diff --git a/.gitignore b/.gitignore index e60ae439..c12f7871 100644 --- a/.gitignore +++ b/.gitignore @@ -63,3 +63,4 @@ lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/spam/SpamC lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/MyFeatures.java lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/MyLabel.java lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/SGDClassifier.java +/.metadata/ diff --git a/.travis.yml b/.travis.yml index 7875da3f..e05acd5a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,6 @@ language: java install: echo "skip" # or anything that returns 0 status script: - - mvn -pl lbjava package - - cd lbjava-examples; sh compileLBJ.sh; cd .. + - mvn compile -P train-all-examples # Train Examples here as well. - mvn test diff --git a/README.md b/README.md index 0ce54fec..f167e40d 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,26 @@ # Learning Based Java -[![Build Status](https://semaphoreci.com/api/v1/projects/02a1d3da-4dc5-41c0-963c-b5605e4abc67/605145/badge.svg)](https://semaphoreci.com/danyaljj/lbjava) +[![Build Status](https://semaphoreci.com/api/v1/cogcomp/lbjava-2/branches/master/badge.svg)](https://semaphoreci.com/cogcomp/lbjava-2) +[![Build status](https://ci.appveyor.com/api/projects/status/vql7prdpsayqe2qb/branch/master?svg=true)](https://ci.appveyor.com/project/bhargavm/lbjava/branch/master) - [LBJava core](lbjava/README.md) - [LBJava examples](lbjava-examples/README.md) - [LBJava maven plugin](lbjava-mvn-plugin/README.md) -## Compiling the whole package -Try the following steps: +## Compiling the whole package + +From the root directory run the following command: + + - Just compile all examples: `mvn compile` + - Compile and train all examples: `mvn compile -P train-all-examples` + See more details [here](lbjava-examples/README.md). - - Build the LBJava core: `mvn -pl lbjava package -Djar.finalName=lbjavaCore` - - Generate java files from lbjava definitions: `cd lbjava-examples; sh compileLBJ.sh `. See more details [here](lbjava-examples/README.md). - Test the whole project: `mvn test` + +## Compiling LBJava Core + +- `mvn compile -pl lbjava` + ## External Links [Here](http://cogcomp.cs.illinois.edu/page/software_view/LBJava) is LBJava's homepage. diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 00000000..d7d5c241 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,34 @@ +version: '{build}' +skip_tags: true +clone_depth: 10 +environment: + matrix: + - JAVA_HOME: C:\Program Files\Java\jdk1.7.0 + - JAVA_HOME: C:\Program Files\Java\jdk1.8.0 +branches: + only: + - master + except: + - gh-pages +os: Windows Server 2012 +install: + - ps: | + Add-Type -AssemblyName System.IO.Compression.FileSystem + if (!(Test-Path -Path "C:\maven" )) { + (new-object System.Net.WebClient).DownloadFile('http://apache.mirrors.tds.net/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.zip', 'C:\maven-bin.zip') + [System.IO.Compression.ZipFile]::ExtractToDirectory("C:\maven-bin.zip", "C:\maven") + } + - cmd: SET PATH=C:\maven\apache-maven-3.3.9\bin;%JAVA_HOME%\bin;%PATH:C:\Ruby193\bin;=% + - cmd: SET MAVEN_OPTS=-XX:MaxPermSize=2g -Xmx4g + - cmd: SET JAVA_OPTS=-XX:MaxPermSize=2g -Xmx4g + - cmd: SET org.ojalgo.OjAlgoUtils.ENVIRONMENT=x86_64 + - cmd: mvn --version + - cmd: java -version +build_script: + - mvn clean + - mvn compile -P train-all-examples +test_script: + - mvn test +cache: + - C:\maven\ + - C:\Users\appveyor\.m2 diff --git a/lbjava-examples/README.md b/lbjava-examples/README.md index 043d3a03..5d1aaaa6 100644 --- a/lbjava-examples/README.md +++ b/lbjava-examples/README.md @@ -8,21 +8,34 @@ Here are a couple of sample classification projects which are using LBJava. 4. [Newsgroup Classification](src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/newsgroup/README.md) 5. [Spam Classification](src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/spam/README.md) 6. [Sentiment Classification](src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/sentiment/README.md) +7. [Regression Classification](src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/README.md) ## How to run -Run all examples: +From the root directory, run the appropriate commands: + +To generate *lbj* to *java* and compile examples: + +``` +mvn compile +``` + +To compile and train all examples: ``` -sh compileLBJ.sh all +mvn compile -P train-all-examples ``` -Run each individual example: +To compile individual examples: ``` -sh compileLBJ.sh example_name +mvn compile -P train- ``` -Available example names are: `badges`, `entity`, `newsgroup`, `sentiment`, `setcover`, `spam`. +Available profile names are: `badges`, `entity`, `newsgroup`, `sentiment`, `setcover`, `spam`, `regression`. + +Example: ```mvn compile -P train-spam``` for the generating and training just the Spam classifier example. + + diff --git a/lbjava-examples/compileLBJ.sh b/lbjava-examples/compileLBJ.sh deleted file mode 100755 index d6c3f2be..00000000 --- a/lbjava-examples/compileLBJ.sh +++ /dev/null @@ -1,78 +0,0 @@ -#!/bin/bash - -# generate each .lbj file -generateLBJFiles() { - if [ $# -eq 0 ]; - then - echo "Usage: compileLBJ " - fi - - FILE=$* - JAVA=java - JAVAC=javac - - JAVA="nice "$JAVA - SWITCHES="-ea -XX:MaxPermSize=1g -Xmx8g" - - BIN=target - LBJBIN=target/classes - SRC=src/main/java - GSP=src/main/java - - if [ ! -d $LBJBIN ]; then mkdir -p $LBJBIN; fi - CP=$BIN:$LBJBIN:lib/*:lib/illinois-core-utilities-3.0.15.jar:lib/liblinear.jar:../lbjava/target/lbjavaCore.jar - - echo $CP - - $JAVA $SWITCHES -cp $CP edu.illinois.cs.cogcomp.lbjava.Main -x -d $LBJBIN -gsp $GSP -sourcepath $SRC $FILE - $JAVA $SWITCHES -cp $CP edu.illinois.cs.cogcomp.lbjava.Main -c -d $LBJBIN -gsp $GSP -sourcepath $SRC $FILE -} - -# generate all .lbj files -generateAllLBJFiles() { - # declare -a files=("BadgesClassifier.lbj" "EntityRelation.lbj" "NewsGroupClassifier.lbj" "SentimentClassifier.lbj" "SetCover.lbj" "SpamClassifier.lbj") - - for i in "BadgesClassifier.lbj" "EntityRelation.lbj" "NewsGroupClassifier.lbj" "SentimentClassifier.lbj" "SetCover.lbj" "SpamClassifier.lbj" "RegressionClassifier.lbj" - do - echo "Generating output for $i" - generateLBJFiles "src/main/lbj/"$i - done -} - -# running starts here -if [ $# -eq 0 ]; then - echo "Error: One argument is needed." - exit -fi - -if [ $# -gt 1 ]; then - echo "Error: Too many arguments." - exit -fi - -case $1 in - "all") - echo "===== Generating output for all ====="; - generateAllLBJFiles - exit;; - "badges") - file_name="BadgesClassifier.lbj";; - "entity") - file_name="EntityRelation.lbj";; - "newsgroup") - file_name="NewsGroupClassifier.lbj";; - "sentiment") - file_name="SentimentClassifier.lbj";; - "setcover") - file_name="SetCover.lbj";; - "spam") - file_name="SpamClassifier.lbj";; - "regression") - file_name="RegressionClassifier.lbj";; - *) - echo "Invalid argument: "$1 - exit;; -esac - -echo "===== Generating output for $1 =====\n" -generateLBJFiles "src/main/lbj/"$file_name \ No newline at end of file diff --git a/lbjava-examples/lib/gurobi-6.0.jar b/lbjava-examples/lib/gurobi-6.0.jar deleted file mode 100755 index a7d3cbef..00000000 Binary files a/lbjava-examples/lib/gurobi-6.0.jar and /dev/null differ diff --git a/lbjava-examples/lib/illinois-core-utilities-3.0.15.jar b/lbjava-examples/lib/illinois-core-utilities-3.0.15.jar deleted file mode 100644 index 18c47a50..00000000 Binary files a/lbjava-examples/lib/illinois-core-utilities-3.0.15.jar and /dev/null differ diff --git a/lbjava-examples/lib/java-cup-0.11a.jar b/lbjava-examples/lib/java-cup-0.11a.jar deleted file mode 100644 index 73ffa0b3..00000000 Binary files a/lbjava-examples/lib/java-cup-0.11a.jar and /dev/null differ diff --git a/lbjava-examples/lib/liblinear.jar b/lbjava-examples/lib/liblinear.jar deleted file mode 100644 index bd72e4ec..00000000 Binary files a/lbjava-examples/lib/liblinear.jar and /dev/null differ diff --git a/lbjava-examples/pom.xml b/lbjava-examples/pom.xml index 86968b10..804e89c8 100755 --- a/lbjava-examples/pom.xml +++ b/lbjava-examples/pom.xml @@ -3,12 +3,18 @@ lbjava-project edu.illinois.cs.cogcomp - 1.2.16 + 1.3.1 4.0.0 LBJavaExamples + + generate + + + + CogcompSoftware @@ -21,49 +27,136 @@ edu.illinois.cs.cogcomp LBJava - 1.2.16 + 1.3.1 edu.illinois.cs.cogcomp lbjava-maven-plugin - 1.2.16 - - - edu.illinois.cs.cogcomp - illinois-core-utilities - 3.0.15 + 1.3.1 + + org.codehaus.mojo + exec-maven-plugin + 1.3 + + + + ensure-target-folder + generate-sources + + exec + + + mkdir + + -p + target/classes + + + + + edu.illinois.cs.cogcomp lbjava-maven-plugin - 1.2.16 + 1.3.1 - - ${project.basedir}/src/main/lbj/BadgesClassifier.lbj - ${project.basedir}/src/main/lbj/SentimentClassifier.lbj - ${project.basedir}/src/main/lbj/SetCover.lbj - ${project.basedir}/src/main/lbj/EntityRelation.lbj - ${project.basedir}/src/main/lbj/NewsGroupClassifier.lbj - ${project.basedir}/src/main/lbj/SpamClassifier.lbj - ${project.basedir}/src/main/java ${project.basedir}/target/classes - compile + generate-sources clean - generate + ${lbjava.mavenplugin.defaultgoal} + + + ${project.basedir}/src/main/lbj/NNBrownClassifier.lbj + ${project.basedir}/src/main/lbj/BadgesClassifier.lbj + ${project.basedir}/src/main/lbj/SentimentClassifier.lbj + ${project.basedir}/src/main/lbj/SetCover.lbj + ${project.basedir}/src/main/lbj/EntityRelation.lbj + ${project.basedir}/src/main/lbj/NewsGroupClassifier.lbj + ${project.basedir}/src/main/lbj/SpamClassifier.lbj + ${project.basedir}/src/main/lbj/RegressionClassifier.lbj + + + + + optional-compile-step + generate-sources + + clean + compile + + + + ${lbjava.mavenplugin.lbjCompileFile} + + - \ No newline at end of file + + + train-all-examples + + compile + + + + + + train-badges + + ${project.basedir}/src/main/lbj/BadgesClassifier.lbj + + + + train-entity + + ${project.basedir}/src/main/lbj/EntityRelation.lbj + + + + train-newsgroup + + ${project.basedir}/src/main/lbj/NewsGroupClassifier.lbj + + + + train-sentiment + + ${project.basedir}/src/main/lbj/SentimentClassifier.lbj + + + + train-setcover + + ${project.basedir}/src/main/lbj/SetCover.lbj + + + + train-spam + + ${project.basedir}/src/main/lbj/SpamClassifier.lbj + + + + train-regression + + ${project.basedir}/src/main/lbj/RegressionClassifier.lbj + + + + + diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/Document.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/Document.java index 0703a1a0..42298c82 100755 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/Document.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/Document.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ /** @@ -27,64 +24,64 @@ */ public class Document { - private final String label; - private final List words; - private String guid; - - /** - * Create a new document - * - * @throws IOException - */ - public Document(File file, String label) throws IOException { - this.label = label; - BufferedReader reader = new BufferedReader(new FileReader(file)); - - words = new ArrayList(); - String line = null; - while ((line = reader.readLine()) != null) { - for (String word : line.split("\\s+")) - words.add(word.trim()); - } - - reader.close(); - } - - public Document(File file) throws IOException { - this(file, "unknown"); - } - - public Document(List words) { - - this(words, "unknown"); - } - - public void setGUID(String guid) { - this.guid = guid; - } - - public String getGUID(){ - return this.guid; - } - - - public Document(List words, String label) { - this.words = words; - this.label = label; - } - - public String getLabel() { - return label; - } - - public List getWords() { - return Collections.unmodifiableList(words); - } - - @Override - public String toString() { - // TODO Auto-generated method stub - return label + ", " + words; - } + private final String label; + private final List words; + private String guid; + + /** + * Create a new document + * + * @throws IOException + */ + public Document(File file, String label) throws IOException { + this.label = label; + BufferedReader reader = new BufferedReader(new FileReader(file)); + + words = new ArrayList(); + String line = null; + while ((line = reader.readLine()) != null) { + for (String word : line.split("\\s+")) + words.add(word.trim()); + } + + reader.close(); + } + + public Document(File file) throws IOException { + this(file, "unknown"); + } + + public Document(List words) { + + this(words, "unknown"); + } + + public void setGUID(String guid) { + this.guid = guid; + } + + public String getGUID() { + return this.guid; + } + + + public Document(List words, String label) { + this.words = words; + this.label = label; + } + + public String getLabel() { + return label; + } + + public List getWords() { + return Collections.unmodifiableList(words); + } + + @Override + public String toString() { + // TODO Auto-generated method stub + return label + ", " + words; + } } diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/DocumentReader.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/DocumentReader.java index 2a97f2ec..0c01b517 100755 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/DocumentReader.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/DocumentReader.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.examples; @@ -26,50 +23,48 @@ * */ public class DocumentReader implements Parser { - + private final List files; - + private int currentFileId; - + /** * */ public DocumentReader(String directory) { File d = new File(directory); - + if (!d.exists()) { System.err.println(directory + " does not exist!"); System.exit(-1); } - + if (!d.isDirectory()) { System.err.println(directory + " is not a directory!"); System.exit(-1); } - + files = new ArrayList(); for (File f : d.listFiles()) { if (f.isDirectory()) { files.addAll(Arrays.asList(f.listFiles())); } } - + Collections.shuffle(files); currentFileId = 0; } - - public void close() { - } - + + public void close() {} + public Object next() { - + if (currentFileId < files.size()) { File file = files.get(currentFileId++); - - String[] split = file.getPath().split(File.separator); - + + String[] split = file.getPath().split("\\" + File.separator); String label = split[split.length - 2]; - + try { return new Document(file, label); } catch (IOException e) { @@ -79,9 +74,9 @@ public Object next() { } } else return null; - + } - + public void reset() { currentFileId = 0; } diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/badges/BadgeDataReader.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/badges/BadgeDataReader.java index c95e27b0..7398fafe 100755 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/badges/BadgeDataReader.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/badges/BadgeDataReader.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ /** @@ -37,7 +34,7 @@ public class BadgeDataReader implements Parser { private int currentline; private BufferedReader br; - + /** * */ @@ -52,15 +49,14 @@ public BadgeDataReader(String file) { } br.close(); } catch (Exception e) { - + } } - - public void close() { - } - + + public void close() {} + public Object next() { - + if (currentline == lines.size()) { return null; } else { @@ -68,7 +64,7 @@ public Object next() { return lines.get(currentline - 1); } } - + public void reset() { currentline = 0; } diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/badges/FeatureWeightCalculator.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/badges/FeatureWeightCalculator.java index 157ba0c6..ad9c6934 100755 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/badges/FeatureWeightCalculator.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/badges/FeatureWeightCalculator.java @@ -1,40 +1,36 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.examples.badges; import edu.illinois.cs.cogcomp.lbjava.classify.Feature; -import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector; import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet; import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; public class FeatureWeightCalculator { - /** - * @param args - */ - public static void main(String[] args) { - // TODO Auto-generated method stub - BadgeClassifier bc = new BadgeClassifier(); - Lexicon lc = bc.getLexicon(); - for (int i = 0; i < lc.size(); i++) { - Feature f = lc.lookupKey(i); - System.out.println(f); - int[] id = new int[1]; - double[] val = new double[1]; - id[0] = i; - val[0] = 1; - ScoreSet ss = bc.scores(id, val); - System.out.println(ss.get("positive")); - } - System.out.println("threshold:" + bc.getThreshold()); - } + /** + * @param args + */ + public static void main(String[] args) { + // TODO Auto-generated method stub + BadgeClassifier bc = new BadgeClassifier(); + Lexicon lc = bc.getLexicon(); + for (int i = 0; i < lc.size(); i++) { + Feature f = lc.lookupKey(i); + System.out.println(f); + int[] id = new int[1]; + double[] val = new double[1]; + id[0] = i; + val[0] = 1; + ScoreSet ss = bc.scores(id, val); + System.out.println(ss.get("positive")); + } + System.out.println("threshold:" + bc.getThreshold()); + } } diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/badges/README.md b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/badges/README.md index a8353476..2a9dee08 100755 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/badges/README.md +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/badges/README.md @@ -1,7 +1,9 @@ -# +--- +title: Badges +authors: Vivek Srikumar, Stephen Mayhew, Daniel Khashabi +lead: Classify a simple dataset of names +layout: page +--- This a sample classification project based on Learning Based Java (LBJava). -Author: Vivek Srikumar -Updated: Stephen Mayhew, Daniel Khashabi - diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/Conll04_InstanceReader.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/Conll04_InstanceReader.java index d2bc0344..ca6588e5 100644 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/Conll04_InstanceReader.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/Conll04_InstanceReader.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.examples.entityRelation; @@ -22,188 +19,186 @@ import java.util.Vector; public class Conll04_InstanceReader implements Parser { - public Vector instances; - public Vector sentences; - public Vector relations; - - public String[] entityLabels,relLabels; - private int currentInstanceId; - - - public Conll04_InstanceReader(String filename){ - instances=new Vector(); - relations=new Vector(); - sentences=new Vector(); - entityLabels=new String[0]; - relLabels=new String[0]; -// } - - - //public void readData(String filename) throws Exception { - //BufferedReader br=new BufferedReader(new FileReader(filename)); - List lines = null; - try { - lines = LineIO.read(filename); - } catch (FileNotFoundException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - String line; - String[] tokens; - - - ConllRawToken c=new ConllRawToken(); - - ConllRelation r; - int currSentId=0; - boolean sentEnd=false; - ConllRawSentence sent=new ConllRawSentence(currSentId); - - ArrayList entityal = new ArrayList(); - ArrayList relal = new ArrayList(); - - boolean relationSeen=false; - int sentindex = 0; - while(sentindex < lines.size()){ - line = lines.get(sentindex); - sentindex ++; - - //System.out.println(sentindex + " " + line); - if(line.isEmpty()){ - sentEnd=true; - -/* if(!sentEnd){ - currSentId++; - sentEnd=true; - - sentences.add(sent); - - sent=new ConllRawSentence(currSentId); - }*/ - continue; - } - - tokens=line.split("\t|\n"); - int s=tokens.length; - if(s==3){ - relationSeen=true; - r=new ConllRelation(); -// r.sentId1=currSentId-1; -// r.sentId2=currSentId-1; - r.wordId1=Integer.parseInt(tokens[0]); - r.wordId2=Integer.parseInt(tokens[1]); - r.relType=tokens[2]; - relations.add(r); - sent.addRelations(r); -// sentences.elementAt(sentences.size()-1).addRelations(r); - if(!relal.contains(tokens[2])){ - relal.add(tokens[2]); - } - } - else{ - //System.out.println("tokens[1]="+tokens[1]+"done"); - if(sentEnd){ - //if(!relationSeen) - { - sentences.add(sent); -/* if(currSentId < 700) - System.out.println("sid:" + currSentId); - else System.out.println("sid:" + (currSentId + 51)); - for(int ind = 0;ind < sent.sentTokens.size();ind ++) - System.out.print(sent.sentTokens.get(ind).phrase + " "); - System.out.println(); - */ - currSentId++; - } - sent=new ConllRawSentence(currSentId); - } - - c=new ConllRawToken(); - -/* if(currSentId < 700) - assert (currSentId == Integer.parseInt(tokens[0])); - else - { - assert(currSentId == Integer.parseInt(tokens[0]) - 51); - if(currSentId != Integer.parseInt(tokens[0]) - 51) - System.out.println("fuck you here"); - }*/ - - c.entType=tokens[1]; - c.sentId=currSentId; - c.wordId=Integer.parseInt(tokens[2]); - c.setPOS(tokens[4]); - c.setPhrase(tokens[5]); - - sent.addTokens(c); - if(!tokens[1].trim().equals("O")){ - instances.add(c); - sent.setCurrentTokenAsEntity(); - if(!entityal.contains(tokens[1])){ - entityal.add(tokens[1]); - } - } - - sentEnd=false; - relationSeen=false; - } - } - - entityLabels=entityal.toArray(entityLabels); - relLabels=relal.toArray(relLabels); - - } - - - public void printData(){ - System.out.println("printing total "+sentences.size()+" sentences"); - for(int i=0;i instances; + public Vector sentences; + public Vector relations; + + public String[] entityLabels, relLabels; + private int currentInstanceId; + + + public Conll04_InstanceReader(String filename) { + instances = new Vector(); + relations = new Vector(); + sentences = new Vector(); + entityLabels = new String[0]; + relLabels = new String[0]; + // } + + + // public void readData(String filename) throws Exception { + // BufferedReader br=new BufferedReader(new FileReader(filename)); + List lines = null; + try { + lines = LineIO.read(filename); + } catch (FileNotFoundException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + String line; + String[] tokens; + + + ConllRawToken c = new ConllRawToken(); + + ConllRelation r; + int currSentId = 0; + boolean sentEnd = false; + ConllRawSentence sent = new ConllRawSentence(currSentId); + + ArrayList entityal = new ArrayList(); + ArrayList relal = new ArrayList(); + + boolean relationSeen = false; + int sentindex = 0; + while (sentindex < lines.size()) { + line = lines.get(sentindex); + sentindex++; + + // System.out.println(sentindex + " " + line); + if (line.isEmpty()) { + sentEnd = true; + + /* + * if(!sentEnd){ currSentId++; sentEnd=true; + * + * sentences.add(sent); + * + * sent=new ConllRawSentence(currSentId); } + */ + continue; + } + + tokens = line.split("\t|\n"); + int s = tokens.length; + if (s == 3) { + relationSeen = true; + r = new ConllRelation(); + // r.sentId1=currSentId-1; + // r.sentId2=currSentId-1; + r.wordId1 = Integer.parseInt(tokens[0]); + r.wordId2 = Integer.parseInt(tokens[1]); + r.relType = tokens[2]; + relations.add(r); + sent.addRelations(r); + // sentences.elementAt(sentences.size()-1).addRelations(r); + if (!relal.contains(tokens[2])) { + relal.add(tokens[2]); + } + } else { + // System.out.println("tokens[1]="+tokens[1]+"done"); + if (sentEnd) { + // if(!relationSeen) + { + sentences.add(sent); + /* + * if(currSentId < 700) System.out.println("sid:" + currSentId); else + * System.out.println("sid:" + (currSentId + 51)); for(int ind = 0;ind < + * sent.sentTokens.size();ind ++) + * System.out.print(sent.sentTokens.get(ind).phrase + " "); + * System.out.println(); + */ + currSentId++; + } + sent = new ConllRawSentence(currSentId); + } + + c = new ConllRawToken(); + + /* + * if(currSentId < 700) assert (currSentId == Integer.parseInt(tokens[0])); else { + * assert(currSentId == Integer.parseInt(tokens[0]) - 51); if(currSentId != + * Integer.parseInt(tokens[0]) - 51) System.out.println("fuck you here"); } + */ + + c.entType = tokens[1]; + c.sentId = currSentId; + c.wordId = Integer.parseInt(tokens[2]); + c.setPOS(tokens[4]); + c.setPhrase(tokens[5]); + + sent.addTokens(c); + if (!tokens[1].trim().equals("O")) { + instances.add(c); + sent.setCurrentTokenAsEntity(); + if (!entityal.contains(tokens[1])) { + entityal.add(tokens[1]); + } + } + + sentEnd = false; + relationSeen = false; + } + } + + entityLabels = entityal.toArray(entityLabels); + relLabels = relal.toArray(relLabels); + + } + + + public void printData() { + System.out.println("printing total " + sentences.size() + " sentences"); + for (int i = 0; i < sentences.size(); i++) { + // sentences.elementAt(i).printSentence(); + sentences.elementAt(i).printEntities(); + sentences.elementAt(i).printRelations(); + } + System.out.println("printing total " + instances.size() + " instances"); + for (int i = 0; i < instances.size(); i++) { + instances.elementAt(i).printInstance(); + } + System.out.println("printing total " + relations.size() + " relations"); + for (int i = 0; i < relations.size(); i++) { + relations.elementAt(i).printRelation(); + } + } + + // public static void main(String[] args) throws Exception{ + // System.out.println("here"); + // Conll04_InstanceReader cr=new Conll04_InstanceReader("./data/conll04.corp"); + // //cr.readData("./data/conll04.corp"); + // cr.printData(); + // } + public void close() {} + + public Object next() { + + if (currentInstanceId < instances.size()) { + + ConllRawToken file = instances.get(currentInstanceId++); + + // String[] split = file.getPath().split("\\" + File.separator); + + // String label = split[split.length - 2]; + + return file;// Document(file, label); + } else + return null; + } + + public void reset() { + currentInstanceId = 0; + } + + public static void main(String[] args) throws Exception { + System.out.println("here"); + Conll04_InstanceReader cr = + new Conll04_InstanceReader( + "/Users/parisakordjamshidi/wolfe-0.1.0/LBJ/data/conll04.corp"); + + // cr.readData("/home/roth/rsamdan2/Project/EMStructuredPrediction/UnsupRelationExtraction/data/conll04.corp"); + cr.printData(); } diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/Conll04_RelationReaderNew.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/Conll04_RelationReaderNew.java index cd627006..0480476c 100644 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/Conll04_RelationReaderNew.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/Conll04_RelationReaderNew.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.examples.entityRelation; @@ -21,8 +18,7 @@ import java.util.Vector; /** - * Created by haowu on 2/9/15. - * Modified by kordjams and khashab2 + * Created by haowu on 2/9/15. Modified by kordjams and khashab2 */ public class Conll04_RelationReaderNew implements Parser { @@ -39,12 +35,14 @@ public class Conll04_RelationReaderNew implements Parser { /** * This constructor reads the CONLL data. + * * @param fileLocation - * @param readerType: Characterizes whether the Next() functions runs on the "relations" or on the "entities". - * Possible values are "token" (for entity instances) - * and "pair" (for relation instances) + * @param readerType: Characterizes whether the Next() functions runs on the "relations" or on + * the "entities". Possible values are "token" (for entity instances) and "pair" (for + * relation instances) */ - // TODO: add independent setter for "readerType", rather than reading the data multiple times in the LBJ file + // TODO: add independent setter for "readerType", rather than reading the data multiple times in + // the LBJ file public Conll04_RelationReaderNew(String fileLocation, String readerType) { boolean verbose = false; instances = new Vector(); @@ -78,7 +76,7 @@ public Conll04_RelationReaderNew(String fileLocation, String readerType) { line = lines.get(sentindex); sentindex++; - if( verbose ) + if (verbose) System.out.println(sentindex + " " + line); if (line.isEmpty()) { sentEnd = true; @@ -93,23 +91,25 @@ public Conll04_RelationReaderNew(String fileLocation, String readerType) { relation.wordId2 = Integer.parseInt(tokens[1]); relation.relType = tokens[2]; relations.add(relation); - if( verbose ) - System.out.println("WORD1:"+relation.s.sentTokens.elementAt(relation.wordId1).phrase); + if (verbose) + System.out.println("WORD1:" + + relation.s.sentTokens.elementAt(relation.wordId1).phrase); sent.addRelations(relation); - sentences.elementAt(sentences.size()-1).addRelations(relation); + sentences.elementAt(sentences.size() - 1).addRelations(relation); if (!relationAll.contains(tokens[2])) { relationAll.add(tokens[2]); } } else { - if( verbose ) - System.out.println("tokens[1]="+tokens[1]+"done"); + if (verbose) + System.out.println("tokens[1]=" + tokens[1] + "done"); if (sentEnd) { { sentences.add(sent); - if( verbose ) { + if (verbose) { if (currSentId < 700) System.out.println("sid:" + currSentId); - else System.out.println("sid:" + (currSentId + 51)); + else + System.out.println("sid:" + (currSentId + 51)); for (int ind = 0; ind < sent.sentTokens.size(); ind++) System.out.print(sent.sentTokens.get(ind).phrase + " "); System.out.println(); @@ -145,9 +145,14 @@ public Conll04_RelationReaderNew(String fileLocation, String readerType) { for (int counter = 0; counter < relations.size(); counter++) { int sindex = relations.elementAt(counter).sentId; - relations.elementAt(counter).s.sentTokens.addAll(0, sentences.elementAt(sindex).sentTokens); - relations.elementAt(counter).e1 = sentences.elementAt(sindex).sentTokens.elementAt(relations.elementAt(counter).wordId1); - relations.elementAt(counter).e2 = sentences.elementAt(sindex).sentTokens.elementAt(relations.elementAt(counter).wordId2); + relations.elementAt(counter).s.sentTokens.addAll(0, + sentences.elementAt(sindex).sentTokens); + relations.elementAt(counter).e1 = + sentences.elementAt(sindex).sentTokens + .elementAt(relations.elementAt(counter).wordId1); + relations.elementAt(counter).e2 = + sentences.elementAt(sindex).sentTokens + .elementAt(relations.elementAt(counter).wordId2); } } @@ -164,8 +169,12 @@ public void printData() { System.out.println("printing total " + relations.size() + " relations"); for (int i = 0; i < relations.size(); i++) { relations.elementAt(i).printRelation(); - System.out.println("WORD1:" + relations.elementAt(i).s.sentTokens.elementAt(relations.elementAt(i).wordId1).phrase); - System.out.println("WORD2:" + relations.elementAt(i).s.sentTokens.elementAt(relations.elementAt(i).wordId2).phrase); + System.out + .println("WORD1:" + + relations.elementAt(i).s.sentTokens.elementAt(relations.elementAt(i).wordId1).phrase); + System.out + .println("WORD2:" + + relations.elementAt(i).s.sentTokens.elementAt(relations.elementAt(i).wordId2).phrase); } } @@ -178,7 +187,7 @@ public Object next() { if (type.equals("Token")) { if (currentTokenId < instances.size()) { ConllRawToken file = instances.get(currentTokenId++); - return file;//Document(file, label); + return file;// Document(file, label); } else return null; } @@ -233,19 +242,21 @@ public static ConllRawToken OrgCandidate(ConllRelation t) { /** * Testing the reader + * * @param args * @throws Exception */ public static void main(String[] args) throws Exception { System.out.println("Start reading tokens ... "); Conll04_RelationReaderNew cr = new Conll04_RelationReaderNew("data/conll04.corp", "Token"); -// Conll04_RelationReaderNew cr = new Conll04_RelationReaderNew("data/conll04_test.corp", "Token"); -// cr.printData(); + // Conll04_RelationReaderNew cr = new Conll04_RelationReaderNew("data/conll04_test.corp", + // "Token"); + // cr.printData(); ConllRawToken tok = (ConllRawToken) cr.next(); - while( tok != null ) { + while (tok != null) { System.out.println(tok); tok = (ConllRawToken) cr.next(); } } -} \ No newline at end of file +} diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/README.md b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/README.md index 31d16793..a508575f 100755 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/README.md +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/README.md @@ -1,4 +1,10 @@ -# Entity Relation Classification +--- +title: Entity Relation Classification +authors: Hao Wu, Parisa Kordjamshidi, Daniel Khashabi +layout: page +lead: Build a simple classifier for entities and relations +--- + This a sample classification project based on Learning Based Java (LBJava). ## Details @@ -8,5 +14,4 @@ The specifications are included in the file This file contains a joint inference model, for applying constraints between labels of entity-classifiers and relation classifiers. -## Credits -Hao Wu, Parisa Kordjamshidi, Daniel Khashabi + diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/datastruct/ConllRawInstance.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/datastruct/ConllRawInstance.java index 0cf37ebd..47f6becd 100644 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/datastruct/ConllRawInstance.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/datastruct/ConllRawInstance.java @@ -1,51 +1,57 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.examples.entityRelation.datastruct; public class ConllRawInstance { - public int sentId,wordId; - public String entType, POS, phrase; - -/* public void setPhrase(String unProcPhrase){ - String[] parts=unProcPhrase.split("/"); - - }*/ - - public void printInstance(){ - System.out.println("sent: "+sentId+" wordId: "+wordId+" phrase: "+phrase+" POS: "+POS+" entity type: "+entType); - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - ConllRawInstance that = (ConllRawInstance) o; - - if (sentId != that.sentId) return false; - if (wordId != that.wordId) return false; - if (POS != null ? !POS.equals(that.POS) : that.POS != null) return false; - if (entType != null ? !entType.equals(that.entType) : that.entType != null) return false; - if (phrase != null ? !phrase.equals(that.phrase) : that.phrase != null) return false; - - return true; - } - - @Override - public int hashCode() { - int result = sentId; - result = 31 * result + wordId; - result = 31 * result + (entType != null ? entType.hashCode() : 0); - result = 31 * result + (POS != null ? POS.hashCode() : 0); - result = 31 * result + (phrase != null ? phrase.hashCode() : 0); - return result; - } + public int sentId, wordId; + public String entType, POS, phrase; + + /* + * public void setPhrase(String unProcPhrase){ String[] parts=unProcPhrase.split("/"); + * + * } + */ + + public void printInstance() { + System.out.println("sent: " + sentId + " wordId: " + wordId + " phrase: " + phrase + + " POS: " + POS + " entity type: " + entType); + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + + ConllRawInstance that = (ConllRawInstance) o; + + if (sentId != that.sentId) + return false; + if (wordId != that.wordId) + return false; + if (POS != null ? !POS.equals(that.POS) : that.POS != null) + return false; + if (entType != null ? !entType.equals(that.entType) : that.entType != null) + return false; + if (phrase != null ? !phrase.equals(that.phrase) : that.phrase != null) + return false; + + return true; + } + + @Override + public int hashCode() { + int result = sentId; + result = 31 * result + wordId; + result = 31 * result + (entType != null ? entType.hashCode() : 0); + result = 31 * result + (POS != null ? POS.hashCode() : 0); + result = 31 * result + (phrase != null ? phrase.hashCode() : 0); + return result; + } } diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/datastruct/ConllRawSentence.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/datastruct/ConllRawSentence.java index 8b684874..85698a98 100755 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/datastruct/ConllRawSentence.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/datastruct/ConllRawSentence.java @@ -1,120 +1,132 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.examples.entityRelation.datastruct; import java.util.Vector; + public class ConllRawSentence { - public Vector sentTokens; - public Vector relations; - public int sentId; - public Vector entityIndices; - - public ConllRawSentence(int sentId){ - sentTokens = new Vector(); - relations= new Vector(); - entityIndices=new Vector(); - this.sentId=sentId; - } - - public void addTokens(ConllRawToken c){ - sentTokens.add(c); - } - - public void addRelations(ConllRelation r){ - relations.add(r); - } - - public void setCurrentTokenAsEntity(){ - entityIndices.add(sentTokens.size()-1); - } - - public Vector getEntitiesInSentence(){ - Vector entities=new Vector(); - for(int i=0;isentTokens.size()-1) break; - window[index]=sentTokens.elementAt(i+loc); -// String a=sentTokens.elementAt(i+loc).getPhrase(isLowerCase); -// window[index]="Window"+i+":"+a; - index++; - // window[index]="Window:"+a; - // index++; - } - return window; - } - - public void printEntities(){ - for(int i=0;i sentTokens.size() - 1) + break; + window[index] = sentTokens.elementAt(i + loc); + // String a=sentTokens.elementAt(i+loc).getPhrase(isLowerCase); + // window[index]="Window"+i+":"+a; + index++; + // window[index]="Window:"+a; + // index++; + } + return window; + } + + public void printEntities() { + for (int i = 0; i < entityIndices.size(); i++) { + sentTokens.elementAt(entityIndices.elementAt(i)).printInstance(); + } + } + + public void printRelations() { + for (int i = 0; i < relations.size(); i++) { + relations.elementAt(i).printRelation(); + } + } + + public void printSentence() { + for (int i = 0; i < sentTokens.size(); i++) { + System.out.println("sent id= " + sentId); + sentTokens.elementAt(i).printInstance(); + } + } + + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + + ConllRawSentence that = (ConllRawSentence) o; + + if (sentId != that.sentId) + return false; + if (entityIndices != null ? !entityIndices.equals(that.entityIndices) + : that.entityIndices != null) + return false; + if (relations != null ? !relations.equals(that.relations) : that.relations != null) + return false; + if (sentTokens != null ? !sentTokens.equals(that.sentTokens) : that.sentTokens != null) + return false; + + return true; + } + + @Override + public int hashCode() { + int result = sentTokens != null ? sentTokens.hashCode() : 0; + result = 31 * result + (relations != null ? relations.hashCode() : 0); + result = 31 * result + sentId; + result = 31 * result + (entityIndices != null ? entityIndices.hashCode() : 0); + return result; + } } diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/datastruct/ConllRawToken.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/datastruct/ConllRawToken.java index affd3087..6d118946 100755 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/datastruct/ConllRawToken.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/datastruct/ConllRawToken.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.examples.entityRelation.datastruct; @@ -14,120 +11,131 @@ import java.util.Arrays; public class ConllRawToken { - public int sentId,wordId; - public String entType, POS, phrase; - public String[] splitWords, splitPOS; - public ArrayList relatedTokens=null; - public ConllRelation t; - public ConllRawSentence s=new ConllRawSentence(sentId); - public void setPhrase(String phrase){ - this.phrase=phrase; - splitWords=phrase.split("/"); - - } - - public void setPOS(String POS){ - this.POS=POS; - splitPOS=POS.split("/"); - } - - public int getLength(){ - return splitWords.length; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - ConllRawToken that = (ConllRawToken) o; - - if (sentId != that.sentId) return false; - if (wordId != that.wordId) return false; - if (POS != null ? !POS.equals(that.POS) : that.POS != null) return false; - if (entType != null ? !entType.equals(that.entType) : that.entType != null) return false; - if (phrase != null ? !phrase.equals(that.phrase) : that.phrase != null) return false; - if (relatedTokens != null ? !relatedTokens.equals(that.relatedTokens) : that.relatedTokens != null) - return false; - if (s != null ? !s.equals(that.s) : that.s != null) return false; - if (!Arrays.equals(splitPOS, that.splitPOS)) return false; - if (!Arrays.equals(splitWords, that.splitWords)) return false; - if (t != null ? !t.equals(that.t) : that.t != null) return false; - - return true; - } - - @Override - public int hashCode() { - int result = sentId; - result = 31 * result + wordId; - result = 31 * result + (entType != null ? entType.hashCode() : 0); - result = 31 * result + (POS != null ? POS.hashCode() : 0); - result = 31 * result + (phrase != null ? phrase.hashCode() : 0); - result = 31 * result + (splitWords != null ? Arrays.hashCode(splitWords) : 0); - result = 31 * result + (splitPOS != null ? Arrays.hashCode(splitPOS) : 0); - result = 31 * result + (relatedTokens != null ? relatedTokens.hashCode() : 0); - result = 31 * result + (t != null ? t.hashCode() : 0); - result = 31 * result + (s != null ? s.hashCode() : 0); - return result; - } - - public String getPhrase(boolean isLowerCase){ - if(isLowerCase){ - return (new String(phrase)).toLowerCase(); - } - - return phrase; - } - - public String[] getWords(boolean isLowerCase){ - if(isLowerCase){ - String[] returnString=new String[splitWords.length]; - for(int i=0;i relatedTokens = null; + public ConllRelation t; + public ConllRawSentence s = new ConllRawSentence(sentId); + + public void setPhrase(String phrase) { + this.phrase = phrase; + splitWords = phrase.split("/"); + + } + + public void setPOS(String POS) { + this.POS = POS; + splitPOS = POS.split("/"); + } + + public int getLength() { + return splitWords.length; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + + ConllRawToken that = (ConllRawToken) o; + + if (sentId != that.sentId) + return false; + if (wordId != that.wordId) + return false; + if (POS != null ? !POS.equals(that.POS) : that.POS != null) + return false; + if (entType != null ? !entType.equals(that.entType) : that.entType != null) + return false; + if (phrase != null ? !phrase.equals(that.phrase) : that.phrase != null) + return false; + if (relatedTokens != null ? !relatedTokens.equals(that.relatedTokens) + : that.relatedTokens != null) + return false; + if (s != null ? !s.equals(that.s) : that.s != null) + return false; + if (!Arrays.equals(splitPOS, that.splitPOS)) + return false; + if (!Arrays.equals(splitWords, that.splitWords)) + return false; + if (t != null ? !t.equals(that.t) : that.t != null) + return false; + + return true; + } + + @Override + public int hashCode() { + int result = sentId; + result = 31 * result + wordId; + result = 31 * result + (entType != null ? entType.hashCode() : 0); + result = 31 * result + (POS != null ? POS.hashCode() : 0); + result = 31 * result + (phrase != null ? phrase.hashCode() : 0); + result = 31 * result + (splitWords != null ? Arrays.hashCode(splitWords) : 0); + result = 31 * result + (splitPOS != null ? Arrays.hashCode(splitPOS) : 0); + result = 31 * result + (relatedTokens != null ? relatedTokens.hashCode() : 0); + result = 31 * result + (t != null ? t.hashCode() : 0); + result = 31 * result + (s != null ? s.hashCode() : 0); + return result; + } + + public String getPhrase(boolean isLowerCase) { + if (isLowerCase) { + return (new String(phrase)).toLowerCase(); + } + + return phrase; + } + + public String[] getWords(boolean isLowerCase) { + if (isLowerCase) { + String[] returnString = new String[splitWords.length]; + for (int i = 0; i < splitWords.length; i++) { + returnString[i] = splitWords[i].toLowerCase(); + } + return returnString; + } + return splitWords; + } + + public void setRelation(ConllRelation r) { + t = r; + } + + public ConllRelation getRelation() { + + return (t); + } + + public ConllRelation getparteners(ConllRawToken e) { + for (int i = 0; i < s.relations.size(); i++) { + if (s.relations.elementAt(i).e1.wordId == e.wordId + || s.relations.elementAt(i).e2.wordId == e.wordId) { + + } + } + // example.ConllRelation t= new example.ConllRelation(); + // t.e2=this; + // t.e1=e; + // t.sentId=e.sentId; + // t.wordId2=this.wordId; + // t.wordId1=e.wordId; + + return (t); + } + + + public void printInstance() { + System.out.println("sent: " + sentId + " wordId: " + wordId + " phrase: " + phrase + + " POS: " + POS + " entity type: " + entType); + } + + @Override + public String toString() { + return "ConllRawToken{" + "sentId=" + sentId + ", wordId=" + wordId + ", entType='" + + entType + '\'' + ", phrase='" + phrase + '\'' + '}'; + } +} diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/datastruct/ConllRelation.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/datastruct/ConllRelation.java index 968d4b2d..97effda3 100644 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/datastruct/ConllRelation.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/datastruct/ConllRelation.java @@ -1,67 +1,69 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.examples.entityRelation.datastruct; public class ConllRelation { - public int wordId1, wordId2; - public int sentId; - public ConllRawSentence s = new ConllRawSentence(sentId); - public String relType; - public ConllRawToken e1; - public ConllRawToken e2; + public int wordId1, wordId2; + public int sentId; + public ConllRawSentence s = new ConllRawSentence(sentId); + public String relType; + public ConllRawToken e1; + public ConllRawToken e2; - public void printRelation(){ - System.out.println(" word1: "+wordId1+" word2: "+wordId2+" reltype: "+relType); - } + public void printRelation() { + System.out.println(" word1: " + wordId1 + " word2: " + wordId2 + " reltype: " + relType); + } - @Override - public String toString() { - return "ConllRelation{" + - "wordId1=" + wordId1 + - ", wordId2=" + wordId2 + - ", sentId=" + sentId + - //", s=" + s + // D: this throws exception and I couldn't figure out why. I am commenting it out. - ", relType='" + relType + '\'' + - ", e1=" + e1 + - ", e2=" + e2 + - '}'; - } + @Override + public String toString() { + return "ConllRelation{" + "wordId1=" + wordId1 + ", wordId2=" + wordId2 + ", sentId=" + + sentId + + // ", s=" + s + // D: this throws exception and I couldn't figure out why. I am + // commenting it out. + ", relType='" + relType + '\'' + ", e1=" + e1 + ", e2=" + e2 + '}'; + } - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; - ConllRelation that = (ConllRelation) o; + ConllRelation that = (ConllRelation) o; - if (sentId != that.sentId) return false; - if (wordId1 != that.wordId1) return false; - if (wordId2 != that.wordId2) return false; - if (e1 != null ? !e1.equals(that.e1) : that.e1 != null) return false; - if (e2 != null ? !e2.equals(that.e2) : that.e2 != null) return false; - if (relType != null ? !relType.equals(that.relType) : that.relType != null) return false; - if (s != null ? !s.equals(that.s) : that.s != null) return false; + if (sentId != that.sentId) + return false; + if (wordId1 != that.wordId1) + return false; + if (wordId2 != that.wordId2) + return false; + if (e1 != null ? !e1.equals(that.e1) : that.e1 != null) + return false; + if (e2 != null ? !e2.equals(that.e2) : that.e2 != null) + return false; + if (relType != null ? !relType.equals(that.relType) : that.relType != null) + return false; + if (s != null ? !s.equals(that.s) : that.s != null) + return false; - return true; - } + return true; + } - @Override - public int hashCode() { - int result = wordId1; - result = 31 * result + wordId2; - result = 31 * result + sentId; - result = 31 * result + (s != null ? s.hashCode() : 0); - result = 31 * result + (relType != null ? relType.hashCode() : 0); - result = 31 * result + (e1 != null ? e1.hashCode() : 0); - result = 31 * result + (e2 != null ? e2.hashCode() : 0); - return result; - } -} \ No newline at end of file + @Override + public int hashCode() { + int result = wordId1; + result = 31 * result + wordId2; + result = 31 * result + sentId; + result = 31 * result + (s != null ? s.hashCode() : 0); + result = 31 * result + (relType != null ? relType.hashCode() : 0); + result = 31 * result + (e1 != null ? e1.hashCode() : 0); + result = 31 * result + (e2 != null ? e2.hashCode() : 0); + return result; + } +} diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/features/ConllEntityFeatureExtractor.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/features/ConllEntityFeatureExtractor.java index 07eeacde..efeb8ec1 100644 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/features/ConllEntityFeatureExtractor.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/features/ConllEntityFeatureExtractor.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.examples.entityRelation.features; @@ -20,184 +17,184 @@ public class ConllEntityFeatureExtractor { - Vector gazets; - public GazeteerReader locGazet, perGazet; - public ConllEntityFeatureExtractor(){ -// gazets=new Vector(); - } - - public void addGazets(GazeteerReader g){ - gazets.add(g); - } - - public HashMap extractFeatures(ConllRawToken c_t){ - HashMap features=new HashMap(); - features.put("WORD:"+c_t.phrase,1.0); - features.put("POS:"+c_t.POS,1.0); - return features; - } - - public HashMap extractEntityFeatures(ConllRawSentence s, int index, boolean isLowerCase){ - HashMap features=new HashMap(); - ConllRawToken ct=s.sentTokens.elementAt(index); - features.put("PHRASE:"+ct.getPhrase(isLowerCase),1.0); - features.put("LEN:"+ct.getLength(),1.0); - String[] allWords=ct.getWords(isLowerCase); - GazeteerReader g; - for(int i=0;i extractFeatures(ConllRawToken ct1,ConllRawToken ct2, ConllRawSentence s){ - HashMap features=new HashMap(); - features.put("WORD1:"+ct1.phrase,1.0); - features.put("WORD2:"+ct2.phrase,1.0); - features.put("POS1:"+ct1.POS,1.0); - features.put("POS2:"+ct2.POS,1.0); - return features; - } - - - public HashMap extractFeatures(ConllRelation c,ConllRawSentence s){ - HashMap features=new HashMap(); - features.put("WORD1:"+s.sentTokens.elementAt(c.wordId1).phrase,1.0); - features.put("WORD2:"+s.sentTokens.elementAt(c.wordId2).phrase,1.0); - -// features.put("POS1:"+s.sentTokens.elementAt(c.wordId1).POS,1.0); -// features.put("POS2:"+s.sentTokens.elementAt(c.wordId1).POS,1.0); - return features; - } + Vector gazets; + public GazeteerReader locGazet, perGazet; + + public ConllEntityFeatureExtractor() { + // gazets=new Vector(); + } + + public void addGazets(GazeteerReader g) { + gazets.add(g); + } + + public HashMap extractFeatures(ConllRawToken c_t) { + HashMap features = new HashMap(); + features.put("WORD:" + c_t.phrase, 1.0); + features.put("POS:" + c_t.POS, 1.0); + return features; + } + + public HashMap extractEntityFeatures(ConllRawSentence s, int index, + boolean isLowerCase) { + HashMap features = new HashMap(); + ConllRawToken ct = s.sentTokens.elementAt(index); + features.put("PHRASE:" + ct.getPhrase(isLowerCase), 1.0); + features.put("LEN:" + ct.getLength(), 1.0); + String[] allWords = ct.getWords(isLowerCase); + GazeteerReader g; + for (int i = 0; i < allWords.length; i++) { + features.put("WORD:" + allWords[i], 1.0); + } + + ConllRawToken[] window = s.returnWindow(index, 2, 2); + int id; + for (int i = 0; i < window.length; i++) { + id = window[i].wordId - index; + // features.put("WINDOW "+id+window[i].getPhrase(isLowerCase),1.0); + features.put("POS_WINDOW " + id + window[i].POS, 1.0); + } + + if (containsSubPhrase("ing", ct.getWords(isLowerCase))) { + features.put("Ing:", 1.0); + } + if (containsSubPhrase("ment", ct.getWords(isLowerCase))) { + features.put("Ment:", 1.0); + } + + + if (locGazet.isContainedIn(ct)) { + features.put("LOC:", 1.0); + } else if (perGazet.containsAny(ct)) { // PRECISION OF LOCATION GAZET IS HIGHER WE CAN + // IMPROVE PRECISION OF PERSON USING THAT + features.put("PER:", 1.0); + + } + + /* + * for(int i=0;i extractFeatures(ConllRawToken ct1, ConllRawToken ct2, + ConllRawSentence s) { + HashMap features = new HashMap(); + features.put("WORD1:" + ct1.phrase, 1.0); + features.put("WORD2:" + ct2.phrase, 1.0); + features.put("POS1:" + ct1.POS, 1.0); + features.put("POS2:" + ct2.POS, 1.0); + return features; + } + + + public HashMap extractFeatures(ConllRelation c, ConllRawSentence s) { + HashMap features = new HashMap(); + features.put("WORD1:" + s.sentTokens.elementAt(c.wordId1).phrase, 1.0); + features.put("WORD2:" + s.sentTokens.elementAt(c.wordId2).phrase, 1.0); + + // features.put("POS1:"+s.sentTokens.elementAt(c.wordId1).POS,1.0); + // features.put("POS2:"+s.sentTokens.elementAt(c.wordId1).POS,1.0); + return features; + } } diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/reader/Conll04Reader.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/reader/Conll04Reader.java index 60e508da..c0082bb7 100644 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/reader/Conll04Reader.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/reader/Conll04Reader.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.examples.entityRelation.reader; @@ -20,154 +17,150 @@ import java.util.Vector; public class Conll04Reader { - public Vector instances; - public Vector sentences; - public Vector relations; - - public String[] entityLabels,relLabels; - - - - public Conll04Reader(){ - instances=new Vector(); - relations=new Vector(); - sentences=new Vector(); - entityLabels=new String[0]; - relLabels=new String[0]; - } - - - - public void readData(String filename) throws Exception { - //BufferedReader br=new BufferedReader(new FileReader(filename)); - List lines = LineIO.read(filename); - String line; - String[] tokens; - - - ConllRawToken c=new ConllRawToken(); - ConllRelation r; - int currSentId=0; - boolean sentEnd=false; - ConllRawSentence sent=new ConllRawSentence(currSentId); - - ArrayList entityal = new ArrayList(); - ArrayList relal = new ArrayList(); - - boolean relationSeen=false; - int sentindex = 0; - while(sentindex < lines.size()){ - line = lines.get(sentindex); - sentindex ++; - - //System.out.println(sentindex + " " + line); - if(line.isEmpty()){ - sentEnd=true; - -/* if(!sentEnd){ - currSentId++; - sentEnd=true; - - sentences.add(sent); - - sent=new ConllRawSentence(currSentId); - }*/ - continue; - } - - tokens=line.split("\t|\n"); - int s=tokens.length; - if(s==3){ - relationSeen=true; - r=new ConllRelation(); -// r.sentId1=currSentId-1; -// r.sentId2=currSentId-1; - r.wordId1=Integer.parseInt(tokens[0]); - r.wordId2=Integer.parseInt(tokens[1]); - r.relType=tokens[2]; - relations.add(r); - sent.addRelations(r); -// sentences.elementAt(sentences.size()-1).addRelations(r); - if(!relal.contains(tokens[2])){ - relal.add(tokens[2]); - } - } - else{ - //System.out.println("tokens[1]="+tokens[1]+"done"); - if(sentEnd){ - //if(!relationSeen) - { - sentences.add(sent); -/* if(currSentId < 700) - System.out.println("sid:" + currSentId); - else System.out.println("sid:" + (currSentId + 51)); - for(int ind = 0;ind < sent.sentTokens.size();ind ++) - System.out.print(sent.sentTokens.get(ind).phrase + " "); - System.out.println(); - */ - currSentId++; - } - sent=new ConllRawSentence(currSentId); - } - - c=new ConllRawToken(); - -/* if(currSentId < 700) - assert (currSentId == Integer.parseInt(tokens[0])); - else - { - assert(currSentId == Integer.parseInt(tokens[0]) - 51); - if(currSentId != Integer.parseInt(tokens[0]) - 51) - System.out.println("fuck you here"); - }*/ - - c.entType=tokens[1]; - c.sentId=currSentId; - c.wordId=Integer.parseInt(tokens[2]); - c.setPOS(tokens[4]); - c.setPhrase(tokens[5]); - - sent.addTokens(c); - if(!tokens[1].trim().equals("O")){ - instances.add(c); - sent.setCurrentTokenAsEntity(); - if(!entityal.contains(tokens[1])){ - entityal.add(tokens[1]); - } - } - - sentEnd=false; - relationSeen=false; - } - } - relal.add("None"); - entityLabels=entityal.toArray(entityLabels); - relLabels=relal.toArray(relLabels); - - } - - - public void printData(){ - System.out.println("printing total "+sentences.size()+" sentences"); - for(int i=0;i instances; + public Vector sentences; + public Vector relations; + + public String[] entityLabels, relLabels; + + + + public Conll04Reader() { + instances = new Vector(); + relations = new Vector(); + sentences = new Vector(); + entityLabels = new String[0]; + relLabels = new String[0]; + } + + + + public void readData(String filename) throws Exception { + // BufferedReader br=new BufferedReader(new FileReader(filename)); + List lines = LineIO.read(filename); + String line; + String[] tokens; + + + ConllRawToken c = new ConllRawToken(); + ConllRelation r; + int currSentId = 0; + boolean sentEnd = false; + ConllRawSentence sent = new ConllRawSentence(currSentId); + + ArrayList entityal = new ArrayList(); + ArrayList relal = new ArrayList(); + + boolean relationSeen = false; + int sentindex = 0; + while (sentindex < lines.size()) { + line = lines.get(sentindex); + sentindex++; + + // System.out.println(sentindex + " " + line); + if (line.isEmpty()) { + sentEnd = true; + + /* + * if(!sentEnd){ currSentId++; sentEnd=true; + * + * sentences.add(sent); + * + * sent=new ConllRawSentence(currSentId); } + */ + continue; + } + + tokens = line.split("\t|\n"); + int s = tokens.length; + if (s == 3) { + relationSeen = true; + r = new ConllRelation(); + // r.sentId1=currSentId-1; + // r.sentId2=currSentId-1; + r.wordId1 = Integer.parseInt(tokens[0]); + r.wordId2 = Integer.parseInt(tokens[1]); + r.relType = tokens[2]; + relations.add(r); + sent.addRelations(r); + // sentences.elementAt(sentences.size()-1).addRelations(r); + if (!relal.contains(tokens[2])) { + relal.add(tokens[2]); + } + } else { + // System.out.println("tokens[1]="+tokens[1]+"done"); + if (sentEnd) { + // if(!relationSeen) + { + sentences.add(sent); + /* + * if(currSentId < 700) System.out.println("sid:" + currSentId); else + * System.out.println("sid:" + (currSentId + 51)); for(int ind = 0;ind < + * sent.sentTokens.size();ind ++) + * System.out.print(sent.sentTokens.get(ind).phrase + " "); + * System.out.println(); + */ + currSentId++; + } + sent = new ConllRawSentence(currSentId); + } + + c = new ConllRawToken(); + + /* + * if(currSentId < 700) assert (currSentId == Integer.parseInt(tokens[0])); else { + * assert(currSentId == Integer.parseInt(tokens[0]) - 51); if(currSentId != + * Integer.parseInt(tokens[0]) - 51) System.out.println("fuck you here"); } + */ + + c.entType = tokens[1]; + c.sentId = currSentId; + c.wordId = Integer.parseInt(tokens[2]); + c.setPOS(tokens[4]); + c.setPhrase(tokens[5]); + + sent.addTokens(c); + if (!tokens[1].trim().equals("O")) { + instances.add(c); + sent.setCurrentTokenAsEntity(); + if (!entityal.contains(tokens[1])) { + entityal.add(tokens[1]); + } + } + + sentEnd = false; + relationSeen = false; + } + } + relal.add("None"); + entityLabels = entityal.toArray(entityLabels); + relLabels = relal.toArray(relLabels); + + } + + + public void printData() { + System.out.println("printing total " + sentences.size() + " sentences"); + for (int i = 0; i < sentences.size(); i++) { + // sentences.elementAt(i).printSentence(); + sentences.elementAt(i).printEntities(); + sentences.elementAt(i).printRelations(); + } + System.out.println("printing total " + instances.size() + " instances"); + for (int i = 0; i < instances.size(); i++) { + instances.elementAt(i).printInstance(); + } + System.out.println("printing total " + relations.size() + " relations"); + for (int i = 0; i < relations.size(); i++) { + relations.elementAt(i).printRelation(); + } + } + + public static void main(String[] args) throws Exception { + System.out.println("here"); + Conll04Reader cr = new Conll04Reader(); + cr.readData("/home/roth/rsamdan2/Project/EMStructuredPrediction/UnsupRelationExtraction/data/conll04.corp"); + cr.printData(); + } } diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/reader/GazeteerReader.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/reader/GazeteerReader.java index 55118e39..e0174d4b 100755 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/reader/GazeteerReader.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/reader/GazeteerReader.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.examples.entityRelation.reader; @@ -17,112 +14,113 @@ import java.util.ArrayList; public class GazeteerReader { - public String listName; - public ArrayList listGazet; - public ArrayList listWords; - - public GazeteerReader(String fileName, String listName, boolean isLowerCase){ - try { - this.listName=listName; - listGazet=new ArrayList(); - listWords=new ArrayList(); - BufferedReader reader = new BufferedReader(new FileReader(fileName)); - String line = null; - while ((line = reader.readLine()) != null) { - line = line.trim(); - if(isLowerCase){ - line=line.toLowerCase(); - } - listGazet.add(line); - listWords.add(line.split(" |\n|\t")); - } - - reader.close(); - } catch (Exception e) { - e.printStackTrace(); - } - } - - public void addFile(String fileName, boolean isLowerCase){ - try { - BufferedReader reader = new BufferedReader(new FileReader(fileName)); - String line = null; - while ((line = reader.readLine()) != null) { - line = line.trim(); - if(isLowerCase){ - line=line.toLowerCase(); - } - listGazet.add(line); - listWords.add(line.split(" |\n|\t")); - } - - reader.close(); - } catch (Exception e) { - e.printStackTrace(); - } - } - - public boolean testMembership(String cand){ - return listGazet.contains(cand); - } - - public boolean testMembership(String[] cand){ - String[] phrases; - for(int i=0;i1){ - return testMembership(ct.getWords(true)); - } - return testMembership(ct.getPhrase(true)); - } - - public boolean subArray(String[] big, String[] small, int index){ - for(int i=0;i listGazet; + public ArrayList listWords; + + public GazeteerReader(String fileName, String listName, boolean isLowerCase) { + try { + this.listName = listName; + listGazet = new ArrayList(); + listWords = new ArrayList(); + BufferedReader reader = new BufferedReader(new FileReader(fileName)); + String line = null; + while ((line = reader.readLine()) != null) { + line = line.trim(); + if (isLowerCase) { + line = line.toLowerCase(); + } + listGazet.add(line); + listWords.add(line.split(" |\n|\t")); + } + + reader.close(); + } catch (Exception e) { + e.printStackTrace(); + } + } + + public void addFile(String fileName, boolean isLowerCase) { + try { + BufferedReader reader = new BufferedReader(new FileReader(fileName)); + String line = null; + while ((line = reader.readLine()) != null) { + line = line.trim(); + if (isLowerCase) { + line = line.toLowerCase(); + } + listGazet.add(line); + listWords.add(line.split(" |\n|\t")); + } + + reader.close(); + } catch (Exception e) { + e.printStackTrace(); + } + } + + public boolean testMembership(String cand) { + return listGazet.contains(cand); + } + + public boolean testMembership(String[] cand) { + String[] phrases; + for (int i = 0; i < listGazet.size(); i++) { + phrases = listGazet.get(i).split(" |\n|\t"); + if (phrases.length != cand.length) + return false; + + for (int j = 0; j < cand.length; j++) { + if (!phrases[j].equals(cand[j])) { + return false; + } + } + } + return true; + } + + public boolean testMembership(ConllRawToken ct) { + if (ct.getLength() > 1) { + return testMembership(ct.getWords(true)); + } + return testMembership(ct.getPhrase(true)); + } + + public boolean subArray(String[] big, String[] small, int index) { + for (int i = 0; i < small.length; i++) { + + if (!big[i + index].equals(small[i])) { + return false; + } + } + return true; + } + + public boolean containsAny(ConllRawToken ct) { + String[] temp1, temp2; + temp2 = ct.getWords(true); + for (int i = 0; i < listWords.size(); i++) { + temp1 = listWords.get(i); + for (int j = 0; j <= temp2.length - temp1.length; j++) { + if (subArray(temp2, temp1, j)) { + return true; + } + } + } + return false; + } + + public boolean isContainedIn(ConllRawToken ct) { + String[] temp1, temp2; + temp2 = ct.getWords(true); + for (int i = 0; i < listWords.size(); i++) { + temp1 = listWords.get(i); + for (int j = 0; j <= temp1.length - temp2.length; j++) { + if (subArray(temp1, temp2, j)) { + return true; + } + } + } + return false; + } } diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/testEntityRelationClassifier.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/testEntityRelationClassifier.java index 176582c3..35c4c2fa 100644 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/testEntityRelationClassifier.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/entityRelation/testEntityRelationClassifier.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.examples.entityRelation; @@ -25,7 +22,7 @@ public static void independentClassifiers() { OrganizationClassifier orgClassifier = new OrganizationClassifier(); Conll04_RelationReaderNew cr = new Conll04_RelationReaderNew("data/conll04.corp", "Token"); ConllRawToken entity = (ConllRawToken) cr.next(); - while( entity != null ) { + while (entity != null) { System.out.println(entity); System.out.println(orgClassifier.discreteValue(entity)); entity = (ConllRawToken) cr.next(); @@ -35,7 +32,7 @@ public static void independentClassifiers() { PersonClassifier personClassifier = new PersonClassifier(); cr = new Conll04_RelationReaderNew("data/conll04.corp", "Token"); entity = (ConllRawToken) cr.next(); - while( entity != null ) { + while (entity != null) { System.out.println(entity); System.out.println(personClassifier.discreteValue(entity)); entity = (ConllRawToken) cr.next(); @@ -47,7 +44,7 @@ public static void independentClassifiers() { ConllRelation relation = (ConllRelation) cr.next(); System.out.println(relation.toString()); - while( relation != null ) { + while (relation != null) { System.out.println(relation); System.out.println(worksForRelationClassifier.discreteValue(relation)); relation = (ConllRelation) cr.next(); @@ -67,17 +64,17 @@ public static void jointClassifiers() { Conll04_RelationReaderNew cr = new Conll04_RelationReaderNew("data/conll04.corp", "Pair"); System.out.println("Starting to classify the instances ... "); ConllRelation pair = (ConllRelation) cr.next(); - while( pair != null ) { + while (pair != null) { System.out.println(pair); pair = (ConllRelation) cr.next(); System.out.println(relArgClassifier.discreteValue(pair)); -// System.out.println(argpClassifier.discreteValue(tok)); -// System.out.println(argpClassifier.discreteValue(tok)); + // System.out.println(argpClassifier.discreteValue(tok)); + // System.out.println(argpClassifier.discreteValue(tok)); } } public static void main(String[] args) { independentClassifiers(); - //jointClassifiers(); + // jointClassifiers(); } } diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/BrownReader.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/BrownReader.java new file mode 100644 index 00000000..e29ef775 --- /dev/null +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/BrownReader.java @@ -0,0 +1,238 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.examples.neuralnet; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.util.Arrays; + +import edu.illinois.cs.cogcomp.lbjava.parse.Parser; + +/** + * @author redman + */ +public class BrownReader implements Parser{ + + /** the input data. */ + float [][] inputs; + + /** the labels. */ + float [][] outputs; + + /** indexes the current example. */ + int index = 0; + + /** the maximum number of input features. */ + int inputCardinality = -1; + + /** the maximum integer classification. */ + int outputCardinality = 1; + + /** + * read input data from the input file, the output data from the out file. + * @param infile the input data. + * @param outfile the output data. + * @throws IOException + */ + public BrownReader (String infile) { + try { + inputs = getExampleInputs(infile); + this.inputCardinality = inputs[0].length; + outputs = getExampleOutputs(infile); + if (inputs.length != outputs.length) + throw new RuntimeException("Need the same number of inputs and outputs."); + } catch (IOException e) { + throw new RuntimeException("Could not read example data.",e); + } + } + + /** + * read input data from the input file, the output data from the out file. + * @param infile the input data. + * @param trainingInputs the previously read training inputs. + * @throws IOException + */ + public BrownReader (String infile, int numberInputFeatures, int numberExamples) { + try { + this.inputCardinality = numberInputFeatures; + inputs = getExampleInputs(infile, numberInputFeatures); + outputs = getExampleOutputs(infile, inputs.length, numberExamples); + if (inputs.length != outputs.length) + throw new RuntimeException("Need the same number of inputs and outputs."); + } catch (IOException e) { + throw new RuntimeException("Could not read example data.",e); + } + } + + @Override + public void close() { + index = 0; + } + + @Override + public Object next() { + NeuralNetExample nne = null; + if (index < inputs.length) { + nne = new NeuralNetExample(inputs[index], outputs[index]); + index++; + } + return nne; + } + + @Override + public void reset() { + index = 0; + } + + /** + * get the examples form an NIST dataset, return everything at once. There are + * 60k examples, at 28x28 pixel values per example, so 60000 x 28 x 28 floats = + * 47 million floats. These are input examples, so they are image data. + * @param filename + * @return the input examples. + * @throws IOException + */ + private float[][] getExampleInputs(String filename) throws IOException { + int count = 0; + try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) { + String line; + while ((line=br.readLine()) != null) { + count++; + String[] splits = line.split("[,:]"); + for (int i = 1; i < splits.length; i++) { + int featureindex = Integer.parseInt(splits[i]); + if (featureindex > this.inputCardinality) + this.inputCardinality = featureindex; + } + } + } + float[][] data = new float[count][++this.inputCardinality]; + for (float[] a : data) + Arrays.fill(a, 0.0f); + + try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) { + String line; + count = 0; + while ((line=br.readLine()) != null) { + String[] splits = line.split("[,:]"); + for (int i = 0; i < splits.length; i++) { + int featureindex = Integer.parseInt(splits[i]); + data[count][featureindex] = 1.0f; + } + count++; + } + } + return data; + } + + /** + * scale the range of input feature vector to the provided example set, of data to train on. + * @param string + * @param examples + * @return the testing input deck. + * @throws IOException + * @throws FileNotFoundException + */ + private float[][] getExampleInputs(String filename, int cardinality) throws FileNotFoundException, IOException { + int count = 0; + try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) { + String line; + while ((line=br.readLine()) != null) { + count++; + String[] splits = line.split("[,:]"); + for (int i = 1; i < splits.length; i++) { + int featureindex = Integer.parseInt(splits[i]); + if (featureindex > this.inputCardinality) + this.inputCardinality = featureindex; + } + } + } + float[][] data = new float[count][cardinality]; + for (float[] a : data) + Arrays.fill(a, 0.0f); + try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) { + String line; + count = 0; + while ((line=br.readLine()) != null) { + String[] splits = line.split("[,:]"); + for (int i = 0; i < splits.length; i++) { + int featureindex = Integer.parseInt(splits[i]); + data[count][featureindex] = 1.0f; + } + count++; + } + } + return data; + } + + /** + * get the examples form an NIST dataset, return everything at once. There are + * 60k examples, at 28x28 pixel values per example, so 60000 x 28 x 28 floats = + * 47 million floats. These are input examples, so they are image data. + * @param filename + * @return the input examples. + * @throws IOException + */ + private float[][] getExampleOutputs(String filename) throws IOException { + int count = 0; + this.outputCardinality = -1; + try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) { + String line = null; + while ((line=br.readLine()) != null) { + count++; + String[] splits = line.split("[,:]"); + int label = Integer.parseInt(splits[0]); + if (label > this.outputCardinality) + this.outputCardinality = label; + } + } + float[][] data = new float[count][1]; + for (float[] a : data) + Arrays.fill(a, 0.0f); + try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) { + String line; + count = 0; + float range = this.outputCardinality; + while ((line=br.readLine()) != null) { + String[] splits = line.split("[,:]"); + int featureindex = Integer.parseInt(splits[0]); + data[count][0] = featureindex/range; + count++; + } + } + return data; + } + + /** + * get the example outputs. + * @param filename file with the values. + * @param outputs the training examples. + * @return the testing examples. + * @throws FileNotFoundException + * @throws IOException + */ + private float[][] getExampleOutputs(String filename, int numouts, int card) throws FileNotFoundException, IOException { + float[][] data = new float[numouts][1]; + try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) { + String line; + int count = 0; + float range = card; + while ((line=br.readLine()) != null) { + String[] splits = line.split("[,:]"); + int featureindex = Integer.parseInt(splits[0]); + // convert to a number 0 - 1, then to a number -1 to 1. + data[count][0] = featureindex/range; + count++; + } + } + return data; + } +} diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/Debug.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/Debug.java new file mode 100644 index 00000000..13f51eaf --- /dev/null +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/Debug.java @@ -0,0 +1,238 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.examples.neuralnet; + +import edu.illinois.cs.cogcomp.lbjava.classify.Classifier; + +/** + * This was used for debugging during development, thought it might be useful in the future + * although it is completely useless right now. + * @author redman + */ +@SuppressWarnings("unused") +public class Debug { + /** running ANN by default. */ + static private final String NN = "NeuralNet"; + + /** running gradient descent. */ + static private final String SGD = "StoichasticGradientDescent"; + + /** the method we are running. */ + static private String method = NN; + + /** scales the weight deltas for each iteration. */ + static private float learningRate = .3f; + + /** this prevents local minimum capture. */ + static private float momentum = .6f; + + /** this prevents local minimum capture. */ + static private int hiddenLayerSize = 20; + + /** this prevents local minimum capture. */ + static private int epochs = 100; + + /** The number of threads to support. */ + @SuppressWarnings("unused") + static private int threads = 1; + + /** + * parse the arguments. + * @param args the command arguments. + */ + static private void parseArgs(String[] args) { + for (int i = 0; i < args.length; i++) { + if (args[i].equals("-l")) + learningRate = Float.parseFloat(args[++i]); + else if (args[i].equals("-m")) + momentum = Float.parseFloat(args[++i]); + else if (args[i].equals("-e")) + epochs = Integer.parseInt(args[++i]); + else if (args[i].equals("-t")) + threads = Integer.parseInt(args[++i]); + else if (args[i].equals("-h")) + hiddenLayerSize = Integer.parseInt(args[++i]); + else if (args[0].equals("-gd")) + method = SGD; + else if (args[i].equals("-help")) { + System.out.println("-t the number of threads to deploy.\n" + + "-l the learning rate.\n" + + "-m momentum.\n" + + "-e number of epochs.\n" + + "-h hidden layer size.\n" + + "-gd use gradient descent.\n" + + "-help this output."); + System.exit(0); + } else + System.out.println("Unexpected argument : "+args[i]); + } + } + + /** + * Print a set of any pair of floating point arrays, labels can be passed in, if + * null is passed for the ol parameter, no second array is printed. + * @param il the input label. + * @param input the input vector. + * @param ol the output label. + * @param output the output vector. + */ + static void printInOut(String il, float[] input, String ol, float[] output) { + System.out.print(il+" "); + for (float in : input) { + System.out.format(" %.18f",in); + } + if (ol!=null) { + System.out.print(" "+ol+" "); + for (float in : output) { + System.out.format(" %.18f",in); + } + } + System.out.println(); + } + + /** + * Print the input and outputs all on one line. + * @param il the input label. + * @param input the input vector. + * @param ol the output label. + * @param output the output vector. + */ + static void printInOutC(String il, float[] input, String ol, float[] output) { + System.out.println(il+" "); + int c = 0; + for (float in : input) { + System.out.format(c+il+": %.18f\n",in); + c++; + } + if (ol!=null) { + System.out.println(" "+ol+" "); + c = 0; + for (float in : output) { + System.out.format(c+ol+": %.18f\n",in); + c++; + } + } + } + + /** + * Compute the value, compare to the label, and accumulate predicted error. + * @param br the brown data reader. + * @param classifier the learner. + */ + static double computeHits (BrownReader br, Classifier classifier) { + int i = 0; + int bads = 0; + while (true) { + NeuralNetExample nne = (NeuralNetExample)br.next(); + if (nne == null) { + // done; + return (1.0f - ((double)bads/(double)i)) * 100f; + } else { + double value = classifier.realValue(nne); + double tru = nne.getOutputLabels()[0]; + double abserr = Math.abs(value - tru); + if (abserr > .25) { + bads++; + } + i++; + } + + } + } + + /** + * @param args + + public static void main(String[] args) { + parseArgs(args); + if (method == NN) { + // read the data to know how many input features there are. + BrownReader br = new BrownReader("data/brown/their-brown80.feat"); + + // first create the classifier and train it up. + NNBrownDataClassifier nn = new NNBrownDataClassifier(); + nn.setInputCount(br.inputCardinality); + nn.setHiddenCount(hiddenLayerSize); + nn.setOutputCount(1); + nn.setEpochs(epochs); + nn.setMomentum(momentum); + nn.setLearningRate(learningRate); + nn.forget(); + + int epochs = nn.getEpochs(); + long time = System.currentTimeMillis(); + // read training data. + try { + // train. + ArrayList trainingExamples = new ArrayList<>(); + while(true) { + Object o = br.next(); + trainingExamples.add(o); + if (o == null) + break; + nn.learn(o); + } + Random r = new Random(); + for(int i = 0 ; i < epochs-1; i++) { + for (int j = 0; j < trainingExamples.size(); j++) { + int oidx = r.nextInt(trainingExamples.size()); + Object o = trainingExamples.get(oidx); + if (o == null) + break; + nn.learn(o); + } + } + + } finally { + br.close(); + } + + // now we have a trained up model, let's test it. + br = new BrownReader("data/brown/their-brown20.feat",br.inputs[0].length, br.outputCardinality); + double accuracy = computeHits(br, nn); + double seconds = ((System.currentTimeMillis() - time)/1000.0); + + // epochs, rate, momentum, hiddens, accuracy, time + System.out.format("%d,%.2f,%.2f,%d,%.4f,%.4f\n",epochs,learningRate,momentum,hiddenLayerSize,accuracy,seconds); + } else { + + // first create the classifier and train it up. + SGDBrownDataClassifier sdg = new SGDBrownDataClassifier(); + sdg.forget(); + Learner.Parameters p = sdg.getParameters(); + p.rounds = epochs; + + System.out.println("Reading data SGD"); + BrownReader br = new BrownReader("data/brown/their-brown80.feat"); + ArrayList trainingExamples = new ArrayList<>(); + while(true) { + Object o = br.next(); + trainingExamples.add(o); + if (o == null) + break; + sdg.learn(o); + } + System.out.println("Training SGD"); + Random r = new Random(); + for(int i = 0 ; i < p.rounds-1; i++) { + for (int j = 0; j < trainingExamples.size(); j++) { + int oidx = r.nextInt(trainingExamples.size()); + Object o = trainingExamples.get(oidx); + if (o == null) + break; + sdg.learn(o); + } + } + System.out.println("Training up done."); + + // now we have a trained up model, let's test it. + br = new BrownReader("data/brown/their-brown20.feat",br.inputs[0].length, br.inputs.length); + computeHits(br, sdg); + } + }*/ +} diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/NeuralNetExample.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/NeuralNetExample.java new file mode 100644 index 00000000..065e0659 --- /dev/null +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/NeuralNetExample.java @@ -0,0 +1,48 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.examples.neuralnet; + +/** + * Data container for LBJava. + * @author redman + */ +public class NeuralNetExample { + + /** the inputs. */ + public float[] inputs; + + /** the labeled data. */ + public float[] outputs; + + /** + * create with inputs and outputs. + * + * @param ins + * @param outs + */ + NeuralNetExample(float[] ins, float [] outs) { + this.inputs = ins; + this.outputs = outs; + } + + /** + * Get the input features. + * @return input features. + */ + public float[] getInputFeatures() { + return inputs; + } + + /** + * @return the output features(truth data). + */ + public float[] getOutputLabels() { + return outputs; + } + +} diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/NeuralNetwork.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/NeuralNetwork.java new file mode 100644 index 00000000..dc5caa2d --- /dev/null +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/NeuralNetwork.java @@ -0,0 +1,210 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.examples.neuralnet; + +import java.io.IOException; +import java.util.Random; + +import edu.illinois.cs.cogcomp.lbjava.neuralnet.Activator; +import edu.illinois.cs.cogcomp.lbjava.neuralnet.Layer; +import edu.illinois.cs.cogcomp.lbjava.neuralnet.NNTrainingInterface; +import edu.illinois.cs.cogcomp.lbjava.neuralnet.SimpleNNTrainer; +import edu.illinois.cs.cogcomp.lbjava.neuralnet.ThreadedNNTrainer; + +/** + * This class will manage a neural network, it will train it up if necessary, create + * and manage all the layers and nodes internally, and respond to activations. + * @author redman + */ +public class NeuralNetwork implements Activator { + + /** debug flag. */ + static final boolean debug = false; + + /** the layers of the neural network. */ + private Layer[] layers; + + /** scales the weight deltas for each iteration. */ + static private float learningRate = .3f; + + /** this prevents local minimum capture. */ + static private float momentum = .6f; + + /** this prevents local minimum capture. */ + static private int hiddenLayerSize = 20; + + /** this prevents local minimum capture. */ + static private int epochs = 100; + + /** this prevents local minimum capture. */ + static private int threads = 1; + + /** + * parse the arguments. + * @param args the command arguments. + */ + static private void parseArgs(String[] args) { + for (int i = 0; i < args.length; i++) { + if (args[i].equals("-l")) + learningRate = Float.parseFloat(args[++i]); + else if (args[i].equals("-m")) + momentum = Float.parseFloat(args[++i]); + else if (args[i].equals("-e")) + epochs = Integer.parseInt(args[++i]); + else if (args[i].equals("-t")) + threads = Integer.parseInt(args[++i]); + else if (args[i].equals("-h")) + hiddenLayerSize = Integer.parseInt(args[++i]); + else if (args[i].equals("-help")) { + System.out.println("-t the number of threads to deploy.\n-l the learning rate.\n-m momentum.\n-e number of epochs.\n-h hidden layer size."); + System.exit(0); + } else + System.out.println("Unexpected argument : "+args[i]); + } + } + /** + * Given the number of input layers and outputs, and the sizes of all layers, + * set up an untrained neural net. + * @param layerSizes the number of neurons in each layer, also corresponds to the number of outputs of that layer. + * @param learningRate the learning rage. + * @param momentum the momentum. + */ + NeuralNetwork(int[] layerSizes) { + layers = new Layer[layerSizes.length-1]; + + // each layer has a number of inputs defined by the outputs of the previous layer, or + // the number inputs passed in, outputs is the number of neurons in the layer since each + // neuron produces one output. + Random r = new Random (1234); + for (int i = 0; i < layerSizes.length-1; i++) { + this.layers[i] = new Layer(layerSizes[i], layerSizes[i+1], r); + } + } + + /** + * @see edu.illinois.cs.cogcomp.lbjava.neuralnet.Activator#activateLayers(float[], edu.illinois.cs.cogcomp.lbjava.neuralnet.Layer[]) + */ + @Override + public float[] prediction(float[] inputs) { + // set up our counts. + int layerCount = layers.length; + + // storage for each output of each layer, and the error computed for each activation. + float [][] activations = new float[layerCount][]; + + // This array contains inputs from previous layer + float [] currentinputs = inputs; + for (int i = 0 ; i < layerCount ; i++) { + + // compute the activations for this layer. + Layer layer = layers[i]; + activations[i] = layer.activate(currentinputs); + currentinputs = activations[i]; + } + return activations[layerCount-1]; + } + + /** + * Train up the NN model given training data, a learner algorith, + * and convergence criteria. + * @param inputs the input data. + * @param outputs the desired output. + * @param learner the learning algorithm. + * @param epochs number of iterations to run. + * @param converge the convergence criteria. + */ + public void train(float[][] inputs, float[][]outputs, NNTrainingInterface learner, int epochs) { + if (inputs.length != outputs.length) + throw new RuntimeException("There must be the same number of input data records and output data records to train."); + learner.train(inputs, outputs, epochs); + } + + /** + * Test will try learning an XOR model. + * @param args + * @throws IOException + */ + public static void main(String[] args) throws IOException { + parseArgs(args); + float[][] examples = null; + float[][] outputs = null; + float[][] texamples = null; + float[][] toutputs = null; + int [] hls = null; + int outputrange = 0; + if (args.length != 0) { + int [] thls = {28*28, hiddenLayerSize, 1}; + hls = thls; + System.out.println("reading data from disk."); + /*examples = DatasetReader.getExampleInputs("./data/NIST/train-images-idx3-ubyte"); + outputs = DatasetReader.getExampleOutputs("./data/NIST/train-labels-idx1-ubyte"); + texamples = DatasetReader.getExampleInputs("./data/NIST/t10k-images-idx3-ubyte"); + toutputs = DatasetReader.getExampleOutputs("./data/NIST/t10k-labels-idx1-ubyte"); + */ + BrownReader br = new BrownReader("data/brown/their-brown80.feat"); + examples = br.inputs; + outputs = br.outputs; + outputrange = br.outputCardinality; + br = new BrownReader("data/brown/their-brown20.feat", examples[0].length, br.outputCardinality); + texamples = br.inputs; + toutputs = br.outputs; + thls[0] = examples[0].length; + thls[2] = outputs[0].length; + } else { + int [] thls = {2, 2, 1}; + hls = thls; + examples = new float[][] { new float[] { 0, 0 }, new float[] { 0, 1 }, new float[] { 1, 0 }, new float[] { 1, 1 } }; + outputs = new float[][] { new float[] { 0 }, new float[] { 1 }, new float[] { 1 }, new float[] { 0 } }; + texamples = new float[][] { new float[] { 0, 0 }, new float[] { 0, 1 }, new float[] { 1, 0 }, new float[] { 1, 1 } }; + toutputs = new float[][] { new float[] { 0 }, new float[] { 1 }, new float[] { 1 }, new float[] { 0 } }; + } + int good = 0; + { + System.out.println("Start run: epochs="+epochs+" lr="+learningRate+" mom="+momentum+" hidden="+hiddenLayerSize+" threads:"+threads); + NeuralNetwork nn = new NeuralNetwork(hls); + NNTrainingInterface learner = null; + if (threads <= 1) { + learner = new SimpleNNTrainer(nn.layers, learningRate, momentum); + } else { + learner = new ThreadedNNTrainer(nn.layers, learningRate, momentum); + } + long time = System.currentTimeMillis(); + learner.train(examples, outputs, epochs); + time = (System.currentTimeMillis() - time)/1000l; + System.out.format("Took %d to train up a simple model, on to testing.\n",time); + System.out.println("\nCompute accuracy against training"); + + // provide some output now. + for (int inputIdx = 0; inputIdx < examples.length; inputIdx++) { + float[] outs = nn.prediction(examples[inputIdx]); + float pred = outs[0]*outputrange; + float label = outputs[inputIdx][0]*outputrange; + if (Math.round(pred) == Math.round(label)) { + good++; + } + } + System.out.format("Of %d, %d were good, accuracy %.4f",examples.length, good, ((float)good/(float)examples.length)); + good = 0; + System.out.println("\nCompute accuracy against hold out set."); + + // provide some output now. + for (int inputIdx = 0; inputIdx < texamples.length; inputIdx++) { + float[] outs = nn.prediction(texamples[inputIdx]); + float pred = outs[0]*outputrange; + float label = toutputs[inputIdx][0]*outputrange; + if (Math.round(pred) == Math.round(label)) { + System.out.format("+ %d label %.10f pred %.10f\n", inputIdx,label,pred); + good++; + } else { + System.out.format("- %d label %.10f pred %.10f\n", inputIdx,label,pred); + } + } + System.out.format("Of %d, %d were good, accuracy %.4f",texamples.length, good, ((float)good/(float)texamples.length)); + } + } +} diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/README.md b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/README.md new file mode 100755 index 00000000..5fc8dbe6 --- /dev/null +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/README.md @@ -0,0 +1,13 @@ +--- +title: Badges +authors: Vivek Srikumar, Stephen Mayhew, Daniel Khashabi +lead: Classify a simple dataset of names +layout: page +--- + +This classifier does spelling correction, it uses data which can be found on the CogComp web site, +do a search for "Brown Corpus Data for Context Sensitive Spelling Correction" to find this data. The +data must be placed in the directory where you run the training process for this to work. + +Training can be done by simply running the NeuralNetwork class manually + diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/newsgroup/ClassifyText.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/newsgroup/ClassifyText.java index 57853ba0..c42ff307 100755 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/newsgroup/ClassifyText.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/newsgroup/ClassifyText.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ /** @@ -25,20 +22,20 @@ */ public class ClassifyText { - public static void main(String[] args) throws IOException { + public static void main(String[] args) throws IOException { - if (args.length != 1) { - System.out.println("Usage: ClassifyText file"); - System.exit(-1); - } + if (args.length != 1) { + System.out.println("Usage: ClassifyText file"); + System.exit(-1); + } - // This is the black box text classifier - NewsGroupClassifier classifier = new NewsGroupClassifier(); + // This is the black box text classifier + NewsGroupClassifier classifier = new NewsGroupClassifier(); - Document document = new Document(new File(args[0])); + Document document = new Document(new File(args[0])); - // Ask the classifier to label the document - String label = classifier.discreteValue(document); - System.out.println(label); - } + // Ask the classifier to label the document + String label = classifier.discreteValue(document); + System.out.println(label); + } } diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/newsgroup/README.md b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/newsgroup/README.md index a8353476..e0d48cf0 100755 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/newsgroup/README.md +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/newsgroup/README.md @@ -1,7 +1,10 @@ -# +--- +title: NewsGroup classification +authors: Vivek Srikumar, Stephen Mayhew, Daniel Khashabi +lead: Build a text classifier for the 20 NewsGroups dataset. +layout: page +--- This a sample classification project based on Learning Based Java (LBJava). -Author: Vivek Srikumar -Updated: Stephen Mayhew, Daniel Khashabi diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/MyData.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/MyData.java index 41e8740c..14f84e26 100644 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/MyData.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/MyData.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.examples.regression; @@ -27,16 +24,13 @@ public class MyData { /** * Constructor + * * @param line string contains features and label * - * Note: - * - The last element is the label. - * - A single space seperates each feature and the label. + * Note: - The last element is the label. - A single space seperates each feature and the + * label. * - * Example: - * line is string "1.0 2.0 3.0 -1" - * feature vector is [1.0, 2.0, 3.0] - * label is -1 + * Example: line is string "1.0 2.0 3.0 -1" feature vector is [1.0, 2.0, 3.0] label is -1 */ public MyData(String line) { this.features = new ArrayList<>(); @@ -45,12 +39,13 @@ public MyData(String line) { features.add(Double.parseDouble(each)); } - label = features.get(features.size()-1); - features.remove(features.size()-1); + label = features.get(features.size() - 1); + features.remove(features.size() - 1); } /** * Getter for feature vector + * * @return feature vector */ public List getFeatures() { @@ -59,9 +54,10 @@ public List getFeatures() { /** * Getter for label + * * @return label */ public double getLabel() { return label; } -} \ No newline at end of file +} diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/MyDataReader.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/MyDataReader.java index 959e4fa7..188560cc 100644 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/MyDataReader.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/MyDataReader.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.examples.regression; @@ -21,8 +18,8 @@ * * It reads through a file and parse each example as MyData * - * Note: assuming each line consists of features and label for each example, - * please see MyData for more information on the format + * Note: assuming each line consists of features and label for each example, please see + * MyData for more information on the format * * @author Yiming Jiang */ @@ -36,6 +33,7 @@ public class MyDataReader implements Parser { /** * Constructor + * * @param filePath the path to the file */ public MyDataReader(String filePath) { @@ -57,21 +55,21 @@ public MyDataReader(String filePath) { /** * Iterator method + * * @return the next example in MyData */ @Override public Object next() { if (currentLineNumber < lines.size()) { MyData ret = new MyData(lines.get(currentLineNumber)); - this.currentLineNumber ++; + this.currentLineNumber++; return ret; } return null; } @Override - public void close() { - } + public void close() {} /** * Reset the line number tracker @@ -80,4 +78,4 @@ public void close() { public void reset() { this.currentLineNumber = 0; } -} \ No newline at end of file +} diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/README.md b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/README.md index a0773b8a..e860d054 100644 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/README.md +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/README.md @@ -1,7 +1,10 @@ -# Regression Example +--- +title: Regression Example +authors: Yiming Jiang +lead: See how to use LBJava for regression +layout: page +--- This is a sample regression project using Stochastic Gradient Descent learning algorithm. It shows how to define `.lbj` file with `real` type and how to use learning algorithms with `real` output. - -Author: Yiming Jiang \ No newline at end of file diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/SGDMain.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/SGDMain.java index 7582d4d2..8936cc39 100644 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/SGDMain.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/regression/SGDMain.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.examples.regression; @@ -24,7 +21,8 @@ public class SGDMain { public static void main(String[] args) { /* read the training data set */ - MyDataReader train = new MyDataReader(System.getProperty("user.dir")+"/data/regression/train.txt"); + MyDataReader train = + new MyDataReader(System.getProperty("user.dir") + "/data/regression/train.txt"); /* programmatically create SGDClassifier and set the learning rate */ StochasticGradientDescent learner = new SGDClassifier(); @@ -37,7 +35,8 @@ public static void main(String[] args) { trainer.train(1000); /* read the testing data set */ - MyDataReader test = new MyDataReader(System.getProperty("user.dir")+"/data/regression/test.txt"); + MyDataReader test = + new MyDataReader(System.getProperty("user.dir") + "/data/regression/test.txt"); /* test the testing data set against gold */ Classifier oracle = new MyLabel(); diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/sentiment/README.md b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/sentiment/README.md index a8353476..71edfe46 100755 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/sentiment/README.md +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/sentiment/README.md @@ -1,7 +1,10 @@ -# +--- +title: Sentiment Classifier +layout: page +author: Vivek Srikumar, Stephen Mayhew, Daniel Khashabi +lead: Sentiment analysis in LBJava +--- This a sample classification project based on Learning Based Java (LBJava). -Author: Vivek Srikumar -Updated: Stephen Mayhew, Daniel Khashabi diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/sentiment/SentimentDataReader.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/sentiment/SentimentDataReader.java index b03b557e..e2691012 100755 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/sentiment/SentimentDataReader.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/sentiment/SentimentDataReader.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ /** @@ -32,77 +29,76 @@ */ public class SentimentDataReader implements Parser { - private List docs; - private int current; + private List docs; + private int current; - public SentimentDataReader(String dir, boolean train) { + public SentimentDataReader(String dir, boolean train) { - docs = new ArrayList(); - try { - if (train) { - String positiveFile = dir + File.separator + "positive.review"; + docs = new ArrayList(); + try { + if (train) { + String positiveFile = dir + File.separator + "positive.review"; - read(positiveFile); + read(positiveFile); - String negativeFile = dir + File.separator + "negative.review"; + String negativeFile = dir + File.separator + "negative.review"; - read(negativeFile); - } else { - String testFile = dir + File.separator + "unlabeled.review"; - read(testFile); - } - Collections.shuffle(docs); - current = 0; + read(negativeFile); + } else { + String testFile = dir + File.separator + "unlabeled.review"; + read(testFile); + } + Collections.shuffle(docs); + current = 0; - } catch (Exception e) { - e.printStackTrace(); - System.exit(-1); - } - } + } catch (Exception e) { + e.printStackTrace(); + System.exit(-1); + } + } - /** - * @param negativeFile - * @param string - * @throws FileNotFoundException - */ - private void read(String file) throws Exception { - BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file))); - String line = br.readLine(); + /** + * @param negativeFile + * @param string + * @throws FileNotFoundException + */ + private void read(String file) throws Exception { + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file))); + String line = br.readLine(); - while (line != null) { - String[] parts = line.split("\\s+"); + while (line != null) { + String[] parts = line.split("\\s+"); - String label = parts[parts.length - 1].split(":")[1].trim(); + String label = parts[parts.length - 1].split(":")[1].trim(); - List words = new ArrayList(); - for (int i = 0; i < parts.length - 1; i++) { - String[] p = parts[i].split(":"); + List words = new ArrayList(); + for (int i = 0; i < parts.length - 1; i++) { + String[] p = parts[i].split(":"); - for (int j = 0; j < Integer.parseInt(p[1]); j++) - words.add(p[0].trim()); - } + for (int j = 0; j < Integer.parseInt(p[1]); j++) + words.add(p[0].trim()); + } - Collections.shuffle(words); + Collections.shuffle(words); - docs.add(new Document(words, label)); - line = br.readLine(); - } + docs.add(new Document(words, label)); + line = br.readLine(); + } - br.close(); - } + br.close(); + } - public void close() { - } + public void close() {} - public Object next() { - if (current < docs.size()) - return docs.get(current++); - else - return null; - } + public Object next() { + if (current < docs.size()) + return docs.get(current++); + else + return null; + } - public void reset() { - current = 0; - } + public void reset() { + current = 0; + } } diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/City.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/City.java index 53dece95..de5ab648 100755 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/City.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/City.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.examples.setCover; @@ -16,39 +13,37 @@ public class City { - TreeMap neighborhoods = - new TreeMap(); - - public City(String file){ - LineByLine parser = new LineByLine(file) { - public Object next() { - return readLine(); - } - }; - - String line = null; - while((line = (String) parser.next()) != null){ - String[] definition = line.split("\\s+"); - Integer index = new Integer(definition[0]); - Neighborhood n = getNeighborhood(index); - for(int i = 1; i < definition.length; i++){ - n.addNeighbor(getNeighborhood(new Integer(definition[i]))); - } + TreeMap neighborhoods = new TreeMap(); + + public City(String file) { + LineByLine parser = new LineByLine(file) { + public Object next() { + return readLine(); + } + }; + + String line = null; + while ((line = (String) parser.next()) != null) { + String[] definition = line.split("\\s+"); + Integer index = new Integer(definition[0]); + Neighborhood n = getNeighborhood(index); + for (int i = 1; i < definition.length; i++) { + n.addNeighbor(getNeighborhood(new Integer(definition[i]))); + } + } } - } - public Collection getNeighborhoods(){ - return neighborhoods.values(); - } - - public Neighborhood getNeighborhood(Integer index) { - Neighborhood n = neighborhoods.get(index); - if (n == null) { - n = new Neighborhood(index, this); - neighborhoods.put(index, n); + public Collection getNeighborhoods() { + return neighborhoods.values(); } - return n; - } -} + public Neighborhood getNeighborhood(Integer index) { + Neighborhood n = neighborhoods.get(index); + if (n == null) { + n = new Neighborhood(index, this); + neighborhoods.put(index, n); + } + return n; + } +} diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/ContainsStation.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/ContainsStation.java index 4d69fa3f..8e4a9b3e 100755 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/ContainsStation.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/ContainsStation.java @@ -1,32 +1,31 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.examples.setCover; import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet; -public class ContainsStation extends DumbLearner -{ - public ContainsStation() { super("edu.illinois.cs.cogcomp.lbjava.examples.setCover.ContainsStation"); } +public class ContainsStation extends DumbLearner { + public ContainsStation() { + super("edu.illinois.cs.cogcomp.lbjava.examples.setCover.ContainsStation"); + } - public String getInputType() { return "edu.illinois.cs.cogcomp.lbjava.examples.setCover.Neighborhood"; } - public String[] allowableValues() { - return new String[]{ "false", "true" }; - } + public String getInputType() { + return "edu.illinois.cs.cogcomp.lbjava.examples.setCover.Neighborhood"; + } - public ScoreSet scores(Object example) - { - ScoreSet result = new ScoreSet(); - result.put("false", 0); - result.put("true", -1); - return result; - } -} + public String[] allowableValues() { + return new String[] {"false", "true"}; + } + public ScoreSet scores(Object example) { + ScoreSet result = new ScoreSet(); + result.put("false", 0); + result.put("true", -1); + return result; + } +} diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/DumbLearner.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/DumbLearner.java index ea46c0fb..40e064b6 100755 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/DumbLearner.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/DumbLearner.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.examples.setCover; @@ -16,29 +13,39 @@ import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet; import edu.illinois.cs.cogcomp.lbjava.learn.Learner; -public abstract class DumbLearner extends Learner -{ - public DumbLearner() { this(""); } - public DumbLearner(String n) { super(n); } - - public void learn(int[] exampleFeatures, double[] exampleValues, - int[] exampleLabels, double[] labelValues) { } - public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) - { return null; } - public ScoreSet scores(int[] f, double[] v) { return scores(null); } - - public FeatureVector classify(Object example) { - String prediction = scores(example).highScoreValue(); - return - new FeatureVector( - new DiscretePrimitiveStringFeature( - containingPackage, name, "", prediction, - valueIndexOf(prediction), (short) allowableValues().length)); - } - - public void write(PrintStream out) { } - - public int hashCode() { return name.hashCode(); } - public boolean equals(Object o) { return getClass().equals(o.getClass()); } -} +public abstract class DumbLearner extends Learner { + public DumbLearner() { + this(""); + } + + public DumbLearner(String n) { + super(n); + } + + public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, + double[] labelValues) {} + + public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) { + return null; + } + + public ScoreSet scores(int[] f, double[] v) { + return scores(null); + } + public FeatureVector classify(Object example) { + String prediction = scores(example).highScoreValue(); + return new FeatureVector(new DiscretePrimitiveStringFeature(containingPackage, name, "", + prediction, valueIndexOf(prediction), (short) allowableValues().length)); + } + + public void write(PrintStream out) {} + + public int hashCode() { + return name.hashCode(); + } + + public boolean equals(Object o) { + return getClass().equals(o.getClass()); + } +} diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/Neighborhood.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/Neighborhood.java index e31646d4..c91ea67b 100755 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/Neighborhood.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/Neighborhood.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.examples.setCover; @@ -14,38 +11,46 @@ public class Neighborhood implements Comparable { - City parent; - Integer number; - TreeSet neighbors = new TreeSet(); - - public Neighborhood(Integer n, City p){ - number = n; - parent = p; - } - - public Integer getNumber(){ - return number; - } + City parent; + Integer number; + TreeSet neighbors = new TreeSet(); - public TreeSet getNeighbors() { return neighbors; } + public Neighborhood(Integer n, City p) { + number = n; + parent = p; + } - public void addNeighbor(Neighborhood n){ - neighbors.add(n); - } + public Integer getNumber() { + return number; + } - public City getParentCity() { return parent; } + public TreeSet getNeighbors() { + return neighbors; + } - public int compareTo(Neighborhood n) { - return number.compareTo(n.number); - } + public void addNeighbor(Neighborhood n) { + neighbors.add(n); + } - public int hashCode() { return number.hashCode(); } + public City getParentCity() { + return parent; + } - public boolean equals(Object o) { - if (!(o instanceof Neighborhood)) return false; - return ((Neighborhood) o).number.equals(number); - } + public int compareTo(Neighborhood n) { + return number.compareTo(n.number); + } - public String toString() { return "neighborhood #" + number; } -} + public int hashCode() { + return number.hashCode(); + } + + public boolean equals(Object o) { + if (!(o instanceof Neighborhood)) + return false; + return ((Neighborhood) o).number.equals(number); + } + public String toString() { + return "neighborhood #" + number; + } +} diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/README.md b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/README.md index 51a387cd..3232ea9d 100755 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/README.md +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/README.md @@ -1,4 +1,9 @@ -# Set Cover problem +--- +title: Set Cover Problem +layout: page +lead: Solve the set cover problem +authors: Daniel Khashabi +--- ## Description This tarball contains the LBJ implementation of the solution to a set cover @@ -7,7 +12,7 @@ the problem is formulated as an Integer Linear Program. In LBJ, we'll write the constraints in First Order Logic, and they'll be translated into the same linear inequalities shown on the web page: -http://mat.gsia.cmu.edu/orclass/integer/node8.html +`http://mat.gsia.cmu.edu/orclass/integer/node8.html` Classes City and Neighborhood are used as the internal representation of our problem's data. An instance of class City will become the "head" object of an @@ -24,24 +29,37 @@ operates similarly to ContainsStation except that it respects the constraints. SetCoverSolver is a program that takes raw data representing a City as input and produces the constrained predictions. -=== How to run === -To run: -./test.sh +## How to run +To run: -To clean: -./clean.sh +``` +./test.sh +``` + +To clean: +``` +./clean.sh +``` Note: You need to have Gurobi, the optimization package installed on your computer. -You can download it from: www.gurobi.com, and set a global variable in ~/.bashrc, pointing to the Gurobi: - export GUROBI_HOME=/shared/austen/khashab2/Gurobi/gurobi603/linux64 - export PATH="${PATH}:${GUROBI_HOME}/bin" - export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${GUROBI_HOME}/lib" +You can download it from: www.gurobi.com, and set a global variable in `~/.bashrc`, pointing to the Gurobi: + +``` +export GUROBI_HOME=/shared/austen/khashab2/Gurobi/gurobi603/linux64 +export PATH="${PATH}:${GUROBI_HOME}/bin" +export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${GUROBI_HOME}/lib" +``` + You also need to get the Gurobi license and put it somewhere on your machine. - export GRB_LICENSE_FILE=ADDRESS_TO_LICENSE_FILE +``` +export GRB_LICENSE_FILE=ADDRESS_TO_LICENSE_FILE +``` + +## Files In This Project -=== Files In This Project === +``` ├── class ├── clean.sh ├── example.txt @@ -61,4 +79,4 @@ You also need to get the Gurobi license and put it somewhere on your machine. │   ├── Neighborhood.java │   └── SetCoverSolver.java └── test.sh - +``` diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/SetCoverSolver.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/SetCoverSolver.java index 9f70eede..abea7265 100755 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/SetCoverSolver.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/setCover/SetCoverSolver.java @@ -1,27 +1,23 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.examples.setCover; public class SetCoverSolver { - public static void main(String[] args){ - - containsStationConstrained classifier = new containsStationConstrained(); - - for(String file : args){ - City c = new City(file); - for (Neighborhood n : c.getNeighborhoods()){ - System.out.println(n.getNumber() + ": " + classifier.discreteValue(n)); - } - } - } -} + public static void main(String[] args) { + + containsStationConstrained classifier = new containsStationConstrained(); + for (String file : args) { + City c = new City(file); + for (Neighborhood n : c.getNeighborhoods()) { + System.out.println(n.getNumber() + ": " + classifier.discreteValue(n)); + } + } + } +} diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/spam/README.md b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/spam/README.md index 50ddf4b0..4169a867 100644 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/spam/README.md +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/spam/README.md @@ -1,2 +1,12 @@ -# Spam Classification -TODO \ No newline at end of file +--- +title: Spam Classification +layout: page +lead: Classify documents as spam or ham using LBJava +author: Stephen Mayhew +--- + +Coming soon! + +{% highlight java %} +{% include_relative SpamClassifierApplication.java %} +{% endhighlight %} diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/spam/SpamClassifierApplication.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/spam/SpamClassifierApplication.java index 1dffbdb8..8d0b4f4b 100755 --- a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/spam/SpamClassifierApplication.java +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/spam/SpamClassifierApplication.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.examples.spam; @@ -19,32 +16,32 @@ public class SpamClassifierApplication { - public static void main(String[] args) { - // Here we call the classifier we have created, just like - // any other java object. - SpamClassifier sc = new SpamClassifier(); - + public static void main(String[] args) { + // Here we call the classifier we have created, just like + // any other java object. + SpamClassifier sc = new SpamClassifier(); + + System.out.print("Enter text to be classified (Ctrl-D to quit):\n>> "); + + // Read data in + Scanner scanner = new Scanner(System.in); + while (scanner.hasNextLine()) { + String email = scanner.nextLine(); + String[] words = email.split(" "); + + List docwords = new ArrayList(); + docwords.addAll(Arrays.asList(words)); + + // Recall: the SpamClassifier understands how + // to deal with Documents, so we create one. + Document doc = new Document(docwords); + + // Now we predict the label of that Document. + String label = sc.discreteValue(doc); + + // Hopefully this is correct! + System.out.println("Classified as: " + label); System.out.print("Enter text to be classified (Ctrl-D to quit):\n>> "); - - // Read data in - Scanner scanner = new Scanner(System.in); - while (scanner.hasNextLine()) { - String email = scanner.nextLine(); - String[] words = email.split(" "); - - List docwords = new ArrayList(); - docwords.addAll(Arrays.asList(words)); - - // Recall: the SpamClassifier understands how - // to deal with Documents, so we create one. - Document doc = new Document(docwords); - - // Now we predict the label of that Document. - String label = sc.discreteValue(doc); - - // Hopefully this is correct! - System.out.println("Classified as: " + label); - System.out.print("Enter text to be classified (Ctrl-D to quit):\n>> "); - } - } + } + } } diff --git a/lbjava-examples/src/main/lbj/BadgesClassifier.lbj b/lbjava-examples/src/main/lbj/BadgesClassifier.lbj index 843a168c..c373e0b4 100755 --- a/lbjava-examples/src/main/lbj/BadgesClassifier.lbj +++ b/lbjava-examples/src/main/lbj/BadgesClassifier.lbj @@ -30,7 +30,7 @@ discrete{"positive", "negative"} BadgeLabel(String line) <- { discrete BadgeClassifier(String line) <- learn BadgeLabel using IdealFeatures - from new BadgeDataReader("../data/badges/badges.train") + from new BadgeDataReader("data/badges/badges.train") 5 rounds with SparseAveragedPerceptron { @@ -38,7 +38,7 @@ learn BadgeLabel thickness = 3.5; } - testFrom new BadgeDataReader("../data/badges/badges.test") + testFrom new BadgeDataReader("data/badges/badges.test") progressOutput 2000 end diff --git a/lbjava-examples/src/main/lbj/EntityRelation.lbj b/lbjava-examples/src/main/lbj/EntityRelation.lbj index 019ecc9e..61325dab 100644 --- a/lbjava-examples/src/main/lbj/EntityRelation.lbj +++ b/lbjava-examples/src/main/lbj/EntityRelation.lbj @@ -3,9 +3,8 @@ package edu.illinois.cs.cogcomp.lbjava.examples.entityRelation; import java.util.List; import edu.illinois.cs.cogcomp.lbjava.examples.entityRelation.datastruct.*; import edu.illinois.cs.cogcomp.lbjava.examples.entityRelation.Conll04_RelationReaderNew; -import edu.illinois.cs.cogcomp.lbjava.infer.GurobiHook; -import edu.illinois.cs.cogcomp.lbjava.infer.GurobiHook; +import edu.illinois.cs.cogcomp.infer.ilp.OJalgoHook; discrete% EntityFeatures(ConllRawToken t) <- { sense t.POS; @@ -26,13 +25,13 @@ discrete {"Peop","nPeop"} personLabel(ConllRawToken t) <- { discrete PersonClassifier(ConllRawToken t) <- learn personLabel using EntityFeatures - from new Conll04_RelationReaderNew("../data/enitityRelations/conll04_train.corp", "Token") + from new Conll04_RelationReaderNew("data/enitityRelations/conll04_train.corp", "Token") 50 rounds with SparsePerceptron { learningRate = 0.1 ; thickness = 3.5; } - testFrom new Conll04_RelationReaderNew("../data/enitityRelations/conll04_test.corp", "Token") + testFrom new Conll04_RelationReaderNew("data/enitityRelations/conll04_test.corp", "Token") progressOutput 200 end @@ -47,14 +46,14 @@ discrete {"Loc","nLoc"} locLabel(ConllRawToken t) <- { discrete LocClassifier(ConllRawToken t) <- learn locLabel using EntityFeatures - from new Conll04_RelationReaderNew("../data/enitityRelations/conll04_train.corp", "Token") + from new Conll04_RelationReaderNew("data/enitityRelations/conll04_train.corp", "Token") 50 rounds with SparsePerceptron { learningRate = 0.1 ; thickness = 3.5; } - testFrom new Conll04_RelationReaderNew("../data/enitityRelations/conll04_test.corp","Token") + testFrom new Conll04_RelationReaderNew("data/enitityRelations/conll04_test.corp","Token") progressOutput 200 end @@ -68,14 +67,14 @@ discrete {"Org","nOrg"} orgLabel(ConllRawToken t) <- { discrete OrganizationClassifier(ConllRawToken t) <- learn orgLabel using EntityFeatures - from new Conll04_RelationReaderNew("../data/enitityRelations/conll04_train.corp", "Token") + from new Conll04_RelationReaderNew("data/enitityRelations/conll04_train.corp", "Token") 50 rounds with SparsePerceptron { learningRate = 0.1 ; thickness = 3.5; } - testFrom new Conll04_RelationReaderNew("../data/enitityRelations/conll04_test.corp", "Token") + testFrom new Conll04_RelationReaderNew("data/enitityRelations/conll04_test.corp", "Token") progressOutput 200 end @@ -89,14 +88,14 @@ discrete {"Work_For","nWork_For"} workLabel(ConllRelation t) <- { discrete work_forClassifier(ConllRelation t) <- learn workLabel using RelationFeatures - from new Conll04_RelationReaderNew("../data/enitityRelations/conll04_train.corp", "Pair") + from new Conll04_RelationReaderNew("data/enitityRelations/conll04_train.corp", "Pair") 50 rounds with SparsePerceptron { learningRate = 0.1 ; thickness = 3.5; } - testFrom new Conll04_RelationReaderNew("../data/enitityRelations/conll04_test.corp", "Pair") + testFrom new Conll04_RelationReaderNew("data/enitityRelations/conll04_test.corp", "Pair") progressOutput 200 end @@ -116,7 +115,7 @@ inference JointER head ConllRelation t { ConllRawToken e { return e.getRelation(); } // normalizedby new Softmax() subjectto { @PersonWorkFor(t) /\ @OrganizationWorkFor(t); } - with new ILPInference(new GurobiHook()) + with new ILPInference(new OJalgoHook()) } discrete RelArgsClassifier(ConllRelation r) <- @@ -124,4 +123,4 @@ discrete RelArgsClassifier(ConllRelation r) <- discrete Arg_pClassifier(ConllRawToken t) <- JointER(PersonClassifier) discrete ArgOClassifier (ConllRawToken t) <- - JointER(OrganizationClassifier) \ No newline at end of file + JointER(OrganizationClassifier) diff --git a/lbjava-examples/src/main/lbj/NNBrownClassifier.lbj b/lbjava-examples/src/main/lbj/NNBrownClassifier.lbj new file mode 100644 index 00000000..49118805 --- /dev/null +++ b/lbjava-examples/src/main/lbj/NNBrownClassifier.lbj @@ -0,0 +1,39 @@ +package edu.illinois.cs.cogcomp.lbjava.examples.neuralnet; + +import edu.illinois.cs.cogcomp.lbjava.neuralnet.*; + +/** +The input features are simple the set of real values +that serve as input to the neural net. + */ +real[] NNInputVector(NeuralNetExample input) <- { + float[] datapoints = input.getInputFeatures(); + for (int i = 0; i < datapoints.length; i++) { + sense datapoints[i]; + } +} + +/** +The output vector contains the data outputs, for this class +just one floating point number. +*/ +real NNOutputVector(NeuralNetExample d) <- { + float[] datapoints = d.getOutputLabels(); + return datapoints[0]; +} + +/** +A learned text classifier; its definition comes from data. +*/ +real NNBrownDataClassifier(NeuralNetExample d) <- + learn NNOutputVector + using NNInputVector + from new BrownReader("data/brown/their-brown80.feat") + with NeuralNetLearner { + inputCount = 4000; + hiddenCount=100; + outputCount=1; + learningRate=.3f; + momentum=.7f; + } +end \ No newline at end of file diff --git a/lbjava-examples/src/main/lbj/NewsGroupClassifier.lbj b/lbjava-examples/src/main/lbj/NewsGroupClassifier.lbj index 90f6c61b..d61d77ff 100755 --- a/lbjava-examples/src/main/lbj/NewsGroupClassifier.lbj +++ b/lbjava-examples/src/main/lbj/NewsGroupClassifier.lbj @@ -24,7 +24,7 @@ discrete NewsGroupLabel(Document d) <- { return d.getLabel(); } discrete NewsGroupClassifier(Document d) <- learn NewsGroupLabel using WordFeatures, BigramFeatures - from new DocumentReader("../data/20news/train") + from new DocumentReader("data/20news/train") 5 rounds with SparseNetworkLearner { @@ -34,7 +34,7 @@ learn NewsGroupLabel p.thickness = 5; baseLTU = new SparseAveragedPerceptron(p); } - testFrom new DocumentReader("../data/20news/test") + testFrom new DocumentReader("data/20news/test") progressOutput 2000 end diff --git a/lbjava-examples/src/main/lbj/SentimentClassifier.lbj b/lbjava-examples/src/main/lbj/SentimentClassifier.lbj index 7ae144d6..37010093 100755 --- a/lbjava-examples/src/main/lbj/SentimentClassifier.lbj +++ b/lbjava-examples/src/main/lbj/SentimentClassifier.lbj @@ -19,7 +19,7 @@ discrete SentimentLabel(Document d) <- { return d.getLabel(); } discrete SentimentClassifier(Document d) <- learn SentimentLabel using WordFeatures - from new SentimentDataReader("../data/sentiment/books", true) + from new SentimentDataReader("data/sentiment/books", true) 5 rounds with SparseNetworkLearner { @@ -29,7 +29,7 @@ learn SentimentLabel p.thickness = 5; baseLTU = new SparseAveragedPerceptron(p); } - testFrom new SentimentDataReader("../data/sentiment/books", false) + testFrom new SentimentDataReader("data/sentiment/books", false) progressOutput 2000 end diff --git a/lbjava-examples/src/main/lbj/SetCover.lbj b/lbjava-examples/src/main/lbj/SetCover.lbj index e6b6828a..60f3b06b 100755 --- a/lbjava-examples/src/main/lbj/SetCover.lbj +++ b/lbjava-examples/src/main/lbj/SetCover.lbj @@ -4,7 +4,7 @@ import edu.illinois.cs.cogcomp.lbjava.examples.setCover.City; import edu.illinois.cs.cogcomp.lbjava.examples.setCover.Neighborhood; import edu.illinois.cs.cogcomp.lbjava.examples.setCover.ContainsStation; -import edu.illinois.cs.cogcomp.lbjava.infer.GurobiHook; +import edu.illinois.cs.cogcomp.infer.ilp.OJalgoHook; constraint noEmptyNeighborhoods(City c) { forall (Neighborhood n in c.getNeighborhoods()) @@ -16,7 +16,7 @@ constraint noEmptyNeighborhoods(City c) { inference SetCover head City c { Neighborhood n { return n.getParentCity(); } subjectto { @noEmptyNeighborhoods(c); } - with new ILPInference(new GurobiHook()) + with new ILPInference(new OJalgoHook()) } discrete{false, true} containsStationConstrained(Neighborhood n) <- diff --git a/lbjava-examples/src/main/lbj/SpamClassifier.lbj b/lbjava-examples/src/main/lbj/SpamClassifier.lbj index d536a9c7..7dad4476 100755 --- a/lbjava-examples/src/main/lbj/SpamClassifier.lbj +++ b/lbjava-examples/src/main/lbj/SpamClassifier.lbj @@ -24,7 +24,7 @@ discrete{"spam", "ham"} SpamLabel(Document d) <- { return d.getLabel(); } discrete SpamClassifier(Document d) <- learn SpamLabel using WordFeatures, BigramFeatures - from new DocumentReader("../data/spam/train") + from new DocumentReader("data/spam/train") 50 rounds with SparseAveragedPerceptron { @@ -32,7 +32,7 @@ learn SpamLabel thickness = 3.5; } - testFrom new DocumentReader("../data/spam/test") + testFrom new DocumentReader("data/spam/test") progressOutput 2000 end diff --git a/lbjava-examples/target/.gitignore b/lbjava-examples/target/.gitignore deleted file mode 100644 index 5e7d2734..00000000 --- a/lbjava-examples/target/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -# Ignore everything in this directory -* -# Except this file -!.gitignore diff --git a/lbjava-mvn-plugin/README.md b/lbjava-mvn-plugin/README.md index b056d93a..1be00051 100644 --- a/lbjava-mvn-plugin/README.md +++ b/lbjava-mvn-plugin/README.md @@ -9,7 +9,6 @@ - [How it works](#how) - [Eclipse](#ecplise) - ### Overview The purpose of this project is to make it easier to work with LBJava and Maven. This is a plugin written for Maven. @@ -23,29 +22,31 @@ Instructions for usage are in the [Usage](#usage) section. Put the following into your pom.xml: - - - ... - - - edu.illinois.cs.cogcomp - lbjava-maven-plugin - LBJAVA-VERSION - - lbjava/class/output/ - path/to/intended/lbjavasrc/output - path/to/src - - path/to/lbjavafile.lbj - path/to/second/lbjavafile2.lbj - etc/etc.lbj - - - - - ... - - +```xml + + + ... + + + edu.illinois.cs.cogcomp + lbjava-maven-plugin + LBJAVA-VERSION + + lbjava/class/output/ + path/to/intended/lbjavasrc/output + path/to/src + + path/to/lbjavafile.lbj + path/to/second/lbjavafile2.lbj + etc/etc.lbj + + + + + ... + + +``` Parameters are: @@ -60,41 +61,45 @@ are simply the flags given to LBJava's Main class. These are documented in the L **NB**: In case you get a repository error you should add edu.illinois.cs.cogcomp as a plugin repository: - - ... +```xml + + ... - - CogcompSoftware - CogcompSoftware - http://cogcomp.cs.illinois.edu/m2repo/ - + + CogcompSoftware + CogcompSoftware + http://cogcomp.cs.illinois.edu/m2repo/ + - ... - + ... + +``` ### Example setup Here's an example without the optional parameters. This is as simple as it gets, folks. - - - ... - - - edu.illinois.cs.cogcomp - lbjava-maven-plugin - LBJAVA-VERSION - - - lbjava/MyClassifier.lbj - - - - - ... - - +```xml + + + ... + + + edu.illinois.cs.cogcomp + lbjava-maven-plugin + LBJAVA-VERSION + + + lbjava/MyClassifier.lbj + + + + + ... + + +``` diff --git a/lbjava-mvn-plugin/pom.xml b/lbjava-mvn-plugin/pom.xml index c3743d23..71bfa199 100644 --- a/lbjava-mvn-plugin/pom.xml +++ b/lbjava-mvn-plugin/pom.xml @@ -5,7 +5,7 @@ lbjava-project edu.illinois.cs.cogcomp - 1.2.16 + 1.3.1 lbjava-maven-plugin @@ -76,7 +76,7 @@ edu.illinois.cs.cogcomp LBJava - 1.2.16 + 1.3.1 jar compile @@ -126,6 +126,7 @@ + compile mojo-descriptor descriptor @@ -176,12 +177,4 @@ - - - CogcompSoftware - CogcompSoftware - scp://bilbo.cs.illinois.edu:/mounts/bilbo/disks/0/www/cogcomp/html/m2repo - - - diff --git a/lbjava-mvn-plugin/src/main/java/edu/illinois/cs/cogcomp/CleanMojo.java b/lbjava-mvn-plugin/src/main/java/edu/illinois/cs/cogcomp/CleanMojo.java index fdf3b092..b027d5e2 100644 --- a/lbjava-mvn-plugin/src/main/java/edu/illinois/cs/cogcomp/CleanMojo.java +++ b/lbjava-mvn-plugin/src/main/java/edu/illinois/cs/cogcomp/CleanMojo.java @@ -1,19 +1,19 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp; +import java.io.File; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; +import edu.illinois.cs.cogcomp.lbjava.util.FileUtils; import org.apache.commons.lang.StringUtils; import org.apache.maven.plugin.AbstractMojo; import org.apache.maven.plugin.MojoExecutionException; @@ -28,62 +28,78 @@ @Mojo(name = "clean", requiresDependencyResolution = ResolutionScope.COMPILE) public class CleanMojo extends AbstractMojo { - /** - * This will be ${project.build.outputDirectory} if not specified. - */ - @Parameter(defaultValue = "${project.build.outputDirectory}") - private String dFlag; - - /** - * This is maven default (src/main/java) if not specified. - */ - @Parameter(defaultValue = "src/main/java") - private String gspFlag; - - /** - * This is maven default (src/main/java) if not specified. - */ - @Parameter(defaultValue = "src/main/java") - private String sourcepathFlag; - - /** - * The only required parameter. - */ - @Parameter(required = true) - private String[] lbjavaInputFileList; - - @Parameter(property = "project.compileClasspathElements", required = true, readonly = true) - private List classpath; - - @Parameter(property = "project.build.outputDirectory", required = true, readonly = true) - private String outputdir; - - public void execute() throws MojoExecutionException { - - classpath.add(dFlag); - classpath.add(gspFlag); - String newpath = StringUtils.join(classpath, ":"); - - // We need to reverse the order we do the cleaning since there might be dependencies across files - List fileList = Arrays.asList(lbjavaInputFileList); - Collections.reverse(fileList); - for (String lbjInputFile : fileList) { - getLog().info("Calling Java edu.illinois.cs.cogcomp.lbjava.Main with the -x flag (for cleaning)..."); - try { - // The -x flag makes all the difference. - String[] args = new String[] { "java", "-cp", newpath, "edu.illinois.cs.cogcomp.lbjava.Main", "-x", - "-d", dFlag, "-gsp", gspFlag, "-sourcepath", sourcepathFlag, lbjInputFile }; - - ProcessBuilder pr = new ProcessBuilder(args); - pr.inheritIO(); - Process p = pr.start(); - p.waitFor(); - - } catch (Exception e) { - e.printStackTrace(); - System.out.println("Yeah, an error."); - } - } - - } + /** + * This will be ${project.build.outputDirectory} if not specified. + */ + @Parameter(defaultValue = "${project.build.outputDirectory}") + private String dFlag; + + /** + * This is maven default (src/main/java) if not specified. + */ + @Parameter(defaultValue = "${project.basedir}/src/main/java") + private String gspFlag; + + /** + * This is maven default (src/main/java) if not specified. + */ + @Parameter(defaultValue = "${project.basedir}/src/main/java") + private String sourcepathFlag; + + /** + * The only required parameter. + */ + @Parameter(required = true) + private String[] lbjavaInputFileList; + + @Parameter(property = "project.compileClasspathElements", required = true, readonly = true) + private List classpath; + + @Parameter(property = "project.build.outputDirectory", required = true, readonly = true) + private String outputdir; + + public void execute() throws MojoExecutionException { + dFlag = FileUtils.getPlatformIndependentFilePath(dFlag); + gspFlag = FileUtils.getPlatformIndependentFilePath(gspFlag); + sourcepathFlag = FileUtils.getPlatformIndependentFilePath(sourcepathFlag); + + classpath.add(dFlag); + classpath.add(gspFlag); + + String newpath = StringUtils.join(classpath, File.pathSeparator); + + // We need to reverse the order we do the cleaning since there might be dependencies across + // files + List fileList = Arrays.asList(lbjavaInputFileList); + Collections.reverse(fileList); + for (String lbjInputFile : fileList) { + if (StringUtils.isEmpty(lbjInputFile)) { + // making the optional-compile-step parameter happy. + continue; + } + + getLog().info( + "Calling Java edu.illinois.cs.cogcomp.lbjava.Main with the -x flag (for cleaning)..."); + + lbjInputFile = FileUtils.getPlatformIndependentFilePath(lbjInputFile); + + try { + // The -x flag makes all the difference. + String[] args = + new String[] {"java", "-cp", newpath, + "edu.illinois.cs.cogcomp.lbjava.Main", "-x", "-d", dFlag, "-gsp", + gspFlag, "-sourcepath", sourcepathFlag, lbjInputFile}; + + ProcessBuilder pr = new ProcessBuilder(args); + pr.inheritIO(); + Process p = pr.start(); + p.waitFor(); + + } catch (Exception e) { + e.printStackTrace(); + System.out.println("Yeah, an error."); + } + } + + } } diff --git a/lbjava-mvn-plugin/src/main/java/edu/illinois/cs/cogcomp/CompileMojo.java b/lbjava-mvn-plugin/src/main/java/edu/illinois/cs/cogcomp/CompileMojo.java index 8cc64b21..8d287ab8 100644 --- a/lbjava-mvn-plugin/src/main/java/edu/illinois/cs/cogcomp/CompileMojo.java +++ b/lbjava-mvn-plugin/src/main/java/edu/illinois/cs/cogcomp/CompileMojo.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp; @@ -13,6 +10,7 @@ import java.io.File; import java.util.List; +import edu.illinois.cs.cogcomp.lbjava.util.FileUtils; import org.apache.commons.lang.StringUtils; import org.apache.maven.plugin.AbstractMojo; import org.apache.maven.plugin.MojoExecutionException; @@ -21,68 +19,82 @@ import org.apache.maven.plugins.annotations.ResolutionScope; /** - * Compiles & executes LBJava code + * Compiles & executes LBJava code * */ @Mojo(name = "compile", requiresDependencyResolution = ResolutionScope.COMPILE) public class CompileMojo extends AbstractMojo { - /** - * This will be ${project.build.outputDirectory} if not specified. - */ - @Parameter(defaultValue = "${project.build.outputDirectory}") - private String dFlag; - - /** - * This is maven default (src/main/java) if not specified. - */ - @Parameter(defaultValue = "src/main/java") - private String gspFlag; - - /** - * This is maven default (src/main/java) if not specified. - */ - @Parameter(defaultValue = "src/main/java") - private String sourcepathFlag; - - /** - * The only required parameter. - */ - @Parameter(required = true) - private String[] lbjavaInputFileList; - - @Parameter(property = "project.compileClasspathElements", required = true, readonly = true) - private List classpath; - - @Parameter(property = "project.build.outputDirectory", required = true, readonly = true) - private String outputdir; - - public void execute() throws MojoExecutionException { - - classpath.add(dFlag); - classpath.add(gspFlag); - String newpath = StringUtils.join(classpath, ":"); - - // If these directories don't exist, make them. - new File(dFlag).mkdirs(); - new File(gspFlag).mkdirs(); - - for (String lbjInputFile : lbjavaInputFileList) { - getLog().info("Calling Java edu.illinois.cs.cogcomp.lbjava.Main..."); - try { - String[] args = new String[] { "java", "-cp", newpath, "edu.illinois.cs.cogcomp.lbjava.Main", - "-d", dFlag, "-gsp", gspFlag, "-sourcepath", sourcepathFlag, lbjInputFile }; - - ProcessBuilder pr = new ProcessBuilder(args); - pr.inheritIO(); - Process p = pr.start(); - p.waitFor(); - - } catch (Exception e) { - e.printStackTrace(); - System.out.println("Yeah, an error."); - } - } - - } + /** + * This will be ${project.build.outputDirectory} if not specified. + */ + @Parameter(defaultValue = "${project.build.outputDirectory}") + private String dFlag; + + /** + * This is maven default (src/main/java) if not specified. + */ + @Parameter(defaultValue = "${project.basedir}/src/main/java") + private String gspFlag; + + /** + * This is maven default (src/main/java) if not specified. + */ + @Parameter(defaultValue = "${project.basedir}/src/main/java") + private String sourcepathFlag; + + /** + * The only required parameter. + */ + @Parameter(required = true) + private String[] lbjavaInputFileList; + + @Parameter(property = "project.compileClasspathElements", required = true, readonly = true) + private List classpath; + + @Parameter(property = "project.build.outputDirectory", required = true, readonly = true) + private String outputdir; + + public void execute() throws MojoExecutionException { + dFlag = FileUtils.getPlatformIndependentFilePath(dFlag); + gspFlag = FileUtils.getPlatformIndependentFilePath(gspFlag); + sourcepathFlag = FileUtils.getPlatformIndependentFilePath(sourcepathFlag); + + classpath.add(dFlag); + classpath.add(gspFlag); + + String newpath = StringUtils.join(classpath, File.pathSeparator); + + // If these directories don't exist, make them. + new File(dFlag).mkdirs(); + new File(gspFlag).mkdirs(); + + for (String lbjInputFile : lbjavaInputFileList) { + if (StringUtils.isEmpty(lbjInputFile)) { + // making the optional-compile-parameter happy. + continue; + } + + getLog().info("Calling Java edu.illinois.cs.cogcomp.lbjava.Main..."); + + lbjInputFile = FileUtils.getPlatformIndependentFilePath(lbjInputFile); + + try { + String[] args = + new String[] {"java", "-cp", newpath, + "edu.illinois.cs.cogcomp.lbjava.Main", "-d", dFlag, "-gsp", + gspFlag, "-sourcepath", sourcepathFlag, lbjInputFile}; + + ProcessBuilder pr = new ProcessBuilder(args); + pr.inheritIO(); + Process p = pr.start(); + p.waitFor(); + + } catch (Exception e) { + e.printStackTrace(); + System.out.println("Yeah, an error."); + } + } + + } } diff --git a/lbjava-mvn-plugin/src/main/java/edu/illinois/cs/cogcomp/GenerateMojo.java b/lbjava-mvn-plugin/src/main/java/edu/illinois/cs/cogcomp/GenerateMojo.java index 27d7bdb1..8940a3bf 100644 --- a/lbjava-mvn-plugin/src/main/java/edu/illinois/cs/cogcomp/GenerateMojo.java +++ b/lbjava-mvn-plugin/src/main/java/edu/illinois/cs/cogcomp/GenerateMojo.java @@ -1,15 +1,13 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp; +import edu.illinois.cs.cogcomp.lbjava.util.FileUtils; import org.apache.commons.lang.StringUtils; import org.apache.maven.plugin.AbstractMojo; import org.apache.maven.plugin.MojoExecutionException; @@ -27,62 +25,76 @@ @Mojo(name = "generate", requiresDependencyResolution = ResolutionScope.COMPILE) public class GenerateMojo extends AbstractMojo { - /** - * This will be ${project.build.outputDirectory} if not specified. - */ - @Parameter(defaultValue = "${project.build.outputDirectory}") - private String dFlag; - - /** - * This is maven default (src/main/java) if not specified. - */ - @Parameter(defaultValue = "src/main/java") - private String gspFlag; - - /** - * This is maven default (src/main/java) if not specified. - */ - @Parameter(defaultValue = "src/main/java") - private String sourcepathFlag; - - /** - * The only required parameter. - */ - @Parameter(required = true) - private String[] lbjavaInputFileList; - - @Parameter(property = "project.compileClasspathElements", required = true, readonly = true) - private List classpath; - - @Parameter(property = "project.build.outputDirectory", required = true, readonly = true) - private String outputdir; - - public void execute() throws MojoExecutionException { - - classpath.add(dFlag); - classpath.add(gspFlag); - String newpath = StringUtils.join(classpath, ":"); - - // If these directories don't exist, make them. - new File(dFlag).mkdirs(); - new File(gspFlag).mkdirs(); - - for (String lbjInputFile : lbjavaInputFileList) { - getLog().info("Calling Java edu.illinois.cs.cogcomp.lbjava.Main..."); - try { - String[] args = new String[] { "java", "-cp", newpath, "edu.illinois.cs.cogcomp.lbjava.Main", - "-c", "-d", dFlag, "-gsp", gspFlag, "-sourcepath", sourcepathFlag, lbjInputFile }; - - ProcessBuilder pr = new ProcessBuilder(args); - pr.inheritIO(); - Process p = pr.start(); - p.waitFor(); - - } catch (Exception e) { - e.printStackTrace(); - System.out.println("Yeah, an error."); - } - } - - } + /** + * This will be ${project.build.outputDirectory} if not specified. + */ + @Parameter(defaultValue = "${project.build.outputDirectory}") + private String dFlag; + + /** + * This is maven default (src/main/java) if not specified. + */ + @Parameter(defaultValue = "${project.basedir}/src/main/java") + private String gspFlag; + + /** + * This is maven default (src/main/java) if not specified. + */ + @Parameter(defaultValue = "${project.basedir}/src/main/java") + private String sourcepathFlag; + + /** + * The only required parameter. + */ + @Parameter(required = true) + private String[] lbjavaInputFileList; + + @Parameter(property = "project.compileClasspathElements", required = true, readonly = true) + private List classpath; + + @Parameter(property = "project.build.outputDirectory", required = true, readonly = true) + private String outputdir; + + public void execute() throws MojoExecutionException { + dFlag = FileUtils.getPlatformIndependentFilePath(dFlag); + gspFlag = FileUtils.getPlatformIndependentFilePath(gspFlag); + sourcepathFlag = FileUtils.getPlatformIndependentFilePath(sourcepathFlag); + + classpath.add(dFlag); + classpath.add(gspFlag); + + String newpath = StringUtils.join(classpath, File.pathSeparator); + + // If these directories don't exist, make them. + new File(dFlag).mkdirs(); + new File(gspFlag).mkdirs(); + + for (String lbjInputFile : lbjavaInputFileList) { + if (StringUtils.isEmpty(lbjInputFile)) { + // making the optional-compile-step parameter happy. + continue; + } + + getLog().info("Calling Java edu.illinois.cs.cogcomp.lbjava.Main..."); + + lbjInputFile = FileUtils.getPlatformIndependentFilePath(lbjInputFile); + + try { + String[] args = + new String[] {"java", "-cp", newpath, + "edu.illinois.cs.cogcomp.lbjava.Main", "-c", "-d", dFlag, "-gsp", + gspFlag, "-sourcepath", sourcepathFlag, lbjInputFile}; + + ProcessBuilder pr = new ProcessBuilder(args); + pr.inheritIO(); + Process p = pr.start(); + p.waitFor(); + + } catch (Exception e) { + e.printStackTrace(); + System.out.println("Yeah, an error."); + } + } + + } } diff --git a/lbjava/README.md b/lbjava/README.md index 74e37733..d77b6f70 100644 --- a/lbjava/README.md +++ b/lbjava/README.md @@ -11,33 +11,79 @@ leaving him to reason more directly about his application. Visit each link for its content 1. [Introduction](doc/INTRO.md) 2. [Basics and definitions](doc/DEFINITIONS.md) - 3. [A working example: classifiying newsgroup documents into topics](doc/20NEWSGROUP.md) + 3. [A working example: classifying newsgroup documents into topics](doc/20NEWSGROUP.md) 4. [Syntax of LBJava](doc/LBJLANGUAGE.md) 5. [LBJava library](doc/LBJLIBRARY.md) - 6. [Installation and Commandline options](doc/INSTALLATION.md) - 7. [A working example: regression](doc/REGRESSION.md) - 8. [Learning Algorithms](doc/ALGORITHMS.md) + 6. [A working example: regression](doc/REGRESSION.md) + 7. [Learning Algorithms](doc/ALGORITHMS.md) -**Note** - -LBJava uses the Gurobi solver for inference and therefore the Gurobi library needs to be installed -prior to compilation. To download and install Gurobi visit [http://www.gurobi.com/](http://www.gurobi.com/) - -Make sure to include Gurobi in your PATH and LD_LIBRARY variables +## Using it as a dependency +To include LBJava in your Maven project, add the following snippet with the + `#version` entry replaced with the version listed in this project's pom.xml file. + Note that you also add to need the + `` element for the CogComp maven repository in the `` element. + +```xml + + ... + + edu.illinois.cs.cogcomp + LBJava + #version# + + ... + + ... + + + CogcompSoftware + CogcompSoftware + http://cogcomp.cs.illinois.edu/m2repo/ + + ``` - export GUROBI_HOME="PATH-TO-GUROBI/linux64" - export PATH="${PATH}:${GUROBI_HOME}/bin" - export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${GUROBI_HOME}/lib" + +## Compiling LBJava code in your projects +To compile the `.lbj` files into Java code you will need to use the [LBJava maven plugin](../lbjava-mvn-plugin/README.md). +Briefly, you need to add the `` and `` snippets into your pom.xml file: +```xml + + + CogcompSoftware + CogcompSoftware + http://cogcomp.cs.illinois.edu/m2repo/ + + +... + + + + edu.illinois.cs.cogcomp + lbjava-maven-plugin + LBJAVA-VERSION + + + lbjava/MyClassifier.lbj + + + + + ``` + +## Compiling the LBJava-core code To compile and package the LBJava code simply run: mvn install -It will create the jar package in target/LBJava-1.0.jar as well as install it in your local Maven repo. - NB: If you need to run `mvn clean` at any point, make sure to create `target/classes` directory before running `mvn compile/install` since it is required for the java-source compilation process. +## Using ILP inference +LBJava uses the solvers included in [illinois-inference](https://gitlab-beta.engr.illinois.edu/cogcomp/inference/) for +inference. We refer the interested reader to the aforementioned library, +for more details and instructions on how to install these libraries. + ## Credits This project was started by [Nicholas Rizzolo](mailto:rizzolo@gmail.com). It was ported to Maven as LBJava (v.1.0) by [Christos Christodoulopoulos](mailto:christod@illinois.edu). diff --git a/lbjava/doc/20NEWSGROUP.md b/lbjava/doc/20NEWSGROUP.md index 6abcde6e..d12d239e 100644 --- a/lbjava/doc/20NEWSGROUP.md +++ b/lbjava/doc/20NEWSGROUP.md @@ -1,4 +1,8 @@ -#3 Tutorial: 20 Newsgroups +--- +title: 20NEWSGROUP +--- + +# 3. Tutorial: 20 Newsgroups We begin our discussion of the LBJava language with a tutorial that illustrates its most common usage. This tutorial is intended for a first time user of the @@ -67,10 +71,10 @@ does not include a parser for newsgroup posts, we can still make use of necessary to read text out of a file. This abstract class also provides implementations of the `reset()` and `close()` methods. The `NewsgroupParser` class in the following snippet, simply extends it to -take advantage of that functionality; it won't be necessary to override ` -reset()` or `close()`. `NewsgroupParser` takes as input a file +take advantage of that functionality; it won't be necessary to override `reset()` +or `close()`. `NewsgroupParser` takes as input a file containing the names of other files, assuming that each of those files -represents a single newgroup post. For brevity, we have hidden in `Post`'s +represents a single newsgroup post. For brevity, we have hidden in `Post`'s constructor the code that actually does the work of filling the fields of a `Post` object. @@ -187,7 +191,7 @@ extraction classifier is `BagOfWords`, the example objects come from `NewsgroupParser`, and the learning algorithm is `SparseNetworkLearner`. We explore each of these ideas in more detail below. - - `learn`: We say that `NewsgroupClassifier is trying to mimic `NewsgroupLabel` + - `learn`: We say that `NewsgroupClassifier` is trying to mimic `NewsgroupLabel` because it will attempt to return features with the same values and for the same example objects that `NewsgroupLabel` would have returned them. Note that the particular feature values being returned have @@ -234,12 +238,12 @@ change the names of our classifiers in that case for clarity's sake.) ## 3.3 Using `NewsgroupClassifier` in a Java Program Now that we’ve specified a learned classifier, the next step is to write a pure Java application that will use it once it’s been trained. This section first introduces the methods every automatically -generated LBJ classifier makes available within pure Java code. These methods comprise a simple +generated LBJava classifier makes available within pure Java code. These methods comprise a simple interface for predicting, online learning, and testing with a classifier. ### 3.3.1 Getting Started -We assume here that all learning will take place during the LBJ compilation phase, which we’ll +We assume here that all learning will take place during the LBJava compilation phase, which we’ll discuss in Section 3.4. (It is also possible to learn online, i.e. while the application is running, which we’ll discuss in Section 3.3.3.) To gain access to the learned classifier within your Java program, simply instantiate an object of the classifier’s generated class, which has the same name @@ -262,7 +266,7 @@ training. And that’s it! The programmer is now free to use the classifier’s s/he chooses. There's one important technical point to be aware of here. The instance we just created of -class `NewsgroupClassifier` above does not actually contain the model that LBJ learned for us. +class `NewsgroupClassifier` above does not actually contain the model that LBJava learned for us. It is merely a "clone" object that contains internally a reference to the real classifier. Thus, if our Java application creates instances of this class in different places and performs any operation that modifies the behavior of the classifier (like online learning), all instances will appear to be @@ -291,7 +295,7 @@ for (Score score : scoresArray) System.out.println("prediction: " + score.value + ", score: " + score.score); ``` -Finally, LBJ also lets you define real valued classifiers which return doubles in the Java +Finally, LBJava also lets you define real valued classifiers which return doubles in the Java application. If you have such a classifier, you can retreive its prediction on an example object by calling the `realValue(Object)` method: @@ -301,8 +305,8 @@ double prediction = realClassifier.realValue(someExampleObject); ### 3.3.3 Learning -As mentioned above, most classifiers are learned during the LBJ phase of compilation (see -Section 3.4 below). In addition, a classifier generated by the LBJ compiler can also continue +As mentioned above, most classifiers are learned during the LBJava phase of compilation (see +Section 3.4 below). In addition, a classifier generated by the LBJava compiler can also continue learning from labeled examples in the Java application. Since `NewsgroupClassifier` takes a `Post` object as input, we merely have to get our hands on such an object, stick the label in the newsgroup field (since that’s where the `NewsgroupLabel` classifier will look for it), and pass it @@ -312,7 +316,7 @@ Now that we know how to get our classifier to learn, let’s see how to make it The contents of a classifier can be completely cleared out by calling the `forget()` method. After this method is called, the classifier returns to the state it was in before it observed any training examples. One reason to forget everything a classifier has learned is to try new learning -algorithm parameters (e.g. learning rates, thresholds, etc.). All LBJ learning algorithms provide +algorithm parameters (e.g. learning rates, thresholds, etc.). All LBJava learning algorithms provide an inner class named `Parameters` that contains default settings for all their parameters. Simply instantiate such an object, overwrite the parameters that need to be updated, and call the `setParameters(Parameters)` method. For example: @@ -342,10 +346,10 @@ classifier.save(); ``` This operation overwrites the model and lexicon files that were originally generated by the -LBJ compiler. A model file stores the values of the learned parameters (not to be confused +LBJava compiler. A model file stores the values of the learned parameters (not to be confused with the manually set learning algorithm parameters mentioned above). A lexicon file stores the classifier’s feature index, used for quick access to the learnable parameters when training for -multiple rounds. These files are written by the LBJ compiler and by the `save()` method (though +multiple rounds. These files are written by the LBJava compiler and by the `save()` method (though only initially; see below) in the same directory where the `NewsgroupClassifier.class` file is written. @@ -363,50 +367,50 @@ It has its own completely independent learnable parameters. Furthermore, if `myM creates them. Either way, we can now train our classifier however we choose and then simply call `c2.save()` to save everything into those files. -## 3.4 Compiling Our Learning Based Program with LBJ +## 3.4 Compiling Our Learning Based Program with LBJava Referring once again to this [newsgroup classifier’s source distribution](http://cogcomp.cs.illinois.edu/software/20news.tgz), we first examine our chosen directory structure starting from the root directory of the distribution. -```java +``` $ ls -20news.lbj class lbj test.sh +20news.LBJava class LBJava test.sh README data src train.sh $ ls src/dssi/news NewsgroupParser.java NewsgroupPrediction.java Post.java ``` -We see there is an LBJ source file `20news.lbj` in the root directory, and in `src/dssi/news` +We see there is an LBJava source file `20news.LBJava` in the root directory, and in `src/dssi/news` we find plain Java source files implementing our internal representation (`Post.java`), a parser that instantiates our internal representation (`NewsgroupParser.java`), and a program intended use our trained classifier to make predictions about newsgroups (`NewsgroupPrediction.java`). -Note that the LBJ source file and all these plain Java source files declare `package dssi.news;`. -The root directory also contains two directories `class` and `lbj` which are initially empty. They -will be used to store all compiled Java class files and all Java source files generated by the LBJ +Note that the LBJava source file and all these plain Java source files declare `package dssi.news;`. +The root directory also contains two directories `class` and `LBJava` which are initially empty. They +will be used to store all compiled Java class files and all Java source files generated by the LBJava compiler respectively. Keeping all these files in separate directories is not a requirement, but many developers find it useful to reduce clutter around the source files they are editing -To compile the LBJ source file using all these directories as intended, we run the following +To compile the LBJava source file using all these directories as intended, we run the following command: ``` $ java -Xmx512m -cp $CLASSPATH:class LBJ2.Main \ -sourcepath src \ - -gsp lbj \ + -gsp LBJava \ -d class \ - 20news.lbj + 20news.LBJava ``` -This command runs the LBJ compiler on `20news.lbj`, generating a new Java source file -for each of the classifiers declared therein. Since `20news.lbj` mentions both the `Post` and +This command runs the LBJava compiler on `20news.LBJava`, generating a new Java source file +for each of the classifiers declared therein. Since `20news.LBJava` mentions both the `Post` and `NewsgroupParser` classes, their definitions (either compiled class files or their original source files) must be available within a directory structure that mirrors their package names. We -have provided their source files using the `-sourcepath src` command line flag. The `-gsp lbj` -(generated source path) flag tells LBJ to put the new Java source files it generates in the `lbj` -directory, and the `-d class` flag tells LBJ to put class files in the `class` directory. For more -information on the LBJ compiler’s command line usage, see Chapter 6. +have provided their source files using the `-sourcepath src` command line flag. The `-gsp LBJava` +(generated source path) flag tells LBJava to put the new Java source files it generates in the `LBJava` +directory, and the `-d class` flag tells LBJava to put class files in the `class` directory. For more +information on the LBJava compiler’s command line usage, see Chapter 6. But the command does more than that; it also trains any learning classifiers on the specified training data, so that the compiled class files are ready to be used in new Java programs just @@ -432,7 +436,7 @@ Compiling generated code The compiler tells us which classifiers it is generating code for and which it is training. Because we have specified `progressOutput 20000` in `NewsgroupClassifier`’s specification (see -the distribution’s `20news.lbj` file), we also get messages updating us on the progress being +the distribution’s `20news.LBJava` file), we also get messages updating us on the progress being made during training. We can see here that the first stage of training is a “pre-extraction” stage in which a feature index is compiled, and all `Post` objects in our training set are converted to feature vectors based on the index. Then the classifier is trained over those vectors for 40 rounds. @@ -441,7 +445,7 @@ The entire process should take under 2 minutes on a modern machine. If you’re curious, you can also look at the files that have been generated: ``` -$ ls lbj/dssi/news +$ ls LBJava/dssi/news BagOfWords.java NewsgroupClassifier.java NewsgroupClassifier.ex NewsgroupLabel.java $ ls class/dssi/news @@ -452,7 +456,7 @@ NewsgroupClassifier.lc Post.class NewsgroupClassifier.lex ``` -The lbj directory now contains a `dssi/news` subdirectory containing our classifier’s Java +The LBJava directory now contains a `dssi/news` subdirectory containing our classifier’s Java implementations, as well as the pre-extracted feature vectors in the `NewsgroupClassifier.ex` file. In the `class/dssi/news` directory, we find the class files compiled from all our hard-coded and generated Java source files, as well as `NewsgroupClassifier.lc` and `NewsgroupClassifier`. @@ -469,7 +473,7 @@ $ javac -cp $CLASSPATH:class \ src/dssi/news/NewsgroupPrediction.java ``` -Notice that the command line flags we gave to the LBJ compiler previously are very similar to +Notice that the command line flags we gave to the LBJava compiler previously are very similar to those we give the Java compiler now. We can test out our new program like this: ``` @@ -492,7 +496,7 @@ going well. ## 3.5 Testing a Discrete Classifier -When a learned classifier returns discrete values, LBJ provides the handy `TestDiscrete` +When a learned classifier returns discrete values, LBJava provides the handy `TestDiscrete` class for measuring the classifier’s prediction performance. This class can be used either as a standalone program or as a library for use inside a Java application. In either case, we’ll need to provide `TestDiscrete` with the following three items: @@ -536,7 +540,7 @@ At the bottom of the table will always be the overall accuracy of the classifier screen shot 2015-11-17 at 3 46 18 am The `TestDiscrete` class also supports the notion of a null label, which is a label intended to - represent the absense of a prediction. The 20 Newsgroups task doesn’t make use of this concept, + represent the absence of a prediction. The 20 Newsgroups task doesn’t make use of this concept, but if our task were, e.g., named entity classification in which every phrase is potentially a named entity, then the classifier will likely output a prediction we interpret as meaning “this phrase is not a named entity.” In that case, we will also be interested in overall precision, recall, and F1 @@ -552,28 +556,28 @@ Alternatively, we can call `TestDiscrete` from within our Java application. This if our parser’s constructor isn’t so simple, or when we’d like to do further processing with the performance numbers themselves. The simplest way to do so is to pass instances of our classifier, labeler, and parser to `TestDiscrete`, like this: + +```java +NewsgroupLabel oracle = new NewsgroupLabel(); +Parser parser = new NewsgroupParser("data/20news.test"); +TestDiscrete tester = TestDiscrete.testDiscrete(classifier, oracle, parser); +tester.printPerformance(System.out); +``` - ```java - NewsgroupLabel oracle = new NewsgroupLabel(); - Parser parser = new NewsgroupParser("data/20news.test"); - TestDiscrete tester = TestDiscrete.testDiscrete(classifier, oracle, parser); - tester.printPerformance(System.out); - ``` - - This Java code does exactly the same thing as the command line above. We can also - exert more fine grained control over the computed statistics. Starting from a new instance of - `TestDiscrete`, we can call `reportPrediction(String,String)` every time we acquire both a - prediction value and a label. Then we can either call the `printPerformance(PrintStream)` - method to produce the standard output in table form or any of the methods whose names start - with `get` to retrieve individual statistics. The example code below retrieves the overall precision, - recall, F1, and accuracy measures in an array. +This Java code does exactly the same thing as the command line above. We can also +exert more fine grained control over the computed statistics. Starting from a new instance of +`TestDiscrete`, we can call `reportPrediction(String,String)` every time we acquire both a +prediction value and a label. Then we can either call the `printPerformance(PrintStream)` +method to produce the standard output in table form or any of the methods whose names start +with `get` to retrieve individual statistics. The example code below retrieves the overall precision, +recall, F1, and accuracy measures in an array. - ```java - TestDiscrete tester = new TestDiscrete(); - ... - tester.reportPrediction(classifier.discreteValue(ngPost), - oracle.discreteValue(ngPost)); - ... - double[] performance = tester.getOverallStats(); - System.out.println("Overall Accuracy: " + performance[3]); - ``` +```java +TestDiscrete tester = new TestDiscrete(); +... +tester.reportPrediction(classifier.discreteValue(ngPost), + oracle.discreteValue(ngPost)); +... +double[] performance = tester.getOverallStats(); +System.out.println("Overall Accuracy: " + performance[3]); +``` diff --git a/lbjava/doc/ALGORITHMS.md b/lbjava/doc/ALGORITHMS.md index c78cdbc5..1aa083ab 100644 --- a/lbjava/doc/ALGORITHMS.md +++ b/lbjava/doc/ALGORITHMS.md @@ -1 +1,98 @@ -# Training Algorithms +--- +title: ALGORITHMS +--- + +# Learning Algorithms + +Here is a list of learning algorithm in LBJava. + +### Classification + +* [AdaBoost](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/AdaBoost.java) +* [AdaGrad]() +* [Binary MIRA](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/BinaryMIRA.java) +* [Mux Learner](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/MuxLearner.java) +* [Naive Bayes](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/NaiveBayes.java) +* [Passive Aggressive](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/PassiveAggressive.java) +* [Sparse Averaged Perceptron](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseAveragedPerceptron.java) +* [Sparse Confidence Weighted](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseConfidenceWeighted.java) +* [Sparse MIRA](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseMIRA.java) +* [Support Vector Machine](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SupportVectorMachine.java) +* [Sparse Perceptron](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparsePerceptron.java) +* [Sparse Winnow](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWinnow.java) +* [Stochastic Gradient Descent]() + + + +### Regression + +* [AdaGrad](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/AdaGrad.java) +* [Stochastic Gradient Descent](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescent.java) + +### Class Architecture Structure + +* [`Learner`](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java) (abstract class) + * [`LinearThresholdUnit`](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LinearThresholdUnit.java) (abstract class) + * [`PassiveAggressive`](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/PassiveAggressive.java) + * [`SparsePerceptron`](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparsePerceptron.java) + * [`BinaryMIRA`](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/BinaryMIRA.java) + * [`SparseAveragedPerceptron`](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseAveragedPerceptron.java) + * [`SparseConfidenceWeighted`](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseConfidenceWeighted.java) + * [`SparseWinnow`](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWinnow.java) + * [`AdaBoost`](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/AdaBoost.java) + * [`AdaGrad`]() + * [`MuxLearner`](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/MuxLearner.java) + * [`NaiveBayes`](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/NaiveBayes.java) + * [`SparseMIRA`](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseMIRA.java) + * [`SupportVectorMachine`](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SupportVectorMachine.java) + * [`SparseNetworkLearner`](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java) + * [`MultiLabelLearner`](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/MultiLabelLearner.java) + +### Note on Binary & Multiclass Classification + +##### Please use [`SparseNetworkLearner`](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java) for both binary and multiclass classification. + +##### Please avoid using learning algorithms, such as [`SparseWinnow`](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWinnow.java), [`SparsePerceptron`](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparsePerceptron.java), and [`SparseAveragedPerceptron`](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseAveragedPerceptron.java) directly. + +The code snippets below demonstrated how to use learning algorithms inside [`SparseNetworkLearner`](https://github.com/IllinoisCogComp/lbjava/blob/master/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java) programmatically, and how to set parameters accordingly. + +#### Declarations in `.lbj` file, with only `SparseNetworkLearner` +```java +discrete SparseNetworkClassifier(Post post) <- + learn NewsgroupLabel + using BagOfWords + + with SparseNetworkLearner {} + +end +``` + +#### Declarations in `.lbj` file, with `SparseAveragedPerceptron` inside `SparseNetworkLearner` +```java +discrete SAPClassifier(Post post) <- + learn NewsgroupLabel + using BagOfWords + + + with SparseNetworkLearner { + SparseAveragedPerceptron.Parameters p = new SparseAveragedPerceptron.Parameters(); + p.learningRate = .1; + p.thickness = 3; + baseLTU = new SparseAveragedPerceptron(p); + } + +end +``` + +### Programmatically use `SparseAveragedPerceptron` inside `SparseNetworkLearner` +```java +SparseNetworkClassifier swn = new SparseNetworkClassifier(); +SparseNetworkLearner.Parameters snp = new SparseNetworkLearner.Parameters(); +SparseAveragedPerceptron sap = new SparseAveragedPerceptron(); +SparseAveragedPerceptron.Parameters sapp = new SparseAveragedPerceptron.Paramters(); +sapp.learningRate = .1; +sapp.thickness = 3; +sap.setParameters(sapp); +snp.baseLTU = sap; +swn.setParameters(snp); +``` \ No newline at end of file diff --git a/lbjava/doc/DEFINITIONS.md b/lbjava/doc/DEFINITIONS.md index bf92d272..c49e3229 100644 --- a/lbjava/doc/DEFINITIONS.md +++ b/lbjava/doc/DEFINITIONS.md @@ -1,4 +1,8 @@ -#2 LBJava Definitions +--- +title: DEFINITIONS +--- + +# 2. LBJava Definitions The terms defined below are used throughout this manual. Their definitions have been carefully formulated to facilitate the design of a modeling language diff --git a/lbjava/doc/INSTALLATION.md b/lbjava/doc/INSTALLATION.md deleted file mode 100644 index 4b0b628e..00000000 --- a/lbjava/doc/INSTALLATION.md +++ /dev/null @@ -1,188 +0,0 @@ -# 6. Installation and Command Line Usage - -## 6.1 Installation - -LBJ is written entirely in Java - almost. The Java Native Interface (JNI) is utilized to interface -with the GNU Linear Programming Kit (GLPK) which is used to perform inference (see -Section 5.3.2), requiring a small amount of C to complete the connection. This C code must be -compiled as a library so that it can be dynamically linked to the JVM at run-time in any application -that uses inference. Thus, the GNU Autotools became a natural choice for LBJ’s build -system. More information on building and installing LBJ from its source code is presented below. -On the other hand, some users’ applications may not require LBJ’s automated inference -capabilities. In this case, installation is as easy as downloading two jar files from the Cognitive -Computation Group’s website1 and adding them to your `CLASSPATH` environment variable. - -`LBJ2.jar` contains the classes implementing the LBJ compiler. `LBJ2Library.jar` contains the -library classes. If this is your chosen method of installation, you may safely skip to the section -on command line usage below. - -Alternatively, the source code for both the compiler and the library can be downloaded from -the same web site. Download the file `lbj-2.x.x.tar.gz` and unpack it with the following command: - -``` -tar zxf lbj-2.x.x.tar.gz -``` - -The `lbj-2.x.x` directory is created, and all files in the package are placed in that directory. -Of particular interest is the file `configure`. This is a shell script designed to automatically -detect pertinent parameters of your system and to create a set of makefiles that builds LBJ with -respect to those parameters. In particular, this script will detect whether or not you have GLPK -installed. If you do, LBJ will be compiled with inference enabled. -(GLPK is a separate software package that must be downloaded, compiled, and installed before LBJ is configured -in order for LBJ to make use of it.) -The `configure` script itself was built automatically by the GNU Autotools, -but you will not need them installed on your -system to make use of it. - -By default, the `configure` script will create makefiles that intend to install LBJ’s JNI libraries -and headers in system directories such as `/usr/local/lib` and `/usr/local/include`. If -you have root privileges on your system, this will work just fine. Otherwise, it will be necessary -to use `configure`’s `--prefix` command line option. For example, running configure -with `--prefix=$HOME` will create makefiles that install LBJ’s libraries and headers in similarly -named subdirectories of your user account’s root directory, such as `~/lib` and `~/include`. The -configure script has many other options as well. Use --help on the command line for more -information. -If you choose to use the `--prefix` command line option, then it is a reasonable assumption -that you also used it when building and installing GLPK. In that case, the following environment -variables must be set before running LBJ’s `configure` script. `CPPFLAGS` is used to supply -command line parameters to the C preprocessor. We will use it to add the directory where -the GLPK headers were installed to the include path. `LDFLAGS` is used to supply command -line parameters to the linker. We will use it to add the directory where the GLPK library was -installed to the list of paths that the linker will search in. For example, in the `bash` shell: -``` -export CPPFLAGS=-I$HOME/include -export LDFLAGS=-L$HOME/lib -``` -or in `csh`: -``` -setenv CPPFLAGS -I${HOME}/include -setenv LDFLAGS -L${HOME}/lib -``` -The last step in making sure that inference will be enabled is to ensure that the file `jni.h` -is on the include path for the C preprocessor. This file comes with your JVM distribution. It is -often installed in a standard location already, but if it isn’t, we must set `CPPFLAGS` in such a way -that it adds all the paths we are interested in to the include path. For example, in the `bash` shell: -``` -export JVMHOME=/usr/lib/jvm/java-6-sun -export CPPFLAGS="$CPPFLAGS -I$JVMHOME/include" -export CPPFLAGS="$CPPFLAGS -I$JVMHOME/include/linux" -``` -or in `csh`: -``` -setenv JVMHOME /usr/lib/jvm/java-6-sun -setenv CPPFLAGS "${CPPFLAGS} -I${JVMHOME}/include" -setenv CPPFLAGS "${CPPFLAGS} -I${JVMHOME}/include/linux" -``` -At long last, we are ready to build and install LBJ with the following command: -``` -./configure --prefix=$HOME && make && make install -``` -If all goes well, you will see a message informing you that a library has been installed and that -certain extra steps may be necessary to ensure that this library can be used by other programs. -Follows these instructions. Also, remember to add the `lbj-2.x.x` directory to your `CLASSPATH` -environment variable. -LBJ’s makefile also contains rules for creating the jars that are separately downloadable from -the website and for creating the Javadoc documentation for both compiler and library. To create - -the jars, simply type `make jars`. To create the Javadoc documentation, you must first set the -environment variable `LBJ2 DOC` equal to the directory in which you would like the documentation -created. Then type make doc. -Finally, users of the VIM editor may be interested in lbj.vim, the LBJ syntax highlighting -file provided in the tar ball. If you have not done so already, create a directory named `.vim` -in your home directory. In that directory, create a file named `filetype.vim` containing the -following text: -``` -if exists("did_load_filetypes") - finish -endif -augroup filetypedetect - au! BufRead,BufNewFile *.lbj setf lbj -augroup END -``` -Then create the subdirectory `.vim/syntax` and place the provided `lbj.vim` file in that subdirectory. -Now, whenever VIM edits a file whose extension is `.lbj`, LBJ syntax highlighting will be -enabled. - - -## 6.2 Command Line Usage -The LBJ compiler is itself written in Java. It calls `javac` both to compile classes that its source -file depends on and to compile the code it generates. Its command line usage is as follows: -``` -java LBJ2.Main [options] -``` -where `[options]` is zero or more of the following: - -| `options` | Description | -| :------------: |:-------------| -| `-c` | Compile only: This option tells LBJ2 to translate the given source to Java, but not to compile the generated Java sources or do any training. | -| `-d ` | `Any class files generated during compilation will be written in the specified directory, just like javac’s -d command line parameter.| -| `-j ` | Sends the contents of `` to javac as command line arguments while compiling. Don’t forget to put quotes around `` if there is more thanone such argument or if the argument has a parameter. | -| `-t ` | Enables progress output during training of learning classifiers. A message containing the date and time will be printed to STDOUT after every `` training objects have been processed. | -| `-v` | Prints the version number and exits. | -| `-w` | Disables the output of warning messages. | -| `-x` | Clean: This option deletes all files that would have been generated otherwise. No new code is generated, and no training takes place. | -| `-gsp ` | LBJ will potentially generate many Java source files. Use this option to have LBJ write them to the specified directory instead of the current directory. `` must already exist. Note that LBJ will also compile these files which can result in even more class files than there were sources. Those class files will also be written in `` unless the `-d` command line parameter is utilized as well. | -| `-sourcepath ` | If the LBJ source depends on classes whose source files cannot be found on the user’s classpath, specify the directories where they can be found using this parameter. It works just like javac’s `-sourcepath` command line parameter. | -| `--parserDebug` | Debug: This option enables debugging output during parsing. | -| `--lexerOutput` | Lexer output: With this option enabled, the lexical token stream will be printed, after which the compiler will terminate. | -| `--parserOutput` | Parser output: With this option enabled, the parsed abstract syntax tree will be printed, after which the compiler will quit. | -| `--semanticOutput` | Semantic analysis output: With this option enabled, some information computed by semantic analysis will be printed, after which the compiler will quit. | - - -By default, all files generated by LBJ will be created in the same directory in which the -LBJ source file is found. To place generated Java sources in a different directory, use the `-gsp` -(or `-generatedsourcepath`) command line option. The lexicon and example files described in -Section 4.1.2.6 are also placed in the directory specified by this option. In addition, the generated -sources’ class files will be created in that directory unless the `-d` command line option is also -specified. This option places all generated class files in the specified directory, just like javac’s --d option. The “learning classifier” file with extension `.lc` (also discussed in Section 4.1.2.6) -will also be placed in the directory specified by the `-d` option. Another option similar to javac -is the `-sourcepath` option for specifying extra directories in which Java source files are found. -Both the `-d` and `-sourcepath` options should be given directly to LBJ if they are given at all. -Do not specify them inside LBJ’s `-j` option. Finally, LBJ does not offer a `-classpath` option. -Simply give this parameter to the JVM instead. -For example, say an employee of the XYZ company is building a new software package called -ABC with the help of LBJ. This is a large project, and compiling the LBJ source file will generate -many new Java sources. She places her LBJ source file in a new working directory along -side three new subdirectories: `src`, `class`, and `lbj`. -``` -$ ls -abc.lbj src/ class/ lbj/ -``` -Next, since all the source files in the ABC application will be part of the `com.xyz.abc` package, -she creates the directory structure `com/xyz/abc` as a subdirectory of the src directory. Application -source files are then placed in the `src/com/xyz/abc` directory. Next, at the top of her LBJ -source file she writes the line `package com.xyz.abc;`. Now she is ready to run the following -commands: -``` -$ java -cp $CLASSPATH:class LBJ2.Main -sourcepath src -gsp lbj -d class abc.lbj -. . . -$ javac -classpath $CLASSPATH:class -sourcepath lbj:src -d class src/com/xyz/abc/*.java -$ jar cvf abc.jar -C class com -``` -The first command creates the `com/xyz/abc` directory structure in both of the lbj and class -directories. LBJ then generates new Java sources in the `lbj/com/xyz/abc` directory and class -files in the `class/com/xyz/abc` directory. Now that the necessary classifiers’ implementations -exist, the second command compiles the rest of the application. Finally, the last command prepares -a jar file containing the entire ABC application. Users of ABC need only add `abc.jar` to -their `CLASSPATH`. -There are two other JVM command line parameters that will be of particular interest to -programmers working with large datasets. Both increase the amount of memory that Java is -willing to utilize while running. The first is `-Xmx` which sets the maximum Java heap size. -It should be set as high as possible, but not so high that it causes page-faults for the JVM or -for some other application on the same computer. This value must be a multiple of `1024` greater -than 2MB and can be specified in kilobytes (`K`, `k`), megabytes (`M`, `m`), or gigabytes (`G`, `g`). -The second is `-XX:MaxPermSize=` which sets the maximum size of the permanent generation. -This is a special area of the heap which stores, among other things, canonical representations -for the `String`s in a Java application. Since a learned classifier can contain many `String`s, it -may be necessary to set it higher than the default of 64 MB. For more information about the heap -and garbage collection, see [here](http://java.sun.com/docs/hotspot/gc5.0/gc) tuning 5.html. -With these two command line parameters, a typical LBJ compiler command line might look -like: -``` -java -Xmx512m -XX:MaxPermSize=512m LBJ2.Main Test.lbj -``` -When it is necessary to run the compiler with these JVM settings, it will also be necessary to -run the application that uses the generated classifiers with the same or larger settings. - - diff --git a/lbjava/doc/INTRO.md b/lbjava/doc/INTRO.md index 0da125e1..7bf5a70d 100644 --- a/lbjava/doc/INTRO.md +++ b/lbjava/doc/INTRO.md @@ -1,4 +1,8 @@ -#1 Introduction +--- +title: INTRO +--- + +# 1. Introduction Learning Based Java is a modeling language for the rapid development of software systems with one or more learned functions, designed for use with the @@ -8,7 +12,7 @@ programmer's application. With LBJava, the details of feature extraction, learning, model evaluation, and inference are all abstracted away from the programmer, leaving him to reason more directly about his application. -##1.1 Motivation +## 1.1 Motivation Many software systems are in need of functions that are simple to describe but that no one knows how to implement. Recently, more and more designers of such @@ -32,7 +36,7 @@ applied in the wild, the independent predictions made by each function are reconciled according to user specified constraints. This approach has been applied successfully to complicated domains such as Semantic Role Labeling. -##1.2 LBJava +## 1.2 LBJava Learning Based Java (LBJava) is a modeling language that expedites the development of learning based programs, designed for use with the JavaTM diff --git a/lbjava/doc/LBJLANGUAGE.md b/lbjava/doc/LBJLANGUAGE.md index 8d1d3d92..d429b4c0 100644 --- a/lbjava/doc/LBJLANGUAGE.md +++ b/lbjava/doc/LBJLANGUAGE.md @@ -1,21 +1,25 @@ -# 4 The LBJ Language -Now that we have defined the building blocks of classifier computation, we next describe LBJ’s +--- +title: LBJava Language +--- + +# 4. The LBJava Language +Now that we have defined the building blocks of classifier computation, we next describe LBJava’s syntax and semantics for programming with these building blocks. -Like a Java source file, an LBJ source file begins with an optional package declaration and +Like a Java source file, an LBJava source file begins with an optional package declaration and an optional list of import declarations. Next follow the definitions of classifiers, constraints, and -inferences. Each will be translated by the LBJ compiler into a Java class of the same name. If +inferences. Each will be translated by the LBJava compiler into a Java class of the same name. If the package declaration is present, those Java classes will all become members of that package. -Import declarations perform the same function in an LBJ source file as in a Java source file. +Import declarations perform the same function in an LBJava source file as in a Java source file. ## 4.1 Classifiers -In LBJ, a classifier can be defined with Java code or composed from the definitions of other +In LBJava, a classifier can be defined with Java code or composed from the definitions of other classifiers using special operators. As such, the syntax of classifier specification allows the programmer to treat classifiers as expressions and assign them to names. This section defines the syntax of classifier specification more precisely, including the syntax of classifiers learned from -data. It also details the behavior of the LBJ compiler when classifiers are specified in terms of -training data and when changes are made to an LBJ source file. +data. It also details the behavior of the LBJava compiler when classifiers are specified in terms of +training data and when changes are made to an LBJava source file. ### 4.1.1 Classifier Declarations @@ -59,9 +63,9 @@ keyword will not load their (potentially large) internal representations from di more information about learning classifiers. -Semantically, every named classifier is a static method. In an LBJ source file, references to +Semantically, every named classifier is a static method. In an LBJava source file, references to classifiers are manipulated and passed to other syntactic constructs, similarly to a functional -programming language. The LBJ compiler implements this behavior by storing a classifier’s definition +programming language. The LBJava compiler implements this behavior by storing a classifier’s definition in a static method of a Java class of the same name and providing access to that method through objects of that class. As we will see, learning classifiers are capable of modifying their definition, and by the semantics of classifier declarations, these modifications are local to the @@ -70,7 +74,7 @@ continues to train a learning classifier on-line, the changes are immediately vi every object of the classifier’s class. Figure 4.1 gives several examples of classifier declarations. These examples illustrate some -key principles LBJ. First, the features produced by a classifier are either discrete or real. If a +key principles LBJava. First, the features produced by a classifier are either discrete or real. If a feature is discrete, the set of allowable values may optionally be specified, contained in curly braces. Any literal values including `int`s, `String`s, and `boolean`s may be used in this set. (Internally, they’ll all be converted to `String`s.) @@ -178,7 +182,7 @@ source file, it may occur anywhere in that source file (in other words, a classi defined before it is used). If the named classifier has an external declaration it must either be fully qualified (e.g., `myPackage.myClassifier`) or it must be imported by an import declaration at the top of the source file. The class file or Java source file containing the implementation -of an imported classifier must exist prior to running the LBJ compiler on the source file that +of an imported classifier must exist prior to running the LBJava compiler on the source file that imports it. ### 4.1.2.2 Method Bodies @@ -190,7 +194,7 @@ then the `return` statement’s expression must evaluate to a double. Otherwise, anything - even an object - and the resulting value will be converted to a `String`. Each method body takes its argument and feature `return` type from the header of the classifier declaration it is contained in (except when in the presence of a classifier cast expression, discussed in Section -4.1.2.3). For more information on method bodies in LBJ, see Section 4.1.3. +4.1.2.3). For more information on method bodies in LBJava, see Section 4.1.3. #### 4.1.2.3 Classifier Cast Expressions @@ -209,7 +213,7 @@ end ``` Of course, we can see that the hard-coded classifier defined on the fly in this example returns -a discrete (`boolean`) value. Without the cast in front of this method body, the LBJ compiler +a discrete (`boolean`) value. Without the cast in front of this method body, the LBJava compiler would have assumed it to have a real valued feature return type, and an error would have been produced. When a classifier cast expression is applied to a classifier expression that contains other @@ -221,7 +225,7 @@ classifier expressions, the cast propagates down to those classifier expressions A conjunction is written with the double ampersand operator (`&&`) in between two classifier expressions (see Figure 4.1 for an example). The conjunction of two classifiers results in a new classifier that combines the values of the features returned by its argument classifiers. The nature -of the combination depends on the feature return types of the argument classifiers. Table bellow +of the combination depends on the feature return types of the argument classifiers. Table below enumerates all possibilities and gives the feature return type of the resulting conjunctive classifier. @@ -242,7 +246,7 @@ two argument classifiers. #### 4.1.2.5 Composite Generators -“Composite generator” is LBJ terminology for a comma separated list of classifier expressions. +“Composite generator” is LBJava terminology for a comma separated list of classifier expressions. When classifier expressions are listed separated by commas, the result is a feature generator that simply returns all the features returned by each classifier in the list. @@ -270,13 +274,13 @@ classifier expression in the `using` clause does all the feature extraction on e both training and evaluation. It will often be a composite generator. The instance creation expression in the `from` clause should create an object of a class that -implements the `LBJ2.parser.Parser` interface in the library (see Section 5.4.1). This clause -is optional. If it appears, the LBJ compiler will automatically perform training on the learner +implements the `parser.Parser` interface in the library (see Section 5.4.1). This clause +is optional. If it appears, the LBJava compiler will automatically perform training on the learner represented by this learning classifier expression at compile-time. Whether it appears or not, the programmer may continue training the learner on-line in the application via methods defined in -`LBJ2.learn.Learner` in the library (see Section 5.2.1). +`learn.Learner` in the library (see Section 5.2.1). -When the `from` clause appears, the LBJ compiler retrieves objects from the specified parser +When the `from` clause appears, the LBJava compiler retrieves objects from the specified parser until it finally returns `null`. One at a time, the feature extraction classifier is applied to each object, and the results are sent to the learning algorithm for processing. However, many learning algorithms perform much better after being given multiple opportunities to learn from each @@ -285,7 +289,7 @@ specifies a number of rounds, or the number of passes over the training data to the classifier during training. The instance creation expression in the `with` clause should create an object of a class derived -from the `LBJ2.learn.Learner` class in the library. This clause is also optional. If it appears, the +from the `learn.Learner` class in the library. This clause is also optional. If it appears, the generated Java class implementing this learning classifier will be derived from the class named in the `with` clause. Otherwise, the default learner for the declared return type of this learning classifier will be substituted with default parameter settings. @@ -296,18 +300,18 @@ evaluation, predicts the label for which it computes the highest score. However, the `valueOf(Object, java.util.Collection)` method which restricts the prediction to one of the labels in the specified collection. In the application, it’s easy enough to call this method in place of `discreteValue(Object)` (discussed in Section 5.1.1), but when this classifier is invoked -elsewhere in an LBJ source file, it translates to an invocation of `discreteValue(Object)`. The +elsewhere in an LBJava source file, it translates to an invocation of `discreteValue(Object)`. The evaluate clause (e.g., `evaluate valueOf(o, MyClass.getCollection())`) changes the behavior of `discreteValue(Object)` (or `realValue(Object)` as appropriate) so that it uses the specified Java-expression to produce the prediction. Note that `Java-expression` will be used only during the evaluation and not the training of the learner specifying the `evaluate` clause. -The cval clause enables LBJ’s built-in K-fold cross validation system. K-fold cross validation +The cval clause enables LBJava’s built-in K-fold cross validation system. K-fold cross validation is a statistical technique for assessing the performance of a learned classifier by partitioning the user’s set of training data into K subsets such that a single subset is held aside for testing while -the others are used for training. LBJ automates this process in order to alleviate the need for +the others are used for training. LBJava automates this process in order to alleviate the need for the user to perform his own testing methodologies. The optional `split-strategy` argument to -the cval clause can be used to specify the method with which LBJ will split the data set into +the cval clause can be used to specify the method with which LBJava will split the data set into subsets (folds). If the `split-strategy` argument is not provided, the default value taken is `sequential`. The user may choose from the following four split strategies: @@ -320,7 +324,7 @@ subsets (folds). If the `split-strategy` argument is not provided, the default v i.e. `[ — 1 — | — 2 — | ... | — K — ]` - `kth` - The `kth` split strategy also attempts to partition the set of examples in to `K` equally - sized subsets with a round-robin style assignement scheme. The `x`’th example encountered + sized subsets with a round-robin style assignment scheme. The `x`’th example encountered is assigned to the `(x%K)`’th subset. i.e. `[ 1 2 3 4 ... K 1 2 3 4 ... K ... ]` @@ -329,7 +333,7 @@ subsets (folds). If the `split-strategy` argument is not provided, the default v are as equally sized as possible. - `manual` - The user may write their parser so that it returns the unique instance of the - `LBJ2.parse.FoldSeparator` class (see the `separator` field) wherever a fold boundary is + `parse.FoldSeparator` class (see the `separator` field) wherever a fold boundary is desired. Each time this object appears, it represents a partition between two folds. Thus, if the k-fold cross validation is desired, it should appear k − 1 times. The integer provided after the `cval` keyword is ignored and may be omitted in this case. @@ -337,15 +341,15 @@ subsets (folds). If the `split-strategy` argument is not provided, the default v The `testingMetric` and `alpha` clauses are sub-clauses of `cval`, and, consequently, have no effect when the `cval` clause is not present. The `testingMetric` clause gives the user the opportunity to provide a custom testing methodology. The object provided to the `testingMetric` -clause must implement the `LBJ2.learn.TestingMetric` interface. If this clause is not provided, -then it will default to the `LBJ2.learn.Accuracy` metric, which simply returns the ratio of correct +clause must implement the `learn.TestingMetric` interface. If this clause is not provided, +then it will default to the `learn.Accuracy` metric, which simply returns the ratio of correct predictions made by the classifier on the testing fold to the total number of examples contained within said fold. -LBJ’s cross validation system provides a confidence interval according to the measurements +LBJava’s cross validation system provides a confidence interval according to the measurements made by the testing function. With the `alpha` clause, the user may define the width of this -confidence interval. The double-precision argument provided to the alpha clause causes LBJ to -calculate a (1 − a)% confidence interval. For example, `alpha .07` causes LBJ to print a 93% +confidence interval. The double-precision argument provided to the alpha clause causes LBJava to +calculate a (1 − a)% confidence interval. For example, `alpha .07` causes LBJava to print a 93% confidence interval, according to the testing measurements made. If this clause is not provided, the default value taken is .05, resulting in a 95% confidence interval. @@ -368,7 +372,7 @@ between progress messages. This variable can also be set via a command line para Expression takes precedence. If no value is provided, then the default value taken is 0, causing progress messages to be given only at the beginning and end of each training pass. -When the LBJ compiler finally processes a learning classifier expression, it generates not +When the LBJava compiler finally processes a learning classifier expression, it generates not only a Java source file implementing the classifier, but also a file containing the results of the computations done during training. This file will have the same name as the classifier but with a `.lc` extension (“`lc`” stands for “learning classifier”). The directory in which this file and also @@ -379,7 +383,7 @@ command line parameters discussed in Section 6.2. Inference is the process through which classifiers constrained in terms of each other reconcile their outputs. More information on the specification of constraints and inference procedures can -be found in Sections 4.2 and 4.3 respectively. In LBJ, the application of an inference to a learning +be found in Sections 4.2 and 4.3 respectively. In LBJava, the application of an inference to a learning classifier participating in that inference results in a new classifier whose output respects the inference’s constraints. Inferences are applied to learning classifiers via the inference invocation, which looks just like a method invocation with a single argument. @@ -403,7 +407,7 @@ mechanism is the `sense` statement, described in Section 4.1.3.1. When a classifier’s only purpose is to provide information to a `Learner` (see Section 5.2.1), the `Feature` data type (see Section 5.1.2) is the most appropriate mode of communication. -However, in any LBJ source file, the programmer will inevitably design one or more classifiers +However, in any LBJava source file, the programmer will inevitably design one or more classifiers intended to provide information within the programmer’s own code, either in the application or in other classifier method bodies. In these situations, the features’ values (and not their names) are the data of interest. Section 4.1.3.2 discusses a special semantics for classifier invocation. @@ -443,7 +447,7 @@ resulting value will be converted to a `String`. assumed to be Boolean with a value of `true`. #### 4.1.3.2 Invoking Classifiers - Under the right circumstances, any classifier may be invoked inside an LBJ method body just as + Under the right circumstances, any classifier may be invoked inside an LBJava method body just as if it were a method. The syntax of a classifier invocation is simply `name (object )`, where `object` is the object to be classified and `name` follows the same rules as when a classifier is named in a classifier expression (see Section 4.1.2.1). In general, the semantics of such an invocation are @@ -475,11 +479,11 @@ resulting value will be converted to a `String`. #### 4.1.3.3 Syntax Limitations - When the exact computation is known, LBJ intends to allow the programmer to explicitly - define a classifier using arbitrary Java. However, the current version of LBJ suffers from one + When the exact computation is known, LBJava intends to allow the programmer to explicitly + define a classifier using arbitrary Java. However, the current version of LBJava suffers from one major limitation. All J2SE 1.4.2 statement and expression syntax is accepted, excluding class and interface definitions. In particular, this means that anonymous classes currently cannot be - defined or instantiated inside an LBJ method body. + defined or instantiated inside an LBJava method body. ## 4.2 Constraints Many modern applications involve the repeated application of one or more learning classifiers in @@ -487,7 +491,7 @@ a coordinated decision making process. Often, the nature of this decision making the output of each learning classifier on a call by call basis to make all these outputs coherent with respect to each other. For example, a classification task may involve classifying some set of objects, at most one of which is allowed to take a given label. If the learned classifier is left to its -own devices, there is no guarantee that this constraint will be respected. Using LBJ’s constraint +own devices, there is no guarantee that this constraint will be respected. Using LBJava’s constraint and inference syntax, constraints such as these are resolved automatically in a principled manner. More specifically, Integer Linear Programming (ILP) is applied to resolve the constraints @@ -496,14 +500,14 @@ maximized. The details of how ILP works are beyond the scope of this user’s ma (Punyakanok, Roth, & Yih , 2008) for more details. This section covers the syntax and semantics of constraint declarations and statements. However, -simply declaring an LBJ constraint has no effect on the classifiers involved. Section 4.3 -introduces the syntax and semantics of LBJ inference procedures, which can then be invoked (as +simply declaring an LBJava constraint has no effect on the classifiers involved. Section 4.3 +introduces the syntax and semantics of LBJava inference procedures, which can then be invoked (as described in Section 4.1.2.7) to produce new classifiers that respect the constraints. ### 4.2.1 Constraint Statements -LBJ constraints are written as arbitrary first order Boolean logic expressions in terms of learning -classifiers and the objects in a Java application. The LBJ constraint statement syntax is +LBJava constraints are written as arbitrary first order Boolean logic expressions in terms of learning +classifiers and the objects in a Java application. The LBJava constraint statement syntax is parameterized by Java expressions, so that general constraints may be expressed in terms of the objects of an internal representation whose exact shape is not known until run-time. The usual operators and quantifiers are provided, as well as the `atleast` and `atmost` quantifiers, which are @@ -520,11 +524,11 @@ semicolon. Constraint expressions take one of the following forms: where the expression must evaluate to an object and `name` follows similar rules as classifier names when they are invoked. In particular, if `MyConstraint` is already declared in `SomeOtherPackage`, it may be invoked with `@SomeOtherPackage.MyConstraint(object)`. - - The negation of an LBJ constraint `!constraint` - - The conjunction of two LBJ constraints `constraint /\ constraint` - - The disjunction of two LBJ constraints `constraint \/ constraint` + - The negation of an LBJava constraint `!constraint` + - The conjunction of two LBJava constraints `constraint /\ constraint` + - The disjunction of two LBJava constraints `constraint \/ constraint` - An implication `constraint => constraint` - - The equivalence of two LBJ constraints `constraint <=> constraint` + - The equivalence of two LBJava constraints `constraint <=> constraint` - A universal quantifier `forall (type name in Java-expression )` constraint where the expression must evaluate to a Java Collection containing objects of the specified type, and the constraint may be written in terms of name . @@ -555,8 +559,8 @@ learning classifier invocations are not treated as inference variables. ### 4.2.2 Constraint Declarations -An LBJ constraint declaration declares a Java method whose purpose is to locate the objects -involved in the inference and generate the constraints. Syntactically, an LBJ constraint declaration +An LBJava constraint declaration declares a Java method whose purpose is to locate the objects +involved in the inference and generate the constraints. Syntactically, an LBJava constraint declaration starts with a header indicating the name of the constraint and the type of object it takes as input, similar to a method declaration with a single parameter: @@ -580,12 +584,12 @@ discrete{"false", "true"} name (type name ) ``` Thus, a constraint may be invoked as if it were a Java method (i.e., without the `@` symbol described -in Section 4.2.1) anywhere in an LBJ source file, just like a classifier. Such an invocation +in Section 4.2.1) anywhere in an LBJava source file, just like a classifier. Such an invocation will evaluate the constraint in place, rather than constructing its first order representation. ## 4.3 Inference -The syntax of an LBJ inference has the following form: +The syntax of an LBJava inference has the following form: ```java inference name head type name @@ -619,7 +623,7 @@ might be an appropriate head finder method when the head object has type `Senten of the classifiers involved in the inference takes `Words` as input. Second, the body specifies how the scores produced by each learning classifier should be -normalized. The LBJ library contains a set of normalizing functions that may be named here. It +normalized. The LBJava library contains a set of normalizing functions that may be named here. It is not strictly necessary to use normalization methods, but doing so ensures that the scores computed for each possible prediction may be treated as a probability distribution by the inference algorithm. Thus, we may then reason about the inference procedure as optimizing the expected @@ -662,10 +666,10 @@ the `with` clause of a learning classifier expression (see Section 4.1.2.6). ## 4.4 “Makefile” Behavior -An LBJ source file also functions as a makefile in the following sense. First, code will only be +An LBJava source file also functions as a makefile in the following sense. First, code will only be generated for a classifier definition when it is determined that a change has been made5 in the -LBJ source for that classifier since the last time the compiler was executed +LBJava source for that classifier since the last time the compiler was executed (When the file(s) containing the translated code for a given classifier do not exist, this is, of course, also interpreted as a change having been made). Second, a learning @@ -674,12 +678,64 @@ More precisely, any classifier whose definition has changed lexically is deemed Furthermore, any classifier that makes use of an affected classifier is also affected. This includes method bodies that invoke affected classifiers and conjunctions and learning classifiers involving at least one affected classifier. A learning classifier will be trained if and only if a change has been -made to its own source code or it is affected. Thus, when an LBJ source contains many learning +made to its own source code or it is affected. Thus, when an LBJava source contains many learning classifiers and a change is made, time will not be wasted re-training those that are unaffected. -In addition, the LBJ compiler will automatically compile any Java source files that it depends +In addition, the LBJava compiler will automatically compile any Java source files that it depends on, so long as the locations of those source files are indicated with the appropriate command -line parameters (see Section 6.2). For example, if the classifiers in an LBJ source file are defined -to take classes from the programmer’s internal representation as input, the LBJ compiler will +line parameters (see Section 6.2). For example, if the classifiers in an LBJava source file are defined +to take classes from the programmer’s internal representation as input, the LBJava compiler will automatically compile the Java source files containing those class’ implementations if their class files don’t already exist or are out of date. + +## 4.5 Parameter Tuning Syntax + +Parameter turning is essential in machine learning, as it allows the programmers to find the best parameters in the learning algorithms to perform to their maximum extent. LBJava has intuitive syntax to tune parameters easily. Please see the subsections below to learn more. + +### 4.5.1 Set of Parameters + +We want to try a set of predefined parameters. + +The syntax is: +``` +{{value1, value2, value3}} +``` + +For example, we want the algorithm to try 5, 10, 20, 30, 40 iterations. + +The `LBJava` code looks like: +``` +{{5, 10, 20, 30, 40}} rounds +``` + +Another example, we want the algorithm to try different learning rates, such as 0.5, 0.1, 0.005. + +The `LBJava` code looks like: +``` +p.learningRate = {{0.5, 0.1, 0.005}}; +``` + +### 4.5.2 Parameters in Steps + +We want to try a set of parameters, within a range, with steps. + +Let's denote the range from `start`, to `end`, with step size `step_size`. + +The syntax is: +``` +{{step_size -> start : end}} +``` + +For example, we want to try thickness in `SparseAvergedPerceptron`, from 3 to 0.5, with step size 1. + +The `LBJava` code looks like: +``` +p.thinkness = {{ 1 -> 3 : 0.5}}; +``` + +### 4.5.3 Cross Validation + +Cross validation is useful, and essential to avoid overfitting problem. For k-fold cross validation, the syntax is: +``` +cval k "random" +``` diff --git a/lbjava/doc/LBJLIBRARY.md b/lbjava/doc/LBJLIBRARY.md index 2833ebd4..731bf183 100644 --- a/lbjava/doc/LBJLIBRARY.md +++ b/lbjava/doc/LBJLIBRARY.md @@ -1,35 +1,39 @@ -# 5. The LBJ Library +--- +title: LBJLIBRARY +--- -The LBJ programming framework is supported by a library of interfaces, learning algorithms, +# 5. The Library + +The programming framework is supported by a library of interfaces, learning algorithms, and implementations of the building blocks described in Chapter 4. This chapter gives a general overview of each of those codes. . -The library is currently organized into five packages. `LBJ2.classify` contains classes related -to features and classification. `LBJ2.learn` contains learner implementations and supporting -classes. `LBJ2.infer` contains inference algorithm implementations and internal representations -for constraints and inference structures. `LBJ2.parse` contains the Parser interface and some -general purpose internal representation classes. Finally, `LBJ2.nlp` contains some basic natural +The library is currently organized into five packages. `lbjava.classify` contains classes related +to features and classification. `lbjava.learn` contains learner implementations and supporting +classes. `lbjava.infer` contains inference algorithm implementations and internal representations +for constraints and inference structures. `lbjava.parse` contains the Parser interface and some +general purpose internal representation classes. Finally, `lbjava.nlp` contains some basic natural language processing internal representations and parsing routines. In the future, we plan to expand this library, adding more varieties of learners and domain specific parsers and internal representations. -## 5.1 `LBJ2.classify` +## 5.1 `lbjava.classify` -The most important class in LBJ’s library is `LBJ2.classify.Classifier`. This abstract class -is the interface through which the application accesses the classifiers defined in the LBJ source +The most important class in ’s library is `lbjava.classify.Classifier`. This abstract class +is the interface through which the application accesses the classifiers defined in the source file. However, the programmer should, in general, only have need to become familiar with a few of the methods defined there. -One other class that may be of broad interest is the `LBJ2.classify.TestDiscrete` class +One other class that may be of broad interest is the `lbjava.classify.TestDiscrete` class (discussed in Section 5.1.8), which can automate the performance evaluation of a discrete learning classifier on a labeled test set. The other classes in this package are designed mainly for internal -use by LBJ’s compiler and can be safely ignored by the casual user. More advanced users who +use by ’s compiler and can be safely ignored by the casual user. More advanced users who writes their own learners or inference algorithms in the application, for instance, will need to become familiar with them. -## 5.1.1 `LBJ2.classify.Classifier` +## 5.1.1 `lbjava.classify.Classifier` -Every classifier declaration in an LBJ source file is translated by the compiler into a Java class +Every classifier declaration in an source file is translated by the compiler into a Java class that extends this class. When the programmer wants to call a classifier in the application, he creates an object of his classifier’s class using its zero argument constructor and calls an appropriate method on that object. The appropriate method will most likely be one of the @@ -58,7 +62,7 @@ this method is for internal use by the compiler of the returned `FeatureVector`.) - `FeatureVector classify(Object)`: - This method is overridden in every classifier implementation generated by the LBJ compiler. + This method is overridden in every classifier implementation generated by the compiler. It returns a `FeatureVector` which may be iterated through to access individual features (see Section 5.1.3). @@ -89,52 +93,52 @@ expects as input. There are several other methods of this class described in the Javadoc documentation. They are omitted here since the programmer is not expected to need them. -### 5.1.2 `LBJ2.classify.Feature` +### 5.1.2 `lbjava.classify.Feature` This abstract class is part of the representation of the value produced by a classifier. In particular, the name of a feature, but not its value, is stored here. Classes derived from this class (described below) provide storage for the value of the feature. This class exists mainly for internal use by -the LBJ compiler, and most programmers will not need to be familiar with it. +the compiler, and most programmers will not need to be familiar with it. - - `LBJ2.classify.DiscreteFeature`: + - `lbjava.classify.DiscreteFeature`: The value of a feature returned by a `discrete` classifier is stored as a String in objects of this class. - - `LBJ2.classify.DiscreteArrayFeature`: + - `lbjava.classify.DiscreteArrayFeature`: The `String` value of a feature returned by a `discrete[]` classifier as well as its integer index into the array are stored in objects of this class. - - `LBJ2.classify.RealFeature`: + - `lbjava.classify.RealFeature`: The value of a feature returned by a `real` classifier is stored as a `double` in objects of this class. - - `LBJ2.classify.RealArrayFeature`: + - `lbjava.classify.RealArrayFeature`: The double value of a feature returned by a `real[]` classifier as well as its integer index into the array are stored in objects of this class. -### 5.1.3 `LBJ2.classify.FeatureVector` +### 5.1.3 `lbjava.classify.FeatureVector` `FeatureVector` is a linked-list-style container which stores features that function as labels separately from other features. It contains methods for iterating through the features and labels and adding more of either. Its main function is as the return value of the `Classifier#classify(Object)` -method which is used internally by the LBJ compiler (see Section 5.1.1). Most programmers will +method which is used internally by the compiler (see Section 5.1.1). Most programmers will not need to become intimately familiar with this class. -### 5.1.4 `LBJ2.classify.Score` +### 5.1.4 `lbjava.classify.Score` This class represents the `double` score produced by a discrete learning classifier is association with one of its `String` prediction values. Both items are stored in an object of this class. -This class is used internally by LBJ’s inference infrastructure, which will interpret the score as +This class is used internally by ’s inference infrastructure, which will interpret the score as an indication of how much the learning classifier prefers the associated prediction value, higher scores indicating more preference. -### 5.1.5 `LBJ2.classify.ScoreSet` -This is another class used internally by LBJ’s inference infrastructure. An object of this class is +### 5.1.5 `lbjava.classify.ScoreSet` +This is another class used internally by ’s inference infrastructure. An object of this class is intended to contain one `Score` for each possible prediction value a learning classifier is capable of returning. -### 5.1.6 `LBJ2.classify.ValueComparer` +### 5.1.6 `lbjava.classify.ValueComparer` This simple class derived from `Classifier` is used to convert a multi-value `discrete` classifier into a Boolean classifier that returns true if and only if the multi-valued classifier evaluated to a particular value. `ValueComparer` is used internally by `SparseNetworkLearner` (see Section 5.2.6). ### 5.1.7 Vector Returners -The classes `LBJ2.classify.FeatureVectorReturner` and -`LBJ2.classify.LabelVectorReturner` are used internally by the LBJ compiler to help implement +The classes `lbjava.classify.FeatureVectorReturner` and +`lbjava.classify.LabelVectorReturner` are used internally by the compiler to help implement the training procedure when the programmer specifies multiple training rounds (see Section 4.1.2.6). A feature vector returner is substituted as the learning classifier’s feature extraction classifier, and a label vector returner is substituted as the learning classifier’s labeler (see Section @@ -143,7 +147,7 @@ input by the learning classifier to be a `FeatureVector`, which is not normally as will be described in Section 5.4.4, the programmer may still be interested in these classes if he wishes to continue training a learning classifier for additional rounds on the same data without incurring the costs of performing feature extraction. -### 5.1.8 `LBJ2.classify.TestDiscrete` +### 5.1.8 `lbjava.classify.TestDiscrete` This class can be quite useful to quickly evaluate the performance of a newly learned classifier on labeled testing data. It operates either as a stand-alone program or as a class that may be imported into an application for more tailored use. In either case, it will automatically compute @@ -158,18 +162,19 @@ other methods provided by this class to retrieve the computed statistics. More d all these methods as well as the operation of this class as a stand-alone program is available in the on-line Javadoc. -## 5.2 `LBJ2.learn` +## 5.2 `lbjava.learn` The programmer will want to familiarize himself with most of the classes in this package, in -particular those that are derived from the abstract class `LBJ2.learn.Learner`. These are the -learners that may be selected from within an LBJ source file in association with a learning -classifier expression (see Section 4.1.2.6). -### 5.2.1 `LBJ2.learn.Learner` +particular those that are derived from the abstract class `lbjava.learn.Learner`. These are the +learners that may be selected from within an source file in association with a learning +classifier expression. + +### 5.2.1 `lbjava.learn.Learner` `Learner` is an abstract class extending the abstract class `Classifier` (see Section 5.1.1). It acts -as an interface between learning classifiers defined in an LBJ source file and applications that -make on-line use of their learning capabilities. The class generated by the LBJ compiler when +as an interface between learning classifiers defined in an source file and applications that +make on-line use of their learning capabilities. The class generated by the compiler when translating a learning classifier expression will always indirectly extend this class. -In addition to the methods inherited from `Classifier`, this class defines the following nonstatic, +In addition to the methods inherited from `Classifier`, this class defines the following non-static, learning related methods. These are not the only methods defined in class `Learner`, and advanced users may be interested in perusing the Javadoc for descriptions of other methods. @@ -178,14 +183,14 @@ advanced users may be interested in perusing the Javadoc for descriptions of oth the training process given a single example object. The most common use of this method will be in conjunction with a supervised learning algorithm, in which case, of course, the true label of the example object must be accessible by the label classifier specified in - the learning classifier expression in the LBJ source file. Note that changes made via this + the learning classifier expression in the source file. Note that changes made via this method will not persist beyond the current execution of the application unless the `save()` method (discussed below) is invoked. - `void doneLearning()`: Some learning algorithms (usually primarily off-line learning algorithms) save part of their computation until after all training objects have been observed. This method informs the learning algorithm that it is time to perform that part of the computation. When compiletime - training is indicated in a learning classifier expression, the LBJ compiler will call + training is indicated in a learning classifier expression, the compiler will call this method after training is complete. Similarly, the programmer who performs on-line learning in his application may need to call this method as well, depending on the learning algorithm. @@ -201,20 +206,20 @@ advanced users may be interested in perusing the Javadoc for descriptions of oth changes that have been made from on-line learning will become visible to subsequent executions of applications that invoke this learning classifier. Please note that the `save()` method currently will not work when the classifier’s byte code is packed in a jar file. - - `LBJ2.classify.ScoreSet scores(Object)`: + - `lbjava.classify.ScoreSet scores(Object)`: This method is used internally by inference algorithms which interpret the scores in the returned `ScoreSet` (see Section 5.1.5) as indications of which predictions the learning classifier prefers and how much they are preferred. - - `LBJ2.classify.Classifier getExtractor()`: + - `lbjava.classify.Classifier getExtractor()`: This method gives access to the feature extraction classifier used by this learning classifier. - - `void setExtractor(LBJ2.classify.Classifier)`: + - `void setExtractor(lbjava.classify.Classifier)`: Use this method to change the feature extraction classifier used by this learning classifier. Note that this change will be remembered during subsequent executions of the application if the `save()` method (described above) is later invoked. - - `LBJ2.classify.Classifier getLabeler()`: + - `lbjava.classify.Classifier getLabeler()`: This method gives access to the classifier used by this learning classifier to produce labels for supervised learning. - void setLabeler(LBJ2.classify.Classifier): + void setLabeler(lbjava.classify.Classifier): Use this method to change the labeler used by this learning classifier. Note that this change will be remembered during subsequent executions of the application if the save() method (described above) is later invoked. @@ -225,18 +230,18 @@ advanced users may be interested in perusing the Javadoc for descriptions of oth modifications to the learner’s internal representation visible to subsequent executions of applications that invoke this learning classifier like the `save()` method does. -In addition, the following static flag is declared in every learner output by the LBJ compiler. +In addition, the following static flag is declared in every learner output by the compiler. - `public static boolean isTraining`: The `isTraining` variable can be used by the programmer to determine if his learning classifier is currently being trained. This ability may be useful if, for instance, a feature extraction classifier for this learning classifier needs to alter its behavior depending on the - availability of labeled training data. The LBJ compiler will automatically set this flag + availability of labeled training data. The compiler will automatically set this flag `true` during offline training, and it will be initialized `false` in any application using the learning classifier. So, it becomes the programmer’s responsibility to make sure it is set appropriately if any additional online training is to be performed in the application -### 5.2.2 `LBJ2.learn.LinearThresholdUnit` +### 5.2.2 `lbjava.learn.LinearThresholdUnit` A linear threshold unit is a supervised, mistake driven learner for binary classification. The predictions made by such a learner are produced by computing a score for a given example object and then comparing that score to a predefined threshold. While learning, if the prediction does @@ -262,7 +267,7 @@ containing exactly two values (See Section 4.1.1 for more information on value l The learner derived from this class will then learn to produce a higher score when the correct prediction is the second value in the value list. -### 5.2.3 `LBJ2.learn.SparsePerceptron` +### 5.2.3 `lbjava.learn.SparsePerceptron` This learner extends class `LinearThresholdUnit` (see Section 5.2.2). It represents its linear function for score computation as a vector of weights corresponding to features. It has an additive update rule, meaning that it promotes and demotes by treating the collection of features @@ -270,14 +275,14 @@ associated with a training object as a vector and using vector addition. Finally as its learning rate, threshold, the thick separator, and others described in the online Javadoc can be configured by the user. -### 5.2.4 `LBJ2.learn.SparseAveragedPerceptron` +### 5.2.4 `lbjava.learn.SparseAveragedPerceptron` Extended from `SparsePerceptron` (see Section 5.2.3), this learner computes an approximation of voted Perceptron by averaging the weight vectors obtained after processing each training example. Its configurable parameters are the same as those of SparsePerceptron, and, in particular, using this algorithm in conjunction with a positive thickness for the thick separator can be particularly effective. -### 5.2.5 `LBJ2.learn.SparseWinnow` +### 5.2.5 `lbjava.learn.SparseWinnow` This learner extends class `LinearThresholdUnit` (see Section 5.2.2). It represents its linear function for score computation as a vector of weights corresponding to features. It has a multiplicative update rule, meaning that it promotes and demotes by multiplying an individual weight @@ -285,7 +290,7 @@ in the weight vector by a function of the corresponding feature. Finally, parame learning rates, threshold, and others described in the online Javadoc can be configured by the user. -### 5.2.6 `LBJ2.learn.SparseNetworkLearner` +### 5.2.6 `lbjava.learn.SparseNetworkLearner` `SparseNetworkLearner` is a multi-class learner, meaning that it can learn to distinguish among two or more discrete label values when classifying an object. It is not necessary to know which label values are possible when employing this learner (i.e., it is not necessary for the label producing @@ -302,14 +307,14 @@ by the programmer, or, if no specific learner is specified, the default is Spars with clause in a learning classifier expression (see Section 4.1.2.6) of discrete feature return type, this learner is invoked with default parameters. -### 5.2.7 `LBJ2.learn.NaiveBayes` +### 5.2.7 `lbjava.learn.NaiveBayes` Na¨ıve Bayes is a multi-class learner that uses prediction value counts and feature counts given a particular prediction value to select the most likely prediction value. It is not mistake driven, as `LinearThresholdUnits` are. The scores returned by its `scores(Object)` method are directly interpretable as empirical probabilities. It also has a smoothing parameter configurable by the user for dealing with features that were never encountered during training. -### 5.2.8 `LBJ2.learn.StochasticGradientDescent` +### 5.2.8 `lbjava.learn.StochasticGradientDescent` Gradient descent is a batch learning algorithm for function approximation in which the learner tries to follow the gradient of the error function to the solution of minimal error. This implementation is a stochastic approximation to gradient descent in which the approximated function @@ -319,30 +324,30 @@ is assumed to have linear form. a with clause in a learning classifier expression (see Section 4.1.2.6) of real feature return type, this learner is invoked with default parameters. -### 5.2.9 `LBJ2.learn.Normalizer` +### 5.2.9 `lbjava.learn.Normalizer` A normalizer is a method that takes a set of scores as input and modifies those scores so that they obey particular constraints. Class `Normalizer` is an abstract class with a single abstract method -`normalize(LBJ2.classify.ScoreSet)` (see Section 5.1.5) which is implemented by extending +`normalize(lbjava.classify.ScoreSet)` (see Section 5.1.5) which is implemented by extending classes to define this “normalization.” For example: - - `LBJ2.learn.Sigmoid`: + - `lbjava.learn.Sigmoid`: This `Normalizer` simply replaces each score `s_i` in the given `ScoreSet` with `1 / 1+e^{s_i}`. After normalization, each score will be greater than 0 and less than 1. - - `LBJ2.learn.Softmax`: + - `lbjava.learn.Softmax`: This `Normalizer` replaces each score with the fraction of its exponential out of the sum of all scores’ exponentials. More precisely, each score si is replaced by `exp(s_i)/ \sum_j exp(s_j)`. After normalization, each score will be positive and they will sum to 1. - - `LBJ2.learn.IdentityNormalizer`: + - `lbjava.learn.IdentityNormalizer`: This `Normalizer` simply returns the same scores it was passed as input. -### 5.2.10 `LBJ2.learn.WekaWrapper` +### 5.2.10 `lbjava.learn.WekaWrapper` The `WekaWrapper` class is meant to wrap instances of learners from the [WEKA library of learning algorithms](http://www.cs.waikato.ac.nz/ml/weka/). -The `LBJ2.learn.WekaWrapper` class converts between the internal representations -of LBJ and WEKA on the fly, so that the more extensive set of algorithms contained within -WEKA can be applied to projects written in LBJ. +The `lbjava.learn.WekaWrapper` class converts between the internal representations +of and WEKA on the fly, so that the more extensive set of algorithms contained within +WEKA can be applied to projects written in . -The `WekaWrapper` class extends `LBJ2.learn.Learner`, and carries all of the functionality that +The `WekaWrapper` class extends `lbjava.learn.Learner`, and carries all of the functionality that can be expected from a learner. A standard invocation of `WekaWrapper` could look something like this: @@ -361,44 +366,44 @@ new WekaWrapper(new weka.classifiers.bayes.NaiveBayes()) this class. See section 4.1.1 for further discussion Classifier Declarations. - When designing a learning classifier which will use a learning algorithm from WEKA, it is important to note that very very few algorithms in the WEKA library support `String` - attributes. In LBJ, this means that it will be very hard to find a learning algorithm which + attributes. In , this means that it will be very hard to find a learning algorithm which will learn using a `discrete` feature extractor which does not have a value list. I.e. value lists should be provided for discrete feature extracting classifiers whenever possible. - Feature pre-extraction must be enabled in order to use the `WekaWrapper` class. Feature preextraction is enabled by using the `preExtract` clause in the `LearningClassifierExpression` (discussed in 4.1.2.6). -## `LBJ2.infer` -The `LBJ2.infer` package contains many classes. The great majority of these classes form the +## `lbjava.infer` +The `lbjava.infer` package contains many classes. The great majority of these classes form the internal representation of both propositional and first order constraint expressions and are used -internally by LBJ’s inference infrastructure. Only the programmer who designs his own inference +internally by ’s inference infrastructure. Only the programmer who designs his own inference algorithm in terms of constraints needs to familiarize himself with these classes. Detailed descriptions of them are provided in the Javadoc. There are a few classes, however, that are of broader interest. First, the `Inference` class -is an abstract class from which all inference algorithms implemented for LBJ are derived. It +is an abstract class from which all inference algorithms implemented for are derived. It is described below along with the particular algorithms that have already been implemented. -Finally, the `InferenceManager` class is used internally by the LBJ library when applications +Finally, the `InferenceManager` class is used internally by the library when applications using inference are running. -### 5.3.1 `LBJ2.infer.Inference` +### 5.3.1 `lbjava.infer.Inference` `Inference` is an abstract class from which all inference algorithms are derived. Executing an inference generally evaluates all the learning classifiers involved on the objects they have been applied to in the constraints, as well as picking new values for their predictions so that the constraints are satisfied. An object of this class keeps track of all the information necessary to perform inference in addition to the information produced by it. Once that inference has been performed, constrained classifiers access the results through this class’s interface to determine -what their constrained predictions are. This is done through the `valueOf(LBJ2.learn.Learner, Object)` +what their constrained predictions are. This is done through the `valueOf(lbjava.learn.Learner, Object)` method described below. - - `String valueOf(LBJ2.learn.Learner, Object)`: + - `String valueOf(lbjava.learn.Learner, Object)`: The arguments to this method are objects representing a learning classifier and an object involved in the inference. Calling this method causes the inference algorithm to run, if it has not been run before. This method then returns the new prediction corresponding to the given learner and object after constraints have been resolved. -### 5.3.2 `LBJ2.infer.GLPK` -This inference algorithm, which may be named in the `with` clause of the LBJ `inference` syntax, +### 5.3.2 `lbjava.infer.GLPK` +This inference algorithm, which may be named in the `with` clause of the `inference` syntax, uses Integer Linear Programming (ILP) to maximize the expected number of correct predictions while respecting the constraints. Upon receiving the constraints represented as First Order Logic (FOL) formulas, this implementation first translates those formulas to a propositional representation. @@ -418,19 +423,19 @@ for any given instance of the inference problem. The resulting ILP problem is then solved by the [GNU Linear Programming Kit (GLPK)](http://www.gnu.org/software/glpk/), a linear programming library written in C. This software must be downloaded and installed -separately before installing LBJ, or the `GLPK` inference algorithm will be disabled. If LBJ has +separately before installing , or the `GLPK` inference algorithm will be disabled. If has already been installed, it must be reconfigured and reinstalled (see Chapter 6.1) after installing GLPK. -## 5.4 `LBJ2.parse` +## 5.4 `lbjava.parse` This package contains the very simple `Parser` interface, implementers of which are used in -conjunction with learning classifier expressions in an LBJ source file when off-line training is +conjunction with learning classifier expressions in an source file when off-line training is desired (see Section 4.1.2.6). It also contains some general purpose internal representations which may be of interest to a programmer who has not yet written the internal representations or parsers for the application. -### 5.4.1 `LBJ2.parse.Parser` -The LBJ compiler is capable of automatically training a learning classifier given training data, +### 5.4.1 `lbjava.parse.Parser` +The compiler is capable of automatically training a learning classifier given training data, so long as that training data comes in the form of objects ready to be passed to the learner’s learn(Object) method. Any class that implements the Parser interface can be utilized by the compiler to provide those training objects. This interface simply consists of a single method for @@ -440,23 +445,23 @@ returning another object: This is the only method that an implementing class needs to define. It returns the next training `Object` until no more are available, at which point it returns `null`. -### 5.4.2 `LBJ2.parse.LineByLine` +### 5.4.2 `lbjava.parse.LineByLine` This abstract class extends `Parser` but does not implement the `next()` method. It does, however, define a constructor that opens the file with the specified name and a readLine() method that fetches the next line of text from that file. Exceptions (as may result from not being able to open or read from the file) are automatically handled by printing an error message and exiting the application. -### 5.4.3 `LBJ2.parse.ChildrenFromVectors` +### 5.4.3 `lbjava.parse.ChildrenFromVectors` This parser calls a user specified, `LinkedVector` (see Section 5.4.6) returning `Parser` internally and returns the `LinkedChildren` (see Section 5.4.5) of that vector one at a time through its -`next()` method. One notable `LinkedVector` returning Parser is `LBJ2.nlp.WordSplitter` discussed +`next()` method. One notable `LinkedVector` returning Parser is `lbjava.nlp.WordSplitter` discussed in Section 5.5.2. -### 5.4.4 `LBJ2.parse.FeatureVectorParser` -This parser is used internally by the LBJ compiler (and may be used by the programmer as well) +### 5.4.4 `lbjava.parse.FeatureVectorParser` +This parser is used internally by the compiler (and may be used by the programmer as well) to continue training the learning classifier after the first round of training without incurring the -cost of feature extraction. See Section 4.1.2.6 for more information on LBJ’s behavior when the +cost of feature extraction. See Section 4.1.2.6 for more information on ’s behavior when the programmer specifies multiple training rounds. That section describes how lexicon and example files are produced, and these files become the input to `FeatureVectorParser`. @@ -468,9 +473,10 @@ calling `learn(Object)`. After the new training objects have been exhausted, the extractor and labeler must be restored before finally calling `save()`. For example, if a learning classifier named `MyTagger` has been trained for multiple rounds by -the LBJ compiler, the lexicon and example file will be created with the names `MyTagger.lex` +the compiler, the lexicon and example file will be created with the names `MyTagger.lex` and `MyTagger.ex` respectively. Then the following code in an application will continue training the classifier for an additional round: + ```java MyTagger tagger = new MyTagger(); Classifier extractor = tagger.getExtractor(); @@ -485,20 +491,20 @@ tagger.setLabeler(labeler); tagger.save(); ``` -### 5.4.5 `LBJ2.parse.LinkedChild` +### 5.4.5 `lbjava.parse.LinkedChild` Together with `LinkedVector` discussed next, these two classes form the basis for a simple, general purpose internal representation for raw data. `LinkedChild` is an abstract class containing pointers to two other `LinkedChildren`, the “previous” one and the “next” one. It may also store a pointer to its parent, which is a `LinkedVector`. Constructors that set up all these links are also provided, simplifying the implementation of the parser. -### 5.4.6 `LBJ2.parse.LinkedVector` +### 5.4.6 `lbjava.parse.LinkedVector` A `LinkedVector` contains any number of `LinkedChildren` and provides random access to them in addition to the serial access provided by their links. It also provides methods for insertion and removal of new children. A `LinkedVector` is itself also a `LinkedChild`, so that hierarchies are easy to construct when sub-classing these two classes. -## 5.5 `LBJ2.nlp` +## 5.5 `lbjava.nlp` The programmer of Natural Language Processing (NLP) applications may find the internal representations and parsing algorithms implemented in this package useful. There are representations of words, sentences, and documents, as well as parsers of some common file formats and algorithms @@ -507,19 +513,19 @@ for word and sentence segmentation. ### 5.5.1 Internal Representations These classes may be used to represent the elements of a natural language document. - - `LBJ2.nlp.Word`: + - `lbjava.nlp.Word`: This simple representation of a word extends the `LinkedChild` class (see Section 5.4.5) and has space for its spelling and part of speech tag. - - `LBJ2.nlp.Sentence`: + - `lbjava.nlp.Sentence`: Objects of the `Sentence` class store only the full text of the sentence in a single `String`. However, a method is provided to heuristically split that text into Word objects contained in a `LinkedVector`. - - `LBJ2.nlp.NLDocument`: + - `lbjava.nlp.NLDocument`: Extended from `LinkedVector`, this class has a constructor that takes the full text of a document as input. Using the methods in `Sentence` and `SentenceSplitter`, it creates a hierarchical representation of a natural language document in which `Words` are contained in `LinkedVectors` representing sentences which are contained in this `LinkedVector`. - - `LBJ2.nlp.POS`: + - `lbjava.nlp.POS`: This class may be used to represent a part of speech, but it used more frequently to simply retrieve information about the various parts of speech made standard by the Penn Treebank project (Marcus, Santorini, & Marcinkiewicz , 1994). @@ -529,7 +535,7 @@ The classes listed in this section are all derived from class `LineByLine` (see all contain (at least) a constructor that takes a single `String` representing the name of a file as input. The objects they return are retrieved through the overridden `next()` method. - - `LBJ2.nlp.SentenceSplitter`: + - `lbjava.nlp.SentenceSplitter`: Use this `Parser` to separate sentences out from plain text. The class provides two constructors, one for splitting sentences out of a plain text file, and the other for splitting sentences out of plain text already stored in memory in a `String[]`. The user can then retrieve @@ -539,19 +545,19 @@ input. The objects they return are retrieved through the overridden `next()` met spaces, newlines, etc., is included in the `Sentence` as it appeared in the paragraph. (If the constructor taking a `String[]` as an argument is used, newline characters are inserted into the returned sentences to indicate transitions from one element of the array to the next.) - - `LBJ2.nlp.WordSplitter`: + - `lbjava.nlp.WordSplitter`: This parser takes the plain, unannotated `Sentence`s (see Section 5.5.1) returned by another parser (e.g., `SentenceSplitter`) and splits them into `Word` objects. Entire sentences now represented as `LinkedVectors` (see Section 5.4.6) are then returned one at a time by calls to the `next()` method. - - `LBJ2.nlp.ColumnFormat`: + - `lbjava.nlp.ColumnFormat`: This parser returns a `String[]` representing the rows of a file in column format. The input file is assumed to contain fields of non-whitespace characters separated by any amount of whitespace, one line of which is commonly used to represent a word in a corpus. This parser breaks a given line into one `String` per field, omitting all of the whitespace. A common usage of this class will be in extending it to create a new `Parser` that calls `super.next()` and creates a more interesting internal representation with the results. - - `LBJ2.nlp.POSBracketToVector`: + - `lbjava.nlp.POSBracketToVector`: Use this parser to return `LinkedVector` objects representing sentences given file names of POS bracket form files to parse. These files are expected to have one sentence per line, and the format of each line is as follows: diff --git a/lbjava/doc/REGRESSION.md b/lbjava/doc/REGRESSION.md index 5109f122..48a7ab27 100644 --- a/lbjava/doc/REGRESSION.md +++ b/lbjava/doc/REGRESSION.md @@ -1,8 +1,12 @@ -#7 A working example: Regression +--- +title: REGRESSION +--- -As mentioned in [Section 2 Basics and definitions](DEFINITION.md#feature), there are two feature types in LBJava: `discrete` and `real`. In machine learning, classification refers to the problem of predicting the class of unlabeled data for which the output type is `discrete`. On the hther hand, regression refers to the problem that the desired output is continuous or `real`. [Section 3 A working example: classifying newsgroup documents into topics](20NEWSGROUP.md) gives an example of how to use LBJava for `discrete` type and this tutorial is dedicated to `real` type. +# 6. A working example: Regression -##7.1 Setting Up +As mentioned in [Section 2 Basics and definitions](DEFINITION.md#feature), there are two feature types in LBJava: `discrete` and `real`. In machine learning, classification refers to the problem of predicting the class of unlabeled data for which the output type is `discrete`. On the other hand, regression refers to the problem that the desired output is continuous or `real`. [Section 3 A working example: classifying newsgroup documents into topics](20NEWSGROUP.md) gives an example of how to use LBJava for `discrete` type and this tutorial is dedicated to `real` type. + +## 6.1 Setting Up Let's name a class as `MyData` and use it for internal representation. @@ -58,13 +62,11 @@ public Object next() { } ``` - - -##7.2 Classifier Declarations +## 6.2 Classifier Declarations For declaring the classifier, we need to use [Section 4 LBJava Language](LBJLANGUAGE.md). -####7.2.1 Feature +#### 6.2.1 Feature The features are declared as following: @@ -92,7 +94,7 @@ If type `real[]` is used, the features become `10 20 10` to classifier. However, Please refer to [Section 4.1.2.4 Conjunctions](LBJLANGUAGE.md) for details on types. -####7.2.2 Label +#### 6.2.2 Label The label is declared as following: @@ -101,7 +103,8 @@ real MyLabel(MyData d) <- { return d.getLabel(); } ``` -####7.2.3 Classifier + +#### 6.2.3 Classifier Since we are using a classifier with real output type, we need to choose a training method compatible this output type. In this example we use Stochastic Gradient Descent. (visit [Training Algorithms](ALGORITHMS.md) for complete list of training algorithms with the expected output types.) @@ -117,9 +120,9 @@ real SGDClassifier(MyData d) <- end ``` -##7.3 Using `SGDClassifier` in a Java Program +## 6.3 Using `SGDClassifier` in a Java Program -###7.3.1 Generate `SGDClassifier` +### 6.3.1 Generate `SGDClassifier` To compile your LBJava file and execute the LBJava code, run the following: @@ -142,6 +145,7 @@ If you only want generate the Java translations of the LBJava code but not execu ``` mvn lbjava:generate ``` + Then to compile all classes run: ``` @@ -164,7 +168,7 @@ or `lbjava:compile-only`) you need to run `lbjava:clean` before compiling again. **Acknowledgement** to Christos Christodoulopoulos. -###7.3.1 Use `SGDClassifier` programmatically +### 6.3.1 Use `SGDClassifier` programmatically Once `SGDClassifier` is generated from the previous step, you may invoke it programmatically. @@ -183,7 +187,7 @@ trainer.train(1000); First read training data set into `MyDataReader` and create a `SGDClassifier`. Pass `SGDClassifier` to `BatchTrainer` and invoke method `train` for number of times. -##7.4 Testing a Real Classifier +## 6.4 Testing a Real Classifier Here is the sample code to use `TestReal` class: diff --git a/lbjava/pom.xml b/lbjava/pom.xml index 767b5613..13a58757 100644 --- a/lbjava/pom.xml +++ b/lbjava/pom.xml @@ -3,7 +3,7 @@ lbjava-project edu.illinois.cs.cogcomp - 1.2.16 + 1.3.1 4.0.0 @@ -26,10 +26,9 @@ - gurobi - gurobi - 5.0.1 - true + edu.illinois.cs.cogcomp + illinois-inference + 0.6.0 nz.ac.waikato.cms.weka @@ -47,11 +46,6 @@ 4.11 test - - org.ojalgo - ojalgo - 37.1.1 - org.apache.commons commons-math3 @@ -70,18 +64,6 @@ - - - CogcompSoftware - CogcompSoftware - scp://bilbo.cs.illinois.edu:/mounts/bilbo/disks/0/www/cogcomp/html/m2repo - - - CogcompSoftwareDoc - scp://bilbo.cs.illinois.edu:/mounts/bilbo/disks/0/www/cogcomp/html/software/doc/${project.artifactId} - - - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/ClassifierCSE.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/ClassifierCSE.java index a46ba1e8..f23c976c 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/ClassifierCSE.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/ClassifierCSE.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava; @@ -28,226 +25,213 @@ /** - * This pass performs common subexpression elimination on classifier - * expressions except for {@link ClassifierName}s and - * {@link LearningClassifierExpression}s. - * - * @author Nick Rizzolo + * This pass performs common subexpression elimination on classifier expressions except for + * {@link ClassifierName}s and {@link LearningClassifierExpression}s. + * + * @author Nick Rizzolo **/ -public class ClassifierCSE extends Pass -{ - /** - * Maps each classifier expression to the canonical name that will - * represent it. - **/ - private HashMap expressionToName; - - - /** - * Instantiates a pass that runs on an entire AST. - * - * @param ast The program to run this pass on. - **/ - public ClassifierCSE(AST ast) { super(ast); } - - - /** - * Looks up the given expression in {@link #expressionToName}, returning - * a new {@link ClassifierName} if there was a name associated with it or - * null otherwise. In the case that a name was not already - * associated with the given expression, its own name is set up in - * association with it, with the following exceptions. - * {@link ClassifierName}s are excluded from the map since they are already - * merely names. {@link LearningClassifierExpression}s are also excluded, - * since each learning classifier expression should represent a separate - * and independent learned function even if its specification is identical - * to some other learning classifier. - * - * @param ce The expression to look up. - **/ - private ClassifierName lookup(ClassifierExpression ce) { - ClassifierName cached = (ClassifierName) expressionToName.get(ce); - - if (cached == null) { - if (!(ce instanceof ClassifierName) - && !(ce instanceof ClassifierCastExpression) - && !(ce instanceof LearningClassifierExpression)) { - cached = - new ClassifierName(ce.name.toString(), ce.line, ce.byteOffset); - cached.name = cached.referent; - cached.returnType = ce.returnType; - cached.argument = ce.argument; - cached.singleExampleCache = ce.singleExampleCache; - expressionToName.put(ce, cached); - } - - return null; +public class ClassifierCSE extends Pass { + /** + * Maps each classifier expression to the canonical name that will represent it. + **/ + private HashMap expressionToName; + + + /** + * Instantiates a pass that runs on an entire AST. + * + * @param ast The program to run this pass on. + **/ + public ClassifierCSE(AST ast) { + super(ast); } - ClassifierName result = (ClassifierName) cached.clone(); - result.line = cached.line; - result.byteOffset = cached.byteOffset; - result.returnType = cached.returnType; - result.argument = cached.argument; - result.singleExampleCache = cached.singleExampleCache; - return result; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param ast The node to process. - **/ - public void run(AST ast) { - expressionToName = new HashMap(); - runOnChildren(ast); - expressionToName = null; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param ca The node to process. - **/ - public void run(ClassifierAssignment ca) { - ca.expression.runPass(this); - ClassifierName name = lookup(ca.expression); - if (name != null) { - name.name = ca.expression.name; - name.returnType = ca.expression.returnType; - name.singleExampleCache = ca.expression.singleExampleCache; - ca.expression = name; - SemanticAnalysis.representationTable.put(name.name.toString(), name); + + /** + * Looks up the given expression in {@link #expressionToName}, returning a new + * {@link ClassifierName} if there was a name associated with it or null otherwise. + * In the case that a name was not already associated with the given expression, its own name is + * set up in association with it, with the following exceptions. {@link ClassifierName}s are + * excluded from the map since they are already merely names. + * {@link LearningClassifierExpression}s are also excluded, since each learning classifier + * expression should represent a separate and independent learned function even if its + * specification is identical to some other learning classifier. + * + * @param ce The expression to look up. + **/ + private ClassifierName lookup(ClassifierExpression ce) { + ClassifierName cached = (ClassifierName) expressionToName.get(ce); + + if (cached == null) { + if (!(ce instanceof ClassifierName) && !(ce instanceof ClassifierCastExpression) + && !(ce instanceof LearningClassifierExpression)) { + cached = new ClassifierName(ce.name.toString(), ce.line, ce.byteOffset); + cached.name = cached.referent; + cached.returnType = ce.returnType; + cached.argument = ce.argument; + cached.singleExampleCache = ce.singleExampleCache; + expressionToName.put(ce, cached); + } + + return null; + } + + ClassifierName result = (ClassifierName) cached.clone(); + result.line = cached.line; + result.byteOffset = cached.byteOffset; + result.returnType = cached.returnType; + result.argument = cached.argument; + result.singleExampleCache = cached.singleExampleCache; + return result; } - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param cce The node to process. - **/ - public void run(ClassifierCastExpression cce) { - cce.expression.runPass(this); - ClassifierName name = lookup(cce.expression); - if (name != null) { - if (cce.expression.name.toString().indexOf("$$") == -1) { - name.name = cce.expression.name; - name.returnType = cce.expression.returnType; - name.singleExampleCache = cce.expression.singleExampleCache; - SemanticAnalysis.representationTable.put(name.name.toString(), name); - } - else SemanticAnalysis.representationTable.remove(cce.name.toString()); - - cce.expression = name; + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param ast The node to process. + **/ + public void run(AST ast) { + expressionToName = new HashMap(); + runOnChildren(ast); + expressionToName = null; } - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param cc The node to process. - **/ - public void run(CodedClassifier cc) { - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param cg The node to process. - **/ - public void run(CompositeGenerator cg) { - for (ClassifierExpressionList.ClassifierExpressionListIterator I = - cg.components.listIterator(); - I.hasNext(); ) { - ClassifierExpression ce = I.nextItem(); - ce.runPass(this); - ClassifierName name = lookup(ce); - if (name != null) { - SemanticAnalysis.representationTable.remove(ce.name.toString()); - I.set(name); - } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param ca The node to process. + **/ + public void run(ClassifierAssignment ca) { + ca.expression.runPass(this); + ClassifierName name = lookup(ca.expression); + if (name != null) { + name.name = ca.expression.name; + name.returnType = ca.expression.returnType; + name.singleExampleCache = ca.expression.singleExampleCache; + ca.expression = name; + SemanticAnalysis.representationTable.put(name.name.toString(), name); + } } - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param c The node to process. - **/ - public void run(Conjunction c) { - c.left.runPass(this); - ClassifierName name = lookup(c.left); - if (name != null) { - SemanticAnalysis.representationTable.remove(c.left.name.toString()); - c.left = name; + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param cce The node to process. + **/ + public void run(ClassifierCastExpression cce) { + cce.expression.runPass(this); + ClassifierName name = lookup(cce.expression); + if (name != null) { + if (cce.expression.name.toString().indexOf("$$") == -1) { + name.name = cce.expression.name; + name.returnType = cce.expression.returnType; + name.singleExampleCache = cce.expression.singleExampleCache; + SemanticAnalysis.representationTable.put(name.name.toString(), name); + } else + SemanticAnalysis.representationTable.remove(cce.name.toString()); + + cce.expression = name; + } } - c.right.runPass(this); - name = lookup(c.right); - if (name != null) { - SemanticAnalysis.representationTable.remove(c.right.name.toString()); - c.right = name; + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param cc The node to process. + **/ + public void run(CodedClassifier cc) {} + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param cg The node to process. + **/ + public void run(CompositeGenerator cg) { + for (ClassifierExpressionList.ClassifierExpressionListIterator I = + cg.components.listIterator(); I.hasNext();) { + ClassifierExpression ce = I.nextItem(); + ce.runPass(this); + ClassifierName name = lookup(ce); + if (name != null) { + SemanticAnalysis.representationTable.remove(ce.name.toString()); + I.set(name); + } + } } - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param i The node to process. - **/ - public void run(InferenceInvocation i) { - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param lce The node to process. - **/ - public void run(LearningClassifierExpression lce) { - ClassifierName name; - if (lce.labeler != null) { - lce.labeler.runPass(this); - name = lookup(lce.labeler); - if (name != null) { - SemanticAnalysis.representationTable - .remove(lce.labeler.name.toString()); - lce.labeler = name; - } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param c The node to process. + **/ + public void run(Conjunction c) { + c.left.runPass(this); + ClassifierName name = lookup(c.left); + if (name != null) { + SemanticAnalysis.representationTable.remove(c.left.name.toString()); + c.left = name; + } + + c.right.runPass(this); + name = lookup(c.right); + if (name != null) { + SemanticAnalysis.representationTable.remove(c.right.name.toString()); + c.right = name; + } } - lce.extractor.runPass(this); - name = lookup(lce.extractor); - if (name != null) { - SemanticAnalysis.representationTable - .remove(lce.extractor.name.toString()); - lce.extractor = name; + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param i The node to process. + **/ + public void run(InferenceInvocation i) {} + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param lce The node to process. + **/ + public void run(LearningClassifierExpression lce) { + ClassifierName name; + if (lce.labeler != null) { + lce.labeler.runPass(this); + name = lookup(lce.labeler); + if (name != null) { + SemanticAnalysis.representationTable.remove(lce.labeler.name.toString()); + lce.labeler = name; + } + } + + lce.extractor.runPass(this); + name = lookup(lce.extractor); + if (name != null) { + SemanticAnalysis.representationTable.remove(lce.extractor.name.toString()); + lce.extractor = name; + } } - } - /** - * Runs this pass on all nodes of the indicated type. - * - * @param c The node to process. - **/ - public void run(ConstraintDeclaration c) { - } + /** + * Runs this pass on all nodes of the indicated type. + * + * @param c The node to process. + **/ + public void run(ConstraintDeclaration c) {} - /** - * Runs this pass on all nodes of the indicated type. - * - * @param i The node to process. - **/ - public void run(InferenceDeclaration i) { - } + /** + * Runs this pass on all nodes of the indicated type. + * + * @param i The node to process. + **/ + public void run(InferenceDeclaration i) {} } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Clean.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Clean.java index 581a52ea..b0f44ef0 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Clean.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Clean.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava; @@ -28,206 +25,214 @@ /** - * To be run after SemanticAnalysis, this pass compiles the list - * of files created by the LBJava compiler and removes them. - * - * @see edu.illinois.cs.cogcomp.lbjava.SemanticAnalysis - * @author Nick Rizzolo + * To be run after SemanticAnalysis, this pass compiles the list of files created by + * the LBJava compiler and removes them. + * + * @see edu.illinois.cs.cogcomp.lbjava.SemanticAnalysis + * @author Nick Rizzolo **/ -public class Clean extends Pass -{ - /** The list of files to be deleted. */ - private LinkedList files; - /** The path at which a cleanable Java file will be found. */ - private String javaPath; - /** The path at which a cleanable class file will be found. */ - private String classPath; - - - /** - * Instantiates a pass that runs on an entire AST. - * - * @param ast The program to run this pass on. - **/ - public Clean(AST ast) { super(ast); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param ast The node to process. - **/ - public void run(AST ast) { - files = new LinkedList(); - - if (Main.generatedSourceDirectory != null) - javaPath = Main.generatedSourceDirectory + File.separator; - else javaPath = ""; - - if (Main.classDirectory != null) - classPath = Main.classDirectory + File.separator; - else classPath = ""; - - runOnChildren(ast); - - for (Iterator I = files.iterator(); I.hasNext(); ) { - Object o = I.next(); - if (o instanceof String) { - String name = (String) o; - File f = new File(name); - if (f.exists() && !f.delete()) - reportError(0, "Could not delete '" + name + "'."); - } - else { - String[] names = (String[]) o; - for (int i = 0; i < names.length; ++i) { - File f = new File(names[i]); - if (f.exists() && !f.delete()) - reportError(0, "Could not delete '" + names[i] + "'."); - } - } +public class Clean extends Pass { + /** The list of files to be deleted. */ + private LinkedList files; + /** The path at which a cleanable Java file will be found. */ + private String javaPath; + /** The path at which a cleanable class file will be found. */ + private String classPath; + + + /** + * Instantiates a pass that runs on an entire AST. + * + * @param ast The program to run this pass on. + **/ + public Clean(AST ast) { + super(ast); } - } - - - /** - * Adds the default files (*.java and *.class) - * to the remove list. - * - * @param n The node to add files with respect to. - **/ - protected void defaultFiles(CodeGenerator n) { - files.add(javaPath + n.getName() + ".java"); - files.add(classPath + n.getName() + ".class"); - } - - - /** - * Adds files generated for anonymous classes associated with a particular - * named classifier to the remove list. - * - * @param name The name of the classifier with which anonymous classes may - * be associated. - **/ - protected void anonymousFiles(Name name) { - final String prefix = name + "$"; - String directory = javaPath; - if (directory.length() == 0) directory = System.getProperty("user.dir"); - else directory = directory.substring(0, directory.length() - 1); - - String[] toAdd = new String[0]; - if (new File(directory).exists()) { - toAdd = new File(directory).list( - new FilenameFilter() { - public boolean accept(File directory, String n) { - return n.startsWith(prefix); + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param ast The node to process. + **/ + public void run(AST ast) { + files = new LinkedList(); + + if (Main.generatedSourceDirectory != null) + javaPath = Main.generatedSourceDirectory + File.separator; + else + javaPath = ""; + + if (Main.classDirectory != null) + classPath = Main.classDirectory + File.separator; + else + classPath = ""; + + runOnChildren(ast); + + for (Iterator I = files.iterator(); I.hasNext();) { + Object o = I.next(); + if (o instanceof String) { + String name = (String) o; + File f = new File(name); + if (f.exists() && !f.delete()) + reportError(0, "Could not delete '" + name + "'."); + } else { + String[] names = (String[]) o; + for (int i = 0; i < names.length; ++i) { + File f = new File(names[i]); + if (f.exists() && !f.delete()) + reportError(0, "Could not delete '" + names[i] + "'."); + } } - }); + } } - for (int i = 0; i < toAdd.length; ++i) - toAdd[i] = directory + File.separator + toAdd[i]; - files.add(toAdd); - directory = classPath; - if (directory.length() == 0) directory = System.getProperty("user.dir"); - else directory = directory.substring(0, directory.length() - 1); + /** + * Adds the default files (*.java and *.class) to the remove list. + * + * @param n The node to add files with respect to. + **/ + protected void defaultFiles(CodeGenerator n) { + files.add(javaPath + n.getName() + ".java"); + files.add(classPath + n.getName() + ".class"); + } + + + /** + * Adds files generated for anonymous classes associated with a particular named classifier to + * the remove list. + * + * @param name The name of the classifier with which anonymous classes may be associated. + **/ + protected void anonymousFiles(Name name) { + final String prefix = name + "$"; + String directory = javaPath; + if (directory.length() == 0) + directory = System.getProperty("user.dir"); + else + directory = directory.substring(0, directory.length() - 1); + + String[] toAdd = new String[0]; + if (new File(directory).exists()) { + toAdd = new File(directory).list(new FilenameFilter() { + public boolean accept(File directory, String n) { + return n.startsWith(prefix); + } + }); + } + + for (int i = 0; i < toAdd.length; ++i) + toAdd[i] = directory + File.separator + toAdd[i]; + files.add(toAdd); - if (new File(directory).exists()) { - toAdd = new File(directory).list( - new FilenameFilter() { + directory = classPath; + if (directory.length() == 0) + directory = System.getProperty("user.dir"); + else + directory = directory.substring(0, directory.length() - 1); + + if (new File(directory).exists()) { + toAdd = new File(directory).list(new FilenameFilter() { public boolean accept(File directory, String n) { - return n.startsWith(prefix); + return n.startsWith(prefix); } - }); + }); + } + + for (int i = 0; i < toAdd.length; ++i) + toAdd[i] = directory + File.separator + toAdd[i]; + files.add(toAdd); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param cn The node to process. + **/ + public void run(ClassifierName cn) { + if (cn.name == cn.referent) + return; + defaultFiles(cn); } - for (int i = 0; i < toAdd.length; ++i) - toAdd[i] = directory + File.separator + toAdd[i]; - files.add(toAdd); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param cn The node to process. - **/ - public void run(ClassifierName cn) { - if (cn.name == cn.referent) return; - defaultFiles(cn); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param cc The node to process. - **/ - public void run(CodedClassifier cc) { defaultFiles(cc); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param cg The node to process. - **/ - public void run(CompositeGenerator cg) { - defaultFiles(cg); - anonymousFiles(cg.name); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param c The node to process. - **/ - public void run(Conjunction c) { - defaultFiles(c); - anonymousFiles(c.name); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param i The node to process. - **/ - public void run(InferenceInvocation i) { defaultFiles(i); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param lce The node to process. - **/ - public void run(LearningClassifierExpression lce) { - defaultFiles(lce); - anonymousFiles(lce.name); - files.add(classPath + lce.name + ".lc"); - files.add(javaPath + lce.name + ".ex"); - files.add(javaPath + lce.name + ".test.ex"); - files.add(javaPath + lce.name + ".lex"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param c The node to process. - **/ - public void run(ConstraintDeclaration c) { defaultFiles(c); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param i The node to process. - **/ - public void run(InferenceDeclaration i) { - defaultFiles(i); - defaultFiles(i.constraint); - } -} + /** + * Runs this pass on all nodes of the indicated type. + * + * @param cc The node to process. + **/ + public void run(CodedClassifier cc) { + defaultFiles(cc); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param cg The node to process. + **/ + public void run(CompositeGenerator cg) { + defaultFiles(cg); + anonymousFiles(cg.name); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param c The node to process. + **/ + public void run(Conjunction c) { + defaultFiles(c); + anonymousFiles(c.name); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param i The node to process. + **/ + public void run(InferenceInvocation i) { + defaultFiles(i); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param lce The node to process. + **/ + public void run(LearningClassifierExpression lce) { + defaultFiles(lce); + anonymousFiles(lce.name); + files.add(classPath + lce.name + ".lc"); + files.add(javaPath + lce.name + ".ex"); + files.add(javaPath + lce.name + ".test.ex"); + files.add(javaPath + lce.name + ".lex"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param c The node to process. + **/ + public void run(ConstraintDeclaration c) { + defaultFiles(c); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param i The node to process. + **/ + public void run(InferenceDeclaration i) { + defaultFiles(i); + defaultFiles(i.constraint); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/CodeGenerator.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/CodeGenerator.java index 2fc2b373..9219b24e 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/CodeGenerator.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/CodeGenerator.java @@ -1,33 +1,27 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava; /** - * All IR classes for which code is generated implement this interface. - * - * @author Nick Rizzolo + * All IR classes for which code is generated implement this interface. + * + * @author Nick Rizzolo **/ -public interface CodeGenerator -{ - /** Returns the name of the code generator. */ - public String getName(); +public interface CodeGenerator { + /** Returns the name of the code generator. */ + public String getName(); - /** - * Returns the line number on which this AST node is found in the source - * (starting from line 0). - **/ - public int getLine(); + /** + * Returns the line number on which this AST node is found in the source (starting from line 0). + **/ + public int getLine(); - /** Returns a shallow textual representation of the AST node. */ - public StringBuffer shallow(); + /** Returns a shallow textual representation of the AST node. */ + public StringBuffer shallow(); } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/DeclarationNames.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/DeclarationNames.java index 3d981b6d..7771a7c5 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/DeclarationNames.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/DeclarationNames.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava; @@ -16,30 +13,29 @@ /** - * - * @author Nick Rizzolo + * + * @author Nick Rizzolo **/ -public class DeclarationNames extends Pass -{ - /** - * Instantiates a pass that runs on an entire {@link AST}. - * - * @param ast The program to run this pass on. - **/ - public DeclarationNames(AST ast) { super(ast); } +public class DeclarationNames extends Pass { + /** + * Instantiates a pass that runs on an entire {@link AST}. + * + * @param ast The program to run this pass on. + **/ + public DeclarationNames(AST ast) { + super(ast); + } - /** - * Runs this pass on all nodes of the indicated type. - * - * @param list The node to process. - **/ - public void run(DeclarationList list) { - for (DeclarationList.DeclarationListIterator I = list.listIterator(); - I.hasNext(); ) { - Declaration d = I.nextItem(); - System.out.println(d.name); + /** + * Runs this pass on all nodes of the indicated type. + * + * @param list The node to process. + **/ + public void run(DeclarationList list) { + for (DeclarationList.DeclarationListIterator I = list.listIterator(); I.hasNext();) { + Declaration d = I.nextItem(); + System.out.println(d.name); + } } - } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/AST.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/AST.java index dafaa233..03d709ad 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/AST.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/AST.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,172 +11,161 @@ /** - * The root node of LBJava's AST. The parser will create only one of these and - * return it as the ultimate result of parsing. It currently contains a - * single (optional) PackageDeclaration, a List of - * ImportDeclarations (also optional), the global - * SymbolTable, and a List containing - * ClassifierAssignments, ConstraintDeclarations, - * and InferenceDeclarations. - * - *

- * The PackageDeclaration specifies what package will contain - * the generated classes. The ImportDeclarations work similarly - * to import declarations in regular Java code: they allow the - * user to name classes in other packages without using their full package - * names. The list of Declarations comprises the LBJava program. - * The global SymbolTable simply associates variable names with - * their type information in the program. - * - * @author Nick Rizzolo + * The root node of LBJava's AST. The parser will create only one of these and return it as the + * ultimate result of parsing. It currently contains a single (optional) + * PackageDeclaration, a List of ImportDeclarations (also + * optional), the global SymbolTable, and a List containing + * ClassifierAssignments, ConstraintDeclarations, and + * InferenceDeclarations. + * + *

+ * The PackageDeclaration specifies what package will contain the generated classes. + * The ImportDeclarations work similarly to import declarations in regular + * Java code: they allow the user to name classes in other packages without using their full package + * names. The list of Declarations comprises the LBJava program. The global + * SymbolTable simply associates variable names with their type information in the + * program. + * + * @author Nick Rizzolo **/ -public class AST extends ASTNode -{ - /** The symbolTable variable mirrors this variable. */ - public static final SymbolTable globalSymbolTable = new SymbolTable(); - - - /** - * (ø) An optional declaration of the package that generated classes - * should be a part of. - **/ - public PackageDeclaration packageDeclaration; - /** - * (¬ø) The list of import statements at the top of the source - * file. - **/ - public ImportList imports; - /** - * (¬ø) The list of classifier, constraint, and inference - * declarations representing the LBJava program. - **/ - public DeclarationList declarations; - - - /** - * Initializes just the statement list. Line and byte offset information - * are taken from the statement list. - * - * @param d The declarations comprising the program. - **/ - public AST(DeclarationList d) { - this(null, new ImportList(), d, d.line, d.byteOffset); - } - - /** - * Initializes both lists. Line and byte offset information are taken from - * the import list. - * - * @param i The import declarations. - * @param d The declarations comprising the program. - **/ - public AST(ImportList i, DeclarationList d) { - this(null, i, d, i.line, i.byteOffset); - } - - /** - * Initializes package declaration and statement list. Line and byte - * offset information are taken from the package declaration. - * - * @param p The package declaration. - * @param d The declarations comprising the program. - **/ - public AST(PackageDeclaration p, DeclarationList d) { - this(p, new ImportList(), d, p.line, p.byteOffset); - } - - /** - * Initializes all member variables. Line and byte offset information are - * taken from the package declaration. - * - * @param p The package declaration. - * @param i The import declarations. - * @param d The declarations comprising the program. - **/ - public AST(PackageDeclaration p, ImportList i, DeclarationList d) { - this(p, i, d, p.line, p.byteOffset); - } - - /** - * Full constructor. - * - * @param p The package declaration. - * @param i The import declarations. - * @param d The declarations comprising the program. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public AST(PackageDeclaration p, ImportList i, DeclarationList d, int line, - int byteOffset) { - super(line, byteOffset); - packageDeclaration = p; - imports = i; - declarations = d; - symbolTable = globalSymbolTable; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = - new ASTNodeIterator(packageDeclaration == null ? 2 : 3); - if (packageDeclaration != null) I.children[0] = packageDeclaration; - I.children[I.children.length - 2] = imports; - I.children[I.children.length - 1] = declarations; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - PackageDeclaration p = null; - if (packageDeclaration != null) - p = (PackageDeclaration) packageDeclaration.clone(); - - return new AST(p, (ImportList) imports.clone(), - (DeclarationList) declarations.clone(), -1, -1); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (packageDeclaration != null) { - packageDeclaration.write(buffer); - buffer.append(" "); +public class AST extends ASTNode { + /** The symbolTable variable mirrors this variable. */ + public static final SymbolTable globalSymbolTable = new SymbolTable(); + + + /** + * (ø) An optional declaration of the package that generated classes should be a part of. + **/ + public PackageDeclaration packageDeclaration; + /** + * (¬ø) The list of import statements at the top of the source file. + **/ + public ImportList imports; + /** + * (¬ø) The list of classifier, constraint, and inference declarations representing + * the LBJava program. + **/ + public DeclarationList declarations; + + + /** + * Initializes just the statement list. Line and byte offset information are taken from the + * statement list. + * + * @param d The declarations comprising the program. + **/ + public AST(DeclarationList d) { + this(null, new ImportList(), d, d.line, d.byteOffset); } - imports.write(buffer); - buffer.append(" "); - declarations.write(buffer); - } -} + /** + * Initializes both lists. Line and byte offset information are taken from the + * import list. + * + * @param i The import declarations. + * @param d The declarations comprising the program. + **/ + public AST(ImportList i, DeclarationList d) { + this(null, i, d, i.line, i.byteOffset); + } + + /** + * Initializes package declaration and statement list. Line and byte offset information are + * taken from the package declaration. + * + * @param p The package declaration. + * @param d The declarations comprising the program. + **/ + public AST(PackageDeclaration p, DeclarationList d) { + this(p, new ImportList(), d, p.line, p.byteOffset); + } + /** + * Initializes all member variables. Line and byte offset information are taken from the + * package declaration. + * + * @param p The package declaration. + * @param i The import declarations. + * @param d The declarations comprising the program. + **/ + public AST(PackageDeclaration p, ImportList i, DeclarationList d) { + this(p, i, d, p.line, p.byteOffset); + } + + /** + * Full constructor. + * + * @param p The package declaration. + * @param i The import declarations. + * @param d The declarations comprising the program. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public AST(PackageDeclaration p, ImportList i, DeclarationList d, int line, int byteOffset) { + super(line, byteOffset); + packageDeclaration = p; + imports = i; + declarations = d; + symbolTable = globalSymbolTable; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(packageDeclaration == null ? 2 : 3); + if (packageDeclaration != null) + I.children[0] = packageDeclaration; + I.children[I.children.length - 2] = imports; + I.children[I.children.length - 1] = declarations; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + PackageDeclaration p = null; + if (packageDeclaration != null) + p = (PackageDeclaration) packageDeclaration.clone(); + + return new AST(p, (ImportList) imports.clone(), (DeclarationList) declarations.clone(), -1, + -1); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (packageDeclaration != null) { + packageDeclaration.write(buffer); + buffer.append(" "); + } + + imports.write(buffer); + buffer.append(" "); + declarations.write(buffer); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ASTNode.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ASTNode.java index 404d50c4..d46ec4d4 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ASTNode.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ASTNode.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,148 +11,134 @@ /** - * Abstract node class that every AST node extends.

- * - * Every AST node must - *

    - *
  • - * have a unique ID number and store the line and byte offset where it - * was found in the source file. - *
  • - * include a special symbol at the beginning of the Javadoc comment of - * each of its AST node child member variables indicating whether that - * member variable is allowed to be set to null. The symbol - * (ø) is used to indicate that the variable is allowed to be - * null, and the symbol (¬ø) indicates that the - * variable will never be null. - *
  • - * return an ASTNodeIterator through the abstract - * iterator() method that traverses its children in the - * order in which the code would be executed. If the node is composed of - * declarative constructs rather than executable code, the iterator will - * traverse those constructs in the order in which they appear in the - * source. - *
  • - * override the clone() method such that it returns a deep - * copy of itself, except that the line and byte offset of the copy are - * both set to -1. - *
  • - * override the runPass(Pass) method so that it simply calls - * the Pass's run(.) method whose argument type - * is the type of the node. - *
  • - * return a String representation of itself through the - * toString() and write(StringBuffer) methods. - * Only the write(StringBuffer) method need be overridden by - * each node, as toString() will simply invoke - * write(StringBuffer) to produce its result. (This is much - * more efficient than having toString() call its childrens' - * toString() methods recursively through the AST and - * concatenating them all together.) The String produced - * will not be very readable (e.g., it won't contain any new lines), but - * it will be compilable by the LBJava compiler. - *
- * - * Most ASTNodes will also contain more than one constructor. - * There will be one constructor that includes all references to its children - * as well as line and byte offset information, etc. This constructor is - * commonly used by the node's clone() method. In addition, - * there may be at least one constructor designed to be more useful for the - * JavaCUP parser, taking TokenValues as input. - * - * @author Nick Rizzolo + * Abstract node class that every AST node extends.
+ *
+ * + * Every AST node must + *
    + *
  • + * have a unique ID number and store the line and byte offset where it was found in the source file. + *
  • + * include a special symbol at the beginning of the Javadoc comment of each of its AST node child + * member variables indicating whether that member variable is allowed to be set to + * null. The symbol (ø) is used to indicate that the variable is allowed to be + * null, and the symbol (¬ø) indicates that the variable will never be + * null. + *
  • + * return an ASTNodeIterator through the abstract iterator() method that + * traverses its children in the order in which the code would be executed. If the node is composed + * of declarative constructs rather than executable code, the iterator will traverse those + * constructs in the order in which they appear in the source. + *
  • + * override the clone() method such that it returns a deep copy of itself, except that + * the line and byte offset of the copy are both set to -1. + *
  • + * override the runPass(Pass) method so that it simply calls the Pass's + * run(.) method whose argument type is the type of the node. + *
  • + * return a String representation of itself through the toString() and + * write(StringBuffer) methods. Only the write(StringBuffer) method need + * be overridden by each node, as toString() will simply invoke + * write(StringBuffer) to produce its result. (This is much more efficient than having + * toString() call its childrens' toString() methods recursively through + * the AST and concatenating them all together.) The String produced will not be very + * readable (e.g., it won't contain any new lines), but it will be compilable by the LBJava + * compiler. + *
+ * + * Most ASTNodes will also contain more than one constructor. There will be one + * constructor that includes all references to its children as well as line and byte offset + * information, etc. This constructor is commonly used by the node's clone() method. In + * addition, there may be at least one constructor designed to be more useful for the JavaCUP + * parser, taking TokenValues as input. + * + * @author Nick Rizzolo **/ -public abstract class ASTNode -{ - /** Keeps track of how many nodes have been created. */ - private static int nextID = 0; - /** Stores the ID of this node as provided by nextID. */ - public int nodeID; - - /** The line on which the source code represented by this node is found. */ - public int line; - /** - * The byte offset from the beginning of the source file at which the - * source code represented by this node is found. - **/ - public int byteOffset; - /** The table of variable types representing this node's scope. */ - public SymbolTable symbolTable; - - - /** Default constructor. */ - public ASTNode() { this(-1, -1); } - - - /** - * Initializing constructor. This constructor is called via the - * super operator from every other node. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public ASTNode(int line, int byteOffset) { - nodeID = nextID++; - this.line = line; - this.byteOffset = byteOffset; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - abstract public ASTNodeIterator iterator(); - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - System.err.println("WARNING: clone() not defined for class '" - + this.getClass().getName() + "'"); - return null; - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - abstract public void runPass(Pass pass); - - - /** - * Calls the write(StringBuffer) method to produce a string - * representation of this node. - * - * @return A textual representation of this node. - **/ - public String toString() { - StringBuffer buffer = new StringBuffer(); - write(buffer); - return buffer.toString(); - } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - abstract public void write(StringBuffer buffer); +public abstract class ASTNode { + /** Keeps track of how many nodes have been created. */ + private static int nextID = 0; + /** Stores the ID of this node as provided by nextID. */ + public int nodeID; + + /** The line on which the source code represented by this node is found. */ + public int line; + /** + * The byte offset from the beginning of the source file at which the source code represented by + * this node is found. + **/ + public int byteOffset; + /** The table of variable types representing this node's scope. */ + public SymbolTable symbolTable; + + + /** Default constructor. */ + public ASTNode() { + this(-1, -1); + } + + + /** + * Initializing constructor. This constructor is called via the super operator from + * every other node. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public ASTNode(int line, int byteOffset) { + nodeID = nextID++; + this.line = line; + this.byteOffset = byteOffset; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + abstract public ASTNodeIterator iterator(); + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + System.err.println("WARNING: clone() not defined for class '" + this.getClass().getName() + + "'"); + return null; + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + abstract public void runPass(Pass pass); + + + /** + * Calls the write(StringBuffer) method to produce a string representation of this + * node. + * + * @return A textual representation of this node. + **/ + public String toString() { + StringBuffer buffer = new StringBuffer(); + write(buffer); + return buffer.toString(); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + abstract public void write(StringBuffer buffer); } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ASTNodeIterator.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ASTNodeIterator.java index 12c2e8e2..ded1fcbb 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ASTNodeIterator.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ASTNodeIterator.java @@ -1,67 +1,64 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; /** - * Used to iterate though the children of an AST node. It is assumed that - * the children array never contains a null - * reference. - * - * @author Nick Rizzolo + * Used to iterate though the children of an AST node. It is assumed that the children + * array never contains a null reference. + * + * @author Nick Rizzolo **/ -public class ASTNodeIterator -{ - /** The nodes iterated through by this iterator. */ - public ASTNode[] children; - /** Index into the children array. */ - protected int index; +public class ASTNodeIterator { + /** The nodes iterated through by this iterator. */ + public ASTNode[] children; + /** Index into the children array. */ + protected int index; - /** Initializes index, but not children. */ - public ASTNodeIterator() { index = 0; } + /** Initializes index, but not children. */ + public ASTNodeIterator() { + index = 0; + } - /** - * The children array will have the specified length. - * - * @param l The number of children to iterate through. - **/ - public ASTNodeIterator(int l) { - this(); - children = new ASTNode[l]; - } + /** + * The children array will have the specified length. + * + * @param l The number of children to iterate through. + **/ + public ASTNodeIterator(int l) { + this(); + children = new ASTNode[l]; + } - /** - * Determines whether there are any child nodes left to be accessed. - * - * @return true iff there are child nodes remaining. - **/ - public boolean hasNext() { - return children != null && index < children.length; - } + /** + * Determines whether there are any child nodes left to be accessed. + * + * @return true iff there are child nodes remaining. + **/ + public boolean hasNext() { + return children != null && index < children.length; + } - /** - * Returns the next child AST node. - * - * @return The next child AST node. - **/ - public ASTNode next() { - return children == null || index == children.length - ? null : children[index++]; - } + /** + * Returns the next child AST node. + * + * @return The next child AST node. + **/ + public ASTNode next() { + return children == null || index == children.length ? null : children[index++]; + } - /** Restarts the iterator. */ - public void reset() { index = 0; } + /** Restarts the iterator. */ + public void reset() { + index = 0; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Argument.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Argument.java index 2805394d..eae0a3ce 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Argument.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Argument.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,135 +11,141 @@ /** - * An "argument" is the specification of a classifier's input parameter. - * - * @author Nick Rizzolo + * An "argument" is the specification of a classifier's input parameter. + * + * @author Nick Rizzolo **/ -public class Argument extends ASTNode -{ - /** Whether or not the argument was modified as final. */ - private boolean isFinal; - /** (¬ø) The type of the argument. */ - private Type type; - /** (¬ø) The name of the argument. */ - private String name; - - - /** - * Initializing constructor. Line and byte offset information are taken - * from the type's representation. - * - * @param t Reference to the object representing the argument's type. - * @param n Reference to the object representing the argument's name. - **/ - public Argument(Type t, String n) { this(t, n, false); } - - /** - * Initializing constructor. Line and byte offset information are taken - * from the type's representation. - * - * @param t Reference to the object representing the argument's type. - * @param n Reference to the object representing the argument's name. - * @param f Whether or not the argument was modified as final. - **/ - public Argument(Type t, String n, boolean f) { - super(t.line, t.byteOffset); - type = t; - name = n; - isFinal = f; - } - - - /** - * Retrieves the value of the isFinal member variable. - * - * @return The value stored in isFinal. - **/ - public boolean getFinal() { return isFinal; } - - - /** - * Retrieves the type portion of the argument. - * - * @return The value stored in type. - **/ - public Type getType() { return type; } - - - /** - * Retrieves the name portion of the argument. - * - * @return The value stored in name. - **/ - public String getName() { return name; } - - - /** - * The hash code of an Argument is simply the hash code of its - * name. - **/ - public int hashCode() { return name.hashCode(); } - - - /** - * Two Arguments are equivalent when their names and types are - * equivalent. - * - * @param o The object with which this object is to be compared. - * @return true iff the two objects are equivalent. - **/ - public boolean equals(Object o) { - if (!(o instanceof Argument)) return false; - Argument a = (Argument) o; - return a.name.equals(name) && a.type.equals(type); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(1); - I.children[0] = type; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new Argument((Type) type.clone(), name, isFinal); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (isFinal) buffer.append("final "); - type.write(buffer); - buffer.append(" " + name); - } +public class Argument extends ASTNode { + /** Whether or not the argument was modified as final. */ + private boolean isFinal; + /** (¬ø) The type of the argument. */ + private Type type; + /** (¬ø) The name of the argument. */ + private String name; + + + /** + * Initializing constructor. Line and byte offset information are taken from the type's + * representation. + * + * @param t Reference to the object representing the argument's type. + * @param n Reference to the object representing the argument's name. + **/ + public Argument(Type t, String n) { + this(t, n, false); + } + + /** + * Initializing constructor. Line and byte offset information are taken from the type's + * representation. + * + * @param t Reference to the object representing the argument's type. + * @param n Reference to the object representing the argument's name. + * @param f Whether or not the argument was modified as final. + **/ + public Argument(Type t, String n, boolean f) { + super(t.line, t.byteOffset); + type = t; + name = n; + isFinal = f; + } + + + /** + * Retrieves the value of the isFinal member variable. + * + * @return The value stored in isFinal. + **/ + public boolean getFinal() { + return isFinal; + } + + + /** + * Retrieves the type portion of the argument. + * + * @return The value stored in type. + **/ + public Type getType() { + return type; + } + + + /** + * Retrieves the name portion of the argument. + * + * @return The value stored in name. + **/ + public String getName() { + return name; + } + + + /** + * The hash code of an Argument is simply the hash code of its name. + **/ + public int hashCode() { + return name.hashCode(); + } + + + /** + * Two Arguments are equivalent when their names and types are equivalent. + * + * @param o The object with which this object is to be compared. + * @return true iff the two objects are equivalent. + **/ + public boolean equals(Object o) { + if (!(o instanceof Argument)) + return false; + Argument a = (Argument) o; + return a.name.equals(name) && a.type.equals(type); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(1); + I.children[0] = type; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new Argument((Type) type.clone(), name, isFinal); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (isFinal) + buffer.append("final "); + type.write(buffer); + buffer.append(" " + name); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ArrayCreationExpression.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ArrayCreationExpression.java index dca31d36..19c9976e 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ArrayCreationExpression.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ArrayCreationExpression.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,160 +11,152 @@ /** - * This class represents an expression creating an array. - * - * @author Nick Rizzolo + * This class represents an expression creating an array. + * + * @author Nick Rizzolo **/ -public class ArrayCreationExpression extends Expression -{ - /** - * (¬ø) The most basic type of elements in the array (i.e., it - * will not be an ArrayType). - **/ - public Type elementType; - /** - * (¬ø) Describes the size of each dimension in the new array. - **/ - public ExpressionList sizes; - /** - * The total number of dimensions, including those for - * which no size is given. - **/ - public int dimensions; - /** (ø) Initial values for the new array. */ - public ArrayInitializer initializer; - - - /** - * Initializing constructor. - * - * @param t The element type. - * @param l The list of dimension size expressions. - * @param d The total number of dimensions in the array. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public ArrayCreationExpression(Type t, ExpressionList l, int d, int line, - int byteOffset) { - super(line, byteOffset); - elementType = t; - sizes = l; - dimensions = d; - initializer = null; - } - - /** - * Initializing constructor. - * - * @param t The element type. - * @param d The total number of dimensions in the array. - * @param a An initializing expression for the array. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public ArrayCreationExpression(Type t, int d, ArrayInitializer a, int line, - int byteOffset) { - this(t, new ExpressionList(), d, line, byteOffset); - initializer = a; - } - - - /** - * Returns a hash code value for java hash structures. - * - * @return A hash code for this object. - **/ - public int hashCode() { - return elementType.hashCode() + sizes.hashCode() - + (initializer == null ? 0 : initializer.hashCode()); - } - - - /** - * Indicates whether some other object is "equal to" this one. - * - * @return true iff this object is the same as the argument. - **/ - public boolean equals(Object o) { - if (!(o instanceof ArrayCreationExpression)) return false; - ArrayCreationExpression a = (ArrayCreationExpression) o; - return elementType.equals(a.elementType) && sizes.equals(a.sizes) - && (initializer == null && a.initializer == null - || initializer != null && initializer.equals(a.initializer)); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(initializer == null ? 2 : 3); - I.children[0] = elementType; - I.children[1] = sizes; - if (initializer != null) I.children[2] = initializer; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return initializer == null - ? new ArrayCreationExpression((Type) elementType.clone(), - (ExpressionList) sizes.clone(), - dimensions, -1, -1) - : new ArrayCreationExpression((Type) elementType.clone(), - dimensions, - (ArrayInitializer) initializer.clone(), - -1, -1); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (parenthesized) buffer.append("("); - buffer.append("new "); - elementType.write(buffer); - - int i = 0; - for (ASTNodeIterator I = sizes.iterator(); I.hasNext(); ++i) { - buffer.append("["); - I.next().write(buffer); - buffer.append("]"); +public class ArrayCreationExpression extends Expression { + /** + * (¬ø) The most basic type of elements in the array (i.e., it will not be an + * ArrayType). + **/ + public Type elementType; + /** + * (¬ø) Describes the size of each dimension in the new array. + **/ + public ExpressionList sizes; + /** + * The total number of dimensions, including those for which no size is given. + **/ + public int dimensions; + /** (ø) Initial values for the new array. */ + public ArrayInitializer initializer; + + + /** + * Initializing constructor. + * + * @param t The element type. + * @param l The list of dimension size expressions. + * @param d The total number of dimensions in the array. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public ArrayCreationExpression(Type t, ExpressionList l, int d, int line, int byteOffset) { + super(line, byteOffset); + elementType = t; + sizes = l; + dimensions = d; + initializer = null; } - for (; i < dimensions; ++i) buffer.append("[]"); - if (initializer != null) initializer.write(buffer); - if (parenthesized) buffer.append(")"); - } -} + /** + * Initializing constructor. + * + * @param t The element type. + * @param d The total number of dimensions in the array. + * @param a An initializing expression for the array. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public ArrayCreationExpression(Type t, int d, ArrayInitializer a, int line, int byteOffset) { + this(t, new ExpressionList(), d, line, byteOffset); + initializer = a; + } + + + /** + * Returns a hash code value for java hash structures. + * + * @return A hash code for this object. + **/ + public int hashCode() { + return elementType.hashCode() + sizes.hashCode() + + (initializer == null ? 0 : initializer.hashCode()); + } + + /** + * Indicates whether some other object is "equal to" this one. + * + * @return true iff this object is the same as the argument. + **/ + public boolean equals(Object o) { + if (!(o instanceof ArrayCreationExpression)) + return false; + ArrayCreationExpression a = (ArrayCreationExpression) o; + return elementType.equals(a.elementType) + && sizes.equals(a.sizes) + && (initializer == null && a.initializer == null || initializer != null + && initializer.equals(a.initializer)); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(initializer == null ? 2 : 3); + I.children[0] = elementType; + I.children[1] = sizes; + if (initializer != null) + I.children[2] = initializer; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return initializer == null ? new ArrayCreationExpression((Type) elementType.clone(), + (ExpressionList) sizes.clone(), dimensions, -1, -1) : new ArrayCreationExpression( + (Type) elementType.clone(), dimensions, (ArrayInitializer) initializer.clone(), -1, + -1); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (parenthesized) + buffer.append("("); + buffer.append("new "); + elementType.write(buffer); + + int i = 0; + for (ASTNodeIterator I = sizes.iterator(); I.hasNext(); ++i) { + buffer.append("["); + I.next().write(buffer); + buffer.append("]"); + } + + for (; i < dimensions; ++i) + buffer.append("[]"); + if (initializer != null) + initializer.write(buffer); + if (parenthesized) + buffer.append(")"); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ArrayInitializer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ArrayInitializer.java index cef6c44d..29207fec 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ArrayInitializer.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ArrayInitializer.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,120 +11,116 @@ /** - * Represents those expressions that can be used to set all the values in an - * array. - * - * @author Nick Rizzolo + * Represents those expressions that can be used to set all the values in an array. + * + * @author Nick Rizzolo **/ -public class ArrayInitializer extends Expression -{ - /** - * (¬ø) The list of expressions that represent the values in the - * array. - **/ - public ExpressionList values; - - - /** Default constructor. */ - public ArrayInitializer() { this(new ExpressionList(), -1, -1); } - - /** - * Initializing constructor. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public ArrayInitializer(int line, int byteOffset) { - this(new ExpressionList(), line, byteOffset); - } - - /** - * Full constructor. - * - * @param v The expressions that represent the values in the - * array. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public ArrayInitializer(ExpressionList v, int line, int byteOffset) { - super(line, byteOffset); - values = v; - } - - - /** - * Returns a hash code value for java hash structures. - * - * @return A hash code for this object. - **/ - public int hashCode() { return values.hashCode(); } - - - /** - * Indicates whether some other object is "equal to" this one. - * - * @return true iff this object is the same as the argument. - **/ - public boolean equals(Object o) { - if (!(o instanceof ArrayInitializer)) return false; - ArrayInitializer a = (ArrayInitializer) o; - return values.equals(a.values); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(1); - I.children[0] = values; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new ArrayInitializer((ExpressionList) values.clone(), -1, -1); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (parenthesized) buffer.append("("); - buffer.append("{"); - values.write(buffer); - buffer.append("}"); - if (parenthesized) buffer.append(")"); - } +public class ArrayInitializer extends Expression { + /** + * (¬ø) The list of expressions that represent the values in the array. + **/ + public ExpressionList values; + + + /** Default constructor. */ + public ArrayInitializer() { + this(new ExpressionList(), -1, -1); + } + + /** + * Initializing constructor. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public ArrayInitializer(int line, int byteOffset) { + this(new ExpressionList(), line, byteOffset); + } + + /** + * Full constructor. + * + * @param v The expressions that represent the values in the array. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public ArrayInitializer(ExpressionList v, int line, int byteOffset) { + super(line, byteOffset); + values = v; + } + + + /** + * Returns a hash code value for java hash structures. + * + * @return A hash code for this object. + **/ + public int hashCode() { + return values.hashCode(); + } + + + /** + * Indicates whether some other object is "equal to" this one. + * + * @return true iff this object is the same as the argument. + **/ + public boolean equals(Object o) { + if (!(o instanceof ArrayInitializer)) + return false; + ArrayInitializer a = (ArrayInitializer) o; + return values.equals(a.values); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(1); + I.children[0] = values; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new ArrayInitializer((ExpressionList) values.clone(), -1, -1); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (parenthesized) + buffer.append("("); + buffer.append("{"); + values.write(buffer); + buffer.append("}"); + if (parenthesized) + buffer.append(")"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ArrayType.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ArrayType.java index ff224b2b..be444e92 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ArrayType.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ArrayType.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -16,114 +13,108 @@ /** - * Class for representing array types. - * - * @author Nick Rizzolo + * Class for representing array types. + * + * @author Nick Rizzolo **/ -public class ArrayType extends Type -{ - /** (¬ø) Represents the type of each element in the array. */ - public Type type; - - - /** - * Initializing constructor. Line and byte offset information are taken - * from the argument. - * - * @param t The type of each element in the array. - */ - public ArrayType(Type t) { - super(t.line, t.byteOffset); - type = t; - } - - - /** - * Returns an object representing the class that this type - * represents. - * - * @return An object representing the class that this type - * represents. - **/ - public Class typeClass() { - if (myClass == null) { - myClass = type.typeClass(); - if (myClass != null) { - try { myClass = Array.newInstance(myClass, 0).getClass(); } - catch (Exception e) { - System.err.println("Could not get the class for an array with " - + "element type '" + type + "'. Aborting..."); - System.exit(1); +public class ArrayType extends Type { + /** (¬ø) Represents the type of each element in the array. */ + public Type type; + + + /** + * Initializing constructor. Line and byte offset information are taken from the argument. + * + * @param t The type of each element in the array. + */ + public ArrayType(Type t) { + super(t.line, t.byteOffset); + type = t; + } + + + /** + * Returns an object representing the class that this type represents. + * + * @return An object representing the class that this type represents. + **/ + public Class typeClass() { + if (myClass == null) { + myClass = type.typeClass(); + if (myClass != null) { + try { + myClass = Array.newInstance(myClass, 0).getClass(); + } catch (Exception e) { + System.err.println("Could not get the class for an array with " + + "element type '" + type + "'. Aborting..."); + System.exit(1); + } + } } - } + + return myClass; + } + + + /** + * Two ArrayTypes are equivalent if their child types are equivalent. + * + * @param t The Type whose equality with this object needs to be tested. + * @return true if the two Types are equal, and false + * otherwise. + **/ + public boolean equals(Object t) { + return t instanceof ArrayType && type.equals(((ArrayType) t).type); + } + + + /** A hash code based on the hash code of {@link #type}. */ + public int hashCode() { + return 31 * type.hashCode() + 17; } - return myClass; - } - - - /** - * Two ArrayTypes are equivalent if their child types are - * equivalent. - * - * @param t The Type whose equality with this object needs to - * be tested. - * @return true if the two Types are equal, and - * false otherwise. - **/ - public boolean equals(Object t) { - return t instanceof ArrayType && type.equals(((ArrayType) t).type); - } - - - /** A hash code based on the hash code of {@link #type}. */ - public int hashCode() { - return 31 * type.hashCode() + 17; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(1); - I.children[0] = type; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { return new ArrayType((Type) type.clone()); } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - type.write(buffer); - buffer.append("[]"); - } -} + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(1); + I.children[0] = type; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new ArrayType((Type) type.clone()); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + type.write(buffer); + buffer.append("[]"); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/AssertStatement.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/AssertStatement.java index c8eb9bfb..fbfb6307 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/AssertStatement.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/AssertStatement.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,132 +11,122 @@ /** - * Represents an assertion statement. - * - * @author Nick Rizzolo + * Represents an assertion statement. + * + * @author Nick Rizzolo **/ -public class AssertStatement extends Statement -{ - /** - * (¬ø) The condition that must hold; otherwise, an assertion - * error is generated. - **/ - public Expression condition; - /** - * (ø) Represents the error message in the assertion error, if any. - **/ - public Expression message; - - - /** - * Initializing constructor. - * - * @param c The condition that must hold. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public AssertStatement(Expression c, int line, int byteOffset) { - this(c, null, line, byteOffset); - } - - /** - * Full constructor. - * - * @param c The condition that must hold. - * @param m The error message. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public AssertStatement(Expression c, Expression m, int line, int byteOffset) - { - super(line, byteOffset); - condition = c; - message = m; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(message == null ? 1 : 2); - I.children[0] = condition; - if (message != null) I.children[1] = message; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new AssertStatement( - (Expression) condition.clone(), - (message == null ? null : (Expression) message.clone()), -1, -1); - } - - - /** Returns a hash code for this {@link ASTNode}. */ - public int hashCode() { - int result = 31 * condition.hashCode(); - if (message != null) result += 7 * message.hashCode(); - return result; - } - - - /** - * Distinguishes this {@link ASTNode} from other objects according to its - * contents recursively. - * - * @param o Another object. - * @return true iff this node is equal to o. - **/ - public boolean equals(Object o) { - if (!(o instanceof AssertStatement)) return false; - AssertStatement a = (AssertStatement) o; - return - condition.equals(a.condition) - && (message == null ? a.message == null : message.equals(a.message)); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("assert "); - condition.write(buffer); - if (message != null) { - buffer.append(" : "); - message.write(buffer); +public class AssertStatement extends Statement { + /** + * (¬ø) The condition that must hold; otherwise, an assertion error is generated. + **/ + public Expression condition; + /** + * (ø) Represents the error message in the assertion error, if any. + **/ + public Expression message; + + + /** + * Initializing constructor. + * + * @param c The condition that must hold. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public AssertStatement(Expression c, int line, int byteOffset) { + this(c, null, line, byteOffset); } - buffer.append(";"); - } -} + /** + * Full constructor. + * + * @param c The condition that must hold. + * @param m The error message. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public AssertStatement(Expression c, Expression m, int line, int byteOffset) { + super(line, byteOffset); + condition = c; + message = m; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(message == null ? 1 : 2); + I.children[0] = condition; + if (message != null) + I.children[1] = message; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new AssertStatement((Expression) condition.clone(), (message == null ? null + : (Expression) message.clone()), -1, -1); + } + + + /** Returns a hash code for this {@link ASTNode}. */ + public int hashCode() { + int result = 31 * condition.hashCode(); + if (message != null) + result += 7 * message.hashCode(); + return result; + } + + + /** + * Distinguishes this {@link ASTNode} from other objects according to its contents recursively. + * + * @param o Another object. + * @return true iff this node is equal to o. + **/ + public boolean equals(Object o) { + if (!(o instanceof AssertStatement)) + return false; + AssertStatement a = (AssertStatement) o; + return condition.equals(a.condition) + && (message == null ? a.message == null : message.equals(a.message)); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("assert "); + condition.write(buffer); + if (message != null) { + buffer.append(" : "); + message.write(buffer); + } + buffer.append(";"); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Assignment.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Assignment.java index f4317fff..0e5333cd 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Assignment.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Assignment.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,112 +11,109 @@ /** - * Represents the assignment of a value to a storage location. - * - * @author Nick Rizzolo + * Represents the assignment of a value to a storage location. + * + * @author Nick Rizzolo **/ -public class Assignment extends StatementExpression -{ - /** (¬ø) The assignment operation. */ - public Operator operation; - /** (¬ø) The left hand side of the assignment. */ - public Expression left; - /** (¬ø) The right hand side of the assignment. */ - public Expression right; - - - /** - * Initializing constructor. Line and byte offset information are taken - * from the representation of the operator. - * - * @param op Reference to the operator's representation. - * @param l Reference to the left hand side's representation. - * @param r Reference to the right hand side's representation. - **/ - public Assignment(Operator op, Expression l, Expression r) { - super(op.line, op.byteOffset); - operation = op; - left = l; - right = r; - } - - - /** - * Returns a hash code value for java hash structures. - * - * @return A hash code for this object. - **/ - public int hashCode() { - return left.hashCode() + operation.hashCode() + right.hashCode(); - } - - - /** - * Indicates whether some other object is "equal to" this one. - * - * @return true iff this object is the same as the argument. - **/ - public boolean equals(Object o) { - if (!(o instanceof Assignment)) return false; - Assignment a = (Assignment) o; - return left.equals(a.left) && operation.equals(a.operation) - && right.equals(a.right); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(3); - I.children[0] = right; - I.children[1] = operation; - I.children[2] = left; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new Assignment((Operator) operation.clone(), - (Expression) left.clone(), - (Expression) right.clone()); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (parenthesized) buffer.append("("); - left.write(buffer); - buffer.append(" "); - operation.write(buffer); - buffer.append(" "); - right.write(buffer); - if (parenthesized) buffer.append(")"); - } +public class Assignment extends StatementExpression { + /** (¬ø) The assignment operation. */ + public Operator operation; + /** (¬ø) The left hand side of the assignment. */ + public Expression left; + /** (¬ø) The right hand side of the assignment. */ + public Expression right; + + + /** + * Initializing constructor. Line and byte offset information are taken from the representation + * of the operator. + * + * @param op Reference to the operator's representation. + * @param l Reference to the left hand side's representation. + * @param r Reference to the right hand side's representation. + **/ + public Assignment(Operator op, Expression l, Expression r) { + super(op.line, op.byteOffset); + operation = op; + left = l; + right = r; + } + + + /** + * Returns a hash code value for java hash structures. + * + * @return A hash code for this object. + **/ + public int hashCode() { + return left.hashCode() + operation.hashCode() + right.hashCode(); + } + + + /** + * Indicates whether some other object is "equal to" this one. + * + * @return true iff this object is the same as the argument. + **/ + public boolean equals(Object o) { + if (!(o instanceof Assignment)) + return false; + Assignment a = (Assignment) o; + return left.equals(a.left) && operation.equals(a.operation) && right.equals(a.right); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(3); + I.children[0] = right; + I.children[1] = operation; + I.children[2] = left; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new Assignment((Operator) operation.clone(), (Expression) left.clone(), + (Expression) right.clone()); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (parenthesized) + buffer.append("("); + left.write(buffer); + buffer.append(" "); + operation.write(buffer); + buffer.append(" "); + right.write(buffer); + if (parenthesized) + buffer.append(")"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/AtLeastQuantifierExpression.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/AtLeastQuantifierExpression.java index 0b5f7cba..a2225c9c 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/AtLeastQuantifierExpression.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/AtLeastQuantifierExpression.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -17,144 +14,126 @@ /** - * An "at least" quantifier has the form: - *
- * atleast expression of argument in - * (expression) constraint-expression - *
- * where the first expression must evaluate to an - * int, the second expression must evaluate - * to a Collection, and the "at least" quantifier expression is - * sastisfied iff when taking settings of argument from - * the Collection, constraint-expression is - * satisfied at least as many times as the integer the first - * expression evaluates to. - * - * @author Nick Rizzolo + * An "at least" quantifier has the form:
atleast expression of + * argument in (expression) constraint-expression
where the first + * expression must evaluate to an int, the second + * expression must evaluate to a Collection, and the "at least" + * quantifier expression is sastisfied iff when taking settings of argument from + * the Collection, constraint-expression is satisfied at least as + * many times as the integer the first expression evaluates to. + * + * @author Nick Rizzolo **/ -public class AtLeastQuantifierExpression - extends QuantifiedConstraintExpression -{ - /** - * (¬ø) This expression evaluates to an integer representing the - * minimum number of objects that must satisfy the child constraint - * expression in order for this quantified constraint expression to be - * satisfied. - **/ - public Expression lowerBound; - /** - * Filled in by SemanticAnalysis, this flag is set if - * lowerBound contains any quantified variables. - **/ - public boolean lowerBoundIsQuantified; - - - /** - * Full constructor. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - * @param lb The lower bound expression. - * @param a The quantification variable specification. - * @param c Evaluates to the collection of objects. - * @param co The quantified constraint. - **/ - public AtLeastQuantifierExpression(int line, int byteOffset, Expression lb, - Argument a, Expression c, - ConstraintExpression co) { - super(line, byteOffset, a, c, co); - lowerBound = lb; - } - - /** - * Parser's constructor. Line and byte offset information are taken from - * the token. - * - * @param t The token containing line and byte offset information. - * @param lb The lower bound expression. - * @param a The quantification variable specification. - * @param c Evaluates to the collection of objects. - * @param co The quantified constraint. - **/ - public AtLeastQuantifierExpression(TokenValue t, Expression lb, Argument a, - Expression c, ConstraintExpression co) { - this(t.line, t.byteOffset, lb, a, c, co); - } - - - /** - * Returns a set of Arguments storing the name and type of - * each variable that is a subexpression of this expression. This method - * cannot be run before SemanticAnalysis runs. - **/ - public HashSet getVariableTypes() { - HashSet result = super.getVariableTypes(); - result.addAll(lowerBound.getVariableTypes()); - return result; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(4); - I.children[0] = lowerBound; - I.children[1] = argument; - I.children[2] = collection; - I.children[3] = constraint; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return - new AtLeastQuantifierExpression( - -1, -1, (Expression) lowerBound.clone(), - (Argument) argument.clone(), - (Expression) collection.clone(), - (ConstraintExpression) constraint.clone()); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("atleast "); - lowerBound.write(buffer); - buffer.append(" of "); - argument.write(buffer); - buffer.append(" in ("); - collection.write(buffer); - buffer.append(") "); - constraint.write(buffer); - } +public class AtLeastQuantifierExpression extends QuantifiedConstraintExpression { + /** + * (¬ø) This expression evaluates to an integer representing the minimum number of + * objects that must satisfy the child constraint expression in order for this quantified + * constraint expression to be satisfied. + **/ + public Expression lowerBound; + /** + * Filled in by SemanticAnalysis, this flag is set if lowerBound + * contains any quantified variables. + **/ + public boolean lowerBoundIsQuantified; + + + /** + * Full constructor. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + * @param lb The lower bound expression. + * @param a The quantification variable specification. + * @param c Evaluates to the collection of objects. + * @param co The quantified constraint. + **/ + public AtLeastQuantifierExpression(int line, int byteOffset, Expression lb, Argument a, + Expression c, ConstraintExpression co) { + super(line, byteOffset, a, c, co); + lowerBound = lb; + } + + /** + * Parser's constructor. Line and byte offset information are taken from the token. + * + * @param t The token containing line and byte offset information. + * @param lb The lower bound expression. + * @param a The quantification variable specification. + * @param c Evaluates to the collection of objects. + * @param co The quantified constraint. + **/ + public AtLeastQuantifierExpression(TokenValue t, Expression lb, Argument a, Expression c, + ConstraintExpression co) { + this(t.line, t.byteOffset, lb, a, c, co); + } + + + /** + * Returns a set of Arguments storing the name and type of each variable that is a + * subexpression of this expression. This method cannot be run before + * SemanticAnalysis runs. + **/ + public HashSet getVariableTypes() { + HashSet result = super.getVariableTypes(); + result.addAll(lowerBound.getVariableTypes()); + return result; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(4); + I.children[0] = lowerBound; + I.children[1] = argument; + I.children[2] = collection; + I.children[3] = constraint; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new AtLeastQuantifierExpression(-1, -1, (Expression) lowerBound.clone(), + (Argument) argument.clone(), (Expression) collection.clone(), + (ConstraintExpression) constraint.clone()); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("atleast "); + lowerBound.write(buffer); + buffer.append(" of "); + argument.write(buffer); + buffer.append(" in ("); + collection.write(buffer); + buffer.append(") "); + constraint.write(buffer); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/AtMostQuantifierExpression.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/AtMostQuantifierExpression.java index 06372d06..85007005 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/AtMostQuantifierExpression.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/AtMostQuantifierExpression.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -17,144 +14,126 @@ /** - * An "at most" quantifier has the form: - *
- * atmost expression of argument in - * (expression) constraint-expression - *
- * where the first expression must evaluate to an - * int, the second expression must evaluate - * to a Collection, and the "at most" quantifier expression is - * sastisfied iff when taking settings of argument from - * the Collection, constraint-expression is - * satisfied at most as many times as the integer the first - * expression evaluates to. - * - * @author Nick Rizzolo + * An "at most" quantifier has the form:
atmost expression of + * argument in (expression) constraint-expression
where the first + * expression must evaluate to an int, the second + * expression must evaluate to a Collection, and the "at most" + * quantifier expression is sastisfied iff when taking settings of argument from + * the Collection, constraint-expression is satisfied at most as + * many times as the integer the first expression evaluates to. + * + * @author Nick Rizzolo **/ -public class AtMostQuantifierExpression - extends QuantifiedConstraintExpression -{ - /** - * (¬ø) This expression evaluates to an integer representing the - * maximum number of objects that must satisfy the child constraint - * expression in order for this quantified constraint expression to be - * satisfied. - **/ - public Expression upperBound; - /** - * Filled in by SemanticAnalysis, this flag is set if - * upperBound contains any quantified variables. - **/ - public boolean upperBoundIsQuantified; - - - /** - * Full constructor. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - * @param ub The upper bound expression. - * @param a The quantification variable specification. - * @param c Evaluates to the collection of objects. - * @param co The quantified constraint. - **/ - public AtMostQuantifierExpression(int line, int byteOffset, - Expression ub, Argument a, Expression c, - ConstraintExpression co) { - super(line, byteOffset, a, c, co); - upperBound = ub; - } - - /** - * Parser's constructor. Line and byte offset information are taken from - * the token. - * - * @param t The token containing line and byte offset information. - * @param ub The upper bound expression. - * @param a The quantification variable specification. - * @param c Evaluates to the collection of objects. - * @param co The quantified constraint. - **/ - public AtMostQuantifierExpression(TokenValue t, Expression ub, Argument a, - Expression c, ConstraintExpression co) { - this(t.line, t.byteOffset, ub, a, c, co); - } - - - /** - * Returns a set of Arguments storing the name and type of - * each variable that is a subexpression of this expression. This method - * cannot be run before SemanticAnalysis runs. - **/ - public HashSet getVariableTypes() { - HashSet result = super.getVariableTypes(); - result.addAll(upperBound.getVariableTypes()); - return result; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(4); - I.children[0] = upperBound; - I.children[1] = argument; - I.children[2] = collection; - I.children[3] = constraint; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return - new AtMostQuantifierExpression( - -1, -1, (Expression) upperBound.clone(), - (Argument) argument.clone(), - (Expression) collection.clone(), - (ConstraintExpression) constraint.clone()); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("atmost "); - upperBound.write(buffer); - buffer.append(" of "); - argument.write(buffer); - buffer.append(" in ("); - collection.write(buffer); - buffer.append(") "); - constraint.write(buffer); - } +public class AtMostQuantifierExpression extends QuantifiedConstraintExpression { + /** + * (¬ø) This expression evaluates to an integer representing the maximum number of + * objects that must satisfy the child constraint expression in order for this quantified + * constraint expression to be satisfied. + **/ + public Expression upperBound; + /** + * Filled in by SemanticAnalysis, this flag is set if upperBound + * contains any quantified variables. + **/ + public boolean upperBoundIsQuantified; + + + /** + * Full constructor. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + * @param ub The upper bound expression. + * @param a The quantification variable specification. + * @param c Evaluates to the collection of objects. + * @param co The quantified constraint. + **/ + public AtMostQuantifierExpression(int line, int byteOffset, Expression ub, Argument a, + Expression c, ConstraintExpression co) { + super(line, byteOffset, a, c, co); + upperBound = ub; + } + + /** + * Parser's constructor. Line and byte offset information are taken from the token. + * + * @param t The token containing line and byte offset information. + * @param ub The upper bound expression. + * @param a The quantification variable specification. + * @param c Evaluates to the collection of objects. + * @param co The quantified constraint. + **/ + public AtMostQuantifierExpression(TokenValue t, Expression ub, Argument a, Expression c, + ConstraintExpression co) { + this(t.line, t.byteOffset, ub, a, c, co); + } + + + /** + * Returns a set of Arguments storing the name and type of each variable that is a + * subexpression of this expression. This method cannot be run before + * SemanticAnalysis runs. + **/ + public HashSet getVariableTypes() { + HashSet result = super.getVariableTypes(); + result.addAll(upperBound.getVariableTypes()); + return result; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(4); + I.children[0] = upperBound; + I.children[1] = argument; + I.children[2] = collection; + I.children[3] = constraint; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new AtMostQuantifierExpression(-1, -1, (Expression) upperBound.clone(), + (Argument) argument.clone(), (Expression) collection.clone(), + (ConstraintExpression) constraint.clone()); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("atmost "); + upperBound.write(buffer); + buffer.append(" of "); + argument.write(buffer); + buffer.append(" in ("); + collection.write(buffer); + buffer.append(") "); + constraint.write(buffer); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/BinaryConstraintExpression.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/BinaryConstraintExpression.java index d83c324c..48173045 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/BinaryConstraintExpression.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/BinaryConstraintExpression.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,91 +11,86 @@ /** - * This class represents a constraint expression involving a binary operator. - * - * @author Nick Rizzolo + * This class represents a constraint expression involving a binary operator. + * + * @author Nick Rizzolo **/ -public class BinaryConstraintExpression extends ConstraintExpression -{ - /** (¬ø) The binary operation. */ - public Operator operation; - /** (¬ø) The left hand side of the binary expression. */ - public ConstraintExpression left; - /** (¬ø) The right hand side of the binary expression. */ - public ConstraintExpression right; +public class BinaryConstraintExpression extends ConstraintExpression { + /** (¬ø) The binary operation. */ + public Operator operation; + /** (¬ø) The left hand side of the binary expression. */ + public ConstraintExpression left; + /** (¬ø) The right hand side of the binary expression. */ + public ConstraintExpression right; - /** - * Initializing constructor. Line and byte offset information are taken - * from the representation of the operator. - * - * @param op Reference to the operator's representation. - * @param l Reference to the left hand side's representation. - * @param r Reference to the right hand side's representation. - **/ - public BinaryConstraintExpression(Operator op, ConstraintExpression l, - ConstraintExpression r) { - super(op.line, op.byteOffset); - operation = op; - left = l; - right = r; - } + /** + * Initializing constructor. Line and byte offset information are taken from the representation + * of the operator. + * + * @param op Reference to the operator's representation. + * @param l Reference to the left hand side's representation. + * @param r Reference to the right hand side's representation. + **/ + public BinaryConstraintExpression(Operator op, ConstraintExpression l, ConstraintExpression r) { + super(op.line, op.byteOffset); + operation = op; + left = l; + right = r; + } - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(3); - I.children[0] = left; - I.children[1] = operation; - I.children[2] = right; - return I; - } + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(3); + I.children[0] = left; + I.children[1] = operation; + I.children[2] = right; + return I; + } - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return - new BinaryConstraintExpression((Operator) operation.clone(), - (ConstraintExpression) left.clone(), - (ConstraintExpression) right.clone()); - } + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new BinaryConstraintExpression((Operator) operation.clone(), + (ConstraintExpression) left.clone(), (ConstraintExpression) right.clone()); + } - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (parenthesized) buffer.append("("); - left.write(buffer); - buffer.append(" "); - operation.write(buffer); - buffer.append(" "); - right.write(buffer); - if (parenthesized) buffer.append(")"); - } + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (parenthesized) + buffer.append("("); + left.write(buffer); + buffer.append(" "); + operation.write(buffer); + buffer.append(" "); + right.write(buffer); + if (parenthesized) + buffer.append(")"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/BinaryExpression.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/BinaryExpression.java index 03aa8518..c8b2cab8 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/BinaryExpression.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/BinaryExpression.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,112 +11,109 @@ /** - * This class represents an expression involving a binary operator. - * - * @author Nick Rizzolo + * This class represents an expression involving a binary operator. + * + * @author Nick Rizzolo **/ -public class BinaryExpression extends Expression -{ - /** (¬ø) The binary operation. */ - public Operator operation; - /** (¬ø) The left hand side of the binary expression. */ - public Expression left; - /** (¬ø) The right hand side of the binary expression. */ - public Expression right; - - - /** - * Initializing constructor. Line and byte offset information are taken - * from the representation of the operator. - * - * @param op Reference to the operator's representation. - * @param l Reference to the left hand side's representation. - * @param r Reference to the right hand side's representation. - **/ - public BinaryExpression(Operator op, Expression l, Expression r) { - super(op.line, op.byteOffset); - operation = op; - left = l; - right = r; - } - - - /** - * Returns a hash code value for java hash structures. - * - * @return A hash code for this object. - **/ - public int hashCode() { - return left.hashCode() + operation.hashCode() + right.hashCode(); - } - - - /** - * Indicates whether some other object is "equal to" this one. - * - * @return true iff this object is the same as the argument. - **/ - public boolean equals(Object o) { - if (!(o instanceof BinaryExpression)) return false; - BinaryExpression b = (BinaryExpression) o; - return left.equals(b.left) && operation.equals(b.operation) - && right.equals(b.right); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(3); - I.children[0] = left; - I.children[1] = operation; - I.children[2] = right; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new BinaryExpression((Operator) operation.clone(), - (Expression) left.clone(), - (Expression) right.clone()); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (parenthesized) buffer.append("("); - left.write(buffer); - buffer.append(" "); - operation.write(buffer); - buffer.append(" "); - right.write(buffer); - if (parenthesized) buffer.append(")"); - } +public class BinaryExpression extends Expression { + /** (¬ø) The binary operation. */ + public Operator operation; + /** (¬ø) The left hand side of the binary expression. */ + public Expression left; + /** (¬ø) The right hand side of the binary expression. */ + public Expression right; + + + /** + * Initializing constructor. Line and byte offset information are taken from the representation + * of the operator. + * + * @param op Reference to the operator's representation. + * @param l Reference to the left hand side's representation. + * @param r Reference to the right hand side's representation. + **/ + public BinaryExpression(Operator op, Expression l, Expression r) { + super(op.line, op.byteOffset); + operation = op; + left = l; + right = r; + } + + + /** + * Returns a hash code value for java hash structures. + * + * @return A hash code for this object. + **/ + public int hashCode() { + return left.hashCode() + operation.hashCode() + right.hashCode(); + } + + + /** + * Indicates whether some other object is "equal to" this one. + * + * @return true iff this object is the same as the argument. + **/ + public boolean equals(Object o) { + if (!(o instanceof BinaryExpression)) + return false; + BinaryExpression b = (BinaryExpression) o; + return left.equals(b.left) && operation.equals(b.operation) && right.equals(b.right); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(3); + I.children[0] = left; + I.children[1] = operation; + I.children[2] = right; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new BinaryExpression((Operator) operation.clone(), (Expression) left.clone(), + (Expression) right.clone()); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (parenthesized) + buffer.append("("); + left.write(buffer); + buffer.append(" "); + operation.write(buffer); + buffer.append(" "); + right.write(buffer); + if (parenthesized) + buffer.append(")"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Block.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Block.java index 284e923f..b12ba266 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Block.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Block.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,121 +11,120 @@ /** - * A block is just a list of statements in between curly braces. - * - * @author Nick Rizzolo + * A block is just a list of statements in between curly braces. + * + * @author Nick Rizzolo **/ -public class Block extends Statement -{ - /** (¬ø) The list of statements. */ - private StatementList statements; - - /** - * Initializing constructor. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public Block(int line, int byteOffset) { - super(line, byteOffset); - statements = new StatementList(); - } - - /** - * Initializing constructor. Line and byte offset information are taken - * from the statement list's representation. - * - * @param list The statement list. - **/ - public Block(StatementList list) { - super(list.line, list.byteOffset); - statements = list; - } - - - /** - * Returns the statement list. - * - * @return This block's statement list. - **/ - public StatementList statementList() { return statements; } - - - /** - * Transforms the list into an array of statements. - * - * @return An array of statements containing references to every statement - * in the list. - **/ - public Statement[] toArray() { return statements.toArray(); } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(1); - I.children[0] = statements; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new Block((StatementList) statements.clone()); - } - - - /** Returns a hash code for this {@link ASTNode}. */ - public int hashCode() { return statements.hashCode(); } - - - /** - * Distinguishes this {@link ASTNode} from other objects according to its - * contents recursively. - * - * @param o Another object. - * @return true iff this node is equal to o. - **/ - public boolean equals(Object o) { - if (!(o instanceof Block)) return false; - Block b = (Block) o; - return statements.equals(b.statements); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("{ "); - statements.write(buffer); - buffer.append(" }"); - } +public class Block extends Statement { + /** (¬ø) The list of statements. */ + private StatementList statements; + + /** + * Initializing constructor. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public Block(int line, int byteOffset) { + super(line, byteOffset); + statements = new StatementList(); + } + + /** + * Initializing constructor. Line and byte offset information are taken from the statement + * list's representation. + * + * @param list The statement list. + **/ + public Block(StatementList list) { + super(list.line, list.byteOffset); + statements = list; + } + + + /** + * Returns the statement list. + * + * @return This block's statement list. + **/ + public StatementList statementList() { + return statements; + } + + + /** + * Transforms the list into an array of statements. + * + * @return An array of statements containing references to every statement in the list. + **/ + public Statement[] toArray() { + return statements.toArray(); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(1); + I.children[0] = statements; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new Block((StatementList) statements.clone()); + } + + + /** Returns a hash code for this {@link ASTNode}. */ + public int hashCode() { + return statements.hashCode(); + } + + + /** + * Distinguishes this {@link ASTNode} from other objects according to its contents recursively. + * + * @param o Another object. + * @return true iff this node is equal to o. + **/ + public boolean equals(Object o) { + if (!(o instanceof Block)) + return false; + Block b = (Block) o; + return statements.equals(b.statements); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("{ "); + statements.write(buffer); + buffer.append(" }"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/BreakStatement.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/BreakStatement.java index e5ee604a..19c8da69 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/BreakStatement.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/BreakStatement.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,91 +11,90 @@ /** - * Represents a break statement. - * - * @author Nick Rizzolo + * Represents a break statement. + * + * @author Nick Rizzolo **/ -public class BreakStatement extends Statement -{ - /** (ø) The label identifying the loop to break out of, if any. */ - public String label; - - - /** - * Full constructor. - * - * @param l The label of the loop to break out of. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public BreakStatement(String l, int line, int byteOffset) { - super(line, byteOffset); - label = l; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { return new ASTNodeIterator(0); } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { return new BreakStatement(label, -1, -1); } - - - /** Returns a hash code for this {@link ASTNode}. */ - public int hashCode() { - return label != null ? label.hashCode() : 7; - } - - - /** - * Distinguishes this {@link ASTNode} from other objects according to its - * contents recursively. - * - * @param o Another object. - * @return true iff this node is equal to o. - **/ - public boolean equals(Object o) { - if (!(o instanceof BreakStatement)) return false; - BreakStatement b = (BreakStatement) o; - return label == null ? b.label == null : label.equals(b.label); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("break"); - if (label != null) buffer.append(" " + label); - buffer.append(";"); - } +public class BreakStatement extends Statement { + /** (ø) The label identifying the loop to break out of, if any. */ + public String label; + + + /** + * Full constructor. + * + * @param l The label of the loop to break out of. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public BreakStatement(String l, int line, int byteOffset) { + super(line, byteOffset); + label = l; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + return new ASTNodeIterator(0); + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new BreakStatement(label, -1, -1); + } + + + /** Returns a hash code for this {@link ASTNode}. */ + public int hashCode() { + return label != null ? label.hashCode() : 7; + } + + + /** + * Distinguishes this {@link ASTNode} from other objects according to its contents recursively. + * + * @param o Another object. + * @return true iff this node is equal to o. + **/ + public boolean equals(Object o) { + if (!(o instanceof BreakStatement)) + return false; + BreakStatement b = (BreakStatement) o; + return label == null ? b.label == null : label.equals(b.label); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("break"); + if (label != null) + buffer.append(" " + label); + buffer.append(";"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/CastExpression.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/CastExpression.java index cd4dee90..dd156573 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/CastExpression.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/CastExpression.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,100 +11,96 @@ /** - * Representation of an expression that casts a value to another type. - * - * @author Nick Rizzolo + * Representation of an expression that casts a value to another type. + * + * @author Nick Rizzolo **/ -public class CastExpression extends Expression -{ - /** (¬ø) The type to cast to. */ - public Type type; - /** (¬ø) The expression whose value should be casted. */ - public Expression expression; - - - /** - * Initializing constructor. Line and byte offset information are taken - * from the type's representation. - * - * @param t Reference to the object representing the cast's type. - * @param e Reference to the object representing the expression to cast. - **/ - public CastExpression(Type t, Expression e) { - super(t.line, t.byteOffset); - type = t; - expression = e; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(2); - I.children[0] = type; - I.children[1] = expression; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return - new CastExpression((Type) type.clone(), - (Expression) expression.clone()); - } - - - /** Determines if this object is equivalent to another object. */ - public boolean equals(Object o) { - if (!(o instanceof CastExpression)) return false; - CastExpression c = (CastExpression) o; - return type.equals(c.type) && expression.equals(c.expression); - } - - - /** - * A hash code based on the hash codes of {@link #type} and - * {@link #expression}. - **/ - public int hashCode() { - return 31 * type.hashCode() + 17 * expression.hashCode(); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (parenthesized) buffer.append("("); - buffer.append("("); - type.write(buffer); - buffer.append(") "); - expression.write(buffer); - if (parenthesized) buffer.append(")"); - } +public class CastExpression extends Expression { + /** (¬ø) The type to cast to. */ + public Type type; + /** (¬ø) The expression whose value should be casted. */ + public Expression expression; + + + /** + * Initializing constructor. Line and byte offset information are taken from the type's + * representation. + * + * @param t Reference to the object representing the cast's type. + * @param e Reference to the object representing the expression to cast. + **/ + public CastExpression(Type t, Expression e) { + super(t.line, t.byteOffset); + type = t; + expression = e; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(2); + I.children[0] = type; + I.children[1] = expression; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new CastExpression((Type) type.clone(), (Expression) expression.clone()); + } + + + /** Determines if this object is equivalent to another object. */ + public boolean equals(Object o) { + if (!(o instanceof CastExpression)) + return false; + CastExpression c = (CastExpression) o; + return type.equals(c.type) && expression.equals(c.expression); + } + + + /** + * A hash code based on the hash codes of {@link #type} and {@link #expression}. + **/ + public int hashCode() { + return 31 * type.hashCode() + 17 * expression.hashCode(); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (parenthesized) + buffer.append("("); + buffer.append("("); + type.write(buffer); + buffer.append(") "); + expression.write(buffer); + if (parenthesized) + buffer.append(")"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/CatchClause.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/CatchClause.java index 06451fc3..f9b808d7 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/CatchClause.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/CatchClause.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,104 +11,97 @@ /** - * Represents a catch clause on a try statement. - * - * @author Nick Rizzolo + * Represents a catch clause on a try statement. + * + * @author Nick Rizzolo **/ -public class CatchClause extends ASTNode -{ - /** (¬ø) The catch's input specification */ - public Argument argument; - /** (¬ø) The code to execute when an exception is caught. */ - public Block block; - - - /** - * Full constructor. - * - * @param a The catch's input specification. - * @param b The code to execute when an exception is caught. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public CatchClause(Argument a, Block b, int line, int byteOffset) { - super(line, byteOffset); - argument = a; - block = b; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(2); - I.children[0] = argument; - I.children[1] = block; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new CatchClause((Argument) argument.clone(), (Block) block.clone(), - -1, -1); - } - - - /** Returns a hash code for this {@link ASTNode}. */ - public int hashCode() { - return 31 * argument.hashCode() + 7 * block.hashCode(); - } - - - /** - * Distinguishes this {@link ASTNode} from other objects according to its - * contents recursively. - * - * @param o Another object. - * @return true iff this node is equal to o. - **/ - public boolean equals(Object o) { - if (!(o instanceof CatchClause)) return false; - CatchClause c = (CatchClause) o; - return argument.equals(c.argument) && block.equals(c.block); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("catch ("); - argument.write(buffer); - buffer.append(") "); - block.write(buffer); - } +public class CatchClause extends ASTNode { + /** (¬ø) The catch's input specification */ + public Argument argument; + /** (¬ø) The code to execute when an exception is caught. */ + public Block block; + + + /** + * Full constructor. + * + * @param a The catch's input specification. + * @param b The code to execute when an exception is caught. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public CatchClause(Argument a, Block b, int line, int byteOffset) { + super(line, byteOffset); + argument = a; + block = b; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(2); + I.children[0] = argument; + I.children[1] = block; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new CatchClause((Argument) argument.clone(), (Block) block.clone(), -1, -1); + } + + + /** Returns a hash code for this {@link ASTNode}. */ + public int hashCode() { + return 31 * argument.hashCode() + 7 * block.hashCode(); + } + + + /** + * Distinguishes this {@link ASTNode} from other objects according to its contents recursively. + * + * @param o Another object. + * @return true iff this node is equal to o. + **/ + public boolean equals(Object o) { + if (!(o instanceof CatchClause)) + return false; + CatchClause c = (CatchClause) o; + return argument.equals(c.argument) && block.equals(c.block); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("catch ("); + argument.write(buffer); + buffer.append(") "); + block.write(buffer); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/CatchList.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/CatchList.java index ff024732..384dd91c 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/CatchList.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/CatchList.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -16,132 +13,132 @@ /** - * Currently, this is just a wrapper class for LinkedList. The - * code that uses it looks a little cleaner when casts are all taken care of - * automatically. - * - * @author Nick Rizzolo + * Currently, this is just a wrapper class for LinkedList. The code that uses it looks + * a little cleaner when casts are all taken care of automatically. + * + * @author Nick Rizzolo **/ -public class CatchList extends List -{ - /** Default constructor. */ - public CatchList() { super(-1, -1, " "); } - - /** - * Initializing constructor. Requires its argument to be - * non-null. - * - * @param s A single CatchClause with which to initialize - * this list. - **/ - public CatchList(CatchClause s) { - super(s.line, s.byteOffset, " "); - list.add(s); - } - - - /** - * Adds another CatchClause to the end of the list. - * - * @param s A reference to the CatchClause to be added. - **/ - public void add(CatchClause s) { list.add(s); } - - - /** - * Adds all the CatchClauses in another - * CatchList to the end of this - * CatchList. - * - * @param s The list to be added. - **/ - public void addAll(CatchList s) { list.addAll(s.list); } - - - /** - * Transforms the list into an array of statements. - * - * @return An array of statements containing references to every statement - * in the list. - **/ - public CatchClause[] toArray() { - return (CatchClause[]) list.toArray(new CatchClause[list.size()]); - } - - - /** - * Returns an iterator used specifically to access the elements of this - * list. - * - * @return An iterator used specifically to access the elements of this - * list. - **/ - public CatchListIterator listIterator() { return new CatchListIterator(); } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { return listIterator(); } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - CatchList clone = new CatchList(); - for (Iterator i = list.iterator(); i.hasNext(); ) - clone.list.add(((CatchClause) i.next()).clone()); - return clone; - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Used to iterate though the children of a list of AST nodes. The entire - * interface of java.util.ListIterator is exposed through this - * class. - * - * @author Nick Rizzolo - **/ - public class CatchListIterator extends NodeListIterator - { +public class CatchList extends List { + /** Default constructor. */ + public CatchList() { + super(-1, -1, " "); + } + /** - * Returns the next AST node in the list. This method may be called - * repeatedly to iterate through the list, or intermixed with calls to - * previous() to go back and forth. (Note that alternating - * calls to next() and previous() will return - * the same element repeatedly.) - * - * @return The next AST node in the list. + * Initializing constructor. Requires its argument to be non-null. + * + * @param s A single CatchClause with which to initialize this list. **/ - public CatchClause nextItem() { return (CatchClause) I.next(); } + public CatchList(CatchClause s) { + super(s.line, s.byteOffset, " "); + list.add(s); + } /** - * Returns the previous element in the list. This method may be called - * repeatedly to iterate through the list backwards, or intermixed with - * calls to next to go back and forth. (Note that alternating calls to - * next and previous will return the same element repeatedly.) - * - * @return The previous AST node in the list. + * Adds another CatchClause to the end of the list. + * + * @param s A reference to the CatchClause to be added. **/ - public CatchClause previousItem() { return (CatchClause) I.previous(); } - } -} + public void add(CatchClause s) { + list.add(s); + } + + + /** + * Adds all the CatchClauses in another CatchList to the end of this + * CatchList. + * + * @param s The list to be added. + **/ + public void addAll(CatchList s) { + list.addAll(s.list); + } + + + /** + * Transforms the list into an array of statements. + * + * @return An array of statements containing references to every statement in the list. + **/ + public CatchClause[] toArray() { + return (CatchClause[]) list.toArray(new CatchClause[list.size()]); + } + + + /** + * Returns an iterator used specifically to access the elements of this list. + * + * @return An iterator used specifically to access the elements of this list. + **/ + public CatchListIterator listIterator() { + return new CatchListIterator(); + } + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + return listIterator(); + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + CatchList clone = new CatchList(); + for (Iterator i = list.iterator(); i.hasNext();) + clone.list.add(((CatchClause) i.next()).clone()); + return clone; + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Used to iterate though the children of a list of AST nodes. The entire interface of + * java.util.ListIterator is exposed through this class. + * + * @author Nick Rizzolo + **/ + public class CatchListIterator extends NodeListIterator { + /** + * Returns the next AST node in the list. This method may be called repeatedly to iterate + * through the list, or intermixed with calls to previous() to go back and + * forth. (Note that alternating calls to next() and previous() + * will return the same element repeatedly.) + * + * @return The next AST node in the list. + **/ + public CatchClause nextItem() { + return (CatchClause) I.next(); + } + + + /** + * Returns the previous element in the list. This method may be called repeatedly to iterate + * through the list backwards, or intermixed with calls to next to go back and forth. (Note + * that alternating calls to next and previous will return the same element repeatedly.) + * + * @return The previous AST node in the list. + **/ + public CatchClause previousItem() { + return (CatchClause) I.previous(); + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierAssignment.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierAssignment.java index 24c53896..1fcb9b37 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierAssignment.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierAssignment.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -15,216 +12,194 @@ /** - * Represents the assignment of a classifier expression to a method - * signature. - * - * @author Nick Rizzolo + * Represents the assignment of a classifier expression to a method signature. + * + * @author Nick Rizzolo **/ -public class ClassifierAssignment extends Declaration -{ - /** - * This value is used in place of the field access which appears as an - * argument to cachedin to indicate that in fact, - * cached was used instead. - **/ - public static final String mapCache = "!!!"; - - - /** (¬ø) The return type of the classifier. */ - public ClassifierReturnType returnType; - /** (¬ø) The input specification of the classifier. */ - public Argument argument; - /** (¬ø) The expression representing the classifier. */ - public ClassifierExpression expression; - /** - * (ø) The expression representing the field to cache this - * classifier's result in. - **/ - public Name cacheIn; - /** - * Whether the classifier will have a single example feature vector cache. - **/ - public boolean singleExampleCache; - - - /** - * Full constructor. Line and byte offset information is taken from the - * type. - * - * @param co A Javadoc comment associated with the declaration. - * @param t The return type of the classifier. - * @param n The classifier's name. - * @param a The input specification of the classifier. - * @param e The expression representing the classifier. - * @param ca The expression representing the field to cache this - * classifier's result in. - * @param sc Whether or not to make a single example cache. - **/ - public ClassifierAssignment(String co, ClassifierReturnType t, Name n, - Argument a, ClassifierExpression e, Name ca, - boolean sc) { - super(co, n, t.line, t.byteOffset); - returnType = t; - argument = a; - expression = e; - cacheIn = ca; - singleExampleCache = sc; - } - - /** - * Parser's constructor. Line and byte offset information is taken from - * the type. - * - * @param t The return type of the classifier. - * @param i The identifier token representing the classifier's name. - * @param a The input specification of the classifier. - * @param e The expression representing the classifier. - **/ - public ClassifierAssignment(ClassifierReturnType t, TokenValue i, - Argument a, ClassifierExpression e) { - this(null, t, new Name(i), a, e, null, false); - } - - /** - * Parser's constructor. Line and byte offset information is taken from - * the type. - * - * @param t The return type of the classifier. - * @param i The identifier token representing the classifier's name. - * @param a The input specification of the classifier. - * @param e The expression representing the classifier. - * @param c The expression representing the field to cache this - * classifier's result in. - **/ - public ClassifierAssignment(ClassifierReturnType t, TokenValue i, - Argument a, ClassifierExpression e, Name c) { - this(null, t, new Name(i), a, e, c, false); - } - - /** - * Parser's constructor. Line and byte offset information is taken from - * the type. - * - * @param t The return type of the classifier. - * @param i The identifier token representing the classifier's name. - * @param a The input specification of the classifier. - * @param e The expression representing the classifier. - * @param sc Whether or not to make a single example cache. - **/ - public ClassifierAssignment(ClassifierReturnType t, TokenValue i, - Argument a, ClassifierExpression e, boolean sc) - { - this(null, t, new Name(i), a, e, null, sc); - } - - /** - * Parser's constructor. Line and byte offset information is taken from - * the type. - * - * @param t The return type of the classifier. - * @param i The identifier token representing the classifier's name. - * @param a The input specification of the classifier. - * @param e The expression representing the classifier. - * @param c The expression representing the field to cache this - * classifier's result in. - * @param sc Whether or not to make a single example cache. - **/ - public ClassifierAssignment(ClassifierReturnType t, TokenValue i, - Argument a, ClassifierExpression e, Name c, - boolean sc) { - this(null, t, new Name(i), a, e, c, sc); - } - - - /** - * Returns the type of the declaration. - * - * @return The type of the declaration. - **/ - public Type getType() { - return - new ClassifierType(argument.getType(), returnType, - expression instanceof LearningClassifierExpression); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(cacheIn == null ? 3 : 4); - I.children[0] = returnType; - I.children[1] = argument; - I.children[2] = expression; - if (cacheIn != null) I.children[3] = cacheIn; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return - new ClassifierAssignment( - comment, - (ClassifierReturnType) returnType.clone(), - (Name) name.clone(), - (Argument) argument.clone(), - (ClassifierExpression) expression.clone(), - (Name) (cacheIn == null ? null : cacheIn.clone()), - singleExampleCache); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - returnType.write(buffer); - buffer.append(' '); - name.write(buffer); - buffer.append('('); - argument.write(buffer); - buffer.append(") "); - - if (singleExampleCache) buffer.append("cached "); - - if (cacheIn != null) { - buffer.append("cachedin"); - - if (cacheIn.toString().equals(ClassifierAssignment.mapCache)) - buffer.append("map"); - else { - buffer.append(" "); - cacheIn.write(buffer); - } - - buffer.append(' '); +public class ClassifierAssignment extends Declaration { + /** + * This value is used in place of the field access which appears as an argument to + * cachedin to indicate that in fact, cached was used instead. + **/ + public static final String mapCache = "!!!"; + + + /** (¬ø) The return type of the classifier. */ + public ClassifierReturnType returnType; + /** (¬ø) The input specification of the classifier. */ + public Argument argument; + /** (¬ø) The expression representing the classifier. */ + public ClassifierExpression expression; + /** + * (ø) The expression representing the field to cache this classifier's result in. + **/ + public Name cacheIn; + /** + * Whether the classifier will have a single example feature vector cache. + **/ + public boolean singleExampleCache; + + + /** + * Full constructor. Line and byte offset information is taken from the type. + * + * @param co A Javadoc comment associated with the declaration. + * @param t The return type of the classifier. + * @param n The classifier's name. + * @param a The input specification of the classifier. + * @param e The expression representing the classifier. + * @param ca The expression representing the field to cache this classifier's result in. + * @param sc Whether or not to make a single example cache. + **/ + public ClassifierAssignment(String co, ClassifierReturnType t, Name n, Argument a, + ClassifierExpression e, Name ca, boolean sc) { + super(co, n, t.line, t.byteOffset); + returnType = t; + argument = a; + expression = e; + cacheIn = ca; + singleExampleCache = sc; } - buffer.append("<- "); - expression.write(buffer); - } -} + /** + * Parser's constructor. Line and byte offset information is taken from the type. + * + * @param t The return type of the classifier. + * @param i The identifier token representing the classifier's name. + * @param a The input specification of the classifier. + * @param e The expression representing the classifier. + **/ + public ClassifierAssignment(ClassifierReturnType t, TokenValue i, Argument a, + ClassifierExpression e) { + this(null, t, new Name(i), a, e, null, false); + } + + /** + * Parser's constructor. Line and byte offset information is taken from the type. + * + * @param t The return type of the classifier. + * @param i The identifier token representing the classifier's name. + * @param a The input specification of the classifier. + * @param e The expression representing the classifier. + * @param c The expression representing the field to cache this classifier's result in. + **/ + public ClassifierAssignment(ClassifierReturnType t, TokenValue i, Argument a, + ClassifierExpression e, Name c) { + this(null, t, new Name(i), a, e, c, false); + } + + /** + * Parser's constructor. Line and byte offset information is taken from the type. + * + * @param t The return type of the classifier. + * @param i The identifier token representing the classifier's name. + * @param a The input specification of the classifier. + * @param e The expression representing the classifier. + * @param sc Whether or not to make a single example cache. + **/ + public ClassifierAssignment(ClassifierReturnType t, TokenValue i, Argument a, + ClassifierExpression e, boolean sc) { + this(null, t, new Name(i), a, e, null, sc); + } + + /** + * Parser's constructor. Line and byte offset information is taken from the type. + * + * @param t The return type of the classifier. + * @param i The identifier token representing the classifier's name. + * @param a The input specification of the classifier. + * @param e The expression representing the classifier. + * @param c The expression representing the field to cache this classifier's result in. + * @param sc Whether or not to make a single example cache. + **/ + public ClassifierAssignment(ClassifierReturnType t, TokenValue i, Argument a, + ClassifierExpression e, Name c, boolean sc) { + this(null, t, new Name(i), a, e, c, sc); + } + + + /** + * Returns the type of the declaration. + * + * @return The type of the declaration. + **/ + public Type getType() { + return new ClassifierType(argument.getType(), returnType, + expression instanceof LearningClassifierExpression); + } + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(cacheIn == null ? 3 : 4); + I.children[0] = returnType; + I.children[1] = argument; + I.children[2] = expression; + if (cacheIn != null) + I.children[3] = cacheIn; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new ClassifierAssignment(comment, (ClassifierReturnType) returnType.clone(), + (Name) name.clone(), (Argument) argument.clone(), + (ClassifierExpression) expression.clone(), (Name) (cacheIn == null ? null + : cacheIn.clone()), singleExampleCache); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + returnType.write(buffer); + buffer.append(' '); + name.write(buffer); + buffer.append('('); + argument.write(buffer); + buffer.append(") "); + + if (singleExampleCache) + buffer.append("cached "); + + if (cacheIn != null) { + buffer.append("cachedin"); + + if (cacheIn.toString().equals(ClassifierAssignment.mapCache)) + buffer.append("map"); + else { + buffer.append(" "); + cacheIn.write(buffer); + } + + buffer.append(' '); + } + + buffer.append("<- "); + expression.write(buffer); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierCastExpression.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierCastExpression.java index 1fd0d7a7..74893922 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierCastExpression.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierCastExpression.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,131 +11,128 @@ /** - * This class represents a classifier cast expression. - * - * @author Nick Rizzolo + * This class represents a classifier cast expression. + * + * @author Nick Rizzolo **/ -public class ClassifierCastExpression extends ClassifierExpression -{ - /** (¬ø) The return type used to cast. */ - public ClassifierReturnType castType; - /** (¬ø) The expression being casted. */ - public ClassifierExpression expression; - - - /** - * Initializing constructor. Line and byte offset information are taken - * from the type. - * - * @param t The return type used to cast. - * @param e The expression being casted. - **/ - public ClassifierCastExpression(ClassifierReturnType t, - ClassifierExpression e) { - super(t.line, t.byteOffset); - castType = t; - expression = e; - } - - - /** - * Sets the cacheIn member variable to the argument. - * - * @param c The new expression for the cacheIn member - * variable. - **/ - public void setCacheIn(Name c) { expression.setCacheIn(c); } - - - /** - * Returns a hash code value for java hash structures. - * - * @return A hash code for this object. - **/ - public int hashCode() { - return castType.hashCode() + expression.hashCode(); - } - - - /** - * Indicates whether some other object is "equal to" this one. - * - * @return true iff this object is the same as the argument. - **/ - public boolean equals(Object o) { - if (!(o instanceof ClassifierCastExpression)) return false; - ClassifierCastExpression c = (ClassifierCastExpression) o; - return castType.equals(c.castType) && expression.equals(c.expression); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(2); - I.children[0] = castType; - I.children[1] = expression; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new ClassifierCastExpression( - (ClassifierReturnType) castType.clone(), - (ClassifierExpression) expression.clone()); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (parenthesized) buffer.append("("); - buffer.append("("); - castType.write(buffer); - buffer.append(") "); - expression.write(buffer); - if (parenthesized) buffer.append(")"); - } - - - /** - * Creates a StringBuffer containing a shallow representation - * of this ASTNode. - * - * @return A StringBuffer containing a shallow text - * representation of the given node. - **/ - public StringBuffer shallow() { - StringBuffer buffer = new StringBuffer(); - buffer.append("("); - castType.write(buffer); - buffer.append(") " + expression.name); - return buffer; - } +public class ClassifierCastExpression extends ClassifierExpression { + /** (¬ø) The return type used to cast. */ + public ClassifierReturnType castType; + /** (¬ø) The expression being casted. */ + public ClassifierExpression expression; + + + /** + * Initializing constructor. Line and byte offset information are taken from the type. + * + * @param t The return type used to cast. + * @param e The expression being casted. + **/ + public ClassifierCastExpression(ClassifierReturnType t, ClassifierExpression e) { + super(t.line, t.byteOffset); + castType = t; + expression = e; + } + + + /** + * Sets the cacheIn member variable to the argument. + * + * @param c The new expression for the cacheIn member variable. + **/ + public void setCacheIn(Name c) { + expression.setCacheIn(c); + } + + + /** + * Returns a hash code value for java hash structures. + * + * @return A hash code for this object. + **/ + public int hashCode() { + return castType.hashCode() + expression.hashCode(); + } + + + /** + * Indicates whether some other object is "equal to" this one. + * + * @return true iff this object is the same as the argument. + **/ + public boolean equals(Object o) { + if (!(o instanceof ClassifierCastExpression)) + return false; + ClassifierCastExpression c = (ClassifierCastExpression) o; + return castType.equals(c.castType) && expression.equals(c.expression); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(2); + I.children[0] = castType; + I.children[1] = expression; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new ClassifierCastExpression((ClassifierReturnType) castType.clone(), + (ClassifierExpression) expression.clone()); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (parenthesized) + buffer.append("("); + buffer.append("("); + castType.write(buffer); + buffer.append(") "); + expression.write(buffer); + if (parenthesized) + buffer.append(")"); + } + + + /** + * Creates a StringBuffer containing a shallow representation of this + * ASTNode. + * + * @return A StringBuffer containing a shallow text representation of the given + * node. + **/ + public StringBuffer shallow() { + StringBuffer buffer = new StringBuffer(); + buffer.append("("); + castType.write(buffer); + buffer.append(") " + expression.name); + return buffer; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierExpression.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierExpression.java index 080840b7..7c0395de 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierExpression.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierExpression.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,80 +11,80 @@ /** - * Abstract classifier expression class. The member variables of this class - * are filled in either during parsing or during semantic analysis, but in - * any case, they do not represent AST children that exist in the source. - * - * @author Nick Rizzolo + * Abstract classifier expression class. The member variables of this class are filled in either + * during parsing or during semantic analysis, but in any case, they do not represent AST children + * that exist in the source. + * + * @author Nick Rizzolo **/ -public abstract class ClassifierExpression extends ASTNode - implements CodeGenerator -{ - /** - * (ø) The text of a Javadoc comment that may be associated with - * this classifier. - **/ - public String comment; - /** Expression describing what is being declared. */ - public Name name; - /** The return type of the declared classifier. */ - public ClassifierReturnType returnType; - /** Specification of the classifier's input. */ - public Argument argument; - /** Indicates whether this expression was parenthesized in the source. */ - public boolean parenthesized = false; - /** - * (ø) The expression representing the field to cache this - * classifier's result in. - **/ - public Name cacheIn; - /** - * Whether the classifier will have a single example feature vector cache. - **/ - public boolean singleExampleCache; +public abstract class ClassifierExpression extends ASTNode implements CodeGenerator { + /** + * (ø) The text of a Javadoc comment that may be associated with this classifier. + **/ + public String comment; + /** Expression describing what is being declared. */ + public Name name; + /** The return type of the declared classifier. */ + public ClassifierReturnType returnType; + /** Specification of the classifier's input. */ + public Argument argument; + /** Indicates whether this expression was parenthesized in the source. */ + public boolean parenthesized = false; + /** + * (ø) The expression representing the field to cache this classifier's result in. + **/ + public Name cacheIn; + /** + * Whether the classifier will have a single example feature vector cache. + **/ + public boolean singleExampleCache; - /** - * Default constructor. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - ClassifierExpression(int line, int byteOffset) { super(line, byteOffset); } + /** + * Default constructor. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + ClassifierExpression(int line, int byteOffset) { + super(line, byteOffset); + } - /** Returns the name of the ClassifierExpression. */ - public String getName() { return name.toString(); } + /** Returns the name of the ClassifierExpression. */ + public String getName() { + return name.toString(); + } - /** - * Returns the line number on which this AST node is found in the source - * (starting from line 0). This method exists to fulfull the - * implementation of CodeGenerator. - * @see CodeGenerator - **/ - public int getLine() { return line; } + /** + * Returns the line number on which this AST node is found in the source (starting from line 0). + * This method exists to fulfull the implementation of CodeGenerator. + * + * @see CodeGenerator + **/ + public int getLine() { + return line; + } - /** - * Sets the cacheIn member variable to the argument. - * - * @param c The new expression for the cacheIn member - * variable. - **/ - public void setCacheIn(Name c) { cacheIn = c; } + /** + * Sets the cacheIn member variable to the argument. + * + * @param c The new expression for the cacheIn member variable. + **/ + public void setCacheIn(Name c) { + cacheIn = c; + } - /** - * Creates a StringBuffer containing a shallow representation - * of this ClassifierExpression. - * - * @return A StringBuffer containing a shallow text - * representation of the given node. - **/ - abstract public StringBuffer shallow(); + /** + * Creates a StringBuffer containing a shallow representation of this + * ClassifierExpression. + * + * @return A StringBuffer containing a shallow text representation of the given + * node. + **/ + abstract public StringBuffer shallow(); } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierExpressionList.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierExpressionList.java index c811d71a..4b89c691 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierExpressionList.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierExpressionList.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -16,141 +13,134 @@ /** - * Currently, this is just a wrapper class for LinkedList. The - * code that uses it looks a little cleaner when casts are all taken care of - * automatically. - * - * @author Nick Rizzolo + * Currently, this is just a wrapper class for LinkedList. The code that uses it looks + * a little cleaner when casts are all taken care of automatically. + * + * @author Nick Rizzolo **/ -public class ClassifierExpressionList extends List -{ - /** Default constructor. */ - public ClassifierExpressionList() { super(-1, -1, ", "); } - - - /** - * Initializing constructor. Does not require its argument to be - * non-null. - * - * @param e A single Expression with which to initialize this - * list. - **/ - public ClassifierExpressionList(ClassifierExpression e) { - super(e == null ? -1 : e.line, e == null ? -1 : e.byteOffset, ", "); - list.add(e); - } - - - /** - * Adds another ClassifierExpression to the end of the list. - * - * @param e A reference to the ClassifierExpression to be - * added. - **/ - public void add(ClassifierExpression e) { list.add(e); } - - - /** - * Adds all the ClassifierExpressions in another - * ClassifierExpressionList to the end of this - * ClassifierExpressionList. - * - * @param e The list to be added. - **/ - public void addAll(ClassifierExpressionList e) { list.addAll(e.list); } - - - /** - * Transforms the list into an array of expressions. - * - * @return An array of expressions containing references to every - * expression in the list. - **/ - public ClassifierExpression[] toArray() { - return (ClassifierExpression[]) - list.toArray(new ClassifierExpression[list.size()]); - } - - - /** - * Returns an iterator used specifically to access the elements of this - * list. - * - * @return An iterator used specifically to access the elements of this - * list. - **/ - public ClassifierExpressionListIterator listIterator() { - return new ClassifierExpressionListIterator(); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { return listIterator(); } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - ClassifierExpressionList clone = new ClassifierExpressionList(); - for (Iterator i = list.iterator(); i.hasNext(); ) - clone.list.add(((ClassifierExpression) i.next()).clone()); - return clone; - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Used to iterate though the children of a list of AST nodes. The entire - * interface of java.util.ListIterator is exposed through this - * class. - * - * @author Nick Rizzolo - **/ - public class ClassifierExpressionListIterator extends NodeListIterator - { +public class ClassifierExpressionList extends List { + /** Default constructor. */ + public ClassifierExpressionList() { + super(-1, -1, ", "); + } + + /** - * Returns the next AST node in the list. This method may be called - * repeatedly to iterate through the list, or intermixed with calls to - * previous() to go back and forth. (Note that alternating - * calls to next() and previous() will return - * the same element repeatedly.) - * - * @return The next AST node in the list. + * Initializing constructor. Does not require its argument to be non-null. + * + * @param e A single Expression with which to initialize this list. **/ - public ClassifierExpression nextItem() { - return (ClassifierExpression) I.next(); + public ClassifierExpressionList(ClassifierExpression e) { + super(e == null ? -1 : e.line, e == null ? -1 : e.byteOffset, ", "); + list.add(e); } /** - * Returns the previous element in the list. This method may be called - * repeatedly to iterate through the list backwards, or intermixed with - * calls to next to go back and forth. (Note that alternating calls to - * next and previous will return the same element repeatedly.) - * - * @return The previous AST node in the list. + * Adds another ClassifierExpression to the end of the list. + * + * @param e A reference to the ClassifierExpression to be added. **/ - public ClassifierExpression previousItem() { - return (ClassifierExpression) I.previous(); + public void add(ClassifierExpression e) { + list.add(e); + } + + + /** + * Adds all the ClassifierExpressions in another + * ClassifierExpressionList to the end of this + * ClassifierExpressionList. + * + * @param e The list to be added. + **/ + public void addAll(ClassifierExpressionList e) { + list.addAll(e.list); + } + + + /** + * Transforms the list into an array of expressions. + * + * @return An array of expressions containing references to every expression in the list. + **/ + public ClassifierExpression[] toArray() { + return (ClassifierExpression[]) list.toArray(new ClassifierExpression[list.size()]); + } + + + /** + * Returns an iterator used specifically to access the elements of this list. + * + * @return An iterator used specifically to access the elements of this list. + **/ + public ClassifierExpressionListIterator listIterator() { + return new ClassifierExpressionListIterator(); } - } -} + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + return listIterator(); + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + ClassifierExpressionList clone = new ClassifierExpressionList(); + for (Iterator i = list.iterator(); i.hasNext();) + clone.list.add(((ClassifierExpression) i.next()).clone()); + return clone; + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Used to iterate though the children of a list of AST nodes. The entire interface of + * java.util.ListIterator is exposed through this class. + * + * @author Nick Rizzolo + **/ + public class ClassifierExpressionListIterator extends NodeListIterator { + /** + * Returns the next AST node in the list. This method may be called repeatedly to iterate + * through the list, or intermixed with calls to previous() to go back and + * forth. (Note that alternating calls to next() and previous() + * will return the same element repeatedly.) + * + * @return The next AST node in the list. + **/ + public ClassifierExpression nextItem() { + return (ClassifierExpression) I.next(); + } + + + /** + * Returns the previous element in the list. This method may be called repeatedly to iterate + * through the list backwards, or intermixed with calls to next to go back and forth. (Note + * that alternating calls to next and previous will return the same element repeatedly.) + * + * @return The previous AST node in the list. + **/ + public ClassifierExpression previousItem() { + return (ClassifierExpression) I.previous(); + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierName.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierName.java index 337a506d..0aefdff6 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierName.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierName.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -17,159 +14,161 @@ /** - * This class represents identifiers that name classifiers. It is ostensibly - * the same class as Name, but it extends - * ClassifierExpression, helping keep the syntax of classifier - * manipulation separate from Java's method definition syntax. - * - * @see Name - * @author Nick Rizzolo + * This class represents identifiers that name classifiers. It is ostensibly the same class as + * Name, but it extends ClassifierExpression, helping keep the syntax of + * classifier manipulation separate from Java's method definition syntax. + * + * @see Name + * @author Nick Rizzolo **/ -public class ClassifierName extends ClassifierExpression -{ - /** - * (¬ø) The name as it appears in the source code. The member - * variable name defined in ClassifierExpression - * will be used by SemanticAnalysis for other purposes. - * - * @see SemanticAnalysis - * @see ClassifierExpression#name - **/ - public Name referent; +public class ClassifierName extends ClassifierExpression { + /** + * (¬ø) The name as it appears in the source code. The member variable + * name defined in ClassifierExpression will be used by + * SemanticAnalysis for other purposes. + * + * @see SemanticAnalysis + * @see ClassifierExpression#name + **/ + public Name referent; /** - * Is used to distinguish {@link Classifier}s that are defined as fields. - * Used during {@link TranslateToJava}. + * Is used to distinguish {@link Classifier}s that are defined as fields. Used during + * {@link TranslateToJava}. */ public boolean isField; - /** - * Full constructor. Line and byte offset information is taken from the - * name. - * - * @param n A name. - **/ - public ClassifierName(Name n) { - super(n.line, n.byteOffset); - referent = n; - } - - /** - * Takes a fully specified name (eg java.lang.String) as input. - * - * @param n A fully specified name. - **/ - public ClassifierName(String n) { this(n, -1, -1); } - - /** - * Takes a fully specified name (eg java.lang.String) as input. - * - * @param n A fully specified name. - **/ - public ClassifierName(String n, int line, int byteOffset) { - super(line, byteOffset); - referent = new Name(n); - } - - - /** - * Returns a hash code value for java hash structures. - * - * @return A hash code for this object. - **/ - public int hashCode() { return referent.hashCode(); } - - - /** - * Indicates whether some other object is "equal to" this one. - * - * @return true iff this object is the same as the argument. - **/ - public boolean equals(Object o) { - if (!(o instanceof ClassifierName)) return false; - ClassifierName n = (ClassifierName) o; - return referent.equals(n.referent); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(1); - I.children[0] = referent; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new ClassifierName((Name) referent.clone()); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { referent.write(buffer); } - - - /** - * Creates a StringBuffer containing a shallow representation - * of this ClassifierExpression. - * - * @return A StringBuffer containing a shallow text - * representation of the given node. - **/ - public StringBuffer shallow() { - StringBuffer buffer = new StringBuffer(); - returnType.write(buffer); - buffer.append(" "); - name.write(buffer); - buffer.append("("); - argument.write(buffer); - buffer.append(") "); - - if (singleExampleCache) buffer.append("cached "); - - if (cacheIn != null) { - buffer.append("cachedin"); - - if (cacheIn.toString().equals(ClassifierAssignment.mapCache)) - buffer.append("map"); - else { - buffer.append(" "); - cacheIn.write(buffer); - } + /** + * Full constructor. Line and byte offset information is taken from the name. + * + * @param n A name. + **/ + public ClassifierName(Name n) { + super(n.line, n.byteOffset); + referent = n; + } - buffer.append(' '); + /** + * Takes a fully specified name (eg java.lang.String) as input. + * + * @param n A fully specified name. + **/ + public ClassifierName(String n) { + this(n, -1, -1); } - buffer.append("<- "); - referent.write(buffer); - return buffer; - } -} + /** + * Takes a fully specified name (eg java.lang.String) as input. + * + * @param n A fully specified name. + **/ + public ClassifierName(String n, int line, int byteOffset) { + super(line, byteOffset); + referent = new Name(n); + } + + + /** + * Returns a hash code value for java hash structures. + * + * @return A hash code for this object. + **/ + public int hashCode() { + return referent.hashCode(); + } + + + /** + * Indicates whether some other object is "equal to" this one. + * + * @return true iff this object is the same as the argument. + **/ + public boolean equals(Object o) { + if (!(o instanceof ClassifierName)) + return false; + ClassifierName n = (ClassifierName) o; + return referent.equals(n.referent); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(1); + I.children[0] = referent; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new ClassifierName((Name) referent.clone()); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + referent.write(buffer); + } + + + /** + * Creates a StringBuffer containing a shallow representation of this + * ClassifierExpression. + * + * @return A StringBuffer containing a shallow text representation of the given + * node. + **/ + public StringBuffer shallow() { + StringBuffer buffer = new StringBuffer(); + returnType.write(buffer); + buffer.append(" "); + name.write(buffer); + buffer.append("("); + argument.write(buffer); + buffer.append(") "); + + if (singleExampleCache) + buffer.append("cached "); + + if (cacheIn != null) { + buffer.append("cachedin"); + + if (cacheIn.toString().equals(ClassifierAssignment.mapCache)) + buffer.append("map"); + else { + buffer.append(" "); + cacheIn.write(buffer); + } + + buffer.append(' '); + } + + buffer.append("<- "); + referent.write(buffer); + return buffer; + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierReturnType.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierReturnType.java index 54229bf6..428511ce 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierReturnType.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierReturnType.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -16,275 +13,267 @@ /** - * Represents the return type of a hard-coded classifier. - * - * @author Nick Rizzolo + * Represents the return type of a hard-coded classifier. + * + * @author Nick Rizzolo **/ -public class ClassifierReturnType extends Type -{ - /** Value of the type variable. */ - public static final int DISCRETE = 0; - /** Value of the type variable. */ - public static final int REAL = 1; - /** Value of the type variable. */ - public static final int MIXED = 2; - /** Value of the type variable. */ - public static final int DISCRETE_ARRAY = 3; - /** Value of the type variable. */ - public static final int REAL_ARRAY = 4; - /** Value of the type variable. */ - public static final int MIXED_ARRAY = 5; - /** Value of the type variable. */ - public static final int DISCRETE_GENERATOR = 6; - /** Value of the type variable. */ - public static final int REAL_GENERATOR = 7; - /** Value of the type variable. */ - public static final int MIXED_GENERATOR = 8; - - /** - * - * = { "discrete", "real", "mixed", "discrete[]", "real[]", "mixed[]", - * "discrete%", "real%", "mixed%" } - * - **/ - private static final String[] typeNames = - { - "discrete", "real", "mixed", "discrete[]", "real[]", "mixed[]", - "discrete%", "real%", "mixed%" - }; - - /** - * Produces the name of the primitive type given its index. - * - * @param t The index of the type. (See the static member variables.) - * @return A String holding the name of the type. - **/ - public static String typeName(int t) { return typeNames[t]; } - - /** = { String.class, Double.TYPE } */ - private static final Class[] classes = { String.class, Double.TYPE }; - - - /** - * The index of the type represented by this - * ClassifierReturnType. - **/ - public int type; - /** - * (¬ø) If the type is DISCRETE, this variable represents a list - * of legal values. - **/ - public ConstantList values; - - - /** - * This constructor parses the name of a classifier return type as it would - * appear in the source, assuming value lists have been omitted. - * - * @param s String representing the type's name. - **/ - public ClassifierReturnType(String s) { this(s, new ConstantList()); } - - /** - * This constructor parses the name of a classifier return type as it would - * appear in the source, assuming value lists have been omitted. - * - * @param s String representing the type's name. - * @param l The list of legal values. - **/ - public ClassifierReturnType(String s, ConstantList l) { - super(-1, -1); - values = l; - type = 0; - while (type < typeNames.length && !s.equals(typeNames[type])) ++type; - assert type < typeNames.length : "Couldn't find type name: " + s; - } - - /** - * Default constructor. Line and byte offset information, having not been - * supplied, are set to -1. - * - * @param t The index of the primitive type. - **/ - public ClassifierReturnType(int t) { this(t, new ConstantList()); } - - /** - * Default constructor. Line and byte offset information, having not been - * supplied, are set to -1. - * - * @param t The index of the primitive type. - * @param l The list of legal values. - **/ - public ClassifierReturnType(int t, ConstantList l) { this(t, l, -1, -1); } - - /** - * Initializing constructor. - * - * @param t The index of the primitive type. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public ClassifierReturnType(int t, int line, int byteOffset) { - this(t, new ConstantList(), line, byteOffset); - } - - /** - * Full constructor. - * - * @param t The index of the primitive type. - * @param l The list of legal values. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public ClassifierReturnType(int t, ConstantList l, int line, int byteOffset) - { - super(line, byteOffset); - type = t; - values = l; - } - - - /** - * Retrieves the name of the base type represented by this object. - * - * @return The name of the base type represented by this object. - **/ - public String getTypeName() { return typeName(type); } - - - /** - * Determines whether the feature(s) returned by a classifier of this type - * can become part or all of the features returned by a classifier of the - * specified type. - * - * @param crt The type of the classifier to which features are - * hypothetically being added. - * @return true iff this type is more specific than the - * specified type. - **/ - public boolean isContainableIn(ClassifierReturnType crt) { - if (crt.values.size() > 0) { - if (values.size() == 0 || values.size() > crt.values.size()) - return false; - - HashSet constants = new HashSet(); - for (ASTNodeIterator I = crt.values.iterator(); I.hasNext(); ) - constants.add(I.next()); - - if (!constants.contains(new Constant("*"))) { - for (ConstantList.ConstantListIterator I = values.listIterator(); - I.hasNext(); ) { - Constant c = I.nextItem(); - if (!c.value.equals("*") && !constants.contains(c)) return false; +public class ClassifierReturnType extends Type { + /** Value of the type variable. */ + public static final int DISCRETE = 0; + /** Value of the type variable. */ + public static final int REAL = 1; + /** Value of the type variable. */ + public static final int MIXED = 2; + /** Value of the type variable. */ + public static final int DISCRETE_ARRAY = 3; + /** Value of the type variable. */ + public static final int REAL_ARRAY = 4; + /** Value of the type variable. */ + public static final int MIXED_ARRAY = 5; + /** Value of the type variable. */ + public static final int DISCRETE_GENERATOR = 6; + /** Value of the type variable. */ + public static final int REAL_GENERATOR = 7; + /** Value of the type variable. */ + public static final int MIXED_GENERATOR = 8; + + /** + * + * = { "discrete", "real", "mixed", "discrete[]", "real[]", "mixed[]", + * "discrete%", "real%", "mixed%" } + * + **/ + private static final String[] typeNames = {"discrete", "real", "mixed", "discrete[]", "real[]", + "mixed[]", "discrete%", "real%", "mixed%"}; + + /** + * Produces the name of the primitive type given its index. + * + * @param t The index of the type. (See the static member variables.) + * @return A String holding the name of the type. + **/ + public static String typeName(int t) { + return typeNames[t]; + } + + /** = { String.class, Double.TYPE } */ + private static final Class[] classes = {String.class, Double.TYPE}; + + + /** + * The index of the type represented by this ClassifierReturnType. + **/ + public int type; + /** + * (¬ø) If the type is DISCRETE, this variable represents a list of legal values. + **/ + public ConstantList values; + + + /** + * This constructor parses the name of a classifier return type as it would appear in the + * source, assuming value lists have been omitted. + * + * @param s String representing the type's name. + **/ + public ClassifierReturnType(String s) { + this(s, new ConstantList()); + } + + /** + * This constructor parses the name of a classifier return type as it would appear in the + * source, assuming value lists have been omitted. + * + * @param s String representing the type's name. + * @param l The list of legal values. + **/ + public ClassifierReturnType(String s, ConstantList l) { + super(-1, -1); + values = l; + type = 0; + while (type < typeNames.length && !s.equals(typeNames[type])) + ++type; + assert type < typeNames.length : "Couldn't find type name: " + s; + } + + /** + * Default constructor. Line and byte offset information, having not been supplied, are set to + * -1. + * + * @param t The index of the primitive type. + **/ + public ClassifierReturnType(int t) { + this(t, new ConstantList()); + } + + /** + * Default constructor. Line and byte offset information, having not been supplied, are set to + * -1. + * + * @param t The index of the primitive type. + * @param l The list of legal values. + **/ + public ClassifierReturnType(int t, ConstantList l) { + this(t, l, -1, -1); + } + + /** + * Initializing constructor. + * + * @param t The index of the primitive type. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public ClassifierReturnType(int t, int line, int byteOffset) { + this(t, new ConstantList(), line, byteOffset); + } + + /** + * Full constructor. + * + * @param t The index of the primitive type. + * @param l The list of legal values. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public ClassifierReturnType(int t, ConstantList l, int line, int byteOffset) { + super(line, byteOffset); + type = t; + values = l; + } + + + /** + * Retrieves the name of the base type represented by this object. + * + * @return The name of the base type represented by this object. + **/ + public String getTypeName() { + return typeName(type); + } + + + /** + * Determines whether the feature(s) returned by a classifier of this type can become part or + * all of the features returned by a classifier of the specified type. + * + * @param crt The type of the classifier to which features are hypothetically being added. + * @return true iff this type is more specific than the specified type. + **/ + public boolean isContainableIn(ClassifierReturnType crt) { + if (crt.values.size() > 0) { + if (values.size() == 0 || values.size() > crt.values.size()) + return false; + + HashSet constants = new HashSet(); + for (ASTNodeIterator I = crt.values.iterator(); I.hasNext();) + constants.add(I.next()); + + if (!constants.contains(new Constant("*"))) { + for (ConstantList.ConstantListIterator I = values.listIterator(); I.hasNext();) { + Constant c = I.nextItem(); + if (!c.value.equals("*") && !constants.contains(c)) + return false; + } + } } - } + + return type == crt.type || crt.type == MIXED_GENERATOR || type == DISCRETE + && (crt.type == DISCRETE_ARRAY || crt.type == DISCRETE_GENERATOR) + || type == DISCRETE_ARRAY && crt.type == DISCRETE_GENERATOR || type == REAL + && (crt.type == REAL_ARRAY || crt.type == REAL_GENERATOR) || type == REAL_ARRAY + && crt.type == REAL_GENERATOR; } - return type == crt.type || crt.type == MIXED_GENERATOR - || type == DISCRETE - && (crt.type == DISCRETE_ARRAY || crt.type == DISCRETE_GENERATOR) - || type == DISCRETE_ARRAY && crt.type == DISCRETE_GENERATOR - || type == REAL - && (crt.type == REAL_ARRAY || crt.type == REAL_GENERATOR) - || type == REAL_ARRAY && crt.type == REAL_GENERATOR; - } - - - /** - * Returns an object representing the class that this type - * represents. - * - * @return An object representing the class that this type - * represents. - **/ - public Class typeClass() { return classes[type]; } - - - /** - * Determines whether the argument is equal to this object. - * - * @param o The Object whose equality with this object needs - * to be tested. - * @return true if the two Objects are equal, and - * false otherwise. - **/ - public boolean equals(Object o) { - return o instanceof ClassifierReturnType - && type == ((ClassifierReturnType) o).type; - } - - - /** A hash code based on {@link #type}. */ - public int hashCode() { - return 31 * type + 17; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(1); - I.children[0] = values; - return I; - } - - - /** - * Creates a new object with the same primitive data. - * - * @return The clone node. - **/ - public Object clone() { return new ClassifierReturnType(type, values); } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - String t = getTypeName(); - - if (values.size() > 0) { - assert t.startsWith("discrete") : "Non-discrete type with value list."; - - buffer.append("discrete{"); - ASTNodeIterator I = values.iterator(); - I.next().write(buffer); - - while (I.hasNext()) { - buffer.append(", "); - I.next().write(buffer); - } - - buffer.append("}"); - - int lastE = t.lastIndexOf('e'); - buffer.append(t.substring(lastE + 1)); + + /** + * Returns an object representing the class that this type represents. + * + * @return An object representing the class that this type represents. + **/ + public Class typeClass() { + return classes[type]; } - else buffer.append(t); - } -} + + /** + * Determines whether the argument is equal to this object. + * + * @param o The Object whose equality with this object needs to be tested. + * @return true if the two Objects are equal, and false + * otherwise. + **/ + public boolean equals(Object o) { + return o instanceof ClassifierReturnType && type == ((ClassifierReturnType) o).type; + } + + + /** A hash code based on {@link #type}. */ + public int hashCode() { + return 31 * type + 17; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(1); + I.children[0] = values; + return I; + } + + + /** + * Creates a new object with the same primitive data. + * + * @return The clone node. + **/ + public Object clone() { + return new ClassifierReturnType(type, values); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + String t = getTypeName(); + + if (values.size() > 0) { + assert t.startsWith("discrete") : "Non-discrete type with value list."; + + buffer.append("discrete{"); + ASTNodeIterator I = values.iterator(); + I.next().write(buffer); + + while (I.hasNext()) { + buffer.append(", "); + I.next().write(buffer); + } + + buffer.append("}"); + + int lastE = t.lastIndexOf('e'); + buffer.append(t.substring(lastE + 1)); + } else + buffer.append(t); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierType.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierType.java index 33d2338d..f80d5ca3 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierType.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ClassifierType.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,129 +11,128 @@ /** - * A classifier's type is defined by what it takes as input and what it - * returns as output, but it is distinguished only by what it takes as input. - * This class does not represent a syntax that appears in the source - it is - * constructed only during semantic analysis. - * - * @author Nick Rizzolo + * A classifier's type is defined by what it takes as input and what it returns as output, but it is + * distinguished only by what it takes as input. This class does not represent a syntax that appears + * in the source - it is constructed only during semantic analysis. + * + * @author Nick Rizzolo **/ -public class ClassifierType extends Type -{ - /** The type of the classifier's input. */ - protected Type input; - /** The type of the classifier's output. */ - protected ClassifierReturnType output; - /** Whether or not the classifier is derived from a learning algorithm. */ - protected boolean learner; - - - /** - * Initializing constructor. - * - * @param i The classifier's input type. - * @param o The classifier's output type. - * @param l Whether or not the classifier is a learner. - **/ - public ClassifierType(Type i, ClassifierReturnType o, boolean l) { - super(-1, -1); - input = i; - output = o; - learner = l; - - try { myClass = Class.forName("edu.illinois.cs.cogcomp.lbjava.classify.Classifier"); } - catch (Exception e) { - System.err.println("Class 'edu.illinois.cs.cogcomp.lbjava.classify.Classifier' not found. " - + "Aborting."); - System.exit(1); +public class ClassifierType extends Type { + /** The type of the classifier's input. */ + protected Type input; + /** The type of the classifier's output. */ + protected ClassifierReturnType output; + /** Whether or not the classifier is derived from a learning algorithm. */ + protected boolean learner; + + + /** + * Initializing constructor. + * + * @param i The classifier's input type. + * @param o The classifier's output type. + * @param l Whether or not the classifier is a learner. + **/ + public ClassifierType(Type i, ClassifierReturnType o, boolean l) { + super(-1, -1); + input = i; + output = o; + learner = l; + + try { + myClass = Class.forName("edu.illinois.cs.cogcomp.lbjava.classify.Classifier"); + } catch (Exception e) { + System.err + .println("Class 'edu.illinois.cs.cogcomp.lbjava.classify.Classifier' not found. " + + "Aborting."); + System.exit(1); + } } - } - - - /** Retrieves the value of the input variable. */ - public Type getInput() { return input; } - - - /** Retrieves the value of the input variable. */ - public ClassifierReturnType getOutput() { return output; } - - - /** Retrieves the value of the learner variable. */ - public boolean isLearner() { return learner; } - - - /** - * Two ClassifierTypes are equivalent when their input types - * match. - * - * @param o The object whose equality with this object needs to be tested. - * @return true if the two objects are equal, and - * false otherwise. - **/ - public boolean equals(Object o) { - return o instanceof ClassifierType - && input.equals(((ClassifierType) o).input); - } - - - /** A hash code based on the hash code of {@link #input}. */ - public int hashCode() { - return 31 * input.hashCode() + 17; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(2); - I.children[0] = input; - I.children[1] = output; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new ClassifierType((Type) input.clone(), - (ClassifierReturnType) output.clone(), - learner); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("Classifier { "); - output.write(buffer); - buffer.append(" : "); - input.write(buffer); - buffer.append(" : "); - buffer.append(learner); - buffer.append(" }"); - } -} + + /** Retrieves the value of the input variable. */ + public Type getInput() { + return input; + } + + + /** Retrieves the value of the input variable. */ + public ClassifierReturnType getOutput() { + return output; + } + + + /** Retrieves the value of the learner variable. */ + public boolean isLearner() { + return learner; + } + + + /** + * Two ClassifierTypes are equivalent when their input types match. + * + * @param o The object whose equality with this object needs to be tested. + * @return true if the two objects are equal, and false otherwise. + **/ + public boolean equals(Object o) { + return o instanceof ClassifierType && input.equals(((ClassifierType) o).input); + } + + + /** A hash code based on the hash code of {@link #input}. */ + public int hashCode() { + return 31 * input.hashCode() + 17; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(2); + I.children[0] = input; + I.children[1] = output; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new ClassifierType((Type) input.clone(), (ClassifierReturnType) output.clone(), + learner); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("Classifier { "); + output.write(buffer); + buffer.append(" : "); + input.write(buffer); + buffer.append(" : "); + buffer.append(learner); + buffer.append(" }"); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/CodedClassifier.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/CodedClassifier.java index 604db95b..f7889b1e 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/CodedClassifier.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/CodedClassifier.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,132 +11,130 @@ /** - * Represents a hard-coded classifier definition. - * - * @author Nick Rizzolo + * Represents a hard-coded classifier definition. + * + * @author Nick Rizzolo **/ -public class CodedClassifier extends ClassifierExpression -{ - /** - * (¬ø) Statements making up the body of the hard-coded - * classifier. - **/ - public Block body; - - - /** - * Full constructor. - * - * @param b The body of the classifier. - **/ - public CodedClassifier(Block b) { - super(b.line, b.byteOffset); - body = b; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(1); - I.children[0] = body; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new CodedClassifier((Block) body.clone()); - } - - - /** Returns a hash code for this {@link ASTNode}. */ - public int hashCode() { - return 31 * argument.hashCode() + 17 * body.hashCode(); - } - - - /** - * Distinguishes this {@link ASTNode} from other objects according to its - * contents recursively. - * - * @param o Another object. - * @return true iff this node is equal to o. - **/ - public boolean equals(Object o) { - if (!(o instanceof CodedClassifier)) return false; - CodedClassifier c = (CodedClassifier) o; - return argument.equals(c.argument) && body.equals(c.body); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (parenthesized) buffer.append("("); - body.write(buffer); - if (parenthesized) buffer.append(")"); - } - - - /** - * Creates a StringBuffer containing a shallow representation - * of this ASTNode. - * - * @return A StringBuffer containing a shallow text - * representation of the given node. - **/ - public StringBuffer shallow() { - StringBuffer buffer = new StringBuffer(); - returnType.write(buffer); - buffer.append(" "); - name.write(buffer); - buffer.append("("); - argument.write(buffer); - buffer.append(") "); - - if (singleExampleCache) buffer.append("cached "); - - if (cacheIn != null) { - buffer.append("cachedin"); - - if (cacheIn.toString().equals(ClassifierAssignment.mapCache)) - buffer.append("map"); - else { - buffer.append(" "); - cacheIn.write(buffer); - } +public class CodedClassifier extends ClassifierExpression { + /** + * (¬ø) Statements making up the body of the hard-coded classifier. + **/ + public Block body; + + + /** + * Full constructor. + * + * @param b The body of the classifier. + **/ + public CodedClassifier(Block b) { + super(b.line, b.byteOffset); + body = b; + } - buffer.append(' '); + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(1); + I.children[0] = body; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new CodedClassifier((Block) body.clone()); + } + + + /** Returns a hash code for this {@link ASTNode}. */ + public int hashCode() { + return 31 * argument.hashCode() + 17 * body.hashCode(); } - buffer.append("<- "); - body.write(buffer); - return buffer; - } -} + /** + * Distinguishes this {@link ASTNode} from other objects according to its contents recursively. + * + * @param o Another object. + * @return true iff this node is equal to o. + **/ + public boolean equals(Object o) { + if (!(o instanceof CodedClassifier)) + return false; + CodedClassifier c = (CodedClassifier) o; + return argument.equals(c.argument) && body.equals(c.body); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (parenthesized) + buffer.append("("); + body.write(buffer); + if (parenthesized) + buffer.append(")"); + } + + + /** + * Creates a StringBuffer containing a shallow representation of this + * ASTNode. + * + * @return A StringBuffer containing a shallow text representation of the given + * node. + **/ + public StringBuffer shallow() { + StringBuffer buffer = new StringBuffer(); + returnType.write(buffer); + buffer.append(" "); + name.write(buffer); + buffer.append("("); + argument.write(buffer); + buffer.append(") "); + + if (singleExampleCache) + buffer.append("cached "); + + if (cacheIn != null) { + buffer.append("cachedin"); + + if (cacheIn.toString().equals(ClassifierAssignment.mapCache)) + buffer.append("map"); + else { + buffer.append(" "); + cacheIn.write(buffer); + } + + buffer.append(' '); + } + + buffer.append("<- "); + body.write(buffer); + return buffer; + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/CompositeGenerator.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/CompositeGenerator.java index 51434568..71474310 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/CompositeGenerator.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/CompositeGenerator.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,152 +11,150 @@ /** - * Represents a generator composed from several other classifiers. - * - * @author Nick Rizzolo + * Represents a generator composed from several other classifiers. + * + * @author Nick Rizzolo **/ -public class CompositeGenerator extends ClassifierExpression -{ - /** (¬ø) The list of classifiers composing this classifier. */ - public ClassifierExpressionList components; - - - /** - * Full constructor. The line and byte offset are each set to -1. - * - * @param c The list of components. - **/ - public CompositeGenerator(ClassifierExpressionList c) { - super(-1, -1); - components = c; - } - - /** - * Parser's constructor. - * - * @param e1 One ClassifierExpression to add. - * @param e2 Another ClassifierExpression to add. - **/ - public CompositeGenerator(ClassifierExpression e1, ClassifierExpression e2) - { - super(e1.line, e2.byteOffset); - - if (e1 instanceof CompositeGenerator) - components = ((CompositeGenerator) e1).components; - else components = new ClassifierExpressionList(e1); - - if (e2 instanceof CompositeGenerator) - components.addAll(((CompositeGenerator) e2).components); - else components.add(e2); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(1); - I.children[0] = components; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return - new CompositeGenerator((ClassifierExpressionList) components.clone()); - } - - - /** Returns a hash code for this {@link ASTNode}. */ - public int hashCode() { - return 17 * components.hashCode(); - } - - - /** - * Distinguishes this {@link ASTNode} from other objects according to its - * contents recursively. - * - * @param o Another object. - * @return true iff this node is equal to o. - **/ - public boolean equals(Object o) { - if (!(o instanceof CompositeGenerator)) return false; - CompositeGenerator c = (CompositeGenerator) o; - return components.equals(c.components); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (parenthesized) buffer.append("("); - components.write(buffer); - if (parenthesized) buffer.append(")"); - } - - - /** - * Creates a StringBuffer containing a shallow representation - * of this ASTNode. - * - * @return A StringBuffer containing a shallow text - * representation of the given node. - **/ - public StringBuffer shallow() { - StringBuffer buffer = new StringBuffer(); - returnType.write(buffer); - buffer.append(" "); - name.write(buffer); - buffer.append("("); - argument.write(buffer); - buffer.append(") "); - - if (singleExampleCache) buffer.append("cached "); - - if (cacheIn != null) { - buffer.append("cachedin"); - - if (cacheIn.toString().equals(ClassifierAssignment.mapCache)) - buffer.append("map"); - else { - buffer.append(" "); - cacheIn.write(buffer); - } +public class CompositeGenerator extends ClassifierExpression { + /** (¬ø) The list of classifiers composing this classifier. */ + public ClassifierExpressionList components; + + + /** + * Full constructor. The line and byte offset are each set to -1. + * + * @param c The list of components. + **/ + public CompositeGenerator(ClassifierExpressionList c) { + super(-1, -1); + components = c; + } - buffer.append(' '); + /** + * Parser's constructor. + * + * @param e1 One ClassifierExpression to add. + * @param e2 Another ClassifierExpression to add. + **/ + public CompositeGenerator(ClassifierExpression e1, ClassifierExpression e2) { + super(e1.line, e2.byteOffset); + + if (e1 instanceof CompositeGenerator) + components = ((CompositeGenerator) e1).components; + else + components = new ClassifierExpressionList(e1); + + if (e2 instanceof CompositeGenerator) + components.addAll(((CompositeGenerator) e2).components); + else + components.add(e2); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(1); + I.children[0] = components; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new CompositeGenerator((ClassifierExpressionList) components.clone()); + } + + + /** Returns a hash code for this {@link ASTNode}. */ + public int hashCode() { + return 17 * components.hashCode(); } - buffer.append("<- "); - for (ClassifierExpressionList.ClassifierExpressionListIterator I = - components.listIterator(); - I.hasNext(); ) - buffer.append(I.nextItem().name + ", "); - return buffer; - } -} + /** + * Distinguishes this {@link ASTNode} from other objects according to its contents recursively. + * + * @param o Another object. + * @return true iff this node is equal to o. + **/ + public boolean equals(Object o) { + if (!(o instanceof CompositeGenerator)) + return false; + CompositeGenerator c = (CompositeGenerator) o; + return components.equals(c.components); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (parenthesized) + buffer.append("("); + components.write(buffer); + if (parenthesized) + buffer.append(")"); + } + + + /** + * Creates a StringBuffer containing a shallow representation of this + * ASTNode. + * + * @return A StringBuffer containing a shallow text representation of the given + * node. + **/ + public StringBuffer shallow() { + StringBuffer buffer = new StringBuffer(); + returnType.write(buffer); + buffer.append(" "); + name.write(buffer); + buffer.append("("); + argument.write(buffer); + buffer.append(") "); + + if (singleExampleCache) + buffer.append("cached "); + + if (cacheIn != null) { + buffer.append("cachedin"); + + if (cacheIn.toString().equals(ClassifierAssignment.mapCache)) + buffer.append("map"); + else { + buffer.append(" "); + cacheIn.write(buffer); + } + + buffer.append(' '); + } + + buffer.append("<- "); + for (ClassifierExpressionList.ClassifierExpressionListIterator I = + components.listIterator(); I.hasNext();) + buffer.append(I.nextItem().name + ", "); + return buffer; + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Conditional.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Conditional.java index a6f1f208..5c9c5485 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Conditional.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Conditional.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,127 +11,117 @@ /** - * This class represents a conditional expression. Conditional expressions - * have the form (c ? t : e). - * - * @author Nick Rizzolo + * This class represents a conditional expression. Conditional expressions have the form + * (c ? t : e). + * + * @author Nick Rizzolo **/ -public class Conditional extends Expression -{ - /** (¬ø) The condition of the conditional expression. */ - public Expression condition; - /** - * (¬ø) The expression to evaluate if the condition evaluates to - * true. - **/ - public Expression thenClause; - /** - * (¬ø) The expression to evaluate if the condition evaluates to - * false. - **/ - public Expression elseClause; - - - /** - * Full constructor. - * - * @param c The condition. - * @param t The expression to evaluate if the condition is - * true. - * @param e The expression to evaluate if the condition is - * false. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public Conditional(Expression c, Expression t, Expression e, int line, - int byteOffset) { - super(line, byteOffset); - condition = c; - thenClause = t; - elseClause = e; - } - - - /** - * Returns a hash code value for java hash structures. - * - * @return A hash code for this object. - **/ - public int hashCode() { - return condition.hashCode() + thenClause.hashCode() - + elseClause.hashCode(); - } - - - /** - * Indicates whether some other object is "equal to" this one. - * - * @return true iff this object is the same as the argument. - **/ - public boolean equals(Object o) { - if (!(o instanceof Conditional)) return false; - Conditional c = (Conditional) o; - return condition.equals(c.condition) && thenClause.equals(c.thenClause) - && elseClause.equals(c.elseClause); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(3); - I.children[0] = condition; - I.children[1] = thenClause; - I.children[2] = elseClause; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new Conditional((Expression) condition.clone(), - (Expression) thenClause.clone(), - (Expression) elseClause.clone(), -1, -1); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (parenthesized) buffer.append("("); - condition.write(buffer); - buffer.append(" ? "); - thenClause.write(buffer); - buffer.append(" : "); - elseClause.write(buffer); - if (parenthesized) buffer.append(")"); - } +public class Conditional extends Expression { + /** (¬ø) The condition of the conditional expression. */ + public Expression condition; + /** + * (¬ø) The expression to evaluate if the condition evaluates to true. + **/ + public Expression thenClause; + /** + * (¬ø) The expression to evaluate if the condition evaluates to false. + **/ + public Expression elseClause; + + + /** + * Full constructor. + * + * @param c The condition. + * @param t The expression to evaluate if the condition is true. + * @param e The expression to evaluate if the condition is false. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public Conditional(Expression c, Expression t, Expression e, int line, int byteOffset) { + super(line, byteOffset); + condition = c; + thenClause = t; + elseClause = e; + } + + + /** + * Returns a hash code value for java hash structures. + * + * @return A hash code for this object. + **/ + public int hashCode() { + return condition.hashCode() + thenClause.hashCode() + elseClause.hashCode(); + } + + + /** + * Indicates whether some other object is "equal to" this one. + * + * @return true iff this object is the same as the argument. + **/ + public boolean equals(Object o) { + if (!(o instanceof Conditional)) + return false; + Conditional c = (Conditional) o; + return condition.equals(c.condition) && thenClause.equals(c.thenClause) + && elseClause.equals(c.elseClause); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(3); + I.children[0] = condition; + I.children[1] = thenClause; + I.children[2] = elseClause; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new Conditional((Expression) condition.clone(), (Expression) thenClause.clone(), + (Expression) elseClause.clone(), -1, -1); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (parenthesized) + buffer.append("("); + condition.write(buffer); + buffer.append(" ? "); + thenClause.write(buffer); + buffer.append(" : "); + elseClause.write(buffer); + if (parenthesized) + buffer.append(")"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Conjunction.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Conjunction.java index aa983093..c321e67f 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Conjunction.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Conjunction.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,147 +11,144 @@ /** - * This class represents a classifier conjunction. - * - * @author Nick Rizzolo + * This class represents a classifier conjunction. + * + * @author Nick Rizzolo **/ -public class Conjunction extends ClassifierExpression -{ - /** (¬ø) The left hand side of the conjunction. */ - public ClassifierExpression left; - /** (¬ø) The right hand side of the conjunction. */ - public ClassifierExpression right; - - - /** - * Initializing constructor. - * - * @param l Reference to the left hand side's representation. - * @param r Reference to the right hand side's representation. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public Conjunction(ClassifierExpression l, ClassifierExpression r, int line, - int byteOffset) { - super(line, byteOffset); - left = l; - right = r; - } - - - /** - * Returns a hash code value for java hash structures. - * - * @return A hash code for this object. - **/ - public int hashCode() { - return 31 * left.hashCode() + 17 * right.hashCode(); - } - - - /** - * Indicates whether some other object is "equal to" this one. - * - * @return true iff this object is the same as the argument. - **/ - public boolean equals(Object o) { - if (!(o instanceof Conjunction)) return false; - Conjunction c = (Conjunction) o; - return left.equals(c.left) && right.equals(c.right); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(2); - I.children[0] = left; - I.children[1] = right; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new Conjunction((ClassifierExpression) left.clone(), - (ClassifierExpression) right.clone(), -1, -1); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (parenthesized) buffer.append("("); - left.write(buffer); - buffer.append(" && "); - right.write(buffer); - if (parenthesized) buffer.append(")"); - } - - - /** - * Creates a StringBuffer containing a shallow representation - * of this ASTNode. - * - * @return A StringBuffer containing a shallow text - * representation of the given node. - **/ - public StringBuffer shallow() { - StringBuffer buffer = new StringBuffer(); - returnType.write(buffer); - buffer.append(" "); - name.write(buffer); - buffer.append("("); - argument.write(buffer); - buffer.append(") "); - - if (singleExampleCache) buffer.append("cached "); - - if (cacheIn != null) { - buffer.append("cachedin"); - - if (cacheIn.toString().equals(ClassifierAssignment.mapCache)) - buffer.append("map"); - else { - buffer.append(" "); - cacheIn.write(buffer); - } +public class Conjunction extends ClassifierExpression { + /** (¬ø) The left hand side of the conjunction. */ + public ClassifierExpression left; + /** (¬ø) The right hand side of the conjunction. */ + public ClassifierExpression right; + + + /** + * Initializing constructor. + * + * @param l Reference to the left hand side's representation. + * @param r Reference to the right hand side's representation. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public Conjunction(ClassifierExpression l, ClassifierExpression r, int line, int byteOffset) { + super(line, byteOffset); + left = l; + right = r; + } - buffer.append(' '); + + /** + * Returns a hash code value for java hash structures. + * + * @return A hash code for this object. + **/ + public int hashCode() { + return 31 * left.hashCode() + 17 * right.hashCode(); + } + + + /** + * Indicates whether some other object is "equal to" this one. + * + * @return true iff this object is the same as the argument. + **/ + public boolean equals(Object o) { + if (!(o instanceof Conjunction)) + return false; + Conjunction c = (Conjunction) o; + return left.equals(c.left) && right.equals(c.right); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(2); + I.children[0] = left; + I.children[1] = right; + return I; } - buffer.append("<- "); - left.name.write(buffer); - buffer.append(" && "); - right.name.write(buffer); - return buffer; - } -} + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new Conjunction((ClassifierExpression) left.clone(), + (ClassifierExpression) right.clone(), -1, -1); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (parenthesized) + buffer.append("("); + left.write(buffer); + buffer.append(" && "); + right.write(buffer); + if (parenthesized) + buffer.append(")"); + } + + + /** + * Creates a StringBuffer containing a shallow representation of this + * ASTNode. + * + * @return A StringBuffer containing a shallow text representation of the given + * node. + **/ + public StringBuffer shallow() { + StringBuffer buffer = new StringBuffer(); + returnType.write(buffer); + buffer.append(" "); + name.write(buffer); + buffer.append("("); + argument.write(buffer); + buffer.append(") "); + + if (singleExampleCache) + buffer.append("cached "); + + if (cacheIn != null) { + buffer.append("cachedin"); + + if (cacheIn.toString().equals(ClassifierAssignment.mapCache)) + buffer.append("map"); + else { + buffer.append(" "); + cacheIn.write(buffer); + } + + buffer.append(' '); + } + + buffer.append("<- "); + left.name.write(buffer); + buffer.append(" && "); + right.name.write(buffer); + return buffer; + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Constant.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Constant.java index 412df484..c5b76e8d 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Constant.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Constant.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -15,128 +12,131 @@ /** - * Represents constant values. - * - * @author Nick Rizzolo + * Represents constant values. + * + * @author Nick Rizzolo **/ -public class Constant extends Expression -{ - /** (¬ø) The text representing the constant. */ - public String value; - - - /** - * Parser's constructor. - * - * @param token The parser's token for the constant. - **/ - public Constant(TokenValue token) { - this(token.line, token.byteOffset, token.text); - } - - /** - * Initializing constructor. The line and byte offset, having not been - * specified, are both set to -1. - * - * @param value The text representation of the constant. - **/ - public Constant(String value) { this(-1, -1, value); } - - /** - * Full constructor. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - * @param value The text representation of the constant. - **/ - public Constant(int line, int byteOffset, String value) { - super(line, byteOffset); - this.value = value; - } - - - /** - * Returns the contents of value removing unescaped double - * quotes. - * - * @return The contents of value with unescaped double quotes - * removed. - **/ - public String noQuotes() { - StringBuffer result = new StringBuffer(value); - for (int i = 0; i < result.length(); ) { - if (result.charAt(i) == '\\') i += 2; - else if (result.charAt(i) == '"') result.deleteCharAt(i); - else ++i; +public class Constant extends Expression { + /** (¬ø) The text representing the constant. */ + public String value; + + + /** + * Parser's constructor. + * + * @param token The parser's token for the constant. + **/ + public Constant(TokenValue token) { + this(token.line, token.byteOffset, token.text); } - return result.toString(); - } - - - /** - * Returns a hash code value for java hash structures. - * - * @return A hash code for this object. - **/ - public int hashCode() { return new Boolean(value).hashCode(); } - - - /** - * Two constants are equal when their noQuotes() methods - * return the same thing. - * - * @see Constant#noQuotes() - * @return true iff this object is the same as the argument. - **/ - public boolean equals(Object o) { - return o instanceof Constant - && ((Constant) o).noQuotes().equals(noQuotes()); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { return new ASTNodeIterator(0); } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { return new Constant(value); } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (parenthesized) buffer.append("("); - buffer.append(value); - if (parenthesized) buffer.append(")"); - } -} + /** + * Initializing constructor. The line and byte offset, having not been specified, are both set + * to -1. + * + * @param value The text representation of the constant. + **/ + public Constant(String value) { + this(-1, -1, value); + } + + /** + * Full constructor. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + * @param value The text representation of the constant. + **/ + public Constant(int line, int byteOffset, String value) { + super(line, byteOffset); + this.value = value; + } + + + /** + * Returns the contents of value removing unescaped double quotes. + * + * @return The contents of value with unescaped double quotes removed. + **/ + public String noQuotes() { + StringBuffer result = new StringBuffer(value); + for (int i = 0; i < result.length();) { + if (result.charAt(i) == '\\') + i += 2; + else if (result.charAt(i) == '"') + result.deleteCharAt(i); + else + ++i; + } + + return result.toString(); + } + + + /** + * Returns a hash code value for java hash structures. + * + * @return A hash code for this object. + **/ + public int hashCode() { + return new Boolean(value).hashCode(); + } + + + /** + * Two constants are equal when their noQuotes() methods return the same thing. + * + * @see Constant#noQuotes() + * @return true iff this object is the same as the argument. + **/ + public boolean equals(Object o) { + return o instanceof Constant && ((Constant) o).noQuotes().equals(noQuotes()); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + return new ASTNodeIterator(0); + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new Constant(value); + } + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (parenthesized) + buffer.append("("); + buffer.append(value); + if (parenthesized) + buffer.append(")"); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstantList.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstantList.java index bee202d8..17614982 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstantList.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstantList.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -17,184 +14,186 @@ /** - * Currently, this is just a wrapper class for LinkedList. The - * code that uses it looks a little cleaner when casts are all taken care of - * automatically. - * - * @author Nick Rizzolo + * Currently, this is just a wrapper class for LinkedList. The code that uses it looks + * a little cleaner when casts are all taken care of automatically. + * + * @author Nick Rizzolo **/ -public class ConstantList extends List -{ - /** Default constructor. */ - public ConstantList() { super(-1, -1, ", "); } - - /** - * Initializing constructor. Requires its argument to be - * non-null. - * - * @param c A single Constant with which to initialize this - * list. - **/ - public ConstantList(Constant c) { - super(c.line, c.byteOffset, ", "); - list.add(c); - } - - /** - * Creates an entire list from an array of values. - * - * @param a The array of constant values. - **/ - public ConstantList(String[] a) { - super(-1, -1, ", "); - for (int i = 0; i < a.length; ++i) list.add(new Constant(a[i])); - } - - /** - * Creates an entire list from an array of values. - * - * @param a The array of constant values. - **/ - public ConstantList(ByteString[] a) { - super(-1, -1, ", "); - for (int i = 0; i < a.length; ++i) - list.add(new Constant(a[i].toString())); - } - - - /** - * Adds another Constant to the end of the list. - * - * @param c A reference to the Constant to be added. - **/ - public void add(Constant c) { list.add(c); } - - - /** - * Adds all the Constants in another ConstantList - * to the end of this ConstantList. - * - * @param l The list to be added. - **/ - public void addAll(ConstantList l) { list.addAll(l.list); } - - - /** - * Transforms the list into an array of expressions. - * - * @return An array of constants containing references to every constant in - * the list. - **/ - public Constant[] toArray() { - return (Constant[]) list.toArray(new Constant[list.size()]); - } - - - /** - * Two ConstantLists are equal when they contain the same - * elements in the same order as evaluated by the - * Constant.equals(Object) method. - * - * @param o The object to test equality with. - * @return true iff this ConstantList is - * equivalent to the specified object as described above. - **/ - public boolean equals(Object o) { - if (!(o instanceof ConstantList)) return false; - ConstantList list = (ConstantList) o; - if (list.size() != size()) return false; - - ASTNodeIterator I = iterator(); - for (ASTNodeIterator J = list.iterator(); J.hasNext(); ) - if (!I.next().equals(J.next())) return false; - return true; - } - - - /** A hash code based on the hash codes of the elements of the list. */ - public int hashCode() { - int result = 0; - for (ASTNodeIterator I = iterator(); I.hasNext(); ) - result = 31 * result + I.next().hashCode(); - return result; - } - - - /** - * Returns an iterator used specifically to access the elements of this - * list. - * - * @return An iterator used specifically to access the elements of this - * list. - **/ - public ConstantListIterator listIterator() { - return new ConstantListIterator(); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { return listIterator(); } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - ConstantList clone = new ConstantList(); - for (Iterator i = list.iterator(); i.hasNext(); ) - clone.list.add(((Constant) i.next()).clone()); - return clone; - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Used to iterate though the children of a list of AST nodes. The entire - * interface of java.util.ListIterator is exposed through this - * class. - * - * @author Nick Rizzolo - **/ - public class ConstantListIterator extends NodeListIterator - { +public class ConstantList extends List { + /** Default constructor. */ + public ConstantList() { + super(-1, -1, ", "); + } + /** - * Returns the next AST node in the list. This method may be called - * repeatedly to iterate through the list, or intermixed with calls to - * previous() to go back and forth. (Note that alternating - * calls to next() and previous() will return - * the same element repeatedly.) - * - * @return The next AST node in the list. + * Initializing constructor. Requires its argument to be non-null. + * + * @param c A single Constant with which to initialize this list. **/ - public Constant nextItem() { return (Constant) I.next(); } + public ConstantList(Constant c) { + super(c.line, c.byteOffset, ", "); + list.add(c); + } + /** + * Creates an entire list from an array of values. + * + * @param a The array of constant values. + **/ + public ConstantList(String[] a) { + super(-1, -1, ", "); + for (int i = 0; i < a.length; ++i) + list.add(new Constant(a[i])); + } /** - * Returns the previous element in the list. This method may be called - * repeatedly to iterate through the list backwards, or intermixed with - * calls to next to go back and forth. (Note that alternating calls to - * next and previous will return the same element repeatedly.) - * - * @return The previous AST node in the list. + * Creates an entire list from an array of values. + * + * @param a The array of constant values. **/ - public Constant previousItem() { return (Constant) I.previous(); } - } -} + public ConstantList(ByteString[] a) { + super(-1, -1, ", "); + for (int i = 0; i < a.length; ++i) + list.add(new Constant(a[i].toString())); + } + + + /** + * Adds another Constant to the end of the list. + * + * @param c A reference to the Constant to be added. + **/ + public void add(Constant c) { + list.add(c); + } + + + /** + * Adds all the Constants in another ConstantList to the end of this + * ConstantList. + * + * @param l The list to be added. + **/ + public void addAll(ConstantList l) { + list.addAll(l.list); + } + + + /** + * Transforms the list into an array of expressions. + * + * @return An array of constants containing references to every constant in the list. + **/ + public Constant[] toArray() { + return (Constant[]) list.toArray(new Constant[list.size()]); + } + + + /** + * Two ConstantLists are equal when they contain the same elements in the same + * order as evaluated by the Constant.equals(Object) method. + * + * @param o The object to test equality with. + * @return true iff this ConstantList is equivalent to the specified + * object as described above. + **/ + public boolean equals(Object o) { + if (!(o instanceof ConstantList)) + return false; + ConstantList list = (ConstantList) o; + if (list.size() != size()) + return false; + + ASTNodeIterator I = iterator(); + for (ASTNodeIterator J = list.iterator(); J.hasNext();) + if (!I.next().equals(J.next())) + return false; + return true; + } + + + /** A hash code based on the hash codes of the elements of the list. */ + public int hashCode() { + int result = 0; + for (ASTNodeIterator I = iterator(); I.hasNext();) + result = 31 * result + I.next().hashCode(); + return result; + } + + /** + * Returns an iterator used specifically to access the elements of this list. + * + * @return An iterator used specifically to access the elements of this list. + **/ + public ConstantListIterator listIterator() { + return new ConstantListIterator(); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + return listIterator(); + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + ConstantList clone = new ConstantList(); + for (Iterator i = list.iterator(); i.hasNext();) + clone.list.add(((Constant) i.next()).clone()); + return clone; + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Used to iterate though the children of a list of AST nodes. The entire interface of + * java.util.ListIterator is exposed through this class. + * + * @author Nick Rizzolo + **/ + public class ConstantListIterator extends NodeListIterator { + /** + * Returns the next AST node in the list. This method may be called repeatedly to iterate + * through the list, or intermixed with calls to previous() to go back and + * forth. (Note that alternating calls to next() and previous() + * will return the same element repeatedly.) + * + * @return The next AST node in the list. + **/ + public Constant nextItem() { + return (Constant) I.next(); + } + + + /** + * Returns the previous element in the list. This method may be called repeatedly to iterate + * through the list backwards, or intermixed with calls to next to go back and forth. (Note + * that alternating calls to next and previous will return the same element repeatedly.) + * + * @return The previous AST node in the list. + **/ + public Constant previousItem() { + return (Constant) I.previous(); + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintDeclaration.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintDeclaration.java index 498a221d..e87c13d8 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintDeclaration.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintDeclaration.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -16,138 +13,131 @@ /** - * Represents the declaration of a constraint. Constraints declared in an - * LBJava source file are mainly used to constrain inferences, but they may also - * be used exactly as if they were a classifier with return type - * discrete{false, true}. - * - * @author Nick Rizzolo + * Represents the declaration of a constraint. Constraints declared in an LBJava source file are + * mainly used to constrain inferences, but they may also be used exactly as if they were a + * classifier with return type discrete{false, true}. + * + * @author Nick Rizzolo **/ -public class ConstraintDeclaration extends Declaration - implements CodeGenerator -{ - /** (¬ø) The input specification of the constraint. */ - public Argument argument; - /** (¬ø) Statements making up the body of the constraint. */ - public Block body; - - - /** - * Full constructor. - * - * @param c A Javadoc comment associated with the declaration. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - * @param n The constraint's name. - * @param a The input specification of the constraint. - * @param b The code block representing the constraint. - **/ - public ConstraintDeclaration(String c, int line, int byteOffset, Name n, - Argument a, Block b) { - super(c, n, line, byteOffset); - argument = a; - body = b; - } - - /** - * Parser's constructor. Line and byte offset information is taken from - * the first token. - * - * @param t The first token indicates line and byte offset information. - * @param i The identifier token representing the constraint's name. - * @param a The input specification of the constraint. - * @param b The code block representing the constraint. - **/ - public ConstraintDeclaration(TokenValue t, TokenValue i, Argument a, - Block b) { - this(null, t.line, t.byteOffset, new Name(i), a, b); - } - - - /** - * Returns the type of the declaration. - * - * @return The type of the declaration. - **/ - public Type getType() { return new ConstraintType(argument.getType()); } - - - /** Returns the name of the ConstraintDeclaration. */ - public String getName() { return name.toString(); } - - - /** - * Returns the line number on which this AST node is found in the source - * (starting from line 0). This method exists to fulfull the - * implementation of CodeGenerator. - * @see CodeGenerator - **/ - public int getLine() { return line; } - - - /** Returns a shallow textual representation of this AST node. */ - public StringBuffer shallow() { - StringBuffer buffer = new StringBuffer(); - write(buffer); - return buffer; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(2); - I.children[0] = argument; - I.children[1] = body; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return - new ConstraintDeclaration(comment, -1, -1, (Name) name.clone(), - (Argument) argument.clone(), - (Block) body.clone()); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("constraint "); - name.write(buffer); - buffer.append("("); - argument.write(buffer); - buffer.append(") "); - body.write(buffer); - } +public class ConstraintDeclaration extends Declaration implements CodeGenerator { + /** (¬ø) The input specification of the constraint. */ + public Argument argument; + /** (¬ø) Statements making up the body of the constraint. */ + public Block body; + + + /** + * Full constructor. + * + * @param c A Javadoc comment associated with the declaration. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + * @param n The constraint's name. + * @param a The input specification of the constraint. + * @param b The code block representing the constraint. + **/ + public ConstraintDeclaration(String c, int line, int byteOffset, Name n, Argument a, Block b) { + super(c, n, line, byteOffset); + argument = a; + body = b; + } + + /** + * Parser's constructor. Line and byte offset information is taken from the first token. + * + * @param t The first token indicates line and byte offset information. + * @param i The identifier token representing the constraint's name. + * @param a The input specification of the constraint. + * @param b The code block representing the constraint. + **/ + public ConstraintDeclaration(TokenValue t, TokenValue i, Argument a, Block b) { + this(null, t.line, t.byteOffset, new Name(i), a, b); + } + + + /** + * Returns the type of the declaration. + * + * @return The type of the declaration. + **/ + public Type getType() { + return new ConstraintType(argument.getType()); + } + + + /** Returns the name of the ConstraintDeclaration. */ + public String getName() { + return name.toString(); + } + + + /** + * Returns the line number on which this AST node is found in the source (starting from line 0). + * This method exists to fulfull the implementation of CodeGenerator. + * + * @see CodeGenerator + **/ + public int getLine() { + return line; + } + + + /** Returns a shallow textual representation of this AST node. */ + public StringBuffer shallow() { + StringBuffer buffer = new StringBuffer(); + write(buffer); + return buffer; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(2); + I.children[0] = argument; + I.children[1] = body; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new ConstraintDeclaration(comment, -1, -1, (Name) name.clone(), + (Argument) argument.clone(), (Block) body.clone()); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("constraint "); + name.write(buffer); + buffer.append("("); + argument.write(buffer); + buffer.append(") "); + body.write(buffer); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintEqualityExpression.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintEqualityExpression.java index 3e817c0c..500111b2 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintEqualityExpression.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintEqualityExpression.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -16,140 +13,129 @@ /** - * This class represents the atom of the LBJava constraint expression: the - * (in)equality comparison. The application of a learning classifier to an - * example object is here compared to either another learning classifier - * application or to an arbitrary Java expression evaluating to a - * String. - * - * @author Nick Rizzolo + * This class represents the atom of the LBJava constraint expression: the (in)equality comparison. + * The application of a learning classifier to an example object is here compared to either another + * learning classifier application or to an arbitrary Java expression evaluating to a + * String. + * + * @author Nick Rizzolo **/ -public class ConstraintEqualityExpression extends ConstraintExpression -{ - /** - * (¬ø) Represents either an equality or an inequality - * comparison. - **/ - public Operator operation; - /** (¬ø) The expression on the left hand side of the operator. */ - public Expression left; - /** - * Filled in by SemanticAnalysis, this flag is set if - * left represents the invocation of a discrete learner. - **/ - public boolean leftIsDiscreteLearner; - /** - * Filled in by SemanticAnalysis, this flag is set if - * left contains any quantified variables. - **/ - public boolean leftIsQuantified; - /** - * (¬ø) The expression on the right hand side of the operator. - **/ - public Expression right; - /** - * Filled in by SemanticAnalysis, this flag is set if - * right represents the invocation of a discrete learner. - **/ - public boolean rightIsDiscreteLearner; - /** - * Filled in by SemanticAnalysis, this flag is set if - * right contains any quantified variables. - **/ - public boolean rightIsQuantified; - - - /** - * Full constructor. Line and byte offset information are taken from the - * operator. - * - * @param o The equality comparison operator. - * @param l The expression on the left of the operator. - * @param r The expression on the right of the operator. - **/ - public ConstraintEqualityExpression(Operator o, Expression l, Expression r) - { - super(o.line, o.byteOffset); - operation = o; - left = l; - right = r; - } - - - /** - * Returns a set of Arguments storing the name and type of - * each variable that is a subexpression of this expression. This method - * cannot be run before SemanticAnalysis runs. - **/ - public HashSet getVariableTypes() { - HashSet result = left.getVariableTypes(); - result.addAll(right.getVariableTypes()); - return result; - } - - - /** - * Determines if there are any quantified variables in this expression. - * This method cannot be run before SemanticAnalysis runs. - **/ - public boolean containsQuantifiedVariable() { - return left.containsQuantifiedVariable() - || right.containsQuantifiedVariable(); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(3); - I.children[0] = left; - I.children[1] = operation; - I.children[2] = right; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return - new ConstraintEqualityExpression((Operator) operation.clone(), - (Expression) left.clone(), - (Expression) right.clone()); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - left.write(buffer); - buffer.append(" "); - operation.write(buffer); - buffer.append(" "); - right.write(buffer); - } +public class ConstraintEqualityExpression extends ConstraintExpression { + /** + * (¬ø) Represents either an equality or an inequality comparison. + **/ + public Operator operation; + /** (¬ø) The expression on the left hand side of the operator. */ + public Expression left; + /** + * Filled in by SemanticAnalysis, this flag is set if left represents + * the invocation of a discrete learner. + **/ + public boolean leftIsDiscreteLearner; + /** + * Filled in by SemanticAnalysis, this flag is set if left contains + * any quantified variables. + **/ + public boolean leftIsQuantified; + /** + * (¬ø) The expression on the right hand side of the operator. + **/ + public Expression right; + /** + * Filled in by SemanticAnalysis, this flag is set if right represents + * the invocation of a discrete learner. + **/ + public boolean rightIsDiscreteLearner; + /** + * Filled in by SemanticAnalysis, this flag is set if right contains + * any quantified variables. + **/ + public boolean rightIsQuantified; + + + /** + * Full constructor. Line and byte offset information are taken from the operator. + * + * @param o The equality comparison operator. + * @param l The expression on the left of the operator. + * @param r The expression on the right of the operator. + **/ + public ConstraintEqualityExpression(Operator o, Expression l, Expression r) { + super(o.line, o.byteOffset); + operation = o; + left = l; + right = r; + } + + + /** + * Returns a set of Arguments storing the name and type of each variable that is a + * subexpression of this expression. This method cannot be run before + * SemanticAnalysis runs. + **/ + public HashSet getVariableTypes() { + HashSet result = left.getVariableTypes(); + result.addAll(right.getVariableTypes()); + return result; + } + + + /** + * Determines if there are any quantified variables in this expression. This method cannot be + * run before SemanticAnalysis runs. + **/ + public boolean containsQuantifiedVariable() { + return left.containsQuantifiedVariable() || right.containsQuantifiedVariable(); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(3); + I.children[0] = left; + I.children[1] = operation; + I.children[2] = right; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new ConstraintEqualityExpression((Operator) operation.clone(), + (Expression) left.clone(), (Expression) right.clone()); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + left.write(buffer); + buffer.append(" "); + operation.write(buffer); + buffer.append(" "); + right.write(buffer); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintExpression.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintExpression.java index 8b4839a3..3f7a504f 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintExpression.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintExpression.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,60 +11,57 @@ /** - * Resembling first order logic, a constraint expression consists of equality - * (or inequality) tests and logical operators and evaluates to a Boolean - * value. - * - * @author Nick Rizzolo + * Resembling first order logic, a constraint expression consists of equality (or inequality) tests + * and logical operators and evaluates to a Boolean value. + * + * @author Nick Rizzolo **/ -public abstract class ConstraintExpression extends ASTNode -{ - /** Indicates whether this expression was parenthesized in the source. */ - public boolean parenthesized = false; +public abstract class ConstraintExpression extends ASTNode { + /** Indicates whether this expression was parenthesized in the source. */ + public boolean parenthesized = false; + + /** + * Default constructor. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + ConstraintExpression(int line, int byteOffset) { + super(line, byteOffset); + } - /** - * Default constructor. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - ConstraintExpression(int line, int byteOffset) { super(line, byteOffset); } + /** + * Returns a set of Arguments storing the name and type of each variable that is a + * subexpression of this expression. This method cannot be run before + * SemanticAnalysis runs. + **/ + public HashSet getVariableTypes() { + HashSet result = new HashSet(); + for (ASTNodeIterator I = iterator(); I.hasNext();) { + ASTNode node = I.next(); + if (node instanceof ConstraintExpression) + result.addAll(((ConstraintExpression) node).getVariableTypes()); + } - /** - * Returns a set of Arguments storing the name and type of - * each variable that is a subexpression of this expression. This method - * cannot be run before SemanticAnalysis runs. - **/ - public HashSet getVariableTypes() { - HashSet result = new HashSet(); - for (ASTNodeIterator I = iterator(); I.hasNext(); ) { - ASTNode node = I.next(); - if (node instanceof ConstraintExpression) - result.addAll(((ConstraintExpression) node).getVariableTypes()); + return result; } - return result; - } + /** + * Determines if there are any quantified variables in this expression. This method cannot be + * run before SemanticAnalysis runs. + **/ + public boolean containsQuantifiedVariable() { + for (ASTNodeIterator I = iterator(); I.hasNext();) { + ASTNode node = I.next(); + if (node instanceof ConstraintExpression + && ((ConstraintExpression) node).containsQuantifiedVariable()) + return true; + } - /** - * Determines if there are any quantified variables in this expression. - * This method cannot be run before SemanticAnalysis runs. - **/ - public boolean containsQuantifiedVariable() { - for (ASTNodeIterator I = iterator(); I.hasNext(); ) { - ASTNode node = I.next(); - if (node instanceof ConstraintExpression - && ((ConstraintExpression) node).containsQuantifiedVariable()) - return true; + return false; } - - return false; - } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintInvocation.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintInvocation.java index b8e34d3c..188d1c8e 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintInvocation.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintInvocation.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -17,113 +14,106 @@ /** - * A constraint may be invoked from within another constraint using the - * @ operator. This class is essentially a - * ConstraintExpression wrapper for a - * MethodInvocation. - * - * @author Nick Rizzolo + * A constraint may be invoked from within another constraint using the @ operator. + * This class is essentially a ConstraintExpression wrapper for a + * MethodInvocation. + * + * @author Nick Rizzolo **/ -public class ConstraintInvocation extends ConstraintExpression -{ - /** (¬ø) The invocation. */ - public MethodInvocation invocation; - /** - * Filled in by SemanticAnalysis, this flag is set if - * invocation contains any quantified variables. - **/ - public boolean invocationIsQuantified; - - - /** - * Full constructor. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - * @param m The invocation. - **/ - public ConstraintInvocation(int line, int byteOffset, MethodInvocation m) { - super(line, byteOffset); - invocation = m; - } - - /** - * Parser's constructor. Line and byte offset information is taken from - * the token. - * - * @param t The token providing line and byte offset information. - * @param m The invocation. - **/ - public ConstraintInvocation(TokenValue t, MethodInvocation m) { - this(t.line, t.byteOffset, m); - } - - - /** - * Returns a set of Arguments storing the name and type of - * each variable that is a subexpression of this expression. This method - * cannot be run before SemanticAnalysis runs. - **/ - public HashSet getVariableTypes() { return invocation.getVariableTypes(); } - - - /** - * Determines if there are any quantified variables in this expression. - * This method cannot be run before SemanticAnalysis runs. - **/ - public boolean containsQuantifiedVariable() { - return invocation.containsQuantifiedVariable(); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(1); - I.children[0] = invocation; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return - new ConstraintInvocation(-1, -1, (MethodInvocation) invocation.clone()); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("@"); - invocation.write(buffer); - } +public class ConstraintInvocation extends ConstraintExpression { + /** (¬ø) The invocation. */ + public MethodInvocation invocation; + /** + * Filled in by SemanticAnalysis, this flag is set if invocation + * contains any quantified variables. + **/ + public boolean invocationIsQuantified; + + + /** + * Full constructor. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + * @param m The invocation. + **/ + public ConstraintInvocation(int line, int byteOffset, MethodInvocation m) { + super(line, byteOffset); + invocation = m; + } + + /** + * Parser's constructor. Line and byte offset information is taken from the token. + * + * @param t The token providing line and byte offset information. + * @param m The invocation. + **/ + public ConstraintInvocation(TokenValue t, MethodInvocation m) { + this(t.line, t.byteOffset, m); + } + + + /** + * Returns a set of Arguments storing the name and type of each variable that is a + * subexpression of this expression. This method cannot be run before + * SemanticAnalysis runs. + **/ + public HashSet getVariableTypes() { + return invocation.getVariableTypes(); + } + + + /** + * Determines if there are any quantified variables in this expression. This method cannot be + * run before SemanticAnalysis runs. + **/ + public boolean containsQuantifiedVariable() { + return invocation.containsQuantifiedVariable(); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(1); + I.children[0] = invocation; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new ConstraintInvocation(-1, -1, (MethodInvocation) invocation.clone()); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("@"); + invocation.write(buffer); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintStatementExpression.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintStatementExpression.java index 170ccf01..6738bb2e 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintStatementExpression.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintStatementExpression.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -16,89 +13,86 @@ /** - * This class is simply a wrapper for a ConstraintExpression so - * that it can be used in an ExpressionStatement. - * - * @author Nick Rizzolo + * This class is simply a wrapper for a ConstraintExpression so that it can be used in + * an ExpressionStatement. + * + * @author Nick Rizzolo **/ -public class ConstraintStatementExpression extends StatementExpression -{ - /** (¬ø) The expression representing the constraint. */ - public ConstraintExpression constraint; - - - /** - * Full constructor. Line and byte offset information is taken from the - * lone argument. - * - * @param c The expression representing a constraint. - **/ - public ConstraintStatementExpression(ConstraintExpression c) { - super(c.line, c.byteOffset); - constraint = c; - } - - - /** - * Returns a set of Arguments storing the name and type of - * each variable that is a subexpression of this expression. This method - * cannot be run before SemanticAnalysis runs. - **/ - public HashSet getVariableTypes() { return constraint.getVariableTypes(); } - - - /** - * Determines if there are any quantified variables in this expression. - * This method cannot be run before SemanticAnalysis runs. - **/ - public boolean containsQuantifiedVariable() { - return constraint.containsQuantifiedVariable(); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(1); - I.children[0] = constraint; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return - new ConstraintStatementExpression( - (ConstraintExpression) constraint.clone()); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { constraint.write(buffer); } +public class ConstraintStatementExpression extends StatementExpression { + /** (¬ø) The expression representing the constraint. */ + public ConstraintExpression constraint; + + + /** + * Full constructor. Line and byte offset information is taken from the lone argument. + * + * @param c The expression representing a constraint. + **/ + public ConstraintStatementExpression(ConstraintExpression c) { + super(c.line, c.byteOffset); + constraint = c; + } + + + /** + * Returns a set of Arguments storing the name and type of each variable that is a + * subexpression of this expression. This method cannot be run before + * SemanticAnalysis runs. + **/ + public HashSet getVariableTypes() { + return constraint.getVariableTypes(); + } + + + /** + * Determines if there are any quantified variables in this expression. This method cannot be + * run before SemanticAnalysis runs. + **/ + public boolean containsQuantifiedVariable() { + return constraint.containsQuantifiedVariable(); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(1); + I.children[0] = constraint; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new ConstraintStatementExpression((ConstraintExpression) constraint.clone()); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + constraint.write(buffer); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintType.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintType.java index 00198861..dc3cc69f 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintType.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ConstraintType.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -15,92 +12,85 @@ /** - * A constraint's type is defined by what it takes as input. This class does - * not represent a syntax that appears in the source - it is constructed only - * during semantic analysis. - * - * @author Nick Rizzolo + * A constraint's type is defined by what it takes as input. This class does not represent a syntax + * that appears in the source - it is constructed only during semantic analysis. + * + * @author Nick Rizzolo **/ -public class ConstraintType extends ClassifierType -{ - /** - * Initializing constructor. - * - * @param i The classifier's input type. - **/ - public ConstraintType(Type i) { - super(i, - new ClassifierReturnType( - ClassifierReturnType.DISCRETE, - new ConstantList(DiscreteFeature.BooleanValues)), - false); +public class ConstraintType extends ClassifierType { + /** + * Initializing constructor. + * + * @param i The classifier's input type. + **/ + public ConstraintType(Type i) { + super(i, new ClassifierReturnType(ClassifierReturnType.DISCRETE, new ConstantList( + DiscreteFeature.BooleanValues)), false); - try { myClass = Class.forName("edu.illinois.cs.cogcomp.lbjava.infer.ParameterizedConstraint"); } - catch (Exception e) { - System.err.println("Class 'edu.illinois.cs.cogcomp.lbjava.infer.ParameterizedConstraint' not " - + "found. Aborting."); - System.exit(1); + try { + myClass = Class.forName("edu.illinois.cs.cogcomp.lbjava.infer.ParameterizedConstraint"); + } catch (Exception e) { + System.err + .println("Class 'edu.illinois.cs.cogcomp.lbjava.infer.ParameterizedConstraint' not " + + "found. Aborting."); + System.exit(1); + } } - } - /** - * Two ConstraintTypes are equivalent when their input types - * match. - * - * @param o The object whose equality with this object needs to be tested. - * @return true if the two objects are equal, and - * false otherwise. - **/ - public boolean equals(Object o) { - return o instanceof ConstraintType - && input.equals(((ConstraintType) o).input); - } + /** + * Two ConstraintTypes are equivalent when their input types match. + * + * @param o The object whose equality with this object needs to be tested. + * @return true if the two objects are equal, and false otherwise. + **/ + public boolean equals(Object o) { + return o instanceof ConstraintType && input.equals(((ConstraintType) o).input); + } - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(1); - I.children[0] = input; - return I; - } + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(1); + I.children[0] = input; + return I; + } - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { return new ConstraintType((Type) input.clone()); } + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new ConstraintType((Type) input.clone()); + } - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("Constraint { "); - input.write(buffer); - buffer.append(" }"); - } + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("Constraint { "); + input.write(buffer); + buffer.append(" }"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ContinueStatement.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ContinueStatement.java index c602f8a0..7499f100 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ContinueStatement.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ContinueStatement.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,91 +11,90 @@ /** - * Represents a continue statement. - * - * @author Nick Rizzolo + * Represents a continue statement. + * + * @author Nick Rizzolo **/ -public class ContinueStatement extends Statement -{ - /** (ø) The label identifying the loop to continue, if any. */ - public String label; - - - /** - * Full constructor. - * - * @param l The label of the loop to continue. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public ContinueStatement(String l, int line, int byteOffset) { - super(line, byteOffset); - label = l; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { return new ASTNodeIterator(0); } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { return new ContinueStatement(label, -1, -1); } - - - /** Returns a hash code for this {@link ASTNode}. */ - public int hashCode() { - return label == null ? 7 : label.hashCode(); - } - - - /** - * Distinguishes this {@link ASTNode} from other objects according to its - * contents recursively. - * - * @param o Another object. - * @return true iff this node is equal to o. - **/ - public boolean equals(Object o) { - if (!(o instanceof ContinueStatement)) return false; - ContinueStatement c = (ContinueStatement) o; - return label == null ? c.label == null : label.equals(c.label); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("continue"); - if (label != null) buffer.append(" " + label); - buffer.append(";"); - } +public class ContinueStatement extends Statement { + /** (ø) The label identifying the loop to continue, if any. */ + public String label; + + + /** + * Full constructor. + * + * @param l The label of the loop to continue. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public ContinueStatement(String l, int line, int byteOffset) { + super(line, byteOffset); + label = l; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + return new ASTNodeIterator(0); + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new ContinueStatement(label, -1, -1); + } + + + /** Returns a hash code for this {@link ASTNode}. */ + public int hashCode() { + return label == null ? 7 : label.hashCode(); + } + + + /** + * Distinguishes this {@link ASTNode} from other objects according to its contents recursively. + * + * @param o Another object. + * @return true iff this node is equal to o. + **/ + public boolean equals(Object o) { + if (!(o instanceof ContinueStatement)) + return false; + ContinueStatement c = (ContinueStatement) o; + return label == null ? c.label == null : label.equals(c.label); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("continue"); + if (label != null) + buffer.append(" " + label); + buffer.append(";"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Declaration.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Declaration.java index c21db93a..c81c72da 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Declaration.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Declaration.java @@ -1,86 +1,71 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; /** - * Abstract representation of declarations such as import and - * package. - * - * @author Nick Rizzolo + * Abstract representation of declarations such as import and package. + * + * @author Nick Rizzolo **/ -abstract public class Declaration extends ASTNode -{ - /** - * (ø) The text of a Javadoc comment that may appear before the - * declaration. - **/ - public String comment; - /** (¬ø) Identifies what is being declared. */ - public Name name; +abstract public class Declaration extends ASTNode { + /** + * (ø) The text of a Javadoc comment that may appear before the declaration. + **/ + public String comment; + /** (¬ø) Identifies what is being declared. */ + public Name name; - /** - * Initializing constructor. - * - * @param n Reference to the name describing what is being - * declared. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public Declaration(Name n, int line, int byteOffset) { - this(null, n, line, byteOffset); - } + /** + * Initializing constructor. + * + * @param n Reference to the name describing what is being declared. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public Declaration(Name n, int line, int byteOffset) { + this(null, n, line, byteOffset); + } - /** - * Full constructor. - * - * @param c The text of a Javadoc comment. - * @param n Reference to the name describing what is being - * declared. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public Declaration(String c, Name n, int line, int byteOffset) { - super(line, byteOffset); - comment = c; - name = n; - } + /** + * Full constructor. + * + * @param c The text of a Javadoc comment. + * @param n Reference to the name describing what is being declared. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public Declaration(String c, Name n, int line, int byteOffset) { + super(line, byteOffset); + comment = c; + name = n; + } - /** - * Returns the type of the declaration. - * - * @return The type of the declaration. - **/ - abstract public Type getType(); + /** + * Returns the type of the declaration. + * + * @return The type of the declaration. + **/ + abstract public Type getType(); - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(1); - I.children[0] = name; - return I; - } + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(1); + I.children[0] = name; + return I; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/DeclarationList.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/DeclarationList.java index b2777cee..f125546f 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/DeclarationList.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/DeclarationList.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -16,138 +13,132 @@ /** - * Currently, this is just a wrapper class for LinkedList. The - * code that uses it looks a little cleaner when casts are all taken care of - * automatically. - * - * @author Nick Rizzolo + * Currently, this is just a wrapper class for LinkedList. The code that uses it looks + * a little cleaner when casts are all taken care of automatically. + * + * @author Nick Rizzolo **/ -public class DeclarationList extends List -{ - /** Default constructor. */ - public DeclarationList() { super(-1, -1, " "); } - - /** - * Initializing constructor. Requires its argument to be - * non-null. - * - * @param d A single Declaration with which to initialize - * this list. - **/ - public DeclarationList(Declaration d) { - super(d.line, d.byteOffset, " "); - list.add(d); - } - - - /** - * Adds another Declaration to the end of the list. - * - * @param d A reference to the Declaration to be added. - **/ - public void add(Declaration d) { list.add(d); } - - - /** - * Adds all the Declarations in another - * DeclarationList to the end of this - * DeclarationList. - * - * @param d The list to be added. - **/ - public void addAll(DeclarationList d) { list.addAll(d.list); } - - - /** - * Transforms the list into an array of statements. - * - * @return An array of statements containing references to every statement - * in the list. - **/ - public Declaration[] toArray() { - return (Declaration[]) list.toArray(new Declaration[list.size()]); - } - - - /** - * Returns an iterator used specifically to access the elements of this - * list. - * - * @return An iterator used specifically to access the elements of this - * list. - **/ - public DeclarationListIterator listIterator() { - return new DeclarationListIterator(); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { return listIterator(); } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - DeclarationList clone = new DeclarationList(); - for (Iterator i = list.iterator(); i.hasNext(); ) - clone.list.add(((Declaration) i.next()).clone()); - return clone; - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Used to iterate though the children of a list of AST nodes. The entire - * interface of java.util.ListIterator is exposed through this - * class. - * - * @author Nick Rizzolo - **/ - public class DeclarationListIterator extends NodeListIterator - { +public class DeclarationList extends List { + /** Default constructor. */ + public DeclarationList() { + super(-1, -1, " "); + } + /** - * Returns the next AST node in the list. This method may be called - * repeatedly to iterate through the list, or intermixed with calls to - * previous() to go back and forth. (Note that alternating - * calls to next() and previous() will return - * the same element repeatedly.) - * - * @return The next AST node in the list. + * Initializing constructor. Requires its argument to be non-null. + * + * @param d A single Declaration with which to initialize this list. **/ - public Declaration nextItem() { - return (Declaration) I.next(); + public DeclarationList(Declaration d) { + super(d.line, d.byteOffset, " "); + list.add(d); } /** - * Returns the previous element in the list. This method may be called - * repeatedly to iterate through the list backwards, or intermixed with - * calls to next to go back and forth. (Note that alternating calls to - * next and previous will return the same element repeatedly.) - * - * @return The previous AST node in the list. + * Adds another Declaration to the end of the list. + * + * @param d A reference to the Declaration to be added. **/ - public Declaration previousItem() { - return (Declaration) I.previous(); + public void add(Declaration d) { + list.add(d); } - } -} + + /** + * Adds all the Declarations in another DeclarationList to the end of + * this DeclarationList. + * + * @param d The list to be added. + **/ + public void addAll(DeclarationList d) { + list.addAll(d.list); + } + + + /** + * Transforms the list into an array of statements. + * + * @return An array of statements containing references to every statement in the list. + **/ + public Declaration[] toArray() { + return (Declaration[]) list.toArray(new Declaration[list.size()]); + } + + + /** + * Returns an iterator used specifically to access the elements of this list. + * + * @return An iterator used specifically to access the elements of this list. + **/ + public DeclarationListIterator listIterator() { + return new DeclarationListIterator(); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + return listIterator(); + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + DeclarationList clone = new DeclarationList(); + for (Iterator i = list.iterator(); i.hasNext();) + clone.list.add(((Declaration) i.next()).clone()); + return clone; + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Used to iterate though the children of a list of AST nodes. The entire interface of + * java.util.ListIterator is exposed through this class. + * + * @author Nick Rizzolo + **/ + public class DeclarationListIterator extends NodeListIterator { + /** + * Returns the next AST node in the list. This method may be called repeatedly to iterate + * through the list, or intermixed with calls to previous() to go back and + * forth. (Note that alternating calls to next() and previous() + * will return the same element repeatedly.) + * + * @return The next AST node in the list. + **/ + public Declaration nextItem() { + return (Declaration) I.next(); + } + + + /** + * Returns the previous element in the list. This method may be called repeatedly to iterate + * through the list backwards, or intermixed with calls to next to go back and forth. (Note + * that alternating calls to next and previous will return the same element repeatedly.) + * + * @return The previous AST node in the list. + **/ + public Declaration previousItem() { + return (Declaration) I.previous(); + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/DoStatement.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/DoStatement.java index 7d8e6e89..42d75823 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/DoStatement.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/DoStatement.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,77 +11,70 @@ /** - * Represents a while loop. - * - * @author Nick Rizzolo + * Represents a while loop. + * + * @author Nick Rizzolo **/ -public class DoStatement extends WhileStatement -{ - /** - * Full constructor. - * - * @param b The body of the loop. - * @param c The terminating condition. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public DoStatement(Statement b, Expression c, int line, int byteOffset) { - super(c, b, line, byteOffset); - } +public class DoStatement extends WhileStatement { + /** + * Full constructor. + * + * @param b The body of the loop. + * @param c The terminating condition. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public DoStatement(Statement b, Expression c, int line, int byteOffset) { + super(c, b, line, byteOffset); + } - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(2); - I.children[0] = body; - I.children[1] = condition; - return I; - } + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(2); + I.children[0] = body; + I.children[1] = condition; + return I; + } - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new DoStatement((Statement) body.clone(), - (Expression) condition.clone(), -1, -1); - } + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new DoStatement((Statement) body.clone(), (Expression) condition.clone(), -1, -1); + } - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("do "); - body.write(buffer); - buffer.append(" while ("); - condition.write(buffer); - buffer.append(");"); - } + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("do "); + body.write(buffer); + buffer.append(" while ("); + condition.write(buffer); + buffer.append(");"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/EmptyStatement.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/EmptyStatement.java index a4a44e40..aab9999c 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/EmptyStatement.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/EmptyStatement.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,79 +11,78 @@ /** - * No statement here. - * - * @author Nick Rizzolo + * No statement here. + * + * @author Nick Rizzolo **/ -public class EmptyStatement extends Statement -{ - /** - * Full constructor. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public EmptyStatement(int line, int byteOffset) { - super(line, byteOffset); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { return new ASTNodeIterator(0); } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { return new EmptyStatement(-1, -1); } - - - /** Returns a hash code for this {@link ASTNode}. */ - public int hashCode() { - return 31; - } - - - /** - * Distinguishes this {@link ASTNode} from other objects according to its - * contents recursively. - * - * @param o Another object. - * @return true iff this node is equal to o. - **/ - public boolean equals(Object o) { - return o instanceof EmptyStatement; - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { buffer.append(";"); } +public class EmptyStatement extends Statement { + /** + * Full constructor. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public EmptyStatement(int line, int byteOffset) { + super(line, byteOffset); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + return new ASTNodeIterator(0); + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new EmptyStatement(-1, -1); + } + + + /** Returns a hash code for this {@link ASTNode}. */ + public int hashCode() { + return 31; + } + + + /** + * Distinguishes this {@link ASTNode} from other objects according to its contents recursively. + * + * @param o Another object. + * @return true iff this node is equal to o. + **/ + public boolean equals(Object o) { + return o instanceof EmptyStatement; + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append(";"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ExistentialQuantifierExpression.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ExistentialQuantifierExpression.java index b5d5067e..75c4885e 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ExistentialQuantifierExpression.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ExistentialQuantifierExpression.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -15,94 +12,78 @@ /** - * An existential quantifier has the form: - *
- * exists argument in (expression) - * constraint-expression - *
- * where expression must evaluate to a - * Collection, and the existential quantifier expression is - * sastisfied iff constraint-expression is satisfied for - * any setting of argument taken from the - * Collection. - * - * @author Nick Rizzolo + * An existential quantifier has the form:
exists argument in + * (expression) constraint-expression
where + * expression must evaluate to a Collection, and the existential + * quantifier expression is sastisfied iff constraint-expression is satisfied + * for any setting of argument taken from the Collection. + * + * @author Nick Rizzolo **/ -public class ExistentialQuantifierExpression - extends QuantifiedConstraintExpression -{ - /** - * Full constructor. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - * @param a The quantification variable specification. - * @param c Evaluates to the collection of objects. - * @param co The quantified constraint. - **/ - public ExistentialQuantifierExpression(int line, int byteOffset, Argument a, - Expression c, - ConstraintExpression co) { - super(line, byteOffset, a, c, co); - } +public class ExistentialQuantifierExpression extends QuantifiedConstraintExpression { + /** + * Full constructor. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + * @param a The quantification variable specification. + * @param c Evaluates to the collection of objects. + * @param co The quantified constraint. + **/ + public ExistentialQuantifierExpression(int line, int byteOffset, Argument a, Expression c, + ConstraintExpression co) { + super(line, byteOffset, a, c, co); + } - /** - * Parser's constructor. Line and byte offset information are taken from - * the token. - * - * @param t The token containing line and byte offset information. - * @param a The quantification variable specification. - * @param c Evaluates to the collection of objects. - * @param co The quantified constraint. - **/ - public ExistentialQuantifierExpression(TokenValue t, Argument a, - Expression c, - ConstraintExpression co) { - this(t.line, t.byteOffset, a, c, co); - } + /** + * Parser's constructor. Line and byte offset information are taken from the token. + * + * @param t The token containing line and byte offset information. + * @param a The quantification variable specification. + * @param c Evaluates to the collection of objects. + * @param co The quantified constraint. + **/ + public ExistentialQuantifierExpression(TokenValue t, Argument a, Expression c, + ConstraintExpression co) { + this(t.line, t.byteOffset, a, c, co); + } - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return - new ExistentialQuantifierExpression( - -1, -1, (Argument) argument.clone(), - (Expression) collection.clone(), - (ConstraintExpression) constraint.clone()); - } + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new ExistentialQuantifierExpression(-1, -1, (Argument) argument.clone(), + (Expression) collection.clone(), (ConstraintExpression) constraint.clone()); + } - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("exists "); - argument.write(buffer); - buffer.append(" in ("); - collection.write(buffer); - buffer.append(") "); - constraint.write(buffer); - } + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("exists "); + argument.write(buffer); + buffer.append(" in ("); + collection.write(buffer); + buffer.append(") "); + constraint.write(buffer); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Expression.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Expression.java index 604e4a5a..d66ca093 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Expression.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Expression.java @@ -1,93 +1,85 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; import java.util.HashSet; - +import edu.illinois.cs.cogcomp.lbjava.SemanticAnalysis; /** - * Abstract expression class. - * - * @author Nick Rizzolo + * Abstract expression class. + * + * @author Nick Rizzolo **/ -public abstract class Expression extends ASTNode -{ - /** - * The SemanticAnalysis pass will store the type of this - * expression here. - **/ - public Type typeCache = null; - /** - * Indicates whether the typeCache variable contains usable - * information. - **/ - public boolean typeCacheFilled = false; - /** Indicates whether this expression was parenthesized in the source. */ - public boolean parenthesized = false; +public abstract class Expression extends ASTNode { + /** + * The SemanticAnalysis pass will store the type of this expression here. + **/ + public Type typeCache = null; + /** + * Indicates whether the typeCache variable contains usable information. + **/ + public boolean typeCacheFilled = false; + /** Indicates whether this expression was parenthesized in the source. */ + public boolean parenthesized = false; + + /** + * Default constructor. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + Expression(int line, int byteOffset) { + super(line, byteOffset); + } - /** - * Default constructor. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - Expression(int line, int byteOffset) { super(line, byteOffset); } + /** + * Supports the SemanticAnalysis pass which needs to notify + * MethodInvocations that are the immediate value child of a + * SenseStatement that it's allowable to invoke an array or generator classifier. + * Only MethodInvocation will need to override this method which does nothing by + * default. + * + * @see SemanticAnalysis + **/ + public void senseValueChild() {} - /** - * Supports the SemanticAnalysis pass which needs to notify - * MethodInvocations that are the immediate value - * child of a SenseStatement that it's allowable to invoke an - * array or generator classifier. Only MethodInvocation will - * need to override this method which does nothing by default. - * - * @see SemanticAnalysis - **/ - public void senseValueChild() { } + /** + * Returns a set of Arguments storing the name and type of each variable that is a + * subexpression of this expression. This method cannot be run before + * SemanticAnalysis runs. + **/ + public HashSet getVariableTypes() { + HashSet result = new HashSet(); + for (ASTNodeIterator I = iterator(); I.hasNext();) { + ASTNode node = I.next(); + if (node instanceof Expression) + result.addAll(((Expression) node).getVariableTypes()); + } - /** - * Returns a set of Arguments storing the name and type of - * each variable that is a subexpression of this expression. This method - * cannot be run before SemanticAnalysis runs. - **/ - public HashSet getVariableTypes() { - HashSet result = new HashSet(); - for (ASTNodeIterator I = iterator(); I.hasNext(); ) { - ASTNode node = I.next(); - if (node instanceof Expression) - result.addAll(((Expression) node).getVariableTypes()); + return result; } - return result; - } + /** + * Determines if there are any quantified variables in this expression. This method cannot be + * run before SemanticAnalysis runs. + **/ + public boolean containsQuantifiedVariable() { + for (ASTNodeIterator I = iterator(); I.hasNext();) { + ASTNode node = I.next(); + if (node instanceof Expression && ((Expression) node).containsQuantifiedVariable()) + return true; + } - /** - * Determines if there are any quantified variables in this expression. - * This method cannot be run before SemanticAnalysis runs. - **/ - public boolean containsQuantifiedVariable() { - for (ASTNodeIterator I = iterator(); I.hasNext(); ) { - ASTNode node = I.next(); - if (node instanceof Expression - && ((Expression) node).containsQuantifiedVariable()) - return true; + return false; } - - return false; - } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ExpressionList.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ExpressionList.java index 30bcae83..847d6bb4 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ExpressionList.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ExpressionList.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -17,174 +14,171 @@ /** - * Currently, this is just a wrapper class for LinkedList. The - * code that uses it looks a little cleaner when casts are all taken care of - * automatically. - * - * @author Nick Rizzolo + * Currently, this is just a wrapper class for LinkedList. The code that uses it looks + * a little cleaner when casts are all taken care of automatically. + * + * @author Nick Rizzolo **/ -public class ExpressionList extends List -{ - /** Default constructor. */ - public ExpressionList() { super(-1, -1, ", "); } - - - /** - * Initializing constructor. Does not require its argument to be - * non-null. - * - * @param e A single Expression with which to initialize this - * list. - **/ - public ExpressionList(Expression e) { - super(e == null ? -1 : e.line, e == null ? -1 : e.byteOffset, ", "); - list.add(e); - } - - /** - * Initializing constructor. Does not require its first argument to be - * non-null, however this is required of the second argument. - * - * @param e A single Expression with which to initialize this - * list. - * @param l A list of Expressions which will also be added to - * the list. - **/ - public ExpressionList(Expression e, ExpressionList l) { - super(e == null ? -1 : e.line, e == null ? -1 : e.byteOffset, ", "); - list.add(e); - addAll(l); - } - - - /** - * Adds another Expression to the end of the list. - * - * @param e A reference to the Expression to be added. - **/ - public void add(Expression e) { list.add(e); } - - - /** - * Adds all the Expressions in another - * ExpressionList to the end of this - * ExpressionList. - * - * @param e The list to be added. - **/ - public void addAll(ExpressionList e) { list.addAll(e.list); } - - - /** - * Returns a set of Arguments storing the name and type of - * each variable that is a subexpression of this expression. This method - * cannot be run before SemanticAnalysis runs. - **/ - public HashSet getVariableTypes() { - HashSet result = new HashSet(); - for (ExpressionListIterator I = listIterator(); I.hasNext(); ) - result.addAll(I.nextItem().getVariableTypes()); - return result; - } - - - /** - * Determines if there are any quantified variables in this expression. - * This method cannot be run before SemanticAnalysis runs. - **/ - public boolean containsQuantifiedVariable() { - for (ExpressionListIterator I = listIterator(); I.hasNext(); ) - if (I.nextItem().containsQuantifiedVariable()) return true; - return false; - } - - - /** - * Transforms the list into an array of expressions. - * - * @return An array of expressions containing references to every - * expression in the list. - **/ - public Expression[] toArray() { - return (Expression[]) list.toArray(new Expression[list.size()]); - } - - - /** - * Returns an iterator used specifically to access the elements of this - * list. - * - * @return An iterator used specifically to access the elements of this - * list. - **/ - public ExpressionListIterator listIterator() { - return new ExpressionListIterator(); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { return listIterator(); } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - ExpressionList clone = new ExpressionList(); - for (Iterator i = list.iterator(); i.hasNext(); ) - clone.list.add(((Expression) i.next()).clone()); - return clone; - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Used to iterate though the children of a list of AST nodes. The entire - * interface of java.util.ListIterator is exposed through this - * class. - * - * @author Nick Rizzolo - **/ - public class ExpressionListIterator extends NodeListIterator - { +public class ExpressionList extends List { + /** Default constructor. */ + public ExpressionList() { + super(-1, -1, ", "); + } + + + /** + * Initializing constructor. Does not require its argument to be non-null. + * + * @param e A single Expression with which to initialize this list. + **/ + public ExpressionList(Expression e) { + super(e == null ? -1 : e.line, e == null ? -1 : e.byteOffset, ", "); + list.add(e); + } + /** - * Returns the next AST node in the list. This method may be called - * repeatedly to iterate through the list, or intermixed with calls to - * previous() to go back and forth. (Note that alternating - * calls to next() and previous() will return - * the same element repeatedly.) - * - * @return The next AST node in the list. + * Initializing constructor. Does not require its first argument to be non-null, + * however this is required of the second argument. + * + * @param e A single Expression with which to initialize this list. + * @param l A list of Expressions which will also be added to the list. **/ - public Expression nextItem() { return (Expression) I.next(); } + public ExpressionList(Expression e, ExpressionList l) { + super(e == null ? -1 : e.line, e == null ? -1 : e.byteOffset, ", "); + list.add(e); + addAll(l); + } /** - * Returns the previous element in the list. This method may be called - * repeatedly to iterate through the list backwards, or intermixed with - * calls to next to go back and forth. (Note that alternating calls to - * next and previous will return the same element repeatedly.) - * - * @return The previous AST node in the list. + * Adds another Expression to the end of the list. + * + * @param e A reference to the Expression to be added. **/ - public Expression previousItem() { return (Expression) I.previous(); } - } -} + public void add(Expression e) { + list.add(e); + } + + + /** + * Adds all the Expressions in another ExpressionList to the end of + * this ExpressionList. + * + * @param e The list to be added. + **/ + public void addAll(ExpressionList e) { + list.addAll(e.list); + } + + /** + * Returns a set of Arguments storing the name and type of each variable that is a + * subexpression of this expression. This method cannot be run before + * SemanticAnalysis runs. + **/ + public HashSet getVariableTypes() { + HashSet result = new HashSet(); + for (ExpressionListIterator I = listIterator(); I.hasNext();) + result.addAll(I.nextItem().getVariableTypes()); + return result; + } + + + /** + * Determines if there are any quantified variables in this expression. This method cannot be + * run before SemanticAnalysis runs. + **/ + public boolean containsQuantifiedVariable() { + for (ExpressionListIterator I = listIterator(); I.hasNext();) + if (I.nextItem().containsQuantifiedVariable()) + return true; + return false; + } + + + /** + * Transforms the list into an array of expressions. + * + * @return An array of expressions containing references to every expression in the list. + **/ + public Expression[] toArray() { + return (Expression[]) list.toArray(new Expression[list.size()]); + } + + + /** + * Returns an iterator used specifically to access the elements of this list. + * + * @return An iterator used specifically to access the elements of this list. + **/ + public ExpressionListIterator listIterator() { + return new ExpressionListIterator(); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + return listIterator(); + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + ExpressionList clone = new ExpressionList(); + for (Iterator i = list.iterator(); i.hasNext();) + clone.list.add(((Expression) i.next()).clone()); + return clone; + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Used to iterate though the children of a list of AST nodes. The entire interface of + * java.util.ListIterator is exposed through this class. + * + * @author Nick Rizzolo + **/ + public class ExpressionListIterator extends NodeListIterator { + /** + * Returns the next AST node in the list. This method may be called repeatedly to iterate + * through the list, or intermixed with calls to previous() to go back and + * forth. (Note that alternating calls to next() and previous() + * will return the same element repeatedly.) + * + * @return The next AST node in the list. + **/ + public Expression nextItem() { + return (Expression) I.next(); + } + + + /** + * Returns the previous element in the list. This method may be called repeatedly to iterate + * through the list backwards, or intermixed with calls to next to go back and forth. (Note + * that alternating calls to next and previous will return the same element repeatedly.) + * + * @return The previous AST node in the list. + **/ + public Expression previousItem() { + return (Expression) I.previous(); + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ExpressionStatement.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ExpressionStatement.java index 7b88178c..f7f22f9d 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ExpressionStatement.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ExpressionStatement.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,93 +11,89 @@ /** - * An expression statement is a statement composed only of a single - * expression, as opposed to a statement involving control flow. - * - * @author Nick Rizzolo + * An expression statement is a statement composed only of a single expression, as opposed to a + * statement involving control flow. + * + * @author Nick Rizzolo **/ -public class ExpressionStatement extends Statement -{ - /** (¬ø) The expression being used as a statement. */ - public StatementExpression expression; - - - /** - * Initializing constructor. Line and byte offset information are taken - * from the expression's representation. - * - * @param e The expression being used as a statement. - **/ - public ExpressionStatement(StatementExpression e) { - super(e.line, e.byteOffset); - expression = e; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(1); - I.children[0] = expression; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new ExpressionStatement((StatementExpression) expression.clone()); - } - - - /** Returns a hash code for this {@link ASTNode}. */ - public int hashCode() { - return 31 * expression.hashCode(); - } - - - /** - * Distinguishes this {@link ASTNode} from other objects according to its - * contents recursively. - * - * @param o Another object. - * @return true iff this node is equal to o. - **/ - public boolean equals(Object o) { - if (!(o instanceof ExpressionStatement)) return false; - ExpressionStatement e = (ExpressionStatement) o; - return expression.equals(e.expression); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - expression.write(buffer); - buffer.append(";"); - } +public class ExpressionStatement extends Statement { + /** (¬ø) The expression being used as a statement. */ + public StatementExpression expression; + + + /** + * Initializing constructor. Line and byte offset information are taken from the expression's + * representation. + * + * @param e The expression being used as a statement. + **/ + public ExpressionStatement(StatementExpression e) { + super(e.line, e.byteOffset); + expression = e; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(1); + I.children[0] = expression; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new ExpressionStatement((StatementExpression) expression.clone()); + } + + + /** Returns a hash code for this {@link ASTNode}. */ + public int hashCode() { + return 31 * expression.hashCode(); + } + + + /** + * Distinguishes this {@link ASTNode} from other objects according to its contents recursively. + * + * @param o Another object. + * @return true iff this node is equal to o. + **/ + public boolean equals(Object o) { + if (!(o instanceof ExpressionStatement)) + return false; + ExpressionStatement e = (ExpressionStatement) o; + return expression.equals(e.expression); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + expression.write(buffer); + buffer.append(";"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/FieldAccess.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/FieldAccess.java index deded390..acfabe56 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/FieldAccess.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/FieldAccess.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -15,113 +12,111 @@ /** - * This class represents a field access. - * - * @author Nick Rizzolo + * This class represents a field access. + * + * @author Nick Rizzolo **/ -public class FieldAccess extends VariableInstance -{ - /** (¬ø) The expression describing the object to be accessed. */ - public Expression object; - /** (¬ø) The name of the field to be accessed. */ - public String name; - - - /** - * Parser's constructor. Line and byte offset information is taken - * from the name token. - * - * @param o The expression describing the object to be accessed. - * @param n Token representing the name of the field to be accessed. - **/ - public FieldAccess(Expression o, TokenValue n) { - this(o, n.toString(), n.line, n.byteOffset); - } - - /** - * Full constructor. - * - * @param o The expression describing the object to be accessed. - * @param n The name of the field to be accessed. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public FieldAccess(Expression o, String n, int line, int byteOffset) { - super(line, byteOffset); - object = o; - name = n; - } - - - /** - * Returns a hash code value for java hash structures. - * - * @return A hash code for this object. - **/ - public int hashCode() { return object.hashCode() + name.hashCode(); } - - - /** - * Indicates whether some other object is "equal to" this one. - * - * @return true iff this object is the same as the argument. - **/ - public boolean equals(Object o) { - if (!(o instanceof FieldAccess)) return false; - FieldAccess f = (FieldAccess) o; - return object.equals(f.object) && name.equals(f.name); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(1); - I.children[0] = object; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new FieldAccess((Expression) object.clone(), name, -1, -1); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (parenthesized) buffer.append("("); - object.write(buffer); - buffer.append("." + name); - if (parenthesized) buffer.append(")"); - } +public class FieldAccess extends VariableInstance { + /** (¬ø) The expression describing the object to be accessed. */ + public Expression object; + /** (¬ø) The name of the field to be accessed. */ + public String name; + + + /** + * Parser's constructor. Line and byte offset information is taken from the name token. + * + * @param o The expression describing the object to be accessed. + * @param n Token representing the name of the field to be accessed. + **/ + public FieldAccess(Expression o, TokenValue n) { + this(o, n.toString(), n.line, n.byteOffset); + } + + /** + * Full constructor. + * + * @param o The expression describing the object to be accessed. + * @param n The name of the field to be accessed. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public FieldAccess(Expression o, String n, int line, int byteOffset) { + super(line, byteOffset); + object = o; + name = n; + } + + + /** + * Returns a hash code value for java hash structures. + * + * @return A hash code for this object. + **/ + public int hashCode() { + return object.hashCode() + name.hashCode(); + } + + + /** + * Indicates whether some other object is "equal to" this one. + * + * @return true iff this object is the same as the argument. + **/ + public boolean equals(Object o) { + if (!(o instanceof FieldAccess)) + return false; + FieldAccess f = (FieldAccess) o; + return object.equals(f.object) && name.equals(f.name); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(1); + I.children[0] = object; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new FieldAccess((Expression) object.clone(), name, -1, -1); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (parenthesized) + buffer.append("("); + object.write(buffer); + buffer.append("." + name); + if (parenthesized) + buffer.append(")"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ForStatement.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ForStatement.java index 5e90e976..127ac371 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ForStatement.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ForStatement.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -16,177 +13,175 @@ /** - * Represents a for loop. - * - * @author Nick Rizzolo + * Represents a for loop. + * + * @author Nick Rizzolo **/ -public class ForStatement extends Statement -{ - /** - * (ø) The initializing expression(s) in the loop header (if any). - **/ - public ExpressionList initializers; - /** (ø) The variable declaration in the loop header (if any). */ - public VariableDeclaration initializer; - /** - * (ø) The expression representing the loop's terminating condition. - **/ - public Expression condition; - /** (ø) The updating expression(s) in the loop header. */ - public ExpressionList updaters; - /** (¬ø) The body of the loop. */ - public Statement body; - - - /** - * Full constructor. - * - * @param i The initializers list. - * @param c The terminating condition. - * @param u The updaters list. - * @param b The body of the loop. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public ForStatement(ExpressionList i, Expression c, ExpressionList u, - Statement b, int line, int byteOffset) { - super(line, byteOffset); - initializers = i; - initializer = null; - condition = c; - updaters = u; - body = b; - } - - /** - * Full constructor. - * - * @param v The initializer variable declaration. - * @param c The terminating condition. - * @param u The updaters list. - * @param b The body of the loop. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public ForStatement(VariableDeclaration v, Expression c, ExpressionList u, - Statement b, int line, int byteOffset) { - this((ExpressionList) null, c, u, b, line, byteOffset); - initializer = v; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - LinkedList children = new LinkedList(); - if (initializers != null) children.add(initializers); - if (initializer != null) children.add(initializer); - if (condition != null) children.add(condition); - children.add(body); - if (updaters != null) children.add(updaters); - - ASTNodeIterator I = new ASTNodeIterator(); - I.children = (ASTNode[]) children.toArray(new ASTNode[children.size()]); - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - ExpressionList i = - initializers == null ? null : (ExpressionList) initializers.clone(); - VariableDeclaration v = - initializer == null ? null : (VariableDeclaration) initializer.clone(); - Expression c = condition == null ? null : (Expression) condition.clone(); - ExpressionList u = - updaters == null ? null : (ExpressionList) updaters.clone(); - - if (v == null) return new ForStatement(i, c, u, body, -1, -1); - return new ForStatement(v, c, u, body, -1, -1); - } - - - /** Returns a hash code for this {@link ASTNode}. */ - public int hashCode() { - int result = 59 * body.hashCode(); - if (initializers != null) result += 47 * initializers.hashCode(); - if (initializer != null) result += 37 * initializer.hashCode(); - if (condition != null) result += 23 * condition.hashCode(); - if (updaters != null) result += 7 * updaters.hashCode(); - return result; - } - - - /** - * Distinguishes this {@link ASTNode} from other objects according to its - * contents recursively. - * - * @param o Another object. - * @return true iff this node is equal to o. - **/ - public boolean equals(Object o) { - if (!(o instanceof ForStatement)) return false; - ForStatement f = (ForStatement) o; - return - body.equals(f.body) - && (initializers == null ? f.initializers == null - : initializers.equals(f.initializers)) - && (initializer == null ? f.initializer == null - : initializer.equals(f.initializer)) - && (condition == null ? f.condition == null - : condition.equals(f.condition)) - && (updaters == null ? f.updaters == null - : updaters.equals(f.updaters)); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("for ("); - - if (initializers != null) initializers.write(buffer); - if (initializer != null) { - initializer.write(buffer); - buffer.append(" "); +public class ForStatement extends Statement { + /** + * (ø) The initializing expression(s) in the loop header (if any). + **/ + public ExpressionList initializers; + /** (ø) The variable declaration in the loop header (if any). */ + public VariableDeclaration initializer; + /** + * (ø) The expression representing the loop's terminating condition. + **/ + public Expression condition; + /** (ø) The updating expression(s) in the loop header. */ + public ExpressionList updaters; + /** (¬ø) The body of the loop. */ + public Statement body; + + + /** + * Full constructor. + * + * @param i The initializers list. + * @param c The terminating condition. + * @param u The updaters list. + * @param b The body of the loop. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public ForStatement(ExpressionList i, Expression c, ExpressionList u, Statement b, int line, + int byteOffset) { + super(line, byteOffset); + initializers = i; + initializer = null; + condition = c; + updaters = u; + body = b; } - else buffer.append("; "); - - if (condition != null) condition.write(buffer); - buffer.append("; "); - if (updaters != null) updaters.write(buffer); - buffer.append(") "); - body.write(buffer); - } -} + /** + * Full constructor. + * + * @param v The initializer variable declaration. + * @param c The terminating condition. + * @param u The updaters list. + * @param b The body of the loop. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public ForStatement(VariableDeclaration v, Expression c, ExpressionList u, Statement b, + int line, int byteOffset) { + this((ExpressionList) null, c, u, b, line, byteOffset); + initializer = v; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + LinkedList children = new LinkedList(); + if (initializers != null) + children.add(initializers); + if (initializer != null) + children.add(initializer); + if (condition != null) + children.add(condition); + children.add(body); + if (updaters != null) + children.add(updaters); + + ASTNodeIterator I = new ASTNodeIterator(); + I.children = (ASTNode[]) children.toArray(new ASTNode[children.size()]); + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + ExpressionList i = initializers == null ? null : (ExpressionList) initializers.clone(); + VariableDeclaration v = + initializer == null ? null : (VariableDeclaration) initializer.clone(); + Expression c = condition == null ? null : (Expression) condition.clone(); + ExpressionList u = updaters == null ? null : (ExpressionList) updaters.clone(); + + if (v == null) + return new ForStatement(i, c, u, body, -1, -1); + return new ForStatement(v, c, u, body, -1, -1); + } + + + /** Returns a hash code for this {@link ASTNode}. */ + public int hashCode() { + int result = 59 * body.hashCode(); + if (initializers != null) + result += 47 * initializers.hashCode(); + if (initializer != null) + result += 37 * initializer.hashCode(); + if (condition != null) + result += 23 * condition.hashCode(); + if (updaters != null) + result += 7 * updaters.hashCode(); + return result; + } + + + /** + * Distinguishes this {@link ASTNode} from other objects according to its contents recursively. + * + * @param o Another object. + * @return true iff this node is equal to o. + **/ + public boolean equals(Object o) { + if (!(o instanceof ForStatement)) + return false; + ForStatement f = (ForStatement) o; + return body.equals(f.body) + && (initializers == null ? f.initializers == null : initializers + .equals(f.initializers)) + && (initializer == null ? f.initializer == null : initializer.equals(f.initializer)) + && (condition == null ? f.condition == null : condition.equals(f.condition)) + && (updaters == null ? f.updaters == null : updaters.equals(f.updaters)); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("for ("); + + if (initializers != null) + initializers.write(buffer); + if (initializer != null) { + initializer.write(buffer); + buffer.append(" "); + } else + buffer.append("; "); + + if (condition != null) + condition.write(buffer); + buffer.append("; "); + if (updaters != null) + updaters.write(buffer); + buffer.append(") "); + body.write(buffer); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/IfStatement.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/IfStatement.java index 5713e2e0..ccaa86ec 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/IfStatement.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/IfStatement.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,142 +11,129 @@ /** - * Represents an if statement. - * - * @author Nick Rizzolo + * Represents an if statement. + * + * @author Nick Rizzolo **/ -public class IfStatement extends Statement -{ - /** - * (¬ø) The condition controlling execution of the - * sub-statements. - **/ - public Expression condition; - /** (¬ø) The statement to execute if the condition is true. */ - public Statement thenClause; - /** - * (ø) The statement to execute if the condition is false, if any. - **/ - public Statement elseClause; - - - /** - * Initializing constructor. - * - * @param c The condition controlling execution. - * @param t The statement to execute if the condition is true. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public IfStatement(Expression c, Statement t, int line, int byteOffset) { - this(c, t, null, line, byteOffset); - } - - /** - * Full constructor. - * - * @param c The condition controlling execution. - * @param t The statement to execute if the condition is true. - * @param e The statement to execute if the condition is false, if - * any. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public IfStatement(Expression c, Statement t, Statement e, int line, - int byteOffset) { - super(line, byteOffset); - condition = c; - thenClause = t; - elseClause = e; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(elseClause == null ? 2 : 3); - I.children[0] = condition; - I.children[1] = thenClause; - if (elseClause != null) I.children[2] = elseClause; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new IfStatement( - (Expression) condition.clone(), - (Statement) thenClause.clone(), - elseClause == null ? null : (Statement) elseClause.clone(), -1, -1); - } - - - /** Returns a hash code for this {@link ASTNode}. */ - public int hashCode() { - int result = 47 * condition.hashCode() + 29 * thenClause.hashCode(); - if (elseClause != null) result += 17 * elseClause.hashCode(); - return result; - } - - - /** - * Distinguishes this {@link ASTNode} from other objects according to its - * contents recursively. - * - * @param o Another object. - * @return true iff this node is equal to o. - **/ - public boolean equals(Object o) { - if (!(o instanceof IfStatement)) return false; - IfStatement i = (IfStatement) o; - return - condition.equals(i.condition) && thenClause.equals(i.thenClause) - && (elseClause == null ? i.elseClause == null - : elseClause.equals(i.elseClause)); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("if ("); - condition.write(buffer); - buffer.append(") "); - thenClause.write(buffer); - if (elseClause != null) { - buffer.append(" else "); - elseClause.write(buffer); +public class IfStatement extends Statement { + /** + * (¬ø) The condition controlling execution of the sub-statements. + **/ + public Expression condition; + /** (¬ø) The statement to execute if the condition is true. */ + public Statement thenClause; + /** + * (ø) The statement to execute if the condition is false, if any. + **/ + public Statement elseClause; + + + /** + * Initializing constructor. + * + * @param c The condition controlling execution. + * @param t The statement to execute if the condition is true. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public IfStatement(Expression c, Statement t, int line, int byteOffset) { + this(c, t, null, line, byteOffset); } - } -} + /** + * Full constructor. + * + * @param c The condition controlling execution. + * @param t The statement to execute if the condition is true. + * @param e The statement to execute if the condition is false, if any. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public IfStatement(Expression c, Statement t, Statement e, int line, int byteOffset) { + super(line, byteOffset); + condition = c; + thenClause = t; + elseClause = e; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(elseClause == null ? 2 : 3); + I.children[0] = condition; + I.children[1] = thenClause; + if (elseClause != null) + I.children[2] = elseClause; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new IfStatement((Expression) condition.clone(), (Statement) thenClause.clone(), + elseClause == null ? null : (Statement) elseClause.clone(), -1, -1); + } + + + /** Returns a hash code for this {@link ASTNode}. */ + public int hashCode() { + int result = 47 * condition.hashCode() + 29 * thenClause.hashCode(); + if (elseClause != null) + result += 17 * elseClause.hashCode(); + return result; + } + + + /** + * Distinguishes this {@link ASTNode} from other objects according to its contents recursively. + * + * @param o Another object. + * @return true iff this node is equal to o. + **/ + public boolean equals(Object o) { + if (!(o instanceof IfStatement)) + return false; + IfStatement i = (IfStatement) o; + return condition.equals(i.condition) && thenClause.equals(i.thenClause) + && (elseClause == null ? i.elseClause == null : elseClause.equals(i.elseClause)); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("if ("); + condition.write(buffer); + buffer.append(") "); + thenClause.write(buffer); + if (elseClause != null) { + buffer.append(" else "); + elseClause.write(buffer); + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ImportDeclaration.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ImportDeclaration.java index c3d1bb5c..87510ad1 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ImportDeclaration.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ImportDeclaration.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,67 +11,65 @@ /** - * Representation of an import declaration. - * - * @author Nick Rizzolo + * Representation of an import declaration. + * + * @author Nick Rizzolo **/ -public class ImportDeclaration extends Declaration -{ - /** - * Full constructor. - * - * @param n Reference to the object representing the import name. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public ImportDeclaration(Name n, int line, int byteOffset) { - super(n, line, byteOffset); - } +public class ImportDeclaration extends Declaration { + /** + * Full constructor. + * + * @param n Reference to the object representing the import name. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public ImportDeclaration(Name n, int line, int byteOffset) { + super(n, line, byteOffset); + } - /** - * Returns null, since this method should never be called on - * an object of this class. - * - * @return null - **/ - public Type getType() { return null; } + /** + * Returns null, since this method should never be called on an object of this + * class. + * + * @return null + **/ + public Type getType() { + return null; + } - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new ImportDeclaration((Name) name.clone(), -1, -1); - } + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new ImportDeclaration((Name) name.clone(), -1, -1); + } - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("import "); - name.write(buffer); - buffer.append(";"); - } + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("import "); + name.write(buffer); + buffer.append(";"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ImportList.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ImportList.java index 56234aea..7b2d125e 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ImportList.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ImportList.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -16,139 +13,132 @@ /** - * Currently, this is just a wrapper class for LinkedList. The - * code that uses it looks a little cleaner when casts are all taken care of - * automatically. - * - * @author Nick Rizzolo + * Currently, this is just a wrapper class for LinkedList. The code that uses it looks + * a little cleaner when casts are all taken care of automatically. + * + * @author Nick Rizzolo **/ -public class ImportList extends List -{ - /** Default constructor. */ - public ImportList() { super(-1, -1, " "); } - - /** - * Initializing constructor. Requires its argument to be - * non-null. - * - * @param d A single ImportDeclaration with which to - * initialize this list. - **/ - public ImportList(ImportDeclaration d) { - super(d.line, d.byteOffset, " "); - list.add(d); - } - - - /** - * Adds another ImportDeclaration to the end of the list. - * - * @param d A reference to the ImportDeclaration to be added. - **/ - public void add(ImportDeclaration d) { list.add(d); } - - - /** - * Adds all the ImportDeclarations in another - * ImportList to the end of this - * ImportList. - * - * @param d The list to be added. - **/ - public void addAll(ImportList d) { list.addAll(d.list); } - - - /** - * Transforms the list into an array of statements. - * - * @return An array of statements containing references to every statement - * in the list. - **/ - public ImportDeclaration[] toArray() { - return - (ImportDeclaration[]) list.toArray(new ImportDeclaration[list.size()]); - } - - - /** - * Returns an iterator used specifically to access the elements of this - * list. - * - * @return An iterator used specifically to access the elements of this - * list. - **/ - public ImportListIterator listIterator() { - return new ImportListIterator(); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { return listIterator(); } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - ImportList clone = new ImportList(); - for (Iterator i = list.iterator(); i.hasNext(); ) - clone.list.add(((ImportDeclaration) i.next()).clone()); - return clone; - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Used to iterate though the children of a list of AST nodes. The entire - * interface of java.util.ListIterator is exposed through this - * class. - * - * @author Nick Rizzolo - **/ - public class ImportListIterator extends NodeListIterator - { +public class ImportList extends List { + /** Default constructor. */ + public ImportList() { + super(-1, -1, " "); + } + /** - * Returns the next AST node in the list. This method may be called - * repeatedly to iterate through the list, or intermixed with calls to - * previous() to go back and forth. (Note that alternating - * calls to next() and previous() will return - * the same element repeatedly.) - * - * @return The next AST node in the list. + * Initializing constructor. Requires its argument to be non-null. + * + * @param d A single ImportDeclaration with which to initialize this list. **/ - public ImportDeclaration nextItem() { - return (ImportDeclaration) I.next(); + public ImportList(ImportDeclaration d) { + super(d.line, d.byteOffset, " "); + list.add(d); } /** - * Returns the previous element in the list. This method may be called - * repeatedly to iterate through the list backwards, or intermixed with - * calls to next to go back and forth. (Note that alternating calls to - * next and previous will return the same element repeatedly.) - * - * @return The previous AST node in the list. + * Adds another ImportDeclaration to the end of the list. + * + * @param d A reference to the ImportDeclaration to be added. **/ - public ImportDeclaration previousItem() { - return (ImportDeclaration) I.previous(); + public void add(ImportDeclaration d) { + list.add(d); } - } -} + + /** + * Adds all the ImportDeclarations in another ImportList to the end of + * this ImportList. + * + * @param d The list to be added. + **/ + public void addAll(ImportList d) { + list.addAll(d.list); + } + + + /** + * Transforms the list into an array of statements. + * + * @return An array of statements containing references to every statement in the list. + **/ + public ImportDeclaration[] toArray() { + return (ImportDeclaration[]) list.toArray(new ImportDeclaration[list.size()]); + } + + + /** + * Returns an iterator used specifically to access the elements of this list. + * + * @return An iterator used specifically to access the elements of this list. + **/ + public ImportListIterator listIterator() { + return new ImportListIterator(); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + return listIterator(); + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + ImportList clone = new ImportList(); + for (Iterator i = list.iterator(); i.hasNext();) + clone.list.add(((ImportDeclaration) i.next()).clone()); + return clone; + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Used to iterate though the children of a list of AST nodes. The entire interface of + * java.util.ListIterator is exposed through this class. + * + * @author Nick Rizzolo + **/ + public class ImportListIterator extends NodeListIterator { + /** + * Returns the next AST node in the list. This method may be called repeatedly to iterate + * through the list, or intermixed with calls to previous() to go back and + * forth. (Note that alternating calls to next() and previous() + * will return the same element repeatedly.) + * + * @return The next AST node in the list. + **/ + public ImportDeclaration nextItem() { + return (ImportDeclaration) I.next(); + } + + + /** + * Returns the previous element in the list. This method may be called repeatedly to iterate + * through the list backwards, or intermixed with calls to next to go back and forth. (Note + * that alternating calls to next and previous will return the same element repeatedly.) + * + * @return The previous AST node in the list. + **/ + public ImportDeclaration previousItem() { + return (ImportDeclaration) I.previous(); + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/IncrementExpression.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/IncrementExpression.java index 361841b6..854542d1 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/IncrementExpression.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/IncrementExpression.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,130 +11,125 @@ /** - * This class represents both increment and decrement expressions. It's - * functionally equivalent to the UnaryExpression class, except - * that it's derived from StatementExpression instead of just - * Expression, thereby allowing it to be part of an - * ExpressionStatement. - * - * @see UnaryExpression - * @see StatementExpression - * @see ExpressionStatement - * @author Nick Rizzolo + * This class represents both increment and decrement expressions. It's functionally equivalent to + * the UnaryExpression class, except that it's derived from + * StatementExpression instead of just Expression, thereby allowing it to + * be part of an ExpressionStatement. + * + * @see UnaryExpression + * @see StatementExpression + * @see ExpressionStatement + * @author Nick Rizzolo **/ -public class IncrementExpression extends StatementExpression -{ - /** (¬ø) Representation of the increment operator. */ - public Operator operation; - /** - * (¬ø) The expression on which the increment operator operates. - **/ - public Expression subexpression; - - - /** - * Initializing constructor. Line and byte offset information is taken - * from the increment operator's representation. - * - * @param op Representation of the increment operator. - * @param sub The expression on which the increment operator operates. - **/ - public IncrementExpression(Operator op, Expression sub) { - super(op.line, op.byteOffset); - operation = op; - subexpression = sub; - } - - - /** - * Returns a hash code value for java hash structures. - * - * @return A hash code for this object. - **/ - public int hashCode() { - return operation.hashCode() + subexpression.hashCode(); - } - - - /** - * Indicates whether some other object is "equal to" this one. - * - * @return true iff this object is the same as the argument. - **/ - public boolean equals(Object o) { - if (!(o instanceof IncrementExpression)) return false; - IncrementExpression i = (IncrementExpression) o; - return operation.equals(i.operation) - && subexpression.equals(i.subexpression); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(2); - - if (operation.operation == Operator.PRE_INCREMENT - || operation.operation == Operator.PRE_DECREMENT) { - I.children[0] = operation; - I.children[1] = subexpression; +public class IncrementExpression extends StatementExpression { + /** (¬ø) Representation of the increment operator. */ + public Operator operation; + /** + * (¬ø) The expression on which the increment operator operates. + **/ + public Expression subexpression; + + + /** + * Initializing constructor. Line and byte offset information is taken from the increment + * operator's representation. + * + * @param op Representation of the increment operator. + * @param sub The expression on which the increment operator operates. + **/ + public IncrementExpression(Operator op, Expression sub) { + super(op.line, op.byteOffset); + operation = op; + subexpression = sub; } - else { - I.children[0] = subexpression; - I.children[1] = operation; + + + /** + * Returns a hash code value for java hash structures. + * + * @return A hash code for this object. + **/ + public int hashCode() { + return operation.hashCode() + subexpression.hashCode(); } - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new IncrementExpression((Operator) operation.clone(), - (Expression) subexpression.clone()); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (parenthesized) buffer.append("("); - - if (operation.operation == Operator.PRE_INCREMENT - || operation.operation == Operator.PRE_DECREMENT) { - operation.write(buffer); - subexpression.write(buffer); + + /** + * Indicates whether some other object is "equal to" this one. + * + * @return true iff this object is the same as the argument. + **/ + public boolean equals(Object o) { + if (!(o instanceof IncrementExpression)) + return false; + IncrementExpression i = (IncrementExpression) o; + return operation.equals(i.operation) && subexpression.equals(i.subexpression); } - else { - subexpression.write(buffer); - operation.write(buffer); + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(2); + + if (operation.operation == Operator.PRE_INCREMENT + || operation.operation == Operator.PRE_DECREMENT) { + I.children[0] = operation; + I.children[1] = subexpression; + } else { + I.children[0] = subexpression; + I.children[1] = operation; + } + + return I; } - if (parenthesized) buffer.append(")"); - } -} + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new IncrementExpression((Operator) operation.clone(), + (Expression) subexpression.clone()); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (parenthesized) + buffer.append("("); + + if (operation.operation == Operator.PRE_INCREMENT + || operation.operation == Operator.PRE_DECREMENT) { + operation.write(buffer); + subexpression.write(buffer); + } else { + subexpression.write(buffer); + operation.write(buffer); + } + + if (parenthesized) + buffer.append(")"); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/InferenceDeclaration.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/InferenceDeclaration.java index b2abbb0e..1533afce 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/InferenceDeclaration.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/InferenceDeclaration.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -19,558 +16,515 @@ /** - * Represents an inference specification. - * - * @author Nick Rizzolo + * Represents an inference specification. + * + * @author Nick Rizzolo **/ -public class InferenceDeclaration extends Declaration - implements CodeGenerator -{ - /** If no inference algorithm is specified, this algorithm is used. */ - public static final InstanceCreationExpression defaultInferenceConstructor = - new InstanceCreationExpression( - new Name("ILPInference"), - new ExpressionList( - new InstanceCreationExpression( - new Name("GLPKHook"), - new ExpressionList(), - -1, -1)), - -1, -1); - - - /** - * (¬ø) A specification of the object from which all variables - * can be found. - **/ - public Argument head; - /** - * (¬ø) The methods used to find the head object given objects - * of different types. - **/ - public HeadFinder[] headFinders; - /** - * (¬ø) Declarations describing how the scores produced by - * various learning classifiers should be normalized. - **/ - public NormalizerDeclaration[] normalizerDeclarations; - /** - * (¬ø) The constraint that must be respected during - * optimization. - **/ - public ConstraintDeclaration constraint; - /** - * Counts the number of subjectto clauses for error detection. - **/ - public int subjecttoClauses; - /** (ø) A constructor for the inference algorithm to use. */ - public InstanceCreationExpression algorithm; - /** Counts the number of with clauses for error detection. */ - public int withClauses; - - - /** - * Full constructor. - * - * @param com A Javadoc comment associated with the declaration. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - * @param n The inference's name. - * @param h The specification of the head object. - * @param f An array of methods used to find the head object. - * @param d An array of normalizer declarations. - * @param con The constraint this inference must respect. - * @param a A constructor for the inference algorithm. - **/ - public InferenceDeclaration(String com, int line, int byteOffset, Name n, - Argument h, HeadFinder[] f, - NormalizerDeclaration[] d, - ConstraintDeclaration con, - InstanceCreationExpression a) { - super(com, n, line, byteOffset); - - head = h; - headFinders = f; - if (headFinders == null) headFinders = new HeadFinder[0]; - normalizerDeclarations = d; - if (normalizerDeclarations == null) - normalizerDeclarations = new NormalizerDeclaration[0]; - - if (con == null) { - constraint = - new ConstraintDeclaration( - null, -1, -1, new Name(name + "$subjectto"), h, - new Block( - new StatementList( - new ExpressionStatement( - new ConstraintStatementExpression( - new ConstraintEqualityExpression( - new Operator(Operator.CONSTRAINT_EQUAL), - new Constant("true"), - new Constant("true"))))))); - } - else constraint = con; - - subjecttoClauses = 1; - algorithm = a; - withClauses = algorithm == null ? 0 : 1; - } - - /** - * Parser's constructor. Line and byte offset information is taken from - * the first token. - * - * @param t The first token indicates line and byte offset information. - * @param i The identifier token representing the classifier's name. - * @param h The specification of the head object. - * @param c A list of clauses from the body of the declaration. - **/ - public InferenceDeclaration(TokenValue t, TokenValue i, Argument h, - LinkedList c) { - this(null, t.line, t.byteOffset, new Name(i), h, null, null, null, null); - subjecttoClauses = 0; - LinkedList finders = new LinkedList(); - LinkedList normalizers = new LinkedList(); - - for (Iterator I = c.iterator(); I.hasNext(); ) { - Clause clause = (Clause) I.next(); - if (clause.type == Clause.HEAD_FINDER) finders.add(clause.argument); - else if (clause.type == Clause.SUBJECTTO) { - Block b = (Block) clause.argument; - constraint = - new ConstraintDeclaration(null, b.line, b.byteOffset, - new Name(name + "$subjectto"), h, b); - ++subjecttoClauses; - } - else if (clause.type == Clause.WITH) { - algorithm = (InstanceCreationExpression) clause.argument; - ++withClauses; - } - else if (clause.type == Clause.NORMALIZER_DECLARATION) - normalizers.add(clause.argument); - } - - headFinders = - (HeadFinder[]) finders.toArray(new HeadFinder[finders.size()]); - normalizerDeclarations = - (NormalizerDeclaration[]) - normalizers.toArray(new NormalizerDeclaration[normalizers.size()]); - } - - - /** - * Returns true iff at least one of the normalizer - * declarations is specific to a given type. - **/ - public boolean containsTypeSpecificNormalizer() { - for (int i = 0; i < normalizerDeclarations.length; ++i) - if (normalizerDeclarations[i].learner != null) return true; - return false; - } - - - /** - * Returns the type of the declaration. - * - * @return The type of the declaration. - **/ - public Type getType() { - return new InferenceType(head.getType(), headFinders); - } - - - /** Returns the name of the InferenceDeclaration. */ - public String getName() { return name.toString(); } - - - /** - * Returns the line number on which this AST node is found in the source - * (starting from line 0). This method exists to fulfull the - * implementation of CodeGenerator. - * @see CodeGenerator - **/ - public int getLine() { return line; } - - - /** - * Returns a shallow textual representation of this AST node. The - * difference between the result of this method and the result of - * write(StringBuffer) is that this method omits the - * subjectto clause. - **/ - public StringBuffer shallow() { - StringBuffer buffer = new StringBuffer(); - buffer.append("inference "); - name.write(buffer); - buffer.append(" head "); - head.write(buffer); - buffer.append(" { "); - - for (int i = 0; i < headFinders.length; ++i) { - headFinders[i].write(buffer); - buffer.append(" "); - } - - for (int i = 0; i < normalizerDeclarations.length; ++i) { - normalizerDeclarations[i].write(buffer); - buffer.append(" "); - } - - if (algorithm != null) { - buffer.append(" with "); - algorithm.write(buffer); - } +public class InferenceDeclaration extends Declaration implements CodeGenerator { + /** If no inference algorithm is specified, this algorithm is used. */ + public static final InstanceCreationExpression defaultInferenceConstructor = + new InstanceCreationExpression(new Name("ILPInference"), new ExpressionList( + new InstanceCreationExpression(new Name("GLPKHook"), new ExpressionList(), -1, + -1)), -1, -1); - buffer.append(" }"); - return buffer; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - int total = headFinders.length + normalizerDeclarations.length + 3; - if (algorithm != null) ++total; - - ASTNodeIterator I = new ASTNodeIterator(total); - I.children[0] = head; - - for (int i = 0; i < headFinders.length; ++i) - I.children[i + 1] = headFinders[i]; - for (int i = 0; i < normalizerDeclarations.length; ++i) - I.children[i + 1 + headFinders.length] = normalizerDeclarations[i]; - - I.children[headFinders.length + normalizerDeclarations.length + 1] = - constraint; - if (algorithm != null) - I.children[headFinders.length + normalizerDeclarations.length + 2] = - algorithm; - - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return - new InferenceDeclaration( - comment, -1, -1, (Name) name.clone(), - (Argument) head.clone(), - (HeadFinder[]) headFinders.clone(), - (NormalizerDeclaration[]) normalizerDeclarations.clone(), - (ConstraintDeclaration) constraint.clone(), - algorithm == null ? null - : (InstanceCreationExpression) algorithm.clone()); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("inference "); - name.write(buffer); - buffer.append(" head "); - head.write(buffer); - buffer.append(" { "); - - for (int i = 0; i < headFinders.length; ++i) { - headFinders[i].write(buffer); - buffer.append(" "); - } - - for (int i = 0; i < normalizerDeclarations.length; ++i) { - normalizerDeclarations[i].write(buffer); - buffer.append(" "); - } - buffer.append("subjectto "); - constraint.body.write(buffer); - - if (algorithm != null) { - buffer.append(" with "); - algorithm.write(buffer); - } - - buffer.append(" }"); - } - - - /** - * A head finder is a method that finds the head object for an inference - * given another object. HeadFinder objects are only - * constructed by the InferenceDeclaration constructor and - * only stored in InferenceDeclaration objects. - * - * @author Nick Rizzolo - **/ - public static class HeadFinder extends ASTNode - { - /** (¬ø) Input specification of the head finder method. */ - public Argument argument; - /** (¬ø) Body of the head finder method. */ - public Block body; + /** + * (¬ø) A specification of the object from which all variables can be found. + **/ + public Argument head; + /** + * (¬ø) The methods used to find the head object given objects of different types. + **/ + public HeadFinder[] headFinders; + /** + * (¬ø) Declarations describing how the scores produced by various learning + * classifiers should be normalized. + **/ + public NormalizerDeclaration[] normalizerDeclarations; + /** + * (¬ø) The constraint that must be respected during optimization. + **/ + public ConstraintDeclaration constraint; + /** + * Counts the number of subjectto clauses for error detection. + **/ + public int subjecttoClauses; + /** (ø) A constructor for the inference algorithm to use. */ + public InstanceCreationExpression algorithm; + /** Counts the number of with clauses for error detection. */ + public int withClauses; /** - * Full constructor. Line and byte offset information are taken from the - * argument. - * - * @param a The argument to the head finder method. - * @param b The body of the head finder method. + * Full constructor. + * + * @param com A Javadoc comment associated with the declaration. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + * @param n The inference's name. + * @param h The specification of the head object. + * @param f An array of methods used to find the head object. + * @param d An array of normalizer declarations. + * @param con The constraint this inference must respect. + * @param a A constructor for the inference algorithm. **/ - public HeadFinder(Argument a, Block b) { - super(a.line, a.byteOffset); - argument = a; - body = b; + public InferenceDeclaration(String com, int line, int byteOffset, Name n, Argument h, + HeadFinder[] f, NormalizerDeclaration[] d, ConstraintDeclaration con, + InstanceCreationExpression a) { + super(com, n, line, byteOffset); + + head = h; + headFinders = f; + if (headFinders == null) + headFinders = new HeadFinder[0]; + normalizerDeclarations = d; + if (normalizerDeclarations == null) + normalizerDeclarations = new NormalizerDeclaration[0]; + + if (con == null) { + constraint = + new ConstraintDeclaration(null, -1, -1, new Name(name + "$subjectto"), h, + new Block(new StatementList(new ExpressionStatement( + new ConstraintStatementExpression( + new ConstraintEqualityExpression(new Operator( + Operator.CONSTRAINT_EQUAL), + new Constant("true"), new Constant("true"))))))); + } else + constraint = con; + + subjecttoClauses = 1; + algorithm = a; + withClauses = algorithm == null ? 0 : 1; } - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. + * Parser's constructor. Line and byte offset information is taken from the first token. + * + * @param t The first token indicates line and byte offset information. + * @param i The identifier token representing the classifier's name. + * @param h The specification of the head object. + * @param c A list of clauses from the body of the declaration. **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(2); - I.children[0] = argument; - I.children[1] = body; - return I; + public InferenceDeclaration(TokenValue t, TokenValue i, Argument h, LinkedList c) { + this(null, t.line, t.byteOffset, new Name(i), h, null, null, null, null); + subjecttoClauses = 0; + LinkedList finders = new LinkedList(); + LinkedList normalizers = new LinkedList(); + + for (Iterator I = c.iterator(); I.hasNext();) { + Clause clause = (Clause) I.next(); + if (clause.type == Clause.HEAD_FINDER) + finders.add(clause.argument); + else if (clause.type == Clause.SUBJECTTO) { + Block b = (Block) clause.argument; + constraint = + new ConstraintDeclaration(null, b.line, b.byteOffset, new Name(name + + "$subjectto"), h, b); + ++subjecttoClauses; + } else if (clause.type == Clause.WITH) { + algorithm = (InstanceCreationExpression) clause.argument; + ++withClauses; + } else if (clause.type == Clause.NORMALIZER_DECLARATION) + normalizers.add(clause.argument); + } + + headFinders = (HeadFinder[]) finders.toArray(new HeadFinder[finders.size()]); + normalizerDeclarations = + (NormalizerDeclaration[]) normalizers.toArray(new NormalizerDeclaration[normalizers + .size()]); } /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. + * Returns true iff at least one of the normalizer declarations is specific to a + * given type. **/ - public Object clone() { - return - new HeadFinder((Argument) argument.clone(), (Block) body.clone()); + public boolean containsTypeSpecificNormalizer() { + for (int i = 0; i < normalizerDeclarations.length; ++i) + if (normalizerDeclarations[i].learner != null) + return true; + return false; } /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. + * Returns the type of the declaration. + * + * @return The type of the declaration. **/ - public void runPass(Pass pass) { pass.run(this); } + public Type getType() { + return new InferenceType(head.getType(), headFinders); + } - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - argument.write(buffer); - buffer.append(" "); - body.write(buffer); + /** Returns the name of the InferenceDeclaration. */ + public String getName() { + return name.toString(); } - } - - - /** - * A normalizer declaration is a clause of an inference declaration that - * specifies a normalizer to be used in association with a particular - * learning classifier or in general. NormalizerDeclaration - * objects are only constructed by the InferenceDeclaration - * constructor and only stored in InferenceDeclaration - * objects. - * - * @author Nick Rizzolo - **/ - public static class NormalizerDeclaration extends ASTNode - { - /** (ø) The name of the learner to be normalized. */ - public Name learner; - /** (¬ø) Constructs the normalizer to use. */ - public InstanceCreationExpression normalizer; /** - * Full constructor. - * - * @param line The line on which the source code represented by - * this node is found. - * @param byteOffset The byte offset from the beginning of the source - * file at which the source code represented by this - * node is found. - * @param l The name of the learner. - * @param n Constructs the normalizer. + * Returns the line number on which this AST node is found in the source (starting from line 0). + * This method exists to fulfull the implementation of CodeGenerator. + * + * @see CodeGenerator **/ - public NormalizerDeclaration(int line, int byteOffset, Name l, - InstanceCreationExpression n) { - super(line, byteOffset); - learner = l; - normalizer = n; + public int getLine() { + return line; } + /** - * Parser's constructor. Line and byte offset information are taken from - * the token. - * - * @param t The token containing line and byte offset information. - * @param l The name of the learner. - * @param n Constructs the normalizer. + * Returns a shallow textual representation of this AST node. The difference between the result + * of this method and the result of write(StringBuffer) is that this method omits + * the subjectto clause. **/ - public NormalizerDeclaration(TokenValue t, Name l, - InstanceCreationExpression n) { - super(t.line, t.byteOffset); - learner = l; - normalizer = n; + public StringBuffer shallow() { + StringBuffer buffer = new StringBuffer(); + buffer.append("inference "); + name.write(buffer); + buffer.append(" head "); + head.write(buffer); + buffer.append(" { "); + + for (int i = 0; i < headFinders.length; ++i) { + headFinders[i].write(buffer); + buffer.append(" "); + } + + for (int i = 0; i < normalizerDeclarations.length; ++i) { + normalizerDeclarations[i].write(buffer); + buffer.append(" "); + } + + if (algorithm != null) { + buffer.append(" with "); + algorithm.write(buffer); + } + + buffer.append(" }"); + return buffer; } /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. **/ public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(learner == null ? 1 : 2); - if (learner != null) I.children[0] = learner; - I.children[I.children.length - 1] = normalizer; - return I; + int total = headFinders.length + normalizerDeclarations.length + 3; + if (algorithm != null) + ++total; + + ASTNodeIterator I = new ASTNodeIterator(total); + I.children[0] = head; + + for (int i = 0; i < headFinders.length; ++i) + I.children[i + 1] = headFinders[i]; + for (int i = 0; i < normalizerDeclarations.length; ++i) + I.children[i + 1 + headFinders.length] = normalizerDeclarations[i]; + + I.children[headFinders.length + normalizerDeclarations.length + 1] = constraint; + if (algorithm != null) + I.children[headFinders.length + normalizerDeclarations.length + 2] = algorithm; + + return I; } /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. **/ public Object clone() { - return - new NormalizerDeclaration( - -1, -1, learner == null ? null : (Name) learner.clone(), - (InstanceCreationExpression) normalizer.clone()); + return new InferenceDeclaration(comment, -1, -1, (Name) name.clone(), + (Argument) head.clone(), (HeadFinder[]) headFinders.clone(), + (NormalizerDeclaration[]) normalizerDeclarations.clone(), + (ConstraintDeclaration) constraint.clone(), algorithm == null ? null + : (InstanceCreationExpression) algorithm.clone()); } /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. **/ - public void runPass(Pass pass) { pass.run(this); } + public void runPass(Pass pass) { + pass.run(this); + } /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. **/ public void write(StringBuffer buffer) { - if (learner != null) { - learner.write(buffer); - buffer.append(" "); - } - - buffer.append("normalizedby "); - normalizer.write(buffer); + buffer.append("inference "); + name.write(buffer); + buffer.append(" head "); + head.write(buffer); + buffer.append(" { "); + + for (int i = 0; i < headFinders.length; ++i) { + headFinders[i].write(buffer); + buffer.append(" "); + } + + for (int i = 0; i < normalizerDeclarations.length; ++i) { + normalizerDeclarations[i].write(buffer); + buffer.append(" "); + } + + buffer.append("subjectto "); + constraint.body.write(buffer); + + if (algorithm != null) { + buffer.append(" with "); + algorithm.write(buffer); + } + + buffer.append(" }"); } - } - - - /** - * An intermediate class used during parsing to represent the various - * clauses of an inference declaration. - * - * @author Nick Rizzolo - **/ - public static class Clause - { - /** Value of the type variable. */ - public static final int HEAD_FINDER = 0; - /** Value of the type variable. */ - public static final int SUBJECTTO = 1; - /** Value of the type variable. */ - public static final int WITH = 2; - /** Value of the type variable. */ - public static final int NORMALIZER_DECLARATION = 3; - /** String representations of the type names. */ - public static final String[] typeNames = - new String[]{ "", "subjectto", "with", "" }; - - - /** The type of the clause. */ - public int type; - /** The argument of the clause. */ - public ASTNode argument; /** - * Full constructor. - * - * @param t The type. - * @param a The argument node. - **/ - public Clause(int t, ASTNode a) { - type = t; - argument = a; + * A head finder is a method that finds the head object for an inference given another object. + * HeadFinder objects are only constructed by the InferenceDeclaration + * constructor and only stored in InferenceDeclaration objects. + * + * @author Nick Rizzolo + **/ + public static class HeadFinder extends ASTNode { + /** (¬ø) Input specification of the head finder method. */ + public Argument argument; + /** (¬ø) Body of the head finder method. */ + public Block body; + + + /** + * Full constructor. Line and byte offset information are taken from the argument. + * + * @param a The argument to the head finder method. + * @param b The body of the head finder method. + **/ + public HeadFinder(Argument a, Block b) { + super(a.line, a.byteOffset); + argument = a; + body = b; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(2); + I.children[0] = argument; + I.children[1] = body; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member + * data objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new HeadFinder((Argument) argument.clone(), (Block) body.clone()); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + argument.write(buffer); + buffer.append(" "); + body.write(buffer); + } } /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new Clause(type, (ASTNode) argument.clone()); + * A normalizer declaration is a clause of an inference declaration that specifies a normalizer + * to be used in association with a particular learning classifier or in general. + * NormalizerDeclaration objects are only constructed by the + * InferenceDeclaration constructor and only stored in + * InferenceDeclaration objects. + * + * @author Nick Rizzolo + **/ + public static class NormalizerDeclaration extends ASTNode { + /** (ø) The name of the learner to be normalized. */ + public Name learner; + /** (¬ø) Constructs the normalizer to use. */ + public InstanceCreationExpression normalizer; + + + /** + * Full constructor. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the + * source code represented by this node is found. + * @param l The name of the learner. + * @param n Constructs the normalizer. + **/ + public NormalizerDeclaration(int line, int byteOffset, Name l, InstanceCreationExpression n) { + super(line, byteOffset); + learner = l; + normalizer = n; + } + + /** + * Parser's constructor. Line and byte offset information are taken from the token. + * + * @param t The token containing line and byte offset information. + * @param l The name of the learner. + * @param n Constructs the normalizer. + **/ + public NormalizerDeclaration(TokenValue t, Name l, InstanceCreationExpression n) { + super(t.line, t.byteOffset); + learner = l; + normalizer = n; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(learner == null ? 1 : 2); + if (learner != null) + I.children[0] = learner; + I.children[I.children.length - 1] = normalizer; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member + * data objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new NormalizerDeclaration(-1, -1, learner == null ? null + : (Name) learner.clone(), (InstanceCreationExpression) normalizer.clone()); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (learner != null) { + learner.write(buffer); + buffer.append(" "); + } + + buffer.append("normalizedby "); + normalizer.write(buffer); + } } /** - * Debugging utility method. - * - * @return A textual representation of this expression. - **/ - public String toString() { - if (type == HEAD_FINDER || type == NORMALIZER_DECLARATION) - return argument.toString(); - return typeNames[type] + " " + argument; + * An intermediate class used during parsing to represent the various clauses of an inference + * declaration. + * + * @author Nick Rizzolo + **/ + public static class Clause { + /** Value of the type variable. */ + public static final int HEAD_FINDER = 0; + /** Value of the type variable. */ + public static final int SUBJECTTO = 1; + /** Value of the type variable. */ + public static final int WITH = 2; + /** Value of the type variable. */ + public static final int NORMALIZER_DECLARATION = 3; + /** String representations of the type names. */ + public static final String[] typeNames = new String[] {"", "subjectto", "with", ""}; + + + /** The type of the clause. */ + public int type; + /** The argument of the clause. */ + public ASTNode argument; + + + /** + * Full constructor. + * + * @param t The type. + * @param a The argument node. + **/ + public Clause(int t, ASTNode a) { + type = t; + argument = a; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member + * data objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new Clause(type, (ASTNode) argument.clone()); + } + + + /** + * Debugging utility method. + * + * @return A textual representation of this expression. + **/ + public String toString() { + if (type == HEAD_FINDER || type == NORMALIZER_DECLARATION) + return argument.toString(); + return typeNames[type] + " " + argument; + } } - } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/InferenceInvocation.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/InferenceInvocation.java index c0e36ec4..3c554b08 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/InferenceInvocation.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/InferenceInvocation.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,146 +11,144 @@ /** - * An inference can be invoked as a method with the name of a learning - * classifier involved in that inference as its lone argument. - * - * @author Nick Rizzolo + * An inference can be invoked as a method with the name of a learning classifier involved in that + * inference as its lone argument. + * + * @author Nick Rizzolo **/ -public class InferenceInvocation extends ClassifierExpression -{ - /** (¬ø) The name of the inference to invoke. */ - public Name inference; - /** (¬ø) The name of the argument learning classifier. */ - public Name classifier; - - - /** - * Initializing constructor. Line and byte offset information are taken - * from the name of the inference. - * - * @param i The name of the inference. - * @param c The name of the classifier. - **/ - public InferenceInvocation(Name i, Name c) { - super(i.line, i.byteOffset); - inference = i; - classifier = c; - } - - - /** - * Returns a hash code value for java hash structures. - * - * @return A hash code for this object. - **/ - public int hashCode() { - return inference.hashCode() + classifier.hashCode(); - } - - - /** - * Indicates whether some other object is "equal to" this one. - * - * @return true iff this object is the same as the argument. - **/ - public boolean equals(Object o) { - if (!(o instanceof InferenceInvocation)) return false; - InferenceInvocation i = (InferenceInvocation) o; - return inference.equals(i.inference) && classifier.equals(i.classifier); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(2); - I.children[0] = inference; - I.children[1] = classifier; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return - new InferenceInvocation((Name) inference.clone(), - (Name) classifier.clone()); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (parenthesized) buffer.append("("); - inference.write(buffer); - buffer.append("("); - classifier.write(buffer); - buffer.append(")"); - if (parenthesized) buffer.append(")"); - } - - - /** - * Creates a StringBuffer containing a shallow representation - * of this ASTNode. - * - * @return A StringBuffer containing a shallow text - * representation of the given node. - **/ - public StringBuffer shallow() { - StringBuffer buffer = new StringBuffer(); - returnType.write(buffer); - buffer.append(" "); - name.write(buffer); - buffer.append("("); - argument.write(buffer); - buffer.append(") "); - - if (singleExampleCache) buffer.append("cached "); - - if (cacheIn != null) { - buffer.append("cachedin"); - - if (cacheIn.toString().equals(ClassifierAssignment.mapCache)) - buffer.append("map"); - else { - buffer.append(" "); - cacheIn.write(buffer); - } +public class InferenceInvocation extends ClassifierExpression { + /** (¬ø) The name of the inference to invoke. */ + public Name inference; + /** (¬ø) The name of the argument learning classifier. */ + public Name classifier; + + + /** + * Initializing constructor. Line and byte offset information are taken from the name of the + * inference. + * + * @param i The name of the inference. + * @param c The name of the classifier. + **/ + public InferenceInvocation(Name i, Name c) { + super(i.line, i.byteOffset); + inference = i; + classifier = c; + } - buffer.append(' '); + + /** + * Returns a hash code value for java hash structures. + * + * @return A hash code for this object. + **/ + public int hashCode() { + return inference.hashCode() + classifier.hashCode(); + } + + + /** + * Indicates whether some other object is "equal to" this one. + * + * @return true iff this object is the same as the argument. + **/ + public boolean equals(Object o) { + if (!(o instanceof InferenceInvocation)) + return false; + InferenceInvocation i = (InferenceInvocation) o; + return inference.equals(i.inference) && classifier.equals(i.classifier); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(2); + I.children[0] = inference; + I.children[1] = classifier; + return I; } - buffer.append("<- "); - inference.write(buffer); - buffer.append("("); - classifier.write(buffer); - buffer.append(")"); - return buffer; - } -} + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new InferenceInvocation((Name) inference.clone(), (Name) classifier.clone()); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (parenthesized) + buffer.append("("); + inference.write(buffer); + buffer.append("("); + classifier.write(buffer); + buffer.append(")"); + if (parenthesized) + buffer.append(")"); + } + + + /** + * Creates a StringBuffer containing a shallow representation of this + * ASTNode. + * + * @return A StringBuffer containing a shallow text representation of the given + * node. + **/ + public StringBuffer shallow() { + StringBuffer buffer = new StringBuffer(); + returnType.write(buffer); + buffer.append(" "); + name.write(buffer); + buffer.append("("); + argument.write(buffer); + buffer.append(") "); + + if (singleExampleCache) + buffer.append("cached "); + + if (cacheIn != null) { + buffer.append("cachedin"); + + if (cacheIn.toString().equals(ClassifierAssignment.mapCache)) + buffer.append("map"); + else { + buffer.append(" "); + cacheIn.write(buffer); + } + + buffer.append(' '); + } + + buffer.append("<- "); + inference.write(buffer); + buffer.append("("); + classifier.write(buffer); + buffer.append(")"); + return buffer; + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/InferenceType.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/InferenceType.java index 112968c9..92c51598 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/InferenceType.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/InferenceType.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,168 +11,166 @@ /** - * An inference's type is defined by the type of the head object as well as - * the types of objects from which the head can be found. This class does - * not represent a syntax that appears in the source - it is constructed only - * during semantic analysis. - * - * @author Nick Rizzolo + * An inference's type is defined by the type of the head object as well as the types of objects + * from which the head can be found. This class does not represent a syntax that appears in the + * source - it is constructed only during semantic analysis. + * + * @author Nick Rizzolo **/ -public class InferenceType extends Type -{ - /** The type of the head object. */ - protected Type headType; - /** The types of the head finder objects. */ - protected Type[] headFinderTypes; - - - /** - * Initializing constructor. - * - * @param h The head object's type. - * @param f The array of head finder types. - **/ - public InferenceType(Type h, Type[] f) { - super(-1, -1); - headType = h; - headFinderTypes = f; - - try { myClass = Class.forName("edu.illinois.cs.cogcomp.lbjava.infer.Inference"); } - catch (Exception e) { - System.err.println("Class 'edu.illinois.cs.cogcomp.lbjava.infer.Inference' not found. " - + "Aborting."); - System.exit(1); +public class InferenceType extends Type { + /** The type of the head object. */ + protected Type headType; + /** The types of the head finder objects. */ + protected Type[] headFinderTypes; + + + /** + * Initializing constructor. + * + * @param h The head object's type. + * @param f The array of head finder types. + **/ + public InferenceType(Type h, Type[] f) { + super(-1, -1); + headType = h; + headFinderTypes = f; + + try { + myClass = Class.forName("edu.illinois.cs.cogcomp.lbjava.infer.Inference"); + } catch (Exception e) { + System.err + .println("Class 'edu.illinois.cs.cogcomp.lbjava.infer.Inference' not found. " + + "Aborting."); + System.exit(1); + } } - } - - /** - * Initializing constructor. - * - * @param h The head object's type. - * @param f The array of head finder arguments from the inference - * declaration. - **/ - public InferenceType(Type h, InferenceDeclaration.HeadFinder[] f) { - super(-1, -1); - headType = h; - - if (f == null) headFinderTypes = new Type[0]; - else { - headFinderTypes = new Type[f.length]; - for (int i = 0; i < f.length; ++i) - headFinderTypes[i] = f[i].argument.getType(); + + /** + * Initializing constructor. + * + * @param h The head object's type. + * @param f The array of head finder arguments from the inference declaration. + **/ + public InferenceType(Type h, InferenceDeclaration.HeadFinder[] f) { + super(-1, -1); + headType = h; + + if (f == null) + headFinderTypes = new Type[0]; + else { + headFinderTypes = new Type[f.length]; + for (int i = 0; i < f.length; ++i) + headFinderTypes[i] = f[i].argument.getType(); + } + + try { + myClass = Class.forName("edu.illinois.cs.cogcomp.lbjava.infer.Inference"); + } catch (Exception e) { + System.err + .println("Class 'edu.illinois.cs.cogcomp.lbjava.infer.Inference' not found. " + + "Aborting."); + System.exit(1); + } } - try { myClass = Class.forName("edu.illinois.cs.cogcomp.lbjava.infer.Inference"); } - catch (Exception e) { - System.err.println("Class 'edu.illinois.cs.cogcomp.lbjava.infer.Inference' not found. " - + "Aborting."); - System.exit(1); + + /** Retrieves the value of the headType variable. */ + public Type getHeadType() { + return headType; } - } - - - /** Retrieves the value of the headType variable. */ - public Type getHeadType() { return headType; } - - - /** Retrieves the number of head finder types. */ - public int getFindersLength() { return headFinderTypes.length; } - - - /** - * Retrieves the type of the ith head finder object. - * - * @param i The index of the head finder type requested. - * @return The type at location i in the - * headFinderTypes array member variable, or - * null if i is outside the bounds of that - * array. - **/ - public Type getFinderType(int i) { - if (i < 0 || i >= headFinderTypes.length) return null; - return headFinderTypes[i]; - } - - - /** - * Two InferenceTypes are equivalent when their head types - * match. - * - * @param o The object whose equality with this object needs to be tested. - * @return true if the two objects are equal, and - * false otherwise. - **/ - public boolean equals(Object o) { - return o instanceof InferenceType - && headType.equals(((InferenceType) o).headType); - } - - - /** A hash code based on the hash code of {@link #headType}. */ - public int hashCode() { - return 31 * headType.hashCode() + 17; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(headFinderTypes.length + 1); - I.children[0] = headType; - for (int i = 0; i < headFinderTypes.length; ++i) - I.children[i + 1] = headFinderTypes[i]; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new InferenceType((Type) headType.clone(), - (Type[]) headFinderTypes.clone()); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("Inference { "); - headType.write(buffer); - buffer.append(" : "); - - if (headFinderTypes.length > 0) { - headFinderTypes[0].write(buffer); - for (int i = 1; i < headFinderTypes.length; ++i) { - buffer.append(", "); - headFinderTypes[i].write(buffer); - } + + + /** Retrieves the number of head finder types. */ + public int getFindersLength() { + return headFinderTypes.length; } - buffer.append(" }"); - } -} + /** + * Retrieves the type of the ith head finder object. + * + * @param i The index of the head finder type requested. + * @return The type at location i in the headFinderTypes array member + * variable, or null if i is outside the bounds of that array. + **/ + public Type getFinderType(int i) { + if (i < 0 || i >= headFinderTypes.length) + return null; + return headFinderTypes[i]; + } + + + /** + * Two InferenceTypes are equivalent when their head types match. + * + * @param o The object whose equality with this object needs to be tested. + * @return true if the two objects are equal, and false otherwise. + **/ + public boolean equals(Object o) { + return o instanceof InferenceType && headType.equals(((InferenceType) o).headType); + } + + + /** A hash code based on the hash code of {@link #headType}. */ + public int hashCode() { + return 31 * headType.hashCode() + 17; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(headFinderTypes.length + 1); + I.children[0] = headType; + for (int i = 0; i < headFinderTypes.length; ++i) + I.children[i + 1] = headFinderTypes[i]; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new InferenceType((Type) headType.clone(), (Type[]) headFinderTypes.clone()); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("Inference { "); + headType.write(buffer); + buffer.append(" : "); + + if (headFinderTypes.length > 0) { + headFinderTypes[0].write(buffer); + for (int i = 1; i < headFinderTypes.length; ++i) { + buffer.append(", "); + headFinderTypes[i].write(buffer); + } + } + + buffer.append(" }"); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/InstanceCreationExpression.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/InstanceCreationExpression.java index aab2a929..e531a63a 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/InstanceCreationExpression.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/InstanceCreationExpression.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,134 +11,122 @@ /** - * This class represents an expression creating a class instance. - * - * @author Nick Rizzolo + * This class represents an expression creating a class instance. + * + * @author Nick Rizzolo **/ -public class InstanceCreationExpression extends MethodInvocation -{ - /** - * Initializing constructor. - * - * @param n The name of the class or inner class to instantiate. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public InstanceCreationExpression(Name n, int line, int byteOffset) { - this(null, n, new ExpressionList(), line, byteOffset); - } - - /** - * Initializing constructor. - * - * @param n The name of the class or inner class to instantiate. - * @param a The argument expressions passed to the constructor. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public InstanceCreationExpression(Name n, ExpressionList a, int line, - int byteOffset) { - this(null, n, a, line, byteOffset); - } - - /** - * Initializing constructor. - * - * @param p Represents the parent object containing an inner - * class. - * @param n The name of the class or inner class to instantiate. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public InstanceCreationExpression(Expression p, Name n, int line, - int byteOffset) { - this(p, n, new ExpressionList(), line, byteOffset); - } - - /** - * Full constructor. - * - * @param p Represents the parent object containing an inner - * class. - * @param n The name of the class or inner class to instantiate. - * @param a The argument expressions passed to the constructor. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public InstanceCreationExpression(Expression p, Name n, ExpressionList a, - int line, int byteOffset) { - super(p, n, a, line, byteOffset); - } - - - /** - * Indicates whether some other object is "equal to" this one. - * - * @return true iff this object is the same as the argument. - **/ - public boolean equals(Object o) { - if (!(o instanceof InstanceCreationExpression)) return false; - return super.equals(o); - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return - new InstanceCreationExpression( - (parentObject == null) ? null : (Expression) parentObject.clone(), - (Name) name.clone(), (ExpressionList) arguments.clone(), -1, -1); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (parenthesized) buffer.append("("); - - if (parentObject != null) { - parentObject.write(buffer); - buffer.append("."); +public class InstanceCreationExpression extends MethodInvocation { + /** + * Initializing constructor. + * + * @param n The name of the class or inner class to instantiate. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public InstanceCreationExpression(Name n, int line, int byteOffset) { + this(null, n, new ExpressionList(), line, byteOffset); } - buffer.append("new "); - name.write(buffer); - buffer.append("("); - arguments.write(buffer); - buffer.append(")"); + /** + * Initializing constructor. + * + * @param n The name of the class or inner class to instantiate. + * @param a The argument expressions passed to the constructor. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public InstanceCreationExpression(Name n, ExpressionList a, int line, int byteOffset) { + this(null, n, a, line, byteOffset); + } - if (parenthesized) buffer.append(")"); - } -} + /** + * Initializing constructor. + * + * @param p Represents the parent object containing an inner class. + * @param n The name of the class or inner class to instantiate. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public InstanceCreationExpression(Expression p, Name n, int line, int byteOffset) { + this(p, n, new ExpressionList(), line, byteOffset); + } + + /** + * Full constructor. + * + * @param p Represents the parent object containing an inner class. + * @param n The name of the class or inner class to instantiate. + * @param a The argument expressions passed to the constructor. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public InstanceCreationExpression(Expression p, Name n, ExpressionList a, int line, + int byteOffset) { + super(p, n, a, line, byteOffset); + } + + + /** + * Indicates whether some other object is "equal to" this one. + * + * @return true iff this object is the same as the argument. + **/ + public boolean equals(Object o) { + if (!(o instanceof InstanceCreationExpression)) + return false; + return super.equals(o); + } + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new InstanceCreationExpression((parentObject == null) ? null + : (Expression) parentObject.clone(), (Name) name.clone(), + (ExpressionList) arguments.clone(), -1, -1); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (parenthesized) + buffer.append("("); + + if (parentObject != null) { + parentObject.write(buffer); + buffer.append("."); + } + + buffer.append("new "); + name.write(buffer); + buffer.append("("); + arguments.write(buffer); + buffer.append(")"); + + if (parenthesized) + buffer.append(")"); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/InstanceofExpression.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/InstanceofExpression.java index 3e7f4f1b..2dec3317 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/InstanceofExpression.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/InstanceofExpression.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,112 +11,107 @@ /** - * This class represents an instanceof expression. - * - * @author Nick Rizzolo + * This class represents an instanceof expression. + * + * @author Nick Rizzolo **/ -public class InstanceofExpression extends Expression -{ - /** - * (¬ø) The expression on the left hand side of - * instanceof. - **/ - public Expression left; - /** - * (¬ø) The expression on the right hand side of - * instanceof. - **/ - public Type right; - - - /** - * Full constructor. - * - * @param l Reference to the left hand side's representation. - * @param r Reference to the right hand side's representation. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public InstanceofExpression(Expression l, Type r, int line, int byteOffset) - { - super(line, byteOffset); - left = l; - right = r; - } - - - /** - * Returns a hash code value for java hash structures. - * - * @return A hash code for this object. - **/ - public int hashCode() { return left.hashCode() + right.hashCode(); } - - - /** - * Indicates whether some other object is "equal to" this one. - * - * @return true iff this object is the same as the argument. - **/ - public boolean equals(Object o) { - if (!(o instanceof InstanceofExpression)) return false; - InstanceofExpression i = (InstanceofExpression) o; - return left.equals(i.left) && right.equals(i.right); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(2); - I.children[0] = left; - I.children[1] = right; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new InstanceofExpression((Expression) left.clone(), - (Type) right.clone(), -1, -1); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (parenthesized) buffer.append("("); - left.write(buffer); - buffer.append(" instanceof "); - right.write(buffer); - if (parenthesized) buffer.append(")"); - } +public class InstanceofExpression extends Expression { + /** + * (¬ø) The expression on the left hand side of instanceof. + **/ + public Expression left; + /** + * (¬ø) The expression on the right hand side of instanceof. + **/ + public Type right; + + + /** + * Full constructor. + * + * @param l Reference to the left hand side's representation. + * @param r Reference to the right hand side's representation. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public InstanceofExpression(Expression l, Type r, int line, int byteOffset) { + super(line, byteOffset); + left = l; + right = r; + } + + + /** + * Returns a hash code value for java hash structures. + * + * @return A hash code for this object. + **/ + public int hashCode() { + return left.hashCode() + right.hashCode(); + } + + + /** + * Indicates whether some other object is "equal to" this one. + * + * @return true iff this object is the same as the argument. + **/ + public boolean equals(Object o) { + if (!(o instanceof InstanceofExpression)) + return false; + InstanceofExpression i = (InstanceofExpression) o; + return left.equals(i.left) && right.equals(i.right); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(2); + I.children[0] = left; + I.children[1] = right; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new InstanceofExpression((Expression) left.clone(), (Type) right.clone(), -1, -1); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (parenthesized) + buffer.append("("); + left.write(buffer); + buffer.append(" instanceof "); + right.write(buffer); + if (parenthesized) + buffer.append(")"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/LabeledStatement.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/LabeledStatement.java index f6dfd090..d47770d5 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/LabeledStatement.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/LabeledStatement.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -15,111 +12,105 @@ /** - * Represents any statement with an identifier label. - * - * @author Nick Rizzolo + * Represents any statement with an identifier label. + * + * @author Nick Rizzolo **/ -public class LabeledStatement extends Statement -{ - /** (¬ø) The label for the statement. */ - public String label; - /** (¬ø) The statement. */ - public Statement statement; - - - /** - * Parser's constructor. Line and byte offset information are taken from - * the label's representation. - * - * @param l The token representing the label. - * @param s The statement. - **/ - public LabeledStatement(TokenValue l, Statement s) { - this(l.toString(), s, l.line, l.byteOffset); - } - - /** - * Full constructor. - * - * @param l The label. - * @param s The statement. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public LabeledStatement(String l, Statement s, int line, int byteOffset) { - super(line, byteOffset); - label = l; - statement = s; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(1); - I.children[0] = statement; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new LabeledStatement(label, (Statement) statement.clone(), -1, -1); - } - - - /** Returns a hash code for this {@link ASTNode}. */ - public int hashCode() { - return 31 * label.hashCode() + 7 * statement.hashCode(); - } - - - /** - * Distinguishes this {@link ASTNode} from other objects according to its - * contents recursively. - * - * @param o Another object. - * @return true iff this node is equal to o. - **/ - public boolean equals(Object o) { - if (!(o instanceof LabeledStatement)) return false; - LabeledStatement l = (LabeledStatement) o; - return label.equals(l.label) && statement.equals(l.statement); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append(label + ": "); - statement.write(buffer); - } +public class LabeledStatement extends Statement { + /** (¬ø) The label for the statement. */ + public String label; + /** (¬ø) The statement. */ + public Statement statement; + + + /** + * Parser's constructor. Line and byte offset information are taken from the label's + * representation. + * + * @param l The token representing the label. + * @param s The statement. + **/ + public LabeledStatement(TokenValue l, Statement s) { + this(l.toString(), s, l.line, l.byteOffset); + } + + /** + * Full constructor. + * + * @param l The label. + * @param s The statement. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public LabeledStatement(String l, Statement s, int line, int byteOffset) { + super(line, byteOffset); + label = l; + statement = s; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(1); + I.children[0] = statement; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new LabeledStatement(label, (Statement) statement.clone(), -1, -1); + } + + + /** Returns a hash code for this {@link ASTNode}. */ + public int hashCode() { + return 31 * label.hashCode() + 7 * statement.hashCode(); + } + + + /** + * Distinguishes this {@link ASTNode} from other objects according to its contents recursively. + * + * @param o Another object. + * @return true iff this node is equal to o. + **/ + public boolean equals(Object o) { + if (!(o instanceof LabeledStatement)) + return false; + LabeledStatement l = (LabeledStatement) o; + return label.equals(l.label) && statement.equals(l.statement); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append(label + ": "); + statement.write(buffer); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/LearningClassifierExpression.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/LearningClassifierExpression.java index a1ca8a9d..7895b974 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/LearningClassifierExpression.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/LearningClassifierExpression.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -22,1100 +19,1031 @@ * This class represents expressions that specify classifiers that learn. * * @author Nick Rizzolo -**/ -public class LearningClassifierExpression extends ClassifierExpression -{ - /** - * If no learning algorithm is specified to learn a discrete classifier, - * this learner is used. - **/ - public static final InstanceCreationExpression defaultDiscreteLearner = - new InstanceCreationExpression( - new Name("SparseNetworkLearner"), - new ExpressionList( - new InstanceCreationExpression( - new Name("SparseWinnow"), - new ExpressionList( - new Constant("1.35"), - new ExpressionList( - new Constant("0.8"), - new ExpressionList( - new Constant("4"), - new ExpressionList(new Constant("1"))))), - -1, -1)), - -1, -1); - - /** - * If no learning algorithm is specified to learn a real classifier, this - * learner is used. - **/ - public static final InstanceCreationExpression defaultRealLearner = - new InstanceCreationExpression( - new Name("StochasticGradientDescent"), -1, -1); - - /** - * If no alpha clause appears during cross validation, this - * constant is used. - **/ - public static final double defaultAlpha = 0.05; - - /** - * If no preExtract clause appears in the sources, this - * constant is used. - **/ - public static final String defaultPreExtract = "\"diskZip\""; - - - /** The revision status of the LCE's features node. */ - public Integer featuresStatus; - /** The revision status of the LCE's prune node. */ - public Integer pruneStatus; - /** The revision status of the LCE's learning node. */ - public Integer learningStatus; - /** - * This flag is set true iff the changes to the learner's LBJava - * specification require its Java translation to be regenerated and nothing - * more. - **/ - public boolean onlyCodeGeneration; - - /** - * (ø) The classifier this learning classifier gets its labels from. - **/ - public ClassifierExpression labeler; - - /** - * (¬ø) The classifier that does feature extraction for this - * classifier; argument to using. - **/ - public ClassifierExpression extractor; - /** Counts the number of using clauses for error detection. */ - public int usingClauses; - - /** - * (ø) The encoding that the generated classifier will use when - * storing string data in features. - **/ - public Constant featureEncoding; - /** - * Counts the number of encoding clauses for error detection. - **/ - public int encodingClauses; - - /** - * (ø) Tells this learning classifier how to get its training data; - * argument to from. - **/ - public InstanceCreationExpression parser; - /** Counts the number of from clauses for error detection. */ - public int fromClauses; - - /** - * (ø) Represents the integer number of training repetitions; - * augments the from clause. - **/ - public Expression rounds; - /** Training starts from this round number. */ - public int startingRound = 1; - - /** (ø) Whether to use "global" or "perClass" feature pruning. */ - public Constant pruneCountType; - /** - * (ø) Whether to use "count" or "percent" counting for feature - * pruning. - **/ - public Constant pruneThresholdType; - /** (ø) The feature pruning threshold. */ - public Constant pruneThreshold; - /** - * (ø) The contents of {@link #pruneCountType} on the previous run - * of the compiler, if any. - **/ - public Constant previousPruneCountType; - - /** - * (ø) Tells this learning classifier how to construct its learning - * algorithm; argument to with. This variable should not - * contain a non-null value if {@link #learnerName} and - * {@link #learnerParameterBlock} contain non-null values. - **/ - public InstanceCreationExpression learnerConstructor; - /** - * (ø) The name of the learner for this classifier; first argument - * to with. If this variable contains a non-null - * value, {@link #learnerParameterBlock} must also be - * non-null, and {@link #learnerConstructor} must be - * null. - **/ - public Name learnerName; - /** - * (ø) A block of statements that set parameters of the learner for - * this classifier; second argument to with. If this variable - * contains a non-null value, {@link #learnerName} must also - * be non-null, and {@link #learnerConstructor} must be - * null. - **/ - public Block learnerParameterBlock; - /** Counts the number of with clauses for error detection. */ - public int withClauses; - - /** - * (ø) Tells this learning classifier how to get its testing data; - * argument to testFrom. - **/ - public InstanceCreationExpression testParser; - /** - * Counts the number of testFrom clauses for error detection. - **/ - public int testFromClauses; - - /** - * (ø) Tells this learning classifier how to produce a prediction - * during evaluation; argument to evaluate. - **/ - public Expression evaluation; - /** - * Counts the number of evaluate clauses for error detection. - **/ - public int evaluateClauses; - - /** - * A list of the {@link ParameterSet} objects that appear in - * the argument list. - **/ - public LinkedList parameterSets; - - /** - * (ø) Represents the integer number of subsets to be used in k-fold - * cross validation; first argument to cval. - **/ - public Constant K; - /** - * (ø) Dictates how the training data will be split into subsets for - * use by cross validation; second argument to cval. - **/ - public FoldParser.SplitPolicy splitPolicy; - /** Counts the number of cval clauses for error detection. */ - public int cvalClauses; - - /** - * (ø) Determines how the user wishes cross-validation to test its - * performance; argument to testingMetric. - **/ - public InstanceCreationExpression testingMetric; - /** - * Counts the number of testingMetric clauses, for error - * detection. - **/ - public int testingMetricClauses; - - /** - * (¬ø) The desired confidence level for cross validation's - * confidence interval output; argument to alpha, which can - * only be specified when cval is also specified. - **/ - public Constant alpha; - /** Counts the number of alpha clauses, for error detection. */ - public int alphaClauses; - - /** - * (¬ø) A Boolean or string value indicating how feature vectors - * are to be pre-extracted; argument to preExtract. Possible - * values are false, true, "false", - * "true", "none", "memory", - * "disk", and "diskZip". - **/ - public Constant preExtract; - /** - * Counts the number of preExtract clauses for error - * detection. - **/ - public int preExtractClauses; - - /** - * (ø) Integer specifying how often (in examples) to give the user a - * progress update during training; argument to - * progressOutput. - **/ - public Constant progressOutput; - /** - * Counts the number of progressOutput clauses, for error - * detection. - **/ - public int progressOutputClauses; - - /** - * The {@link SemanticAnalysis} pass will let this - * LearningClassifierExpression know if the features it - * generates need to be checked for appropriateness in the context of the - * enclosing {@link ClassifierAssignment} by setting this flag. - **/ - public boolean checkDiscreteValues; - - - /** - * A string representation of the return type information for each feature. - * This information is crucial in the construction of WEKA classifiers. - * - *

Its format follows this convention: - * "<type>_<name>[_<value-list>]:<type>_<name>[_<value-list>:[...]]" - * - *

<type> can be either "num", "str", or "nom", representing - * numerical, string, and nominal attributes respectively. - * - *

Numerical and string attribute encodings do not need a value-list, - * while Nominal attribute encodings are required to contain a value list. - * - * Examples: - *

    - *
  • - * Just a numerical attribute named Dan: "num_Dan:" - *
  • - * A numerical attribute named Dan and a string attribute named Nick: - * "num_Dan:str_Nick:" - *
  • - * A numerical attribute named Dan, a string attribute named Nick, and - * a nominal attribute named Arindam which can take the values "Cool", - * "Uncool", or "Kinda Cool": - * "num_Dan:str_Nick:nom_Arindam_\"Cool\",\"Uncool\",\"Kinda Cool\":" - *
- * - **/ - public StringBuffer attributeString = new StringBuffer(); - - - /** - * Full constructor. The line and byte offset are set to -1. - * - * @param l The expression representing the labeler classifier. - * @param ext Representation of the extractor classifier. - * @param p The expression representing the parser applied to data. - * @param r The constant representing the number of training - * repetitions. - * @param a A learning algorithm instance creation expression. - * @param ln The name of a learning algorithm. - * @param pb A block of statements used to set learning algorithm - * parameters. - * @param enc The expression representing the feature encoding used in - * this learner's lexicon. - * @param tp The expression representing the test parser. - * @param e The expression used to compute a prediction. - * @param k The number of folds for cross validation. - * @param prms The sets of parameter values used in tuning. - * @param s The method used to split the data for cross validation. - * @param t Determines how the user wishes cross-validation to test its - * performance. - * @param al The cross validation confidence interval width. - * @param pre A Boolean indicating whether example vectors will be - * pre-extracted. - * @param pro An integer indicating how often progress updates will be - * output. - * @param pct The prune type ("global" or "perClass"). - * @param ptt The prune count type ("count" or "percent"). - * @param pt The prune count threshold. - * @param ls The "learning status" node, set by RevisionAnalysis. - * @param fs The "features status" node, set by RevisionAnalysis. - * @param ps The "prune status" node, set by RevisionAnalysis. - * @param at The WEKA attribute string. - * @param cdv Value for {@link #checkDiscreteValues}. - **/ - public LearningClassifierExpression(ClassifierExpression l, - ClassifierExpression ext, - InstanceCreationExpression p, - Expression r, - InstanceCreationExpression a, Name ln, - Block pb, Constant enc, - InstanceCreationExpression tp, - Expression e, Constant k, - LinkedList prms, - FoldParser.SplitPolicy s, - InstanceCreationExpression t, - Constant al, Constant pre, Constant pro, - Constant pct, Constant ptt, Constant pt, - Integer ls, Integer fs, Integer ps, - StringBuffer at, boolean cdv) { - super(-1, -1); - labeler = l; - extractor = ext; - usingClauses = extractor == null ? 0 : 1; - parser = p; - fromClauses = parser == null ? 0 : 1; - rounds = r; - learnerConstructor = a; - learnerName = ln; - learnerParameterBlock = pb; - featureEncoding = enc; - withClauses = learnerConstructor == null && learnerName == null ? 0 : 1; - testParser = tp; - testFromClauses = testParser == null ? 0 : 1; - evaluation = e; - evaluateClauses = evaluation == null ? 0 : 1; - K = k; - parameterSets = prms; - splitPolicy = s; - cvalClauses = K == null ? 0 : 1; - testingMetric = t; - testingMetricClauses = testingMetric == null ? 0 : 1; - alpha = al; - alphaClauses = al == null ? 0 : 1; - if (alpha == null) alpha = new Constant("" + defaultAlpha); - preExtract = pre; - preExtractClauses = preExtract == null ? 0 : 1; - if (preExtract == null) - preExtract = new Constant(parser == null ? "false" : defaultPreExtract); - progressOutput = pro; - progressOutputClauses = progressOutput == null ? 0 : 1; - pruneCountType = pct; - pruneThresholdType = ptt; - pruneThreshold = pt; - learningStatus = ls; - featuresStatus = fs; - pruneStatus = ps; - attributeString = at; - checkDiscreteValues = cdv; - } - - /** - * Parser's unsupervised learning constructor. - * - * @param cl A list of clauses. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public LearningClassifierExpression(LinkedList cl, int line, int byteOffset) - { - this(null, cl, line, byteOffset); - } - - /** - * Parser's supervised learning constructor. - * - * @param l The expression representing the labeler classifier. - * @param cl A list of clauses. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public LearningClassifierExpression(ClassifierExpression l, LinkedList cl, - int line, int byteOffset) { - super(line, byteOffset); - - labeler = l; - extractor = null; - parser = null; - learnerConstructor = null; - checkDiscreteValues = false; - alpha = new Constant("" + defaultAlpha); - parameterSets = new LinkedList(); - - for (Iterator I = cl.iterator(); I.hasNext(); ) { - Clause c = I.next(); - - if (c.type == Clause.USING) { - extractor = (ClassifierExpression) c.argument; - ++usingClauses; - } - else if (c.type == Clause.FROM) { - parser = (InstanceCreationExpression) c.argument; - rounds = c.rounds; - ++fromClauses; - } - else if (c.type == Clause.WITH) { - if (c.learnerParameterBlock == null) - learnerConstructor = (InstanceCreationExpression) c.argument; - else { - learnerName = (Name) c.argument; - learnerParameterBlock = c.learnerParameterBlock; - } - - ++withClauses; - } - else if (c.type == Clause.ENCODING) { - featureEncoding = (Constant) c.argument; - ++encodingClauses; - } - else if (c.type == Clause.TESTFROM) { - testParser = (InstanceCreationExpression) c.argument; - ++testFromClauses; - } - else if (c.type == Clause.EVALUATE) { - evaluation = (Expression) c.argument; - ++evaluateClauses; - } - else if (c.type == Clause.CVAL) { - if (c.argument != null) - K = (Constant) c.argument; - else - // if there was a cval clause, but k was not given, set it to -1 so - // cross validation gets invoked anyway, this occurs legally in the - // case of manual separation - K = new Constant("-1"); - - // handle default action - if (c.splitPolicy != null) splitPolicy = c.splitPolicy; - else splitPolicy = FoldParser.SplitPolicy.sequential; - - ++cvalClauses; - } - else if (c.type == Clause.TESTINGMETRIC) { - testingMetric = (InstanceCreationExpression) c.argument; - ++testingMetricClauses; - } - else if (c.type == Clause.ALPHA) { - alpha = (Constant) c.argument; - ++alphaClauses; - } - else if (c.type == Clause.PREEXTRACT) { - preExtract = (Constant) c.argument; - ++preExtractClauses; - } - else if (c.type == Clause.PROGRESSOUTPUT) { - progressOutput = (Constant) c.argument; - ++progressOutputClauses; - } - else if (c.type == Clause.PRUNE) { - pruneCountType = (Constant) c.pruneCountType; - pruneThresholdType = (Constant) c.pruneThresholdType; - pruneThreshold = (Constant) c.pruneThreshold; - } - } - - if (preExtract == null) - preExtract = new Constant(parser == null ? "false" : defaultPreExtract); - } - - - /** - * Returns a hash code value for java hash structures. - * - * @return A hash code for this object. - **/ - public int hashCode() { - int result = labeler == null ? 0 : labeler.hashCode(); - result += extractor.hashCode(); - if (parser != null) result += parser.hashCode(); - if (rounds != null) result += rounds.hashCode(); - if (learnerConstructor != null) result += learnerConstructor.hashCode(); - if (learnerName != null) result += learnerName.hashCode(); - if (learnerParameterBlock != null) - result += learnerParameterBlock.hashCode(); - if (featureEncoding != null) result += featureEncoding.hashCode(); - if (testParser != null) result += testParser.hashCode(); - if (evaluation != null) result += evaluation.hashCode(); - if (K != null) result += K.hashCode(); - if (splitPolicy != null) result += splitPolicy.hashCode(); - if (testingMetric != null) result += testingMetric.hashCode(); - result += alpha.hashCode(); - result += preExtract.hashCode(); - if (progressOutput != null) result += progressOutput.hashCode(); - return result; - } - - - /** - * Indicates whether some other object is "equal to" this one. - * - * @return true iff this object is the same as the argument. - **/ - public boolean equals(Object o) { - if (!(o instanceof LearningClassifierExpression)) return false; - LearningClassifierExpression c = (LearningClassifierExpression) o; - return (labeler == null ? c.labeler == null : labeler.equals(c.labeler)) - && extractor.equals(c.extractor) - && (parser == null ? c.parser == null : parser.equals(c.parser)) - && (rounds == null ? c.rounds == null : rounds.equals(c.rounds)) - && (learnerConstructor == null - ? c.learnerConstructor == null - : learnerConstructor.equals(c.learnerConstructor)) - && (learnerName == null ? c.learnerName == null - : learnerName.equals(c.learnerName)) - && (learnerParameterBlock == null - ? c.learnerParameterBlock == null - : learnerParameterBlock.equals(c.learnerParameterBlock)) - && (featureEncoding == null - ? c.featureEncoding == null - : featureEncoding.equals(c.featureEncoding)) - && (testParser == null ? c.testParser == null - : testParser.equals(c.testParser)) - && (evaluation == null ? c.evaluation == null - : evaluation.equals(c.evaluation)) - && (K == null ? c.K == null : K.equals(c.K)) - && (splitPolicy == null ? c.splitPolicy == null - : splitPolicy.equals(c.splitPolicy)) - && (testingMetric == null ? c.testingMetric == null - : testingMetric.equals(c.testingMetric)) - && alpha.equals(c.alpha) - && preExtract.equals(c.preExtract) - && (progressOutput == null - ? c.progressOutput == null - : progressOutput.equals(c.progressOutput)) - && (pruneCountType == null - ? c.pruneCountType == null - : pruneCountType.equals(c.pruneCountType)) - && (pruneThresholdType == null - ? c.pruneThresholdType == null - : pruneThresholdType.equals(c.pruneThresholdType)) - && (pruneThreshold == null - ? c.pruneThreshold == null - : pruneThreshold.equals(c.pruneThreshold)); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - LinkedList children = new LinkedList(); - - if (labeler != null) children.add(labeler); - children.add(extractor); - if (parser != null) children.add(parser); - if (rounds != null) children.add(rounds); - if (learnerConstructor != null) children.add(learnerConstructor); - if (learnerName != null) children.add(learnerName); - if (learnerParameterBlock != null) children.add(learnerParameterBlock); - if (featureEncoding != null) children.add(featureEncoding); - if (testParser != null) children.add(testParser); - if (evaluation != null) children.add(evaluation); - if (K != null) children.add(K); - if (testingMetric != null) children.add(testingMetric); - children.add(alpha); - children.add(preExtract); - if (progressOutput != null) children.add(progressOutput); - if (pruneCountType != null) children.add(pruneCountType); - if (pruneThresholdType != null) children.add(pruneThresholdType); - if (pruneThreshold != null) children.add(pruneThreshold); - - ASTNodeIterator I = new ASTNodeIterator(); - I.children = (ASTNode[]) children.toArray(new ASTNode[children.size()]); - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - ClassifierExpression l = - labeler == null ? null : (ClassifierExpression) labeler.clone(); - ClassifierExpression ext = (ClassifierExpression) extractor.clone(); - InstanceCreationExpression p = - parser == null ? null : (InstanceCreationExpression) parser.clone(); - Expression r = rounds == null ? null : (Expression) rounds.clone(); - InstanceCreationExpression a = - learnerConstructor == null - ? null - : (InstanceCreationExpression) learnerConstructor.clone(); - Name ln = learnerName == null ? null : (Name) name.clone(); - Block pb = - learnerParameterBlock == null ? null - : (Block) learnerParameterBlock.clone(); - Constant enc = - featureEncoding == null ? null : (Constant) featureEncoding.clone(); - InstanceCreationExpression tp = - testParser == null ? null - : (InstanceCreationExpression) testParser.clone(); - Expression e = - evaluation == null ? null : (Expression) evaluation.clone(); - Constant k = K == null ? null : (Constant) K.clone(); - LinkedList prms = null; - if (parameterSets != null) { - prms = new LinkedList(); - for (Iterator I = parameterSets.iterator(); I.hasNext(); ) - prms.add((ParameterSet) I.next().clone()); - } - FoldParser.SplitPolicy s = splitPolicy; - InstanceCreationExpression t = - testingMetric == null - ? null : (InstanceCreationExpression) testingMetric.clone(); - Constant al = (Constant) alpha.clone(); - Constant pre = (Constant) preExtract.clone(); - Constant pro = - progressOutput == null ? null : (Constant) progressOutput.clone(); - Constant pct = - pruneCountType == null ? null : (Constant) pruneCountType.clone(); - Constant ptt = - pruneThresholdType == null ? null - : (Constant) pruneThresholdType.clone(); - Constant pt = - pruneThreshold == null ? null - : (Constant) pruneThreshold.clone(); - - return - new LearningClassifierExpression( - l, ext, p, r, a, ln, pb, enc, tp, e, k, prms, s, t, al, pre, pro, pct, - ptt, pt, learningStatus, featuresStatus, pruneStatus, - new StringBuffer(attributeString.toString()), checkDiscreteValues); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (parenthesized) buffer.append("("); - buffer.append("learn"); - - if (labeler != null) { - buffer.append(" "); - labeler.write(buffer); - } - - buffer.append(" using "); - extractor.write(buffer); - - if (parser != null) { - buffer.append(" from "); - parser.write(buffer); - - if (rounds != null) { - buffer.append(" "); - rounds.write(buffer); - buffer.append(" rounds"); - } - } - - if (learnerConstructor != null) { - buffer.append(" with "); - learnerConstructor.write(buffer); - } - else if (learnerName != null) { - buffer.append(" with "); - learnerName.write(buffer); - learnerParameterBlock.write(buffer); - } - - if (featureEncoding != null) { - buffer.append(" encoding "); - featureEncoding.write(buffer); - } - - if (evaluation != null) { - buffer.append(" evaluate "); - evaluation.write(buffer); - } - - if (pruneCountType != null) { - buffer.append(" prune "); - pruneCountType.write(buffer); - buffer.append(" "); - pruneThresholdType.write(buffer); - buffer.append(" "); - pruneThreshold.write(buffer); - } - - if (K != null) { - buffer.append(" cval "); - - if (splitPolicy != FoldParser.SplitPolicy.manual) { - K.write(buffer); - buffer.append(" "); - } - - buffer.append("\"" + splitPolicy + "\""); - - buffer.append(" alpha "); - alpha.write(buffer); - } - - if (testingMetric != null) { - buffer.append(" testingMetric "); - testingMetric.write(buffer); - } - - if (testParser != null) { - buffer.append(" testFrom "); - testParser.write(buffer); - } - - buffer.append(" preExtract "); - preExtract.write(buffer); - - if (progressOutput != null) { - buffer.append(" progressOutput "); - progressOutput.write(buffer); - } - - buffer.append(" end"); - if (parenthesized) buffer.append(")"); - } - - - /** - * Creates a StringBuffer containing a shallow representation - * of this ASTNode. - * - * @return A StringBuffer containing a shallow text - * representation of the given node. - **/ - public StringBuffer shallow() { - StringBuffer buffer = new StringBuffer(); - if (comment != null) { - buffer.append(comment); - buffer.append(" "); - } - - returnType.write(buffer); - buffer.append(" "); - name.write(buffer); - buffer.append("("); - argument.write(buffer); - buffer.append(") "); - - if (singleExampleCache) buffer.append("cached "); - - if (cacheIn != null) { - buffer.append("cachedin"); - - if (cacheIn.toString().equals(ClassifierAssignment.mapCache)) - buffer.append("map"); - else { - buffer.append(" "); - cacheIn.write(buffer); - } - - buffer.append(' '); - } - - buffer.append("<- learn"); - - if (labeler != null) buffer.append(" " + labeler.name); - buffer.append(" using " + extractor.name); - - if (parser != null) { - buffer.append(" from "); - parser.write(buffer); - - if (rounds != null) { - buffer.append(" "); - rounds.write(buffer); - buffer.append(" rounds"); - } - } - - if (learnerConstructor != null) { - buffer.append(" with "); - learnerConstructor.write(buffer); - } - else if (learnerName != null) { - buffer.append(" with "); - learnerName.write(buffer); - learnerParameterBlock.write(buffer); - } - - if (featureEncoding != null) { - buffer.append(" encoding "); - featureEncoding.write(buffer); - } - - if (evaluation != null) { - buffer.append(" evaluate "); - evaluation.write(buffer); - } - - if (pruneCountType != null) { - buffer.append(" prune "); - pruneCountType.write(buffer); - buffer.append(" "); - pruneThresholdType.write(buffer); - buffer.append(" "); - pruneThreshold.write(buffer); - } - - if (K != null) { - buffer.append(" cval "); - - if (splitPolicy != FoldParser.SplitPolicy.manual) { - K.write(buffer); - buffer.append(" "); - } - - buffer.append("\"" + splitPolicy + "\""); - - buffer.append(" alpha "); - alpha.write(buffer); - } - - if (testingMetric != null) { - buffer.append(" testingMetric "); - testingMetric.write(buffer); - } - - if (testParser != null) { - buffer.append(" testFrom "); - testParser.write(buffer); - } - - buffer.append(" preExtract "); - preExtract.write(buffer); - - if (progressOutput != null) { - buffer.append(" progressOutput "); - progressOutput.write(buffer); - } - - buffer.append(" end"); - return buffer; - } - - - /** - * This class represents a clause in a - * {@link LearningClassifierExpression}. Note that this class is not an - * {@link ASTNode} since it is only an intermediary used during parsing. - * - * @author Nick Rizzolo **/ - public static class Clause - { - /** Value of the type variable. */ - public static final int USING = 0; - /** Value of the type variable. */ - public static final int FROM = 1; - /** Value of the type variable. */ - public static final int WITH = 2; - /** Value of the type variable. */ - public static final int TESTFROM = 3; - /** Value of the type variable. */ - public static final int EVALUATE = 4; - /** Value of the type variable. */ - public static final int CVAL = 5; - /** Value of the type variable */ - public static final int PREEXTRACT = 6; - /** Value of the type variable */ - public static final int PROGRESSOUTPUT = 7; - /** Value of the type variable */ - public static final int TESTINGMETRIC = 8; - /** Value of the type variable */ - public static final int ALPHA = 9; - /** Value of the type variable */ - public static final int PRUNE = 10; - /** Value of the type variable */ - public static final int ENCODING = 11; - /** String representations of the type names. */ - public static final String[] typeNames = - { - "using", "from", "with", "testFrom", "evaluate", "cval", "preExtract", - "progressOutput", "testingMetric", "alpha", "prune", "encoding" - }; - - - /** The type of the clause. */ - public int type; - /** The argument of the clause. */ - public ASTNode argument; - /** - * Represents the number training repetitions; used only by the +public class LearningClassifierExpression extends ClassifierExpression { + /** + * If no learning algorithm is specified to learn a discrete classifier, this learner is used. + **/ + public static final InstanceCreationExpression defaultDiscreteLearner = + new InstanceCreationExpression(new Name("SparseNetworkLearner"), new ExpressionList( + new InstanceCreationExpression(new Name("SparseWinnow"), new ExpressionList( + new Constant("1.35"), new ExpressionList(new Constant("0.8"), + new ExpressionList(new Constant("4"), new ExpressionList( + new Constant("1"))))), -1, -1)), -1, -1); + + /** + * If no learning algorithm is specified to learn a real classifier, this learner is used. + **/ + public static final InstanceCreationExpression defaultRealLearner = + new InstanceCreationExpression(new Name("StochasticGradientDescent"), -1, -1); + + /** + * If no alpha clause appears during cross validation, this constant is used. + **/ + public static final double defaultAlpha = 0.05; + + /** + * If no preExtract clause appears in the sources, this constant is used. + **/ + public static final String defaultPreExtract = "\"diskZip\""; + + + /** The revision status of the LCE's features node. */ + public Integer featuresStatus; + /** The revision status of the LCE's prune node. */ + public Integer pruneStatus; + /** The revision status of the LCE's learning node. */ + public Integer learningStatus; + /** + * This flag is set true iff the changes to the learner's LBJava specification + * require its Java translation to be regenerated and nothing more. + **/ + public boolean onlyCodeGeneration; + + /** + * (ø) The classifier this learning classifier gets its labels from. + **/ + public ClassifierExpression labeler; + + /** + * (¬ø) The classifier that does feature extraction for this classifier; argument to + * using. + **/ + public ClassifierExpression extractor; + /** Counts the number of using clauses for error detection. */ + public int usingClauses; + + /** + * (ø) The encoding that the generated classifier will use when storing string data in + * features. + **/ + public Constant featureEncoding; + /** + * Counts the number of encoding clauses for error detection. + **/ + public int encodingClauses; + + /** + * (ø) Tells this learning classifier how to get its training data; argument to + * from. + **/ + public InstanceCreationExpression parser; + /** Counts the number of from clauses for error detection. */ + public int fromClauses; + + /** + * (ø) Represents the integer number of training repetitions; augments the * from clause. - **/ - public Expression rounds; - /** - * A block of statements intended to be used to set learner parameters; - * used only by the with clause. - **/ - public Block learnerParameterBlock; - /** - * Dictates how cross-validation divides the training data; used only by - * the cval clause. - **/ - public Constant K; - /** Whether to use "global" or "perClass" feature pruning. */ - public Constant pruneCountType; - /** Whether to use "count" or "percent" counting for feature pruning. */ - public Constant pruneThresholdType; - /** The feature pruning threshold. */ - public Constant pruneThreshold; - /** - * Dictates how the training data will be split into subsets for use by - * cross validation. - **/ - public FoldParser.SplitPolicy splitPolicy; - /** - * Determines the parameter with which the confidence interval is - * calculated. Takes the value .05 if the user does not specify - * otherwise. - **/ - public Constant alpha; - /** - * Determines how often to give the user status output during training. - **/ - public Constant progressOutput; - - - /** - * Initializing constructor. + **/ + public Expression rounds; + /** Training starts from this round number. */ + public int startingRound = 1; + + /** (ø) Whether to use "global" or "perClass" feature pruning. */ + public Constant pruneCountType; + /** + * (ø) Whether to use "count" or "percent" counting for feature pruning. + **/ + public Constant pruneThresholdType; + /** (ø) The feature pruning threshold. */ + public Constant pruneThreshold; + /** + * (ø) The contents of {@link #pruneCountType} on the previous run of the compiler, if + * any. + **/ + public Constant previousPruneCountType; + + /** + * (ø) Tells this learning classifier how to construct its learning algorithm; argument + * to with. This variable should not contain a non-null value if + * {@link #learnerName} and {@link #learnerParameterBlock} contain non-null values. + **/ + public InstanceCreationExpression learnerConstructor; + /** + * (ø) The name of the learner for this classifier; first argument to with. + * If this variable contains a non-null value, {@link #learnerParameterBlock} must + * also be non-null, and {@link #learnerConstructor} must be null. + **/ + public Name learnerName; + /** + * (ø) A block of statements that set parameters of the learner for this classifier; + * second argument to with. If this variable contains a non-null + * value, {@link #learnerName} must also be non-null, and + * {@link #learnerConstructor} must be null. + **/ + public Block learnerParameterBlock; + /** Counts the number of with clauses for error detection. */ + public int withClauses; + + /** + * (ø) Tells this learning classifier how to get its testing data; argument to + * testFrom. + **/ + public InstanceCreationExpression testParser; + /** + * Counts the number of testFrom clauses for error detection. + **/ + public int testFromClauses; + + /** + * (ø) Tells this learning classifier how to produce a prediction during evaluation; + * argument to evaluate. + **/ + public Expression evaluation; + /** + * Counts the number of evaluate clauses for error detection. + **/ + public int evaluateClauses; + + /** + * A list of the {@link ParameterSet} objects that appear in the argument list. + **/ + public LinkedList parameterSets; + + /** + * (ø) Represents the integer number of subsets to be used in k-fold cross validation; + * first argument to cval. + **/ + public Constant K; + /** + * (ø) Dictates how the training data will be split into subsets for use by cross + * validation; second argument to cval. + **/ + public FoldParser.SplitPolicy splitPolicy; + /** Counts the number of cval clauses for error detection. */ + public int cvalClauses; + + /** + * (ø) Determines how the user wishes cross-validation to test its performance; argument + * to testingMetric. + **/ + public InstanceCreationExpression testingMetric; + /** + * Counts the number of testingMetric clauses, for error detection. + **/ + public int testingMetricClauses; + + /** + * (¬ø) The desired confidence level for cross validation's confidence interval + * output; argument to alpha, which can only be specified when cval is + * also specified. + **/ + public Constant alpha; + /** Counts the number of alpha clauses, for error detection. */ + public int alphaClauses; + + /** + * (¬ø) A Boolean or string value indicating how feature vectors are to be + * pre-extracted; argument to preExtract. Possible values are false, + * true, "false", "true", "none", + * "memory", "disk", and "diskZip". + **/ + public Constant preExtract; + /** + * Counts the number of preExtract clauses for error detection. + **/ + public int preExtractClauses; + + /** + * (ø) Integer specifying how often (in examples) to give the user a progress update + * during training; argument to progressOutput. + **/ + public Constant progressOutput; + /** + * Counts the number of progressOutput clauses, for error detection. + **/ + public int progressOutputClauses; + + /** + * The {@link SemanticAnalysis} pass will let this LearningClassifierExpression + * know if the features it generates need to be checked for appropriateness in the context of + * the enclosing {@link ClassifierAssignment} by setting this flag. + **/ + public boolean checkDiscreteValues; + + + /** + * A string representation of the return type information for each feature. This information is + * crucial in the construction of WEKA classifiers. * - * @param t The type. - * @param a The argument node. - **/ - public Clause(int t, ASTNode a) { this(t, a, (Constant) null); } + *

+ * Its format follows this convention: + * "<type>_<name>[_<value-list>]:<type>_<name>[_<value-list>:[...]]" + * + *

+ * <type> can be either "num", "str", or "nom", representing numerical, string, and + * nominal attributes respectively. + * + *

+ * Numerical and string attribute encodings do not need a value-list, while Nominal attribute + * encodings are required to contain a value list. + * + * Examples: + *

    + *
  • + * Just a numerical attribute named Dan: "num_Dan:" + *
  • + * A numerical attribute named Dan and a string attribute named Nick: "num_Dan:str_Nick:" + *
  • + * A numerical attribute named Dan, a string attribute named Nick, and a nominal attribute named + * Arindam which can take the values "Cool", "Uncool", or "Kinda Cool": + * "num_Dan:str_Nick:nom_Arindam_\"Cool\",\"Uncool\",\"Kinda Cool\":" + *
+ * + **/ + public StringBuffer attributeString = new StringBuffer(); - /** - * Full constructor. + + /** + * Full constructor. The line and byte offset are set to -1. + * + * @param l The expression representing the labeler classifier. + * @param ext Representation of the extractor classifier. + * @param p The expression representing the parser applied to data. + * @param r The constant representing the number of training repetitions. + * @param a A learning algorithm instance creation expression. + * @param ln The name of a learning algorithm. + * @param pb A block of statements used to set learning algorithm parameters. + * @param enc The expression representing the feature encoding used in this learner's lexicon. + * @param tp The expression representing the test parser. + * @param e The expression used to compute a prediction. + * @param k The number of folds for cross validation. + * @param prms The sets of parameter values used in tuning. + * @param s The method used to split the data for cross validation. + * @param t Determines how the user wishes cross-validation to test its performance. + * @param al The cross validation confidence interval width. + * @param pre A Boolean indicating whether example vectors will be pre-extracted. + * @param pro An integer indicating how often progress updates will be output. + * @param pct The prune type ("global" or "perClass"). + * @param ptt The prune count type ("count" or "percent"). + * @param pt The prune count threshold. + * @param ls The "learning status" node, set by RevisionAnalysis. + * @param fs The "features status" node, set by RevisionAnalysis. + * @param ps The "prune status" node, set by RevisionAnalysis. + * @param at The WEKA attribute string. + * @param cdv Value for {@link #checkDiscreteValues}. + **/ + public LearningClassifierExpression(ClassifierExpression l, ClassifierExpression ext, + InstanceCreationExpression p, Expression r, InstanceCreationExpression a, Name ln, + Block pb, Constant enc, InstanceCreationExpression tp, Expression e, Constant k, + LinkedList prms, FoldParser.SplitPolicy s, InstanceCreationExpression t, + Constant al, Constant pre, Constant pro, Constant pct, Constant ptt, Constant pt, + Integer ls, Integer fs, Integer ps, StringBuffer at, boolean cdv) { + super(-1, -1); + labeler = l; + extractor = ext; + usingClauses = extractor == null ? 0 : 1; + parser = p; + fromClauses = parser == null ? 0 : 1; + rounds = r; + learnerConstructor = a; + learnerName = ln; + learnerParameterBlock = pb; + featureEncoding = enc; + withClauses = learnerConstructor == null && learnerName == null ? 0 : 1; + testParser = tp; + testFromClauses = testParser == null ? 0 : 1; + evaluation = e; + evaluateClauses = evaluation == null ? 0 : 1; + K = k; + parameterSets = prms; + splitPolicy = s; + cvalClauses = K == null ? 0 : 1; + testingMetric = t; + testingMetricClauses = testingMetric == null ? 0 : 1; + alpha = al; + alphaClauses = al == null ? 0 : 1; + if (alpha == null) + alpha = new Constant("" + defaultAlpha); + preExtract = pre; + preExtractClauses = preExtract == null ? 0 : 1; + if (preExtract == null) + preExtract = new Constant(parser == null ? "false" : defaultPreExtract); + progressOutput = pro; + progressOutputClauses = progressOutput == null ? 0 : 1; + pruneCountType = pct; + pruneThresholdType = ptt; + pruneThreshold = pt; + learningStatus = ls; + featuresStatus = fs; + pruneStatus = ps; + attributeString = at; + checkDiscreteValues = cdv; + } + + /** + * Parser's unsupervised learning constructor. + * + * @param cl A list of clauses. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public LearningClassifierExpression(LinkedList cl, int line, int byteOffset) { + this(null, cl, line, byteOffset); + } + + /** + * Parser's supervised learning constructor. * - * @param t The type. - * @param a The argument node. - * @param r Could represent the number of training repetitions or the - * split policy. - **/ - public Clause(int t, ASTNode a, Expression r) { - type = t; - - if (t == CVAL) { - argument = a; - - if (r == null) splitPolicy = FoldParser.SplitPolicy.sequential; - else { - if (r instanceof Constant) { - String s = ((Constant) r).value; - if (s.startsWith("\"") && s.endsWith("\"") && s.length() >= 2) - s = s.substring(1, s.length() - 1); - - if (s.equals("random")) - splitPolicy = FoldParser.SplitPolicy.random; - else if (s.equals("sequential")) - splitPolicy = FoldParser.SplitPolicy.sequential; - else if (s.equals("kth")) - splitPolicy = FoldParser.SplitPolicy.kth; - else if (s.equals("manual")) - splitPolicy = FoldParser.SplitPolicy.manual; - else { - System.err.println("Error: '" + s + "' is not a split policy."); - System.exit(1); - } - } - } - } - else { - argument = a; - rounds = r; - } - } - - /** - * A constructor with 3 Constant parameters, used for the - * prune clause. + * @param l The expression representing the labeler classifier. + * @param cl A list of clauses. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public LearningClassifierExpression(ClassifierExpression l, LinkedList cl, int line, + int byteOffset) { + super(line, byteOffset); + + labeler = l; + extractor = null; + parser = null; + learnerConstructor = null; + checkDiscreteValues = false; + alpha = new Constant("" + defaultAlpha); + parameterSets = new LinkedList(); + + for (Iterator I = cl.iterator(); I.hasNext();) { + Clause c = I.next(); + + if (c.type == Clause.USING) { + extractor = (ClassifierExpression) c.argument; + ++usingClauses; + } else if (c.type == Clause.FROM) { + parser = (InstanceCreationExpression) c.argument; + rounds = c.rounds; + ++fromClauses; + } else if (c.type == Clause.WITH) { + if (c.learnerParameterBlock == null) + learnerConstructor = (InstanceCreationExpression) c.argument; + else { + learnerName = (Name) c.argument; + learnerParameterBlock = c.learnerParameterBlock; + } + + ++withClauses; + } else if (c.type == Clause.ENCODING) { + featureEncoding = (Constant) c.argument; + ++encodingClauses; + } else if (c.type == Clause.TESTFROM) { + testParser = (InstanceCreationExpression) c.argument; + ++testFromClauses; + } else if (c.type == Clause.EVALUATE) { + evaluation = (Expression) c.argument; + ++evaluateClauses; + } else if (c.type == Clause.CVAL) { + if (c.argument != null) + K = (Constant) c.argument; + else + // if there was a cval clause, but k was not given, set it to -1 so + // cross validation gets invoked anyway, this occurs legally in the + // case of manual separation + K = new Constant("-1"); + + // handle default action + if (c.splitPolicy != null) + splitPolicy = c.splitPolicy; + else + splitPolicy = FoldParser.SplitPolicy.sequential; + + ++cvalClauses; + } else if (c.type == Clause.TESTINGMETRIC) { + testingMetric = (InstanceCreationExpression) c.argument; + ++testingMetricClauses; + } else if (c.type == Clause.ALPHA) { + alpha = (Constant) c.argument; + ++alphaClauses; + } else if (c.type == Clause.PREEXTRACT) { + preExtract = (Constant) c.argument; + ++preExtractClauses; + } else if (c.type == Clause.PROGRESSOUTPUT) { + progressOutput = (Constant) c.argument; + ++progressOutputClauses; + } else if (c.type == Clause.PRUNE) { + pruneCountType = (Constant) c.pruneCountType; + pruneThresholdType = (Constant) c.pruneThresholdType; + pruneThreshold = (Constant) c.pruneThreshold; + } + } + + if (preExtract == null) + preExtract = new Constant(parser == null ? "false" : defaultPreExtract); + } + + + /** + * Returns a hash code value for java hash structures. * - * @param t The type. - * @param x The first parameter. - * @param y The second parameter. - * @param z The third parameter. - **/ - public Clause(int t, Constant x, Constant y, Constant z) { - type = t; - - if (t == PRUNE) { - pruneCountType = x; - pruneThresholdType = y; - pruneThreshold = z; - } - } - - /** - * A constructor for a with clause with a parameter setting block. + * @return A hash code for this object. + **/ + public int hashCode() { + int result = labeler == null ? 0 : labeler.hashCode(); + result += extractor.hashCode(); + if (parser != null) + result += parser.hashCode(); + if (rounds != null) + result += rounds.hashCode(); + if (learnerConstructor != null) + result += learnerConstructor.hashCode(); + if (learnerName != null) + result += learnerName.hashCode(); + if (learnerParameterBlock != null) + result += learnerParameterBlock.hashCode(); + if (featureEncoding != null) + result += featureEncoding.hashCode(); + if (testParser != null) + result += testParser.hashCode(); + if (evaluation != null) + result += evaluation.hashCode(); + if (K != null) + result += K.hashCode(); + if (splitPolicy != null) + result += splitPolicy.hashCode(); + if (testingMetric != null) + result += testingMetric.hashCode(); + result += alpha.hashCode(); + result += preExtract.hashCode(); + if (progressOutput != null) + result += progressOutput.hashCode(); + return result; + } + + + /** + * Indicates whether some other object is "equal to" this one. * - * @param t The type. - * @param n The name of the learner used by this learning classifier. - * @param b The parameter setting block. - **/ - public Clause(int t, Name n, Block b) { - type = t; - argument = n; - learnerParameterBlock = b; - } - - /** - * This constructor is only called by {@link #clone()}. + * @return true iff this object is the same as the argument. + **/ + public boolean equals(Object o) { + if (!(o instanceof LearningClassifierExpression)) + return false; + LearningClassifierExpression c = (LearningClassifierExpression) o; + return (labeler == null ? c.labeler == null : labeler.equals(c.labeler)) + && extractor.equals(c.extractor) + && (parser == null ? c.parser == null : parser.equals(c.parser)) + && (rounds == null ? c.rounds == null : rounds.equals(c.rounds)) + && (learnerConstructor == null ? c.learnerConstructor == null : learnerConstructor + .equals(c.learnerConstructor)) + && (learnerName == null ? c.learnerName == null : learnerName.equals(c.learnerName)) + && (learnerParameterBlock == null ? c.learnerParameterBlock == null + : learnerParameterBlock.equals(c.learnerParameterBlock)) + && (featureEncoding == null ? c.featureEncoding == null : featureEncoding + .equals(c.featureEncoding)) + && (testParser == null ? c.testParser == null : testParser.equals(c.testParser)) + && (evaluation == null ? c.evaluation == null : evaluation.equals(c.evaluation)) + && (K == null ? c.K == null : K.equals(c.K)) + && (splitPolicy == null ? c.splitPolicy == null : splitPolicy.equals(c.splitPolicy)) + && (testingMetric == null ? c.testingMetric == null : testingMetric + .equals(c.testingMetric)) + && alpha.equals(c.alpha) + && preExtract.equals(c.preExtract) + && (progressOutput == null ? c.progressOutput == null : progressOutput + .equals(c.progressOutput)) + && (pruneCountType == null ? c.pruneCountType == null : pruneCountType + .equals(c.pruneCountType)) + && (pruneThresholdType == null ? c.pruneThresholdType == null : pruneThresholdType + .equals(c.pruneThresholdType)) + && (pruneThreshold == null ? c.pruneThreshold == null : pruneThreshold + .equals(c.pruneThreshold)); + } + + + /** + * Returns an iterator used to successively access the children of this node. * - * @param t The type of the clause. - * @param a The argument of the clause. - * @param r Represents the number of training repetitions. - * @param b A block of statements that set parameters of a learning - * algorithm. - * @param k The number of folds for cross validation. - * @param s The data splitting policy for cross validation. - * @param al The width of the confidence interval for cross validation - * output. - * @param p The frequency in examples of progress updates. - * @param pct The type of pruning. - * @param ptt The type of feature counting for pruning. - * @param pt The pruning threshold. - **/ - protected Clause(int t, ASTNode a, Expression r, Block b, Constant k, - FoldParser.SplitPolicy s, Constant al, Constant p, - Constant pct, Constant ptt, Constant pt) { - type = t; - argument = a; - rounds = r; - learnerParameterBlock = b; - K = k; - splitPolicy = s; - alpha = al; - progressOutput = p; - pruneCountType = pct; - pruneThresholdType = ptt; - pruneThreshold = pt; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + LinkedList children = new LinkedList(); + + if (labeler != null) + children.add(labeler); + children.add(extractor); + if (parser != null) + children.add(parser); + if (rounds != null) + children.add(rounds); + if (learnerConstructor != null) + children.add(learnerConstructor); + if (learnerName != null) + children.add(learnerName); + if (learnerParameterBlock != null) + children.add(learnerParameterBlock); + if (featureEncoding != null) + children.add(featureEncoding); + if (testParser != null) + children.add(testParser); + if (evaluation != null) + children.add(evaluation); + if (K != null) + children.add(K); + if (testingMetric != null) + children.add(testingMetric); + children.add(alpha); + children.add(preExtract); + if (progressOutput != null) + children.add(progressOutput); + if (pruneCountType != null) + children.add(pruneCountType); + if (pruneThresholdType != null) + children.add(pruneThresholdType); + if (pruneThreshold != null) + children.add(pruneThreshold); + + ASTNodeIterator I = new ASTNodeIterator(); + I.children = (ASTNode[]) children.toArray(new ASTNode[children.size()]); + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. * * @return The clone node. - **/ - public Object clone() { - ASTNode a = argument == null ? null : (ASTNode) argument.clone(); - Expression r = rounds == null ? null : (Expression) rounds.clone(); - Block b = - learnerParameterBlock == null ? null - : (Block) learnerParameterBlock.clone(); - Constant k = K == null ? null : (Constant) K.clone(); - Constant al = alpha == null ? null : (Constant) alpha.clone(); - Constant p = - progressOutput == null ? null : (Constant) progressOutput.clone(); - Constant pct = - pruneCountType == null ? null : (Constant) pruneCountType.clone(); - Constant ptt = - pruneThresholdType == null ? null - : (Constant) pruneThresholdType.clone(); - Constant pt = - pruneThreshold == null ? null : (Constant) pruneThreshold.clone(); - return new Clause(type, a, r, b, k, splitPolicy, al, p, pct, ptt, pt); - } - - - /** - * Debugging utility method. + **/ + public Object clone() { + ClassifierExpression l = labeler == null ? null : (ClassifierExpression) labeler.clone(); + ClassifierExpression ext = (ClassifierExpression) extractor.clone(); + InstanceCreationExpression p = + parser == null ? null : (InstanceCreationExpression) parser.clone(); + Expression r = rounds == null ? null : (Expression) rounds.clone(); + InstanceCreationExpression a = + learnerConstructor == null ? null : (InstanceCreationExpression) learnerConstructor + .clone(); + Name ln = learnerName == null ? null : (Name) name.clone(); + Block pb = learnerParameterBlock == null ? null : (Block) learnerParameterBlock.clone(); + Constant enc = featureEncoding == null ? null : (Constant) featureEncoding.clone(); + InstanceCreationExpression tp = + testParser == null ? null : (InstanceCreationExpression) testParser.clone(); + Expression e = evaluation == null ? null : (Expression) evaluation.clone(); + Constant k = K == null ? null : (Constant) K.clone(); + LinkedList prms = null; + if (parameterSets != null) { + prms = new LinkedList(); + for (Iterator I = parameterSets.iterator(); I.hasNext();) + prms.add((ParameterSet) I.next().clone()); + } + FoldParser.SplitPolicy s = splitPolicy; + InstanceCreationExpression t = + testingMetric == null ? null : (InstanceCreationExpression) testingMetric.clone(); + Constant al = (Constant) alpha.clone(); + Constant pre = (Constant) preExtract.clone(); + Constant pro = progressOutput == null ? null : (Constant) progressOutput.clone(); + Constant pct = pruneCountType == null ? null : (Constant) pruneCountType.clone(); + Constant ptt = pruneThresholdType == null ? null : (Constant) pruneThresholdType.clone(); + Constant pt = pruneThreshold == null ? null : (Constant) pruneThreshold.clone(); + + return new LearningClassifierExpression(l, ext, p, r, a, ln, pb, enc, tp, e, k, prms, s, t, + al, pre, pro, pct, ptt, pt, learningStatus, featuresStatus, pruneStatus, + new StringBuffer(attributeString.toString()), checkDiscreteValues); + } + + + /** + * Ensures that the correct run() method is called for this type of node. * - * @return A textual representation of this expression. - **/ - public String toString() { return typeNames[type] + " " + argument; } - } -} + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (parenthesized) + buffer.append("("); + buffer.append("learn"); + + if (labeler != null) { + buffer.append(" "); + labeler.write(buffer); + } + + buffer.append(" using "); + extractor.write(buffer); + + if (parser != null) { + buffer.append(" from "); + parser.write(buffer); + + if (rounds != null) { + buffer.append(" "); + rounds.write(buffer); + buffer.append(" rounds"); + } + } + + if (learnerConstructor != null) { + buffer.append(" with "); + learnerConstructor.write(buffer); + } else if (learnerName != null) { + buffer.append(" with "); + learnerName.write(buffer); + learnerParameterBlock.write(buffer); + } + + if (featureEncoding != null) { + buffer.append(" encoding "); + featureEncoding.write(buffer); + } + + if (evaluation != null) { + buffer.append(" evaluate "); + evaluation.write(buffer); + } + + if (pruneCountType != null) { + buffer.append(" prune "); + pruneCountType.write(buffer); + buffer.append(" "); + pruneThresholdType.write(buffer); + buffer.append(" "); + pruneThreshold.write(buffer); + } + + if (K != null) { + buffer.append(" cval "); + + if (splitPolicy != FoldParser.SplitPolicy.manual) { + K.write(buffer); + buffer.append(" "); + } + + buffer.append("\"" + splitPolicy + "\""); + + buffer.append(" alpha "); + alpha.write(buffer); + } + + if (testingMetric != null) { + buffer.append(" testingMetric "); + testingMetric.write(buffer); + } + + if (testParser != null) { + buffer.append(" testFrom "); + testParser.write(buffer); + } + + buffer.append(" preExtract "); + preExtract.write(buffer); + + if (progressOutput != null) { + buffer.append(" progressOutput "); + progressOutput.write(buffer); + } + + buffer.append(" end"); + if (parenthesized) + buffer.append(")"); + } + + + /** + * Creates a StringBuffer containing a shallow representation of this + * ASTNode. + * + * @return A StringBuffer containing a shallow text representation of the given + * node. + **/ + public StringBuffer shallow() { + StringBuffer buffer = new StringBuffer(); + if (comment != null) { + buffer.append(comment); + buffer.append(" "); + } + + returnType.write(buffer); + buffer.append(" "); + name.write(buffer); + buffer.append("("); + argument.write(buffer); + buffer.append(") "); + + if (singleExampleCache) + buffer.append("cached "); + + if (cacheIn != null) { + buffer.append("cachedin"); + + if (cacheIn.toString().equals(ClassifierAssignment.mapCache)) + buffer.append("map"); + else { + buffer.append(" "); + cacheIn.write(buffer); + } + + buffer.append(' '); + } + + buffer.append("<- learn"); + + if (labeler != null) + buffer.append(" " + labeler.name); + buffer.append(" using " + extractor.name); + + if (parser != null) { + buffer.append(" from "); + parser.write(buffer); + + if (rounds != null) { + buffer.append(" "); + rounds.write(buffer); + buffer.append(" rounds"); + } + } + + if (learnerConstructor != null) { + buffer.append(" with "); + learnerConstructor.write(buffer); + } else if (learnerName != null) { + buffer.append(" with "); + learnerName.write(buffer); + learnerParameterBlock.write(buffer); + } + + if (featureEncoding != null) { + buffer.append(" encoding "); + featureEncoding.write(buffer); + } + + if (evaluation != null) { + buffer.append(" evaluate "); + evaluation.write(buffer); + } + + if (pruneCountType != null) { + buffer.append(" prune "); + pruneCountType.write(buffer); + buffer.append(" "); + pruneThresholdType.write(buffer); + buffer.append(" "); + pruneThreshold.write(buffer); + } + + if (K != null) { + buffer.append(" cval "); + + if (splitPolicy != FoldParser.SplitPolicy.manual) { + K.write(buffer); + buffer.append(" "); + } + + buffer.append("\"" + splitPolicy + "\""); + + buffer.append(" alpha "); + alpha.write(buffer); + } + + if (testingMetric != null) { + buffer.append(" testingMetric "); + testingMetric.write(buffer); + } + + if (testParser != null) { + buffer.append(" testFrom "); + testParser.write(buffer); + } + + buffer.append(" preExtract "); + preExtract.write(buffer); + + if (progressOutput != null) { + buffer.append(" progressOutput "); + progressOutput.write(buffer); + } + + buffer.append(" end"); + return buffer; + } + + + /** + * This class represents a clause in a {@link LearningClassifierExpression}. Note that this + * class is not an {@link ASTNode} since it is only an intermediary used during parsing. + * + * @author Nick Rizzolo + **/ + public static class Clause { + /** Value of the type variable. */ + public static final int USING = 0; + /** Value of the type variable. */ + public static final int FROM = 1; + /** Value of the type variable. */ + public static final int WITH = 2; + /** Value of the type variable. */ + public static final int TESTFROM = 3; + /** Value of the type variable. */ + public static final int EVALUATE = 4; + /** Value of the type variable. */ + public static final int CVAL = 5; + /** Value of the type variable */ + public static final int PREEXTRACT = 6; + /** Value of the type variable */ + public static final int PROGRESSOUTPUT = 7; + /** Value of the type variable */ + public static final int TESTINGMETRIC = 8; + /** Value of the type variable */ + public static final int ALPHA = 9; + /** Value of the type variable */ + public static final int PRUNE = 10; + /** Value of the type variable */ + public static final int ENCODING = 11; + /** String representations of the type names. */ + public static final String[] typeNames = {"using", "from", "with", "testFrom", "evaluate", + "cval", "preExtract", "progressOutput", "testingMetric", "alpha", "prune", + "encoding"}; + + + /** The type of the clause. */ + public int type; + /** The argument of the clause. */ + public ASTNode argument; + /** + * Represents the number training repetitions; used only by the from clause. + **/ + public Expression rounds; + /** + * A block of statements intended to be used to set learner parameters; used only by the + * with clause. + **/ + public Block learnerParameterBlock; + /** + * Dictates how cross-validation divides the training data; used only by the + * cval clause. + **/ + public Constant K; + /** Whether to use "global" or "perClass" feature pruning. */ + public Constant pruneCountType; + /** Whether to use "count" or "percent" counting for feature pruning. */ + public Constant pruneThresholdType; + /** The feature pruning threshold. */ + public Constant pruneThreshold; + /** + * Dictates how the training data will be split into subsets for use by cross validation. + **/ + public FoldParser.SplitPolicy splitPolicy; + /** + * Determines the parameter with which the confidence interval is calculated. Takes the + * value .05 if the user does not specify otherwise. + **/ + public Constant alpha; + /** + * Determines how often to give the user status output during training. + **/ + public Constant progressOutput; + + + /** + * Initializing constructor. + * + * @param t The type. + * @param a The argument node. + **/ + public Clause(int t, ASTNode a) { + this(t, a, (Constant) null); + } + + /** + * Full constructor. + * + * @param t The type. + * @param a The argument node. + * @param r Could represent the number of training repetitions or the split policy. + **/ + public Clause(int t, ASTNode a, Expression r) { + type = t; + + if (t == CVAL) { + argument = a; + + if (r == null) + splitPolicy = FoldParser.SplitPolicy.sequential; + else { + if (r instanceof Constant) { + String s = ((Constant) r).value; + if (s.startsWith("\"") && s.endsWith("\"") && s.length() >= 2) + s = s.substring(1, s.length() - 1); + + if (s.equals("random")) + splitPolicy = FoldParser.SplitPolicy.random; + else if (s.equals("sequential")) + splitPolicy = FoldParser.SplitPolicy.sequential; + else if (s.equals("kth")) + splitPolicy = FoldParser.SplitPolicy.kth; + else if (s.equals("manual")) + splitPolicy = FoldParser.SplitPolicy.manual; + else { + System.err.println("Error: '" + s + "' is not a split policy."); + System.exit(1); + } + } + } + } else { + argument = a; + rounds = r; + } + } + + /** + * A constructor with 3 Constant parameters, used for the prune clause. + * + * @param t The type. + * @param x The first parameter. + * @param y The second parameter. + * @param z The third parameter. + **/ + public Clause(int t, Constant x, Constant y, Constant z) { + type = t; + + if (t == PRUNE) { + pruneCountType = x; + pruneThresholdType = y; + pruneThreshold = z; + } + } + + /** + * A constructor for a with clause with a parameter setting block. + * + * @param t The type. + * @param n The name of the learner used by this learning classifier. + * @param b The parameter setting block. + **/ + public Clause(int t, Name n, Block b) { + type = t; + argument = n; + learnerParameterBlock = b; + } + + /** + * This constructor is only called by {@link #clone()}. + * + * @param t The type of the clause. + * @param a The argument of the clause. + * @param r Represents the number of training repetitions. + * @param b A block of statements that set parameters of a learning algorithm. + * @param k The number of folds for cross validation. + * @param s The data splitting policy for cross validation. + * @param al The width of the confidence interval for cross validation output. + * @param p The frequency in examples of progress updates. + * @param pct The type of pruning. + * @param ptt The type of feature counting for pruning. + * @param pt The pruning threshold. + **/ + protected Clause(int t, ASTNode a, Expression r, Block b, Constant k, + FoldParser.SplitPolicy s, Constant al, Constant p, Constant pct, Constant ptt, + Constant pt) { + type = t; + argument = a; + rounds = r; + learnerParameterBlock = b; + K = k; + splitPolicy = s; + alpha = al; + progressOutput = p; + pruneCountType = pct; + pruneThresholdType = ptt; + pruneThreshold = pt; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member + * data objects as well. + * + * @return The clone node. + **/ + public Object clone() { + ASTNode a = argument == null ? null : (ASTNode) argument.clone(); + Expression r = rounds == null ? null : (Expression) rounds.clone(); + Block b = learnerParameterBlock == null ? null : (Block) learnerParameterBlock.clone(); + Constant k = K == null ? null : (Constant) K.clone(); + Constant al = alpha == null ? null : (Constant) alpha.clone(); + Constant p = progressOutput == null ? null : (Constant) progressOutput.clone(); + Constant pct = pruneCountType == null ? null : (Constant) pruneCountType.clone(); + Constant ptt = + pruneThresholdType == null ? null : (Constant) pruneThresholdType.clone(); + Constant pt = pruneThreshold == null ? null : (Constant) pruneThreshold.clone(); + return new Clause(type, a, r, b, k, splitPolicy, al, p, pct, ptt, pt); + } + + + /** + * Debugging utility method. + * + * @return A textual representation of this expression. + **/ + public String toString() { + return typeNames[type] + " " + argument; + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/List.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/List.java index 67331c2c..771688d8 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/List.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/List.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -17,254 +14,269 @@ /** - * Currently, this is just a wrapper class for LinkedList. The - * code that uses it looks a little cleaner when casts are all taken care of - * automatically. - * - * @author Nick Rizzolo + * Currently, this is just a wrapper class for LinkedList. The code that uses it looks + * a little cleaner when casts are all taken care of automatically. + * + * @author Nick Rizzolo **/ -abstract public class List extends ASTNode -{ - /** (¬ø) The list being wrapped. */ - protected LinkedList list; - /** - * The characters appearing in between elements of the list in its string - * representation. - **/ - protected String separator; - - - /** - * Full constructor. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - * @param s The list's element separator. - **/ - public List(int line, int byteOffset, String s) { - super(line, byteOffset); - list = new LinkedList(); - separator = s; - } - - - /** - * Returns the size of the list. - * - * @return The number of elements currently in the list. - **/ - public int size() { return list.size(); } - - - /** - * Returns the separating characters. - * - * @return The value of separator. - **/ - public String getSeparator() { return separator; } - - - /** Sorts the list according to their natural ordering. */ - public void sort() { Collections.sort(list); } - - - /** - * Sorts the list according to the order induced by the specified - * comparator. - * - * @param c A comparator that determines the relative ordering of two - * elements in the list. - **/ - public void sort(Comparator c) { Collections.sort(list, c); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - protected void writeBuffer(StringBuffer buffer, String separate) { - ASTNodeIterator I = iterator(); - if (!I.hasNext()) return; - - I.next().write(buffer); - while (I.hasNext()) { - buffer.append(separate); - I.next().write(buffer); - } - } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { writeBuffer(buffer, separator); } - - - /** - * Determines whether this list is equivalent to another object. - * - * @param o The other object. - * @return true iff this list is equivalent to o. - **/ - public boolean equals(Object o) { - if (o == null || !o.getClass().equals(getClass())) return false; - List list = (List) o; - if (size() != list.size()) return false; - NodeListIterator I1 = new NodeListIterator(); - NodeListIterator I2 = list.new NodeListIterator(); - while (I1.hasNext()) - if (!I1.next().equals(I2.next())) return false; - return true; - } - - - /** A hash code based on the hash codes of the elements of the list. */ - public int hashCode() { - int result = 53; - for (NodeListIterator I = new NodeListIterator(); I.hasNext(); ) { - Object element = I.next(); - result = 31 * result + (element == null ? 7 : element.hashCode()); - } - return result; - } - - - /** - * Used to iterate though the children of a list of AST nodes. The entire - * interface of java.util.ListIterator is exposed through this - * class. - * - * @author Nick Rizzolo - **/ - public class NodeListIterator extends ASTNodeIterator - { - /** An iterator into list. */ - protected ListIterator I; - - - /** Initializes I. */ - public NodeListIterator() { I = list.listIterator(); } - - +abstract public class List extends ASTNode { + /** (¬ø) The list being wrapped. */ + protected LinkedList list; /** - * Inserts the specified node into the list. The element is inserted - * immediately before the next element that would be returned by - * next(), if any, and after the next element that would be - * returned by previous(), if any. (If the list contains no - * elements, the new element becomes the sole element on the list.) The - * new element is inserted before the implicit cursor: a subsequent call - * to next() would be unaffected, and a subsequent call to - * previous() would return the new element. (This call - * increases by one the value that would be returned by a call to - * nextIndex or previousIndex.) - * - * @param n The node to add. + * The characters appearing in between elements of the list in its string representation. **/ - public void add(ASTNode n) { I.add(n); } + protected String separator; /** - * Returns true if this list iterator has more elements when - * traversing the list in the forward direction. - * - * @return true if this list iterator has more elements when - * traversing the list in the forward direction. + * Full constructor. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + * @param s The list's element separator. **/ - public boolean hasNext() { return I.hasNext(); } + public List(int line, int byteOffset, String s) { + super(line, byteOffset); + list = new LinkedList(); + separator = s; + } /** - * Returns true if this list iterator has more elements when - * traversing the list in the reverse direction. - * - * @return true if this list iterator has more elements when - * traversing the list in the reverse direction. + * Returns the size of the list. + * + * @return The number of elements currently in the list. **/ - public boolean hasPrevious() { return I.hasPrevious(); } + public int size() { + return list.size(); + } /** - * Returns the next AST node in the list. This method may be called - * repeatedly to iterate through the list, or intermixed with calls to - * previous() to go back and forth. (Note that alternating - * calls to next() and previous() will return - * the same element repeatedly.) - * - * @return The next AST node in the list. + * Returns the separating characters. + * + * @return The value of separator. **/ - public ASTNode next() { return (ASTNode) I.next(); } + public String getSeparator() { + return separator; + } + + + /** Sorts the list according to their natural ordering. */ + public void sort() { + Collections.sort(list); + } /** - * Returns the index of the node that would be returned by a subsequent - * call to next(). (Returns list size if the list iterator - * is at the end of the list.) - * - * @return The index of the element that would be returned by a - * subsequent call to next(), or list size if list - * iterator is at end of list. + * Sorts the list according to the order induced by the specified comparator. + * + * @param c A comparator that determines the relative ordering of two elements in the list. **/ - public int nextIndex() { return I.nextIndex(); } + public void sort(Comparator c) { + Collections.sort(list, c); + } /** - * Returns the previous element in the list. This method may be called - * repeatedly to iterate through the list backwards, or intermixed with - * calls to next to go back and forth. (Note that alternating calls to - * next and previous will return the same element repeatedly.) - * - * @return The previous AST node in the list. + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. **/ - public ASTNode previous() { return (ASTNode) I.previous(); } + protected void writeBuffer(StringBuffer buffer, String separate) { + ASTNodeIterator I = iterator(); + if (!I.hasNext()) + return; + + I.next().write(buffer); + while (I.hasNext()) { + buffer.append(separate); + I.next().write(buffer); + } + } /** - * Returns the index of the node that would be returned by a subsequent - * call to previous(). (Returns -1 if the list iterator is - * at the beginning of the list.) - * - * @return The index of the element that would be returned by a - * subsequent call to previous(), or -1 if list - * iterator is at the beginning of the list. + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. **/ - public int previousIndex() { return I.previousIndex(); } + public void write(StringBuffer buffer) { + writeBuffer(buffer, separator); + } /** - * Removes from the list the last element that was returned by - * next() or previous. This call can only be - * made once per call to next() or previous. - * It can be made only if add(ASTNode) has not been called - * after the last call to next() or previous. + * Determines whether this list is equivalent to another object. + * + * @param o The other object. + * @return true iff this list is equivalent to o. **/ - public void remove() { I.remove(); } + public boolean equals(Object o) { + if (o == null || !o.getClass().equals(getClass())) + return false; + List list = (List) o; + if (size() != list.size()) + return false; + NodeListIterator I1 = new NodeListIterator(); + NodeListIterator I2 = list.new NodeListIterator(); + while (I1.hasNext()) + if (!I1.next().equals(I2.next())) + return false; + return true; + } - /** Restarts the iterator. */ - public void reset() { I = list.listIterator(); } + /** A hash code based on the hash codes of the elements of the list. */ + public int hashCode() { + int result = 53; + for (NodeListIterator I = new NodeListIterator(); I.hasNext();) { + Object element = I.next(); + result = 31 * result + (element == null ? 7 : element.hashCode()); + } + return result; + } /** - * Replaces the last element returned by next() or - * previous() with the specified element. This call can be - * made only if neither remove() nor - * add(ASTNode) have been called after the last call to - * next() or previous(). - * - * @param n The element with which to replace the last element returned - * by next() or previous(). + * Used to iterate though the children of a list of AST nodes. The entire interface of + * java.util.ListIterator is exposed through this class. + * + * @author Nick Rizzolo **/ - public void set(ASTNode n) { I.set(n); } - } + public class NodeListIterator extends ASTNodeIterator { + /** An iterator into list. */ + protected ListIterator I; + + + /** Initializes I. */ + public NodeListIterator() { + I = list.listIterator(); + } + + + /** + * Inserts the specified node into the list. The element is inserted immediately before the + * next element that would be returned by next(), if any, and after the next + * element that would be returned by previous(), if any. (If the list contains + * no elements, the new element becomes the sole element on the list.) The new element is + * inserted before the implicit cursor: a subsequent call to next() would be + * unaffected, and a subsequent call to previous() would return the new + * element. (This call increases by one the value that would be returned by a call to + * nextIndex or previousIndex.) + * + * @param n The node to add. + **/ + public void add(ASTNode n) { + I.add(n); + } + + + /** + * Returns true if this list iterator has more elements when traversing the + * list in the forward direction. + * + * @return true if this list iterator has more elements when traversing the + * list in the forward direction. + **/ + public boolean hasNext() { + return I.hasNext(); + } + + + /** + * Returns true if this list iterator has more elements when traversing the + * list in the reverse direction. + * + * @return true if this list iterator has more elements when traversing the + * list in the reverse direction. + **/ + public boolean hasPrevious() { + return I.hasPrevious(); + } + + + /** + * Returns the next AST node in the list. This method may be called repeatedly to iterate + * through the list, or intermixed with calls to previous() to go back and + * forth. (Note that alternating calls to next() and previous() + * will return the same element repeatedly.) + * + * @return The next AST node in the list. + **/ + public ASTNode next() { + return (ASTNode) I.next(); + } + + + /** + * Returns the index of the node that would be returned by a subsequent call to + * next(). (Returns list size if the list iterator is at the end of the list.) + * + * @return The index of the element that would be returned by a subsequent call to + * next(), or list size if list iterator is at end of list. + **/ + public int nextIndex() { + return I.nextIndex(); + } + + + /** + * Returns the previous element in the list. This method may be called repeatedly to iterate + * through the list backwards, or intermixed with calls to next to go back and forth. (Note + * that alternating calls to next and previous will return the same element repeatedly.) + * + * @return The previous AST node in the list. + **/ + public ASTNode previous() { + return (ASTNode) I.previous(); + } + + + /** + * Returns the index of the node that would be returned by a subsequent call to + * previous(). (Returns -1 if the list iterator is at the beginning of the + * list.) + * + * @return The index of the element that would be returned by a subsequent call to + * previous(), or -1 if list iterator is at the beginning of the list. + **/ + public int previousIndex() { + return I.previousIndex(); + } + + + /** + * Removes from the list the last element that was returned by next() or + * previous. This call can only be made once per call to next() or + * previous. It can be made only if add(ASTNode) has not been + * called after the last call to next() or previous. + **/ + public void remove() { + I.remove(); + } + + + /** Restarts the iterator. */ + public void reset() { + I = list.listIterator(); + } + + + /** + * Replaces the last element returned by next() or previous() with + * the specified element. This call can be made only if neither remove() nor + * add(ASTNode) have been called after the last call to next() or + * previous(). + * + * @param n The element with which to replace the last element returned by + * next() or previous(). + **/ + public void set(ASTNode n) { + I.set(n); + } + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/MethodInvocation.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/MethodInvocation.java index 28bb6957..c9aa1907 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/MethodInvocation.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/MethodInvocation.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,230 +11,227 @@ import edu.illinois.cs.cogcomp.lbjava.Pass; import edu.illinois.cs.cogcomp.lbjava.frontend.TokenValue; - +import edu.illinois.cs.cogcomp.lbjava.SemanticAnalysis; /** - * This class represents a method call. - * - * @author Nick Rizzolo + * This class represents a method call. + * + * @author Nick Rizzolo **/ -public class MethodInvocation extends StatementExpression -{ - /** - * (ø) This expression evaluates to the object whose method will be - * called. - **/ - public Expression parentObject; - /** (¬ø) The name of the method to be invoked. */ - public Name name; - /** (¬ø) The argument expressions passed to the method. */ - public ExpressionList arguments; - /** - * Filled in by the SemanticAnalysis pass, this variable is - * set to true iff this invocation represents a classifier - * invocation. - * - * @see SemanticAnalysis - **/ - public boolean isClassifierInvocation; - /** - * Filled in by the SemanticAnalysis pass, this variable is - * set to true iff this invocation is the argument of a - * learning classifier expression's evaluate clause. - * - * @see SemanticAnalysis - **/ - public boolean isEvaluateArgument; - /** - * The SemanticAnalysis pass will let this - * MethodInvocation know if it is the immediate - * value child of a SenseStatement by setting - * this flag. - * - * @see SemanticAnalysis - **/ - public boolean isSensedValue; - - - /** - * Initializing constructor. Line and byte offset information is taken - * from the representation of the name. - * - * @param n The name of the method being invoked. - **/ - public MethodInvocation(Name n) { this(n, new ExpressionList()); } - - /** - * Initializing constructor. Line and byte offset information is taken - * from the representation of the name. - * - * @param n The name of the method being invoked. - * @param a The argument expressions passed to the method. - **/ - public MethodInvocation(Name n, ExpressionList a) { - this(null, n, a, n.line, n.byteOffset); - } - - /** - * Parser's constructor. Line and byte offset information is taken from - * the representation of the name. - * - * @param p Represents the object whose method is being invoked. - * @param n Token representing the name of the method being invoked. - **/ - public MethodInvocation(Expression p, TokenValue n) { - this(p, n, new ExpressionList()); - } - - /** - * Parser's constructor. Line and byte offset information is taken from - * the representation of the name. - * - * @param p Represents the object whose method is being invoked. - * @param n Token representing the name of the method being invoked. - * @param a The argument expressions passed to the method. - **/ - public MethodInvocation(Expression p, TokenValue n, ExpressionList a) { - this(p, new Name(n), a, n.line, n.byteOffset); - } - - /** - * Full constructor. - * - * @param p Represents the object whose method is being invoked. - * @param n The name of the method being invoked. - * @param a The argument expressions passed to the method. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public MethodInvocation(Expression p, Name n, ExpressionList a, int line, - int byteOffset) { - super(line, byteOffset); - parentObject = p; - name = n; - arguments = a; - isClassifierInvocation = isEvaluateArgument = isSensedValue = false; - } - - - /** - * Returns a set of Arguments storing the name and type of - * each variable that is a subexpression of this expression. This method - * cannot be run before SemanticAnalysis runs. - **/ - public HashSet getVariableTypes() { - HashSet result = new HashSet(); - if (parentObject != null) result.addAll(parentObject.getVariableTypes()); - result.addAll(name.getVariableTypes(true)); - result.addAll(arguments.getVariableTypes()); - return result; - } - - - /** - * Determines if there are any quantified variables in this expression. - * This method cannot be run before SemanticAnalysis runs. - **/ - public boolean containsQuantifiedVariable() { - return parentObject != null && parentObject.containsQuantifiedVariable() - || name.containsQuantifiedVariable(true) - || arguments.containsQuantifiedVariable(); - } - - - /** Sets the isSensedValue flag. */ - public void senseValueChild() { isSensedValue = true; } - - - /** - * Returns a hash code value for java hash structures. - * - * @return A hash code for this object. - **/ - public int hashCode() { - int result = 31 * name.hashCode() + 17 * arguments.hashCode(); - if (parentObject != null) result += 7 * parentObject.hashCode(); - return result; - } - - - /** - * Indicates whether some other object is "equal to" this one. - * - * @return true iff this object is the same as the argument. - **/ - public boolean equals(Object o) { - if (!(o instanceof MethodInvocation)) return false; - MethodInvocation i = (MethodInvocation) o; - return (parentObject == null ? i.parentObject == null - : parentObject.equals(i.parentObject)) - && name.equals(i.name) && arguments.equals(i.arguments); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(parentObject == null ? 2 : 3); - if (parentObject != null) I.children[0] = parentObject; - I.children[I.children.length - 2] = name; - I.children[I.children.length - 1] = arguments; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return - new MethodInvocation( - (parentObject == null) ? null : (Expression) parentObject.clone(), - (Name) name.clone(), (ExpressionList) arguments.clone(), -1, -1); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (parenthesized) buffer.append("("); - - if (parentObject != null) { - parentObject.write(buffer); - buffer.append("."); +public class MethodInvocation extends StatementExpression { + /** + * (ø) This expression evaluates to the object whose method will be called. + **/ + public Expression parentObject; + /** (¬ø) The name of the method to be invoked. */ + public Name name; + /** (¬ø) The argument expressions passed to the method. */ + public ExpressionList arguments; + /** + * Filled in by the SemanticAnalysis pass, this variable is set to + * true iff this invocation represents a classifier invocation. + * + * @see SemanticAnalysis + **/ + public boolean isClassifierInvocation; + /** + * Filled in by the SemanticAnalysis pass, this variable is set to + * true iff this invocation is the argument of a learning classifier expression's + * evaluate clause. + * + * @see SemanticAnalysis + **/ + public boolean isEvaluateArgument; + /** + * The SemanticAnalysis pass will let this MethodInvocation know if it + * is the immediate value child of a SenseStatement by setting this + * flag. + * + * @see SemanticAnalysis + **/ + public boolean isSensedValue; + + + /** + * Initializing constructor. Line and byte offset information is taken from the representation + * of the name. + * + * @param n The name of the method being invoked. + **/ + public MethodInvocation(Name n) { + this(n, new ExpressionList()); } - name.write(buffer); - buffer.append("("); - arguments.write(buffer); - buffer.append(")"); + /** + * Initializing constructor. Line and byte offset information is taken from the representation + * of the name. + * + * @param n The name of the method being invoked. + * @param a The argument expressions passed to the method. + **/ + public MethodInvocation(Name n, ExpressionList a) { + this(null, n, a, n.line, n.byteOffset); + } - if (parenthesized) buffer.append(")"); - } -} + /** + * Parser's constructor. Line and byte offset information is taken from the representation of + * the name. + * + * @param p Represents the object whose method is being invoked. + * @param n Token representing the name of the method being invoked. + **/ + public MethodInvocation(Expression p, TokenValue n) { + this(p, n, new ExpressionList()); + } + + /** + * Parser's constructor. Line and byte offset information is taken from the representation of + * the name. + * + * @param p Represents the object whose method is being invoked. + * @param n Token representing the name of the method being invoked. + * @param a The argument expressions passed to the method. + **/ + public MethodInvocation(Expression p, TokenValue n, ExpressionList a) { + this(p, new Name(n), a, n.line, n.byteOffset); + } + + /** + * Full constructor. + * + * @param p Represents the object whose method is being invoked. + * @param n The name of the method being invoked. + * @param a The argument expressions passed to the method. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public MethodInvocation(Expression p, Name n, ExpressionList a, int line, int byteOffset) { + super(line, byteOffset); + parentObject = p; + name = n; + arguments = a; + isClassifierInvocation = isEvaluateArgument = isSensedValue = false; + } + + + /** + * Returns a set of Arguments storing the name and type of each variable that is a + * subexpression of this expression. This method cannot be run before + * SemanticAnalysis runs. + **/ + public HashSet getVariableTypes() { + HashSet result = new HashSet(); + if (parentObject != null) + result.addAll(parentObject.getVariableTypes()); + result.addAll(name.getVariableTypes(true)); + result.addAll(arguments.getVariableTypes()); + return result; + } + + + /** + * Determines if there are any quantified variables in this expression. This method cannot be + * run before SemanticAnalysis runs. + **/ + public boolean containsQuantifiedVariable() { + return parentObject != null && parentObject.containsQuantifiedVariable() + || name.containsQuantifiedVariable(true) || arguments.containsQuantifiedVariable(); + } + + + /** Sets the isSensedValue flag. */ + public void senseValueChild() { + isSensedValue = true; + } + + /** + * Returns a hash code value for java hash structures. + * + * @return A hash code for this object. + **/ + public int hashCode() { + int result = 31 * name.hashCode() + 17 * arguments.hashCode(); + if (parentObject != null) + result += 7 * parentObject.hashCode(); + return result; + } + + + /** + * Indicates whether some other object is "equal to" this one. + * + * @return true iff this object is the same as the argument. + **/ + public boolean equals(Object o) { + if (!(o instanceof MethodInvocation)) + return false; + MethodInvocation i = (MethodInvocation) o; + return (parentObject == null ? i.parentObject == null : parentObject.equals(i.parentObject)) + && name.equals(i.name) && arguments.equals(i.arguments); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(parentObject == null ? 2 : 3); + if (parentObject != null) + I.children[0] = parentObject; + I.children[I.children.length - 2] = name; + I.children[I.children.length - 1] = arguments; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new MethodInvocation((parentObject == null) ? null + : (Expression) parentObject.clone(), (Name) name.clone(), + (ExpressionList) arguments.clone(), -1, -1); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (parenthesized) + buffer.append("("); + + if (parentObject != null) { + parentObject.write(buffer); + buffer.append("."); + } + + name.write(buffer); + buffer.append("("); + arguments.write(buffer); + buffer.append(")"); + + if (parenthesized) + buffer.append(")"); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Name.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Name.java index c778a855..def779ea 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Name.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Name.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -17,237 +14,253 @@ /** - * This class represents a scalar variable. - * - * @author Nick Rizzolo + * This class represents a scalar variable. + * + * @author Nick Rizzolo **/ -public class Name extends Expression -{ - /** - * (¬ø) These strings appeared with dots between them to form - * the name in the source. - **/ - public String[] name; - /** - * The number of matched brackets appearing after a single identifier; - * supports variable declarations. - **/ - public int dimensions; - - - /** - * Takes a fully specified name (eg java.lang.String) as input. - * - * @param n A fully specified name. - **/ - public Name(String n) { this(n, -1, -1); } - - /** - * Takes a fully specified name (eg java.lang.String) as input. - * - * @param n A fully specified name. - **/ - public Name(String n, int line, int byteOffset) { - super(line, byteOffset); - name = n.split("\\."); - dimensions = 0; - } - - /** - * Should only be called by the clone() method. - * - * @param n The value of the name variable. - **/ - protected Name(String[] n) { this(n, -1, -1); } - - /** - * Should only be called by the clone() method. - * - * @param n The value of the name variable. - **/ - protected Name(String[] n, int line, int byteOffset) { - super(line, byteOffset); - name = (String[]) n.clone(); - dimensions = 0; - } - - /** - * Parser's constructor. - * - * @param token The parser's token for the identifier. - **/ - public Name(TokenValue token) { - super(token.line, token.byteOffset); - name = new String[1]; - name[0] = token.toString(); - dimensions = 0; - } - - /** - * Parser's constructor. - * - * @param n A name that needs another identifier added to it. - * @param token The parser's token for the identifier. - **/ - public Name(Name n, TokenValue token) { - super(n.line, n.byteOffset); - name = new String[n.name.length + 1]; - for (int i = 0; i < n.name.length; ++i) name[i] = n.name[i]; - name[n.name.length] = token.toString(); - } - - - /** - * Returns the length of the name array. - * - * @return The length of the name array. - **/ - public int length() { return name.length; } - - - /** - * Returns a new Name object that is the same as this - * Name object, except the last identifier has been removed. - **/ - public Name cutLast() { - String[] n = new String[name.length - 1]; - for (int i = 0; i < n.length; ++i) n[i] = name[i]; - return new Name(n); - } - - - /** - * Returns a set of Arguments storing the name and type of - * each variable that is a subexpression of this expression. This method - * cannot be run before SemanticAnalysis runs. - **/ - public HashSet getVariableTypes() { - HashSet result = new HashSet(); - - Type type = null; - if (name.length == 1) type = typeCache; - else type = symbolTable.get(name[0]); - - if (type != null - && (type instanceof ArrayType || type instanceof ReferenceType - || type instanceof PrimitiveType)) - result.add(new Argument(type, name[0])); - - return result; - } - - - /** - * Returns a set of Arguments storing the name and type of - * each variable that is a subexpression of this expression. This method - * cannot be run before SemanticAnalysis runs. - * - * @param b Flag set if this name is the name of an invoked method. - **/ - public HashSet getVariableTypes(boolean b) { - if (!b || name.length > 1) return getVariableTypes(); - return new HashSet(); - } - - - /** - * Determines if there are any quantified variables in this expression. - * This method cannot be run before SemanticAnalysis runs. - **/ - public boolean containsQuantifiedVariable() { - HashSet types = getVariableTypes(); - if (types.size() == 0) return false; - return - ((Argument) types.iterator().next()).getType().quantifierArgumentType; - } - - - /** - * Determines if there are any quantified variables in this expression. - * This method cannot be run before SemanticAnalysis runs. - * - * @param b Flag set if this name is the name of an invoked method. - **/ - public boolean containsQuantifiedVariable(boolean b) { - if (!b || name.length > 1) return containsQuantifiedVariable(); - return false; - } - - - /** - * Returns a hash code value for java hash structures. - * - * @return A hash code for this object. - **/ - public int hashCode() { - int code = 0; - for (int i = 0; i < name.length; ++i) code += name[i].hashCode(); - return code; - } - - - /** - * Indicates whether some other object is "equal to" this one. - * - * @return true iff this object is the same as the argument. - **/ - public boolean equals(Object o) { - if (!(o instanceof Name)) return false; - Name n = (Name) o; - if (n.name.length != name.length) return false; - for (int i = 0; i < name.length; ++i) - if (!name[i].equals(n.name[i])) return false; - return true; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { return new ASTNodeIterator(0); } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { return new Name(name, line, byteOffset); } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (parenthesized) buffer.append("("); - - if (name.length > 0) { - buffer.append(name[0]); - for (int i = 1; i < name.length; ++i) { - buffer.append("."); - buffer.append(name[i]); - } - } - - if (parenthesized) buffer.append(")"); - } -} +public class Name extends Expression { + /** + * (¬ø) These strings appeared with dots between them to form the name in the source. + **/ + public String[] name; + /** + * The number of matched brackets appearing after a single identifier; supports variable + * declarations. + **/ + public int dimensions; + + + /** + * Takes a fully specified name (eg java.lang.String) as input. + * + * @param n A fully specified name. + **/ + public Name(String n) { + this(n, -1, -1); + } + + /** + * Takes a fully specified name (eg java.lang.String) as input. + * + * @param n A fully specified name. + **/ + public Name(String n, int line, int byteOffset) { + super(line, byteOffset); + name = n.split("\\."); + dimensions = 0; + } + + /** + * Should only be called by the clone() method. + * + * @param n The value of the name variable. + **/ + protected Name(String[] n) { + this(n, -1, -1); + } + + /** + * Should only be called by the clone() method. + * + * @param n The value of the name variable. + **/ + protected Name(String[] n, int line, int byteOffset) { + super(line, byteOffset); + name = (String[]) n.clone(); + dimensions = 0; + } + + /** + * Parser's constructor. + * + * @param token The parser's token for the identifier. + **/ + public Name(TokenValue token) { + super(token.line, token.byteOffset); + name = new String[1]; + name[0] = token.toString(); + dimensions = 0; + } + + /** + * Parser's constructor. + * + * @param n A name that needs another identifier added to it. + * @param token The parser's token for the identifier. + **/ + public Name(Name n, TokenValue token) { + super(n.line, n.byteOffset); + name = new String[n.name.length + 1]; + for (int i = 0; i < n.name.length; ++i) + name[i] = n.name[i]; + name[n.name.length] = token.toString(); + } + + + /** + * Returns the length of the name array. + * + * @return The length of the name array. + **/ + public int length() { + return name.length; + } + + + /** + * Returns a new Name object that is the same as this Name object, + * except the last identifier has been removed. + **/ + public Name cutLast() { + String[] n = new String[name.length - 1]; + for (int i = 0; i < n.length; ++i) + n[i] = name[i]; + return new Name(n); + } + + + /** + * Returns a set of Arguments storing the name and type of each variable that is a + * subexpression of this expression. This method cannot be run before + * SemanticAnalysis runs. + **/ + public HashSet getVariableTypes() { + HashSet result = new HashSet(); + + Type type = null; + if (name.length == 1) + type = typeCache; + else + type = symbolTable.get(name[0]); + + if (type != null + && (type instanceof ArrayType || type instanceof ReferenceType || type instanceof PrimitiveType)) + result.add(new Argument(type, name[0])); + + return result; + } + + /** + * Returns a set of Arguments storing the name and type of each variable that is a + * subexpression of this expression. This method cannot be run before + * SemanticAnalysis runs. + * + * @param b Flag set if this name is the name of an invoked method. + **/ + public HashSet getVariableTypes(boolean b) { + if (!b || name.length > 1) + return getVariableTypes(); + return new HashSet(); + } + + + /** + * Determines if there are any quantified variables in this expression. This method cannot be + * run before SemanticAnalysis runs. + **/ + public boolean containsQuantifiedVariable() { + HashSet types = getVariableTypes(); + if (types.size() == 0) + return false; + return ((Argument) types.iterator().next()).getType().quantifierArgumentType; + } + + + /** + * Determines if there are any quantified variables in this expression. This method cannot be + * run before SemanticAnalysis runs. + * + * @param b Flag set if this name is the name of an invoked method. + **/ + public boolean containsQuantifiedVariable(boolean b) { + if (!b || name.length > 1) + return containsQuantifiedVariable(); + return false; + } + + + /** + * Returns a hash code value for java hash structures. + * + * @return A hash code for this object. + **/ + public int hashCode() { + int code = 0; + for (int i = 0; i < name.length; ++i) + code += name[i].hashCode(); + return code; + } + + + /** + * Indicates whether some other object is "equal to" this one. + * + * @return true iff this object is the same as the argument. + **/ + public boolean equals(Object o) { + if (!(o instanceof Name)) + return false; + Name n = (Name) o; + if (n.name.length != name.length) + return false; + for (int i = 0; i < name.length; ++i) + if (!name[i].equals(n.name[i])) + return false; + return true; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + return new ASTNodeIterator(0); + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new Name(name, line, byteOffset); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (parenthesized) + buffer.append("("); + + if (name.length > 0) { + buffer.append(name[0]); + for (int i = 1; i < name.length; ++i) { + buffer.append("."); + buffer.append(name[i]); + } + } + + if (parenthesized) + buffer.append(")"); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/NameList.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/NameList.java index 77a79a2f..e8f33938 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/NameList.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/NameList.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -16,130 +13,132 @@ /** - * Currently, this is just a wrapper class for LinkedList. The - * code that uses it looks a little cleaner when casts are all taken care of - * automatically. - * - * @author Nick Rizzolo + * Currently, this is just a wrapper class for LinkedList. The code that uses it looks + * a little cleaner when casts are all taken care of automatically. + * + * @author Nick Rizzolo **/ -public class NameList extends List -{ - /** Default constructor. */ - public NameList() { super(-1, -1, ", "); } - - /** - * Initializing constructor. Requires its argument to be - * non-null. - * - * @param n A single Name with which to initialize this list. - **/ - public NameList(Name n) { - super(n.line, n.byteOffset, ", "); - list.add(n); - } - - - /** - * Adds another Name to the end of the list. - * - * @param n A reference to the Name to be added. - **/ - public void add(Name n) { list.add(n); } - - - /** - * Adds all the Names in another NameList to the - * end of this NameList. - * - * @param l The list to be added. - **/ - public void addAll(NameList l) { list.addAll(l.list); } - - - /** - * Transforms the list into an array of expressions. - * - * @return An array of constants containing references to every constant in - * the list. - **/ - public Name[] toArray() { - return (Name[]) list.toArray(new Name[list.size()]); - } - - - /** - * Returns an iterator used specifically to access the elements of this - * list. - * - * @return An iterator used specifically to access the elements of this - * list. - **/ - public NameListIterator listIterator() { return new NameListIterator(); } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { return listIterator(); } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - NameList clone = new NameList(); - for (Iterator i = list.iterator(); i.hasNext(); ) - clone.list.add(((Name) i.next()).clone()); - return clone; - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Used to iterate though the children of a list of AST nodes. The entire - * interface of java.util.ListIterator is exposed through this - * class. - * - * @author Nick Rizzolo - **/ - public class NameListIterator extends NodeListIterator - { +public class NameList extends List { + /** Default constructor. */ + public NameList() { + super(-1, -1, ", "); + } + /** - * Returns the next AST node in the list. This method may be called - * repeatedly to iterate through the list, or intermixed with calls to - * previous() to go back and forth. (Note that alternating - * calls to next() and previous() will return - * the same element repeatedly.) - * - * @return The next AST node in the list. + * Initializing constructor. Requires its argument to be non-null. + * + * @param n A single Name with which to initialize this list. **/ - public Name nextItem() { return (Name) I.next(); } + public NameList(Name n) { + super(n.line, n.byteOffset, ", "); + list.add(n); + } /** - * Returns the previous element in the list. This method may be called - * repeatedly to iterate through the list backwards, or intermixed with - * calls to next to go back and forth. (Note that alternating calls to - * next and previous will return the same element repeatedly.) - * - * @return The previous AST node in the list. + * Adds another Name to the end of the list. + * + * @param n A reference to the Name to be added. **/ - public Name previousItem() { return (Name) I.previous(); } - } -} + public void add(Name n) { + list.add(n); + } + + + /** + * Adds all the Names in another NameList to the end of this + * NameList. + * + * @param l The list to be added. + **/ + public void addAll(NameList l) { + list.addAll(l.list); + } + + + /** + * Transforms the list into an array of expressions. + * + * @return An array of constants containing references to every constant in the list. + **/ + public Name[] toArray() { + return (Name[]) list.toArray(new Name[list.size()]); + } + + + /** + * Returns an iterator used specifically to access the elements of this list. + * + * @return An iterator used specifically to access the elements of this list. + **/ + public NameListIterator listIterator() { + return new NameListIterator(); + } + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + return listIterator(); + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + NameList clone = new NameList(); + for (Iterator i = list.iterator(); i.hasNext();) + clone.list.add(((Name) i.next()).clone()); + return clone; + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Used to iterate though the children of a list of AST nodes. The entire interface of + * java.util.ListIterator is exposed through this class. + * + * @author Nick Rizzolo + **/ + public class NameListIterator extends NodeListIterator { + /** + * Returns the next AST node in the list. This method may be called repeatedly to iterate + * through the list, or intermixed with calls to previous() to go back and + * forth. (Note that alternating calls to next() and previous() + * will return the same element repeatedly.) + * + * @return The next AST node in the list. + **/ + public Name nextItem() { + return (Name) I.next(); + } + + + /** + * Returns the previous element in the list. This method may be called repeatedly to iterate + * through the list backwards, or intermixed with calls to next to go back and forth. (Note + * that alternating calls to next and previous will return the same element repeatedly.) + * + * @return The previous AST node in the list. + **/ + public Name previousItem() { + return (Name) I.previous(); + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/NegatedConstraintExpression.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/NegatedConstraintExpression.java index dce6f18c..996ba755 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/NegatedConstraintExpression.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/NegatedConstraintExpression.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -15,91 +12,81 @@ /** - * Represents the negation of a constraint expression. - * - * @author Nick Rizzolo + * Represents the negation of a constraint expression. + * + * @author Nick Rizzolo **/ -public class NegatedConstraintExpression extends ConstraintExpression -{ - /** (¬ø) The constraint being negated. */ - public ConstraintExpression constraint; +public class NegatedConstraintExpression extends ConstraintExpression { + /** (¬ø) The constraint being negated. */ + public ConstraintExpression constraint; - /** - * Full constructor. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - * @param c The constraint being negated. - **/ - public NegatedConstraintExpression(int line, int byteOffset, - ConstraintExpression c) { - super(line, byteOffset); - constraint = c; - } + /** + * Full constructor. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + * @param c The constraint being negated. + **/ + public NegatedConstraintExpression(int line, int byteOffset, ConstraintExpression c) { + super(line, byteOffset); + constraint = c; + } - /** - * Parser's constructor. Line and byte offset information is taken from - * the token. - * - * @param t The token providing line and byte offset information. - * @param c The constraint being negated. - **/ - public NegatedConstraintExpression(TokenValue t, ConstraintExpression c) { - this(t.line, t.byteOffset, c); - } + /** + * Parser's constructor. Line and byte offset information is taken from the token. + * + * @param t The token providing line and byte offset information. + * @param c The constraint being negated. + **/ + public NegatedConstraintExpression(TokenValue t, ConstraintExpression c) { + this(t.line, t.byteOffset, c); + } - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(1); - I.children[0] = constraint; - return I; - } + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(1); + I.children[0] = constraint; + return I; + } - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return - new NegatedConstraintExpression( - -1, -1, (ConstraintExpression) constraint.clone()); - } + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new NegatedConstraintExpression(-1, -1, (ConstraintExpression) constraint.clone()); + } - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("!("); - constraint.write(buffer); - buffer.append(") "); - } + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("!("); + constraint.write(buffer); + buffer.append(") "); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/NormalizerType.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/NormalizerType.java index e3f9ed2e..63662f31 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/NormalizerType.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/NormalizerType.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,78 +11,85 @@ /** - * A normalizer type is simply a place holder indicating that the name it is - * associated with in the symbol table is a normalizer function. - * - * @author Nick Rizzolo + * A normalizer type is simply a place holder indicating that the name it is associated with in the + * symbol table is a normalizer function. + * + * @author Nick Rizzolo **/ -public class NormalizerType extends Type -{ - /** Default constructor. */ - public NormalizerType() { - super(-1, -1); - - try { myClass = Class.forName("edu.illinois.cs.cogcomp.lbjava.learn.Normalizer"); } - catch (Exception e) { - System.err.println("Class 'edu.illinois.cs.cogcomp.lbjava.learn.Normalizer' not found. " - + "Aborting."); - System.exit(1); +public class NormalizerType extends Type { + /** Default constructor. */ + public NormalizerType() { + super(-1, -1); + + try { + myClass = Class.forName("edu.illinois.cs.cogcomp.lbjava.learn.Normalizer"); + } catch (Exception e) { + System.err + .println("Class 'edu.illinois.cs.cogcomp.lbjava.learn.Normalizer' not found. " + + "Aborting."); + System.exit(1); + } } - } - - - /** - * Any two NormalizerTypes are equivalent. - * - * @param o The object whose equality with this object needs to be tested. - * @return true if the two objects are equal, and - * false otherwise. - **/ - public boolean equals(Object o) { return o instanceof NormalizerType; } - - - /** - * Returns a constant, since all objects of this type are equal according - * to {@link #equals(Object)}. - **/ - public int hashCode() { return 17; } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { return new ASTNodeIterator(0); } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { return new NormalizerType(); } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { buffer.append("Normalizer { }"); } -} + + /** + * Any two NormalizerTypes are equivalent. + * + * @param o The object whose equality with this object needs to be tested. + * @return true if the two objects are equal, and false otherwise. + **/ + public boolean equals(Object o) { + return o instanceof NormalizerType; + } + + + /** + * Returns a constant, since all objects of this type are equal according to + * {@link #equals(Object)}. + **/ + public int hashCode() { + return 17; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + return new ASTNodeIterator(0); + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new NormalizerType(); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("Normalizer { }"); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Operator.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Operator.java index 5f04371c..ca5ab350 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Operator.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Operator.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,273 +11,267 @@ /** - * LBJava supports every Java operator. This class stores information about - * those operators, such as their symbols and precedences, as well as - * information about LBJava's new operators. The static constant fields defined - * in this class serve as indeces into its static array fields. - * - *

+ * The precedence values for Java operators were taken from Java-FAQ.com. + * + * @author Nick Rizzolo **/ -public class Operator extends ASTNode -{ - /** Value of the operation variable. */ - public static final int DOT = 0; - /** Value of the operation variable. */ - public static final int PRE_INCREMENT = 1; - /** Value of the operation variable. */ - public static final int POST_INCREMENT = 2; - /** Value of the operation variable. */ - public static final int PRE_DECREMENT = 3; - /** Value of the operation variable. */ - public static final int POST_DECREMENT = 4; - /** Value of the operation variable. */ - public static final int BITWISE_NOT = 5; - /** Value of the operation variable. */ - public static final int NOT = 6; - /** Value of the operation variable. */ - public static final int NEW = 7; - /** Value of the operation variable. */ - public static final int TIMES = 8; - /** Value of the operation variable. */ - public static final int DIVIDE = 9; - /** Value of the operation variable. */ - public static final int MOD = 10; - /** Value of the operation variable. */ - public static final int PLUS = 11; - /** Value of the operation variable. */ - public static final int MINUS = 12; - /** Value of the operation variable. */ - public static final int LEFT_SHIFT = 13; - /** Value of the operation variable. */ - public static final int SIGNED_RIGHT_SHIFT = 14; - /** Value of the operation variable. */ - public static final int UNSIGNED_RIGHT_SHIFT = 15; - /** Value of the operation variable. */ - public static final int LESS_THAN = 16; - /** Value of the operation variable. */ - public static final int LESS_THAN_OR_EQUAL = 17; - /** Value of the operation variable. */ - public static final int GREATER_THAN = 18; - /** Value of the operation variable. */ - public static final int GREATER_THAN_OR_EQUAL = 19; - /** Value of the operation variable. */ - public static final int INSTANCEOF = 20; - /** Value of the operation variable. */ - public static final int EQUAL = 21; - /** Value of the operation variable. */ - public static final int NOT_EQUAL = 22; - /** Value of the operation variable. */ - public static final int BITWISE_AND = 23; - /** Value of the operation variable. */ - public static final int XOR = 24; - /** Value of the operation variable. */ - public static final int BITWISE_OR = 25; - /** Value of the operation variable. */ - public static final int AND = 26; - /** Value of the operation variable. */ - public static final int OR = 27; - /** Value of the operation variable. */ - public static final int CONDITIONAL = 28; - /** Value of the operation variable. */ - public static final int ASSIGN = 29; - /** Value of the operation variable. */ - public static final int MULTIPLY_ASSIGN = 30; - /** Value of the operation variable. */ - public static final int DIVIDE_ASSIGN = 31; - /** Value of the operation variable. */ - public static final int MOD_ASSIGN = 32; - /** Value of the operation variable. */ - public static final int PLUS_ASSIGN = 33; - /** Value of the operation variable. */ - public static final int MINUS_ASSIGN = 34; - /** Value of the operation variable. */ - public static final int LEFT_SHIFT_ASSIGN = 35; - /** Value of the operation variable. */ - public static final int SIGNED_RIGHT_SHIFT_ASSIGN = 36; - /** Value of the operation variable. */ - public static final int UNSIGNED_RIGHT_SHIFT_ASSIGN = 37; - /** Value of the operation variable. */ - public static final int AND_ASSIGN = 38; - /** Value of the operation variable. */ - public static final int OR_ASSIGN = 39; - /** Value of the operation variable. */ - public static final int XOR_ASSIGN = 40; - /** Value of the operation variable. */ - public static final int CONJUNCTION = 41; - /** Value of the operation variable. */ - public static final int ARROW = 42; - /** Value of the operation variable. */ - public static final int CONSTRAINT_EQUAL = 43; - /** Value of the operation variable. */ - public static final int CONSTRAINT_NOT_EQUAL = 44; - /** Value of the operation variable. */ - public static final int LOGICAL_CONJUNCTION = 45; - /** Value of the operation variable. */ - public static final int LOGICAL_DISJUNCTION = 46; - /** Value of the operation variable. */ - public static final int IMPLICATION = 47; - /** Value of the operation variable. */ - public static final int DOUBLE_IMPLICATION = 48; +public class Operator extends ASTNode { + /** Value of the operation variable. */ + public static final int DOT = 0; + /** Value of the operation variable. */ + public static final int PRE_INCREMENT = 1; + /** Value of the operation variable. */ + public static final int POST_INCREMENT = 2; + /** Value of the operation variable. */ + public static final int PRE_DECREMENT = 3; + /** Value of the operation variable. */ + public static final int POST_DECREMENT = 4; + /** Value of the operation variable. */ + public static final int BITWISE_NOT = 5; + /** Value of the operation variable. */ + public static final int NOT = 6; + /** Value of the operation variable. */ + public static final int NEW = 7; + /** Value of the operation variable. */ + public static final int TIMES = 8; + /** Value of the operation variable. */ + public static final int DIVIDE = 9; + /** Value of the operation variable. */ + public static final int MOD = 10; + /** Value of the operation variable. */ + public static final int PLUS = 11; + /** Value of the operation variable. */ + public static final int MINUS = 12; + /** Value of the operation variable. */ + public static final int LEFT_SHIFT = 13; + /** Value of the operation variable. */ + public static final int SIGNED_RIGHT_SHIFT = 14; + /** Value of the operation variable. */ + public static final int UNSIGNED_RIGHT_SHIFT = 15; + /** Value of the operation variable. */ + public static final int LESS_THAN = 16; + /** Value of the operation variable. */ + public static final int LESS_THAN_OR_EQUAL = 17; + /** Value of the operation variable. */ + public static final int GREATER_THAN = 18; + /** Value of the operation variable. */ + public static final int GREATER_THAN_OR_EQUAL = 19; + /** Value of the operation variable. */ + public static final int INSTANCEOF = 20; + /** Value of the operation variable. */ + public static final int EQUAL = 21; + /** Value of the operation variable. */ + public static final int NOT_EQUAL = 22; + /** Value of the operation variable. */ + public static final int BITWISE_AND = 23; + /** Value of the operation variable. */ + public static final int XOR = 24; + /** Value of the operation variable. */ + public static final int BITWISE_OR = 25; + /** Value of the operation variable. */ + public static final int AND = 26; + /** Value of the operation variable. */ + public static final int OR = 27; + /** Value of the operation variable. */ + public static final int CONDITIONAL = 28; + /** Value of the operation variable. */ + public static final int ASSIGN = 29; + /** Value of the operation variable. */ + public static final int MULTIPLY_ASSIGN = 30; + /** Value of the operation variable. */ + public static final int DIVIDE_ASSIGN = 31; + /** Value of the operation variable. */ + public static final int MOD_ASSIGN = 32; + /** Value of the operation variable. */ + public static final int PLUS_ASSIGN = 33; + /** Value of the operation variable. */ + public static final int MINUS_ASSIGN = 34; + /** Value of the operation variable. */ + public static final int LEFT_SHIFT_ASSIGN = 35; + /** Value of the operation variable. */ + public static final int SIGNED_RIGHT_SHIFT_ASSIGN = 36; + /** Value of the operation variable. */ + public static final int UNSIGNED_RIGHT_SHIFT_ASSIGN = 37; + /** Value of the operation variable. */ + public static final int AND_ASSIGN = 38; + /** Value of the operation variable. */ + public static final int OR_ASSIGN = 39; + /** Value of the operation variable. */ + public static final int XOR_ASSIGN = 40; + /** Value of the operation variable. */ + public static final int CONJUNCTION = 41; + /** Value of the operation variable. */ + public static final int ARROW = 42; + /** Value of the operation variable. */ + public static final int CONSTRAINT_EQUAL = 43; + /** Value of the operation variable. */ + public static final int CONSTRAINT_NOT_EQUAL = 44; + /** Value of the operation variable. */ + public static final int LOGICAL_CONJUNCTION = 45; + /** Value of the operation variable. */ + public static final int LOGICAL_DISJUNCTION = 46; + /** Value of the operation variable. */ + public static final int IMPLICATION = 47; + /** Value of the operation variable. */ + public static final int DOUBLE_IMPLICATION = 48; - /** - * This array contains the text representations of every symbol that - * objects of this class can represent. - * - * { - * ".", "++", "++", "--", "--", "~", "!", "new", "*", "/", "%", "+", "-", - * "<<", ">>", ">>>", "<", "<=", ">", - * ">=", "instanceof", "==", "!=", "&", "^", "|", "&&", - * "||", "?", "=", "*=", "/=", "%=", "+=", "-=", "<<=", - * ">>=", ">>>=", "&=", "|=", "^=", "&&", - * "<-", "::", "!:", "/\\", "\\/", "=>", "<=>" - * } - **/ - private static final String[] symbols = - { - ".", "++", "++", "--", "--", "~", "!", "new", "*", "/", "%", "+", "-", - "<<", ">>", ">>>", "<", "<=", ">", ">=", "instanceof", "==", "!=", "&", - "^", "|", "&&", "||", "?", "=", "*=", "/=", "%=", "+=", "-=", "<<=", - ">>=", ">>>=", "&=", "|=", "^=", "&&", "<-", "::", "!:", "/\\", "\\/", - "=>", "<=>" - }; + /** + * This array contains the text representations of every symbol that objects of this class can + * represent. + * { + * ".", "++", "++", "--", "--", "~", "!", "new", "*", "/", "%", "+", "-", + * "<<", ">>", ">>>", "<", "<=", ">", + * ">=", "instanceof", "==", "!=", "&", "^", "|", "&&", + * "||", "?", "=", "*=", "/=", "%=", "+=", "-=", "<<=", + * ">>=", ">>>=", "&=", "|=", "^=", "&&", + * "<-", "::", "!:", "/\\", "\\/", "=>", "<=>" + * } + **/ + private static final String[] symbols = {".", "++", "++", "--", "--", "~", "!", "new", "*", + "/", "%", "+", "-", "<<", ">>", ">>>", "<", "<=", ">", ">=", "instanceof", "==", "!=", + "&", "^", "|", "&&", "||", "?", "=", "*=", "/=", "%=", "+=", "-=", "<<=", ">>=", + ">>>=", "&=", "|=", "^=", "&&", "<-", "::", "!:", "/\\", "\\/", "=>", "<=>"}; - /** - * This array contains the precedences of every operator in the same order - * that they appear in the symbols array. - * = - * { - * 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, - * 8, 9, 10, 11, 12, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - * 15, 16, 1, 1, 2, 3, 4, 5 - * } - * . - *
- * A lower value represents a more tightly binding operator. - **/ - private static final int[] precedences = - { - 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 8, - 9, 10, 11, 12, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, - 16, 1, 1, 2, 3, 4, 5 - }; + /** + * This array contains the precedences of every operator in the same order that they appear in + * the symbols array. = + * { + * 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, + * 8, 9, 10, 11, 12, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + * 15, 16, 1, 1, 2, 3, 4, 5 + * } + * .
+ * A lower value represents a more tightly binding operator. + **/ + private static final int[] precedences = {1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 6, 6, + 6, 6, 6, 7, 7, 8, 9, 10, 11, 12, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 15, 16, 1, 1, 2, 3, 4, 5}; - /** - * Produces the name of an operator given its index. - * - * @param operation The index of the operation. (See the static member - * variables.) - * @return A String holding the name of the operator. - **/ - public static String operatorSymbol(int operation) { - return symbols[operation]; - } + /** + * Produces the name of an operator given its index. + * + * @param operation The index of the operation. (See the static member variables.) + * @return A String holding the name of the operator. + **/ + public static String operatorSymbol(int operation) { + return symbols[operation]; + } - /** - * Produces the precedence of an operator given its index. - * - * @param operation The index of the operation. - * @return The precedence of the corresponding operator. - **/ - public static int operatorPrecedence(int operation) { - return precedences[operation]; - } + /** + * Produces the precedence of an operator given its index. + * + * @param operation The index of the operation. + * @return The precedence of the corresponding operator. + **/ + public static int operatorPrecedence(int operation) { + return precedences[operation]; + } - /** The index of the operation represented by this Operator. */ - public int operation; + /** The index of the operation represented by this Operator. */ + public int operation; - /** - * Default constructor. Line and byte offset information, having not been - * specified, is set to -1. - * - * @param operation The index of the operation. - **/ - public Operator(int operation) { this(operation, -1, -1); } + /** + * Default constructor. Line and byte offset information, having not been specified, is set to + * -1. + * + * @param operation The index of the operation. + **/ + public Operator(int operation) { + this(operation, -1, -1); + } - /** - * Full constructor. - * - * @param operation The index of the operation. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public Operator(int operation, int line, int byteOffset) { - super(line, byteOffset); - this.operation = operation; - } + /** + * Full constructor. + * + * @param operation The index of the operation. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public Operator(int operation, int line, int byteOffset) { + super(line, byteOffset); + this.operation = operation; + } - /** - * Produces the precedence of this operator. - * - * @return The precedence of this operator. - **/ - public int getPrecedence() { return precedences[operation]; } + /** + * Produces the precedence of this operator. + * + * @return The precedence of this operator. + **/ + public int getPrecedence() { + return precedences[operation]; + } - /** - * Returns a hash code value for java hash structures. - * - * @return A hash code for this object. - **/ - public int hashCode() { return operation + 1; } + /** + * Returns a hash code value for java hash structures. + * + * @return A hash code for this object. + **/ + public int hashCode() { + return operation + 1; + } - /** - * Indicates whether some other object is "equal to" this one. - * - * @return true iff this object is the same as the argument. - **/ - public boolean equals(Object o) { - if (!(o instanceof Operator)) return false; - return ((Operator) o).operation == operation; - } + /** + * Indicates whether some other object is "equal to" this one. + * + * @return true iff this object is the same as the argument. + **/ + public boolean equals(Object o) { + if (!(o instanceof Operator)) + return false; + return ((Operator) o).operation == operation; + } - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { return new ASTNodeIterator(0); } + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + return new ASTNodeIterator(0); + } - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { return new Operator(operation); } + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new Operator(operation); + } - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append(symbols[operation]); - } + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append(symbols[operation]); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/PackageDeclaration.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/PackageDeclaration.java index 96f1afd2..f6a4c86f 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/PackageDeclaration.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/PackageDeclaration.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,67 +11,65 @@ /** - * Representation of an package declaration. - * - * @author Nick Rizzolo + * Representation of an package declaration. + * + * @author Nick Rizzolo **/ -public class PackageDeclaration extends Declaration -{ - /** - * Full constructor. - * - * @param n Reference to the object representing the package name. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public PackageDeclaration(Name n, int line, int byteOffset) { - super(n, line, byteOffset); - } +public class PackageDeclaration extends Declaration { + /** + * Full constructor. + * + * @param n Reference to the object representing the package name. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public PackageDeclaration(Name n, int line, int byteOffset) { + super(n, line, byteOffset); + } - /** - * Returns null, since this method should never be called on - * an object of this class. - * - * @return null - **/ - public Type getType() { return null; } + /** + * Returns null, since this method should never be called on an object of this + * class. + * + * @return null + **/ + public Type getType() { + return null; + } - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new PackageDeclaration((Name) name.clone(), -1, -1); - } + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new PackageDeclaration((Name) name.clone(), -1, -1); + } - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("package "); - name.write(buffer); - buffer.append(";"); - } + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("package "); + name.write(buffer); + buffer.append(";"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ParameterSet.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ParameterSet.java index 926528d3..626f9091 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ParameterSet.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ParameterSet.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -16,329 +13,313 @@ /** - * Represents a set of possible parameters, used when doing - * parameter-tuning. The list of possible values for a - * parameter is stored in {@link #parameterList} as an - * ExpressionList. - * - *

The set can be defined either as a comma-separated list - * of possible values, or it can be defined as a range in terms - * of a start value, end value, and increment factor, which - * can then be converted into the explicit list of possible values. - * - *

The LBJava syntax for defining the parameter set is to declare - * the possible values inside of double curly braces, either as - * an explicit list or as a range. The following two examples - * are equivalent: - *

    - *
  • {{2,3,4,5}}
  • - *
  • {{2->5:1}}
  • - *
- * - * @author Michael Paul + * Represents a set of possible parameters, used when doing parameter-tuning. The list of possible + * values for a parameter is stored in {@link #parameterList} as an ExpressionList. + * + *

+ * The set can be defined either as a comma-separated list of possible values, or it can be defined + * as a range in terms of a start value, end value, and increment factor, which can then be + * converted into the explicit list of possible values. + * + *

+ * The LBJava syntax for defining the parameter set is to declare the possible values inside of + * double curly braces, either as an explicit list or as a range. The following two examples are + * equivalent: + *

    + *
  • {{2,3,4,5}}
  • + *
  • {{2->5:1}}
  • + *
+ * + * @author Michael Paul **/ -public class ParameterSet extends Expression -{ - /** Remembers how many instances of this class have been instantiated. */ - public static int count; - - - /** The list of possible values for this parameter. */ - private ExpressionList parameterList; - /** - * The name of the parameter that will be printed in method signatures in - * the generated code. - **/ - private String parameterName; - - /** The start value for the range. */ - public Expression start; - /** The end value for the range. */ - public Expression end; - /** The factor to increment by. */ - public Expression increment; - /** The most specific type for the values in this set. */ - public Type type; - /** - * true iff this parameter set appears inside the - * rounds clause of a {@link LearningClassifierExpression}. - **/ - public boolean inRounds; - - - /** - * Initializing constructor. Sets the list of possible parameter values. - * - * @param list The list of possible values for the parameter - **/ - public ParameterSet(ExpressionList list) { this(-1, -1, list); } - - /** - * Full constructor. Sets the list of possible parameter values. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - * @param list The list of possible values for the parameter. - **/ - public ParameterSet(int line, int byteOffset, ExpressionList list) { - super(line, byteOffset); - parameterList = list; - parameterName = "a" + count++; - } - - /** - * Initializing constructor. Sets the range parameters. - * - * @param s The start value. - * @param e The end value. - * @param i The increment factor. - **/ - public ParameterSet(Expression s, Expression e, Expression i) { - this(-1, -1, s, e, i); - } - - /** - * Full constructor. Sets the range parameters. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - * @param s The start value. - * @param e The end value. - * @param i The increment factor. - **/ - public ParameterSet(int line, int byteOffset, Expression s, Expression e, - Expression i) { - super(line, byteOffset); - start = s; - end = e; - increment = i; - parameterName = "a" + count++; - } - - - /** Returns the value of {@link #parameterName}. */ - public String getParameterName() { return parameterName; } - - - /** true iff this parameter set was specified as a range. */ - public boolean isRange() { return start != null; } - - - /** Returns a list iterator over {@link #parameterList}. */ - public ExpressionList.ExpressionListIterator listIterator() { - return parameterList.listIterator(); - } - - - /** Returns the first element of the list. */ - public Expression getFirst() { - ExpressionList.ExpressionListIterator iterator = listIterator(); - return iterator.hasNext() ? iterator.nextItem() : null; - } - - - /** - * Converts this parameter set's {@link #start}, {@link #end}, and - * {@link #increment} expressions (which must represent {@link Constant}s - * of a {@link PrimitiveType} other than boolean) into an - * explicit list of values. The {@link #type} field must be set - * appropriately for before calling this method. - **/ - public void convertRange() { - if (start == null) - throw - new IllegalArgumentException( - "Can't call ParameterSet.convertRange() when start == null"); - if (type == null) - throw - new IllegalArgumentException( - "Can't call ParameterSet.convertRange() when type == null"); - if (!(type instanceof PrimitiveType)) - throw - new IllegalArgumentException( - "Can't call ParameterSet.convertRange() when type isn't " - + "primitive"); - PrimitiveType pt = (PrimitiveType) type; - if (pt.type == PrimitiveType.BOOLEAN) - throw - new IllegalArgumentException( - "Can't call ParameterSet.convertRange() when type is boolean"); - - parameterList = new ExpressionList(); - - if (pt.type == PrimitiveType.CHAR) { - int s = (int) ((Constant) start).value.charAt(1); - int e = (int) ((Constant) end).value.charAt(1); - int i = Integer.parseInt(((Constant) increment).value); - int m = i / Math.abs(i); - e *= m; - - for (int j = s; m*j <= e; j += i) - parameterList.add( - new Constant(this.line, this.byteOffset, "" + ((char) j))); +public class ParameterSet extends Expression { + /** Remembers how many instances of this class have been instantiated. */ + public static int count; + + + /** The list of possible values for this parameter. */ + private ExpressionList parameterList; + /** + * The name of the parameter that will be printed in method signatures in the generated code. + **/ + private String parameterName; + + /** The start value for the range. */ + public Expression start; + /** The end value for the range. */ + public Expression end; + /** The factor to increment by. */ + public Expression increment; + /** The most specific type for the values in this set. */ + public Type type; + /** + * true iff this parameter set appears inside the rounds clause of a + * {@link LearningClassifierExpression}. + **/ + public boolean inRounds; + + + /** + * Initializing constructor. Sets the list of possible parameter values. + * + * @param list The list of possible values for the parameter + **/ + public ParameterSet(ExpressionList list) { + this(-1, -1, list); + } + + /** + * Full constructor. Sets the list of possible parameter values. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + * @param list The list of possible values for the parameter. + **/ + public ParameterSet(int line, int byteOffset, ExpressionList list) { + super(line, byteOffset); + parameterList = list; + parameterName = "a" + count++; + } + + /** + * Initializing constructor. Sets the range parameters. + * + * @param s The start value. + * @param e The end value. + * @param i The increment factor. + **/ + public ParameterSet(Expression s, Expression e, Expression i) { + this(-1, -1, s, e, i); + } + + /** + * Full constructor. Sets the range parameters. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + * @param s The start value. + * @param e The end value. + * @param i The increment factor. + **/ + public ParameterSet(int line, int byteOffset, Expression s, Expression e, Expression i) { + super(line, byteOffset); + start = s; + end = e; + increment = i; + parameterName = "a" + count++; + } + + + /** Returns the value of {@link #parameterName}. */ + public String getParameterName() { + return parameterName; } - else { - double s = Double.parseDouble(((Constant) start).value); - double e = Double.parseDouble(((Constant) end).value); - double i = Double.parseDouble(((Constant) increment).value); - double m = i / Math.abs(i); - e *= m; - - for (double a = s; m*a <= e; a += i) { - String num = ""; - - switch (pt.type) { - case PrimitiveType.FLOAT: case PrimitiveType.DOUBLE: - num += a; - break; - case PrimitiveType.BYTE: case PrimitiveType.SHORT: - case PrimitiveType.INT: case PrimitiveType.LONG: - num += Math.round(a); - break; + + + /** true iff this parameter set was specified as a range. */ + public boolean isRange() { + return start != null; + } + + + /** Returns a list iterator over {@link #parameterList}. */ + public ExpressionList.ExpressionListIterator listIterator() { + return parameterList.listIterator(); + } + + + /** Returns the first element of the list. */ + public Expression getFirst() { + ExpressionList.ExpressionListIterator iterator = listIterator(); + return iterator.hasNext() ? iterator.nextItem() : null; + } + + + /** + * Converts this parameter set's {@link #start}, {@link #end}, and {@link #increment} + * expressions (which must represent {@link Constant}s of a {@link PrimitiveType} other than + * boolean) into an explicit list of values. The {@link #type} field must be set + * appropriately for before calling this method. + **/ + public void convertRange() { + if (start == null) + throw new IllegalArgumentException( + "Can't call ParameterSet.convertRange() when start == null"); + if (type == null) + throw new IllegalArgumentException( + "Can't call ParameterSet.convertRange() when type == null"); + if (!(type instanceof PrimitiveType)) + throw new IllegalArgumentException( + "Can't call ParameterSet.convertRange() when type isn't " + "primitive"); + PrimitiveType pt = (PrimitiveType) type; + if (pt.type == PrimitiveType.BOOLEAN) + throw new IllegalArgumentException( + "Can't call ParameterSet.convertRange() when type is boolean"); + + parameterList = new ExpressionList(); + + if (pt.type == PrimitiveType.CHAR) { + int s = (int) ((Constant) start).value.charAt(1); + int e = (int) ((Constant) end).value.charAt(1); + int i = Integer.parseInt(((Constant) increment).value); + int m = i / Math.abs(i); + e *= m; + + for (int j = s; m * j <= e; j += i) + parameterList.add(new Constant(this.line, this.byteOffset, "" + ((char) j))); + } else { + double s = Double.parseDouble(((Constant) start).value); + double e = Double.parseDouble(((Constant) end).value); + double i = Double.parseDouble(((Constant) increment).value); + double m = i / Math.abs(i); + e *= m; + + for (double a = s; m * a <= e; a += i) { + String num = ""; + + switch (pt.type) { + case PrimitiveType.FLOAT: + case PrimitiveType.DOUBLE: + num += a; + break; + case PrimitiveType.BYTE: + case PrimitiveType.SHORT: + case PrimitiveType.INT: + case PrimitiveType.LONG: + num += Math.round(a); + break; + } + + parameterList.add(new Constant(this.line, this.byteOffset, num)); + } } - parameterList.add(new Constant(this.line, this.byteOffset, num)); - } + // If we didn't add the end value, add it + /* + * if (lastEntry != e) { Double num = new Double(e); list.add(new Constant(this.line, + * this.byteOffset, num.toString())); } + */ } - // If we didn't add the end value, add it - /* - if (lastEntry != e) { - Double num = new Double(e); - list.add(new Constant(this.line, this.byteOffset, num.toString())); + + /** + * Parses integers out of every constant in the set and returns them in a sorted array. If this + * parameter set was specified as a range, it is assumed that {@link #convertRange()} has + * already been called. + **/ + public int[] toSortedIntArray() { + int[] values = new int[parameterList.size()]; + ExpressionList.ExpressionListIterator I = listIterator(); + for (int i = 0; I.hasNext(); ++i) + values[i] = Integer.parseInt(((Constant) I.next()).value); + Arrays.sort(values); + return values; } - */ - } - - - /** - * Parses integers out of every constant in the set and returns them in a - * sorted array. If this parameter set was specified as a range, it is - * assumed that {@link #convertRange()} has already been called. - **/ - public int[] toSortedIntArray() { - int[] values = new int[parameterList.size()]; - ExpressionList.ExpressionListIterator I = listIterator(); - for (int i = 0; I.hasNext(); ++i) - values[i] = Integer.parseInt(((Constant) I.next()).value); - Arrays.sort(values); - return values; - } - - - /** - * Assuming that {@link #convertRange()} has already been called (if - * necessary) and that every expression in {@link #parameterList} is a - * {@link Constant}, this method produces an array of Strings - * containing the values of the constants. The return type of the method - * is Object[] so that its elements can be replaced by objects - * of other types, which is convenient during parameter tuning. - **/ - public Object[] toStringArray() { - Object[] values = new Object[parameterList.size()]; - ExpressionList.ExpressionListIterator I = listIterator(); - for (int i = 0; I.hasNext(); ++i) values[i] = ((Constant) I.next()).value; - return values; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - if (start != null) { - ASTNodeIterator result = new ASTNodeIterator(3); - result.children[0] = start; - result.children[1] = end; - result.children[2] = increment; - return result; + + + /** + * Assuming that {@link #convertRange()} has already been called (if necessary) and that every + * expression in {@link #parameterList} is a {@link Constant}, this method produces an array of + * Strings containing the values of the constants. The return type of the method is + * Object[] so that its elements can be replaced by objects of other types, which + * is convenient during parameter tuning. + **/ + public Object[] toStringArray() { + Object[] values = new Object[parameterList.size()]; + ExpressionList.ExpressionListIterator I = listIterator(); + for (int i = 0; I.hasNext(); ++i) + values[i] = ((Constant) I.next()).value; + return values; } - return parameterList.iterator(); - } - - - /** - * Two parameter sets are equivalent when their constituent expressions are - * the same. - * - * @return true iff this object is the same as the argument. - **/ - public boolean equals(Object o) { - if (!(o instanceof ParameterSet)) return false; - ParameterSet p = (ParameterSet) o; - return - start == null ? parameterList.equals(p.parameterList) - : start.equals(p.start) && end.equals(p.end) - && increment.equals(p.increment); - } - - - /** A hash code based on the hash codes of the constituent expressions. */ - public int hashCode() { - return - start == null ? parameterList.hashCode() - : 31 * start.hashCode() + 23 * end.hashCode() - + 17 * increment.hashCode(); - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - ParameterSet c = - new ParameterSet(this.line, this.byteOffset, - (ExpressionList) parameterList.clone()); - c.start = start; - c.end = end; - c.increment = increment; - return c; - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (parenthesized) buffer.append("("); - buffer.append("{{ "); - if (start != null) { - start.write(buffer); - buffer.append(" -> "); - end.write(buffer); - buffer.append(" : "); - increment.write(buffer); + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + if (start != null) { + ASTNodeIterator result = new ASTNodeIterator(3); + result.children[0] = start; + result.children[1] = end; + result.children[2] = increment; + return result; + } + + return parameterList.iterator(); } - else parameterList.write(buffer); - buffer.append(" }}"); - if (parenthesized) buffer.append(")"); - } -} + + /** + * Two parameter sets are equivalent when their constituent expressions are the same. + * + * @return true iff this object is the same as the argument. + **/ + public boolean equals(Object o) { + if (!(o instanceof ParameterSet)) + return false; + ParameterSet p = (ParameterSet) o; + return start == null ? parameterList.equals(p.parameterList) : start.equals(p.start) + && end.equals(p.end) && increment.equals(p.increment); + } + + + /** A hash code based on the hash codes of the constituent expressions. */ + public int hashCode() { + return start == null ? parameterList.hashCode() : 31 * start.hashCode() + 23 + * end.hashCode() + 17 * increment.hashCode(); + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + ParameterSet c = + new ParameterSet(this.line, this.byteOffset, (ExpressionList) parameterList.clone()); + c.start = start; + c.end = end; + c.increment = increment; + return c; + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (parenthesized) + buffer.append("("); + buffer.append("{{ "); + if (start != null) { + start.write(buffer); + buffer.append(" -> "); + end.write(buffer); + buffer.append(" : "); + increment.write(buffer); + } else + parameterList.write(buffer); + buffer.append(" }}"); + if (parenthesized) + buffer.append(")"); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/PrimitiveType.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/PrimitiveType.java index 8b11f1e5..939fc0b9 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/PrimitiveType.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/PrimitiveType.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,181 +11,179 @@ /** - * Represents a primitive type, as in a declaration. In LBJava, the legal - * primitive types are boolean, char, byte, short, int, long, float, and - * double. - * - * @author Nick Rizzolo + * Represents a primitive type, as in a declaration. In LBJava, the legal primitive types are + * boolean, char, byte, short, int, long, float, and double. + * + * @author Nick Rizzolo **/ -public class PrimitiveType extends Type -{ - /** Value of the type variable. */ - public static final int BOOLEAN = 0; - /** Value of the type variable. */ - public static final int CHAR = 1; - /** Value of the type variable. */ - public static final int BYTE = 2; - /** Value of the type variable. */ - public static final int SHORT = 3; - /** Value of the type variable. */ - public static final int INT = 4; - /** Value of the type variable. */ - public static final int LONG = 5; - /** Value of the type variable. */ - public static final int FLOAT = 6; - /** Value of the type variable. */ - public static final int DOUBLE = 7; - - /** - * = - * { - * "boolean", "char", "byte", "short", "int", "long", "float", "double" - * } - * - **/ - private static final String[] typeNames = - { - "boolean", "char", "byte", "short", "int", "long", "float", "double" - }; - - /** - * Produces the name of the primitive type given its index. - * - * @param t The index of the type. (See the static member variables.) - * @return A String holding the name of the type. - **/ - public static String typeName(int t) { return typeNames[t]; } - - /** - * = - * { - * Boolean.TYPE, Character.TYPE, Byte.TYPE, Short.TYPE, Integer.TYPE, - * Long.TYPE, Float.TYPE, Double.TYPE - * } - * - **/ - private static final Class[] classes = - { - Boolean.TYPE, Character.TYPE, Byte.TYPE, Short.TYPE, Integer.TYPE, - Long.TYPE, Float.TYPE, Double.TYPE - }; - - - /** - * (¬ø) The index of the type represented by this - * PrimitiveType. - **/ - public int type; - - - /** - * Default constructor. Line and byte offset information, having not been - * supplied, is set to -1. - * - * @param t The index of the primitive type. - **/ - public PrimitiveType(int t) { this(t, -1, -1); } - - /** - * Full constructor. - * - * @param t The index of the primitive type. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public PrimitiveType(int t, int line, int byteOffset) { - super(line, byteOffset); - type = t; - } - - - /** - * Returns an object representing the class that this type - * represents. - * - * @return An object representing the class that this type - * represents. - **/ - public Class typeClass() { return classes[type]; } - - - /** - * Determines whether this type represents a numerical value (including - * char), as opposed to a boolean or null. - * - * @return true iff this PrimitiveType represents - * a numerical type. - **/ - public boolean isNumber() { return type >= CHAR; } - - - /** - * Determines whether this type represents a whole number value (including - * char), as opposed to a floating point, a boolean, or - * null. - * - * @return true iff this PrimitiveType represents - * a whole number type. - **/ - public boolean isWholeNumber() { return type >= CHAR && type <= LONG; } - - - /** - * Two PrimitiveTypes are equivalent when their - * type member variables are the same. - * - * @param o The Object whose equality with this object needs - * to be tested. - * @return true if the two Objects are equal, and - * false otherwise. - **/ - public boolean equals(Object o) { - return o instanceof PrimitiveType - && type == ((PrimitiveType) o).type; - } - - - /** A hash code based on {@link #type}. */ - public int hashCode() { return type; } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { return new ASTNodeIterator(0); } - - - /** - * Creates a new object with the same primitive data. - * - * @return The clone node. - **/ - public Object clone() { return new PrimitiveType(type); } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { buffer.append(typeName(type)); } +public class PrimitiveType extends Type { + /** Value of the type variable. */ + public static final int BOOLEAN = 0; + /** Value of the type variable. */ + public static final int CHAR = 1; + /** Value of the type variable. */ + public static final int BYTE = 2; + /** Value of the type variable. */ + public static final int SHORT = 3; + /** Value of the type variable. */ + public static final int INT = 4; + /** Value of the type variable. */ + public static final int LONG = 5; + /** Value of the type variable. */ + public static final int FLOAT = 6; + /** Value of the type variable. */ + public static final int DOUBLE = 7; + + /** + * = + * { + * "boolean", "char", "byte", "short", "int", "long", "float", "double" + * } + * + **/ + private static final String[] typeNames = {"boolean", "char", "byte", "short", "int", "long", + "float", "double"}; + + /** + * Produces the name of the primitive type given its index. + * + * @param t The index of the type. (See the static member variables.) + * @return A String holding the name of the type. + **/ + public static String typeName(int t) { + return typeNames[t]; + } + + /** + * = + * { + * Boolean.TYPE, Character.TYPE, Byte.TYPE, Short.TYPE, Integer.TYPE, + * Long.TYPE, Float.TYPE, Double.TYPE + * } + * + **/ + private static final Class[] classes = {Boolean.TYPE, Character.TYPE, Byte.TYPE, Short.TYPE, + Integer.TYPE, Long.TYPE, Float.TYPE, Double.TYPE}; + + + /** + * (¬ø) The index of the type represented by this PrimitiveType. + **/ + public int type; + + + /** + * Default constructor. Line and byte offset information, having not been supplied, is set to + * -1. + * + * @param t The index of the primitive type. + **/ + public PrimitiveType(int t) { + this(t, -1, -1); + } + + /** + * Full constructor. + * + * @param t The index of the primitive type. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public PrimitiveType(int t, int line, int byteOffset) { + super(line, byteOffset); + type = t; + } + + + /** + * Returns an object representing the class that this type represents. + * + * @return An object representing the class that this type represents. + **/ + public Class typeClass() { + return classes[type]; + } + + + /** + * Determines whether this type represents a numerical value (including char), as + * opposed to a boolean or null. + * + * @return true iff this PrimitiveType represents a numerical type. + **/ + public boolean isNumber() { + return type >= CHAR; + } + + + /** + * Determines whether this type represents a whole number value (including char), + * as opposed to a floating point, a boolean, or null. + * + * @return true iff this PrimitiveType represents a whole number type. + **/ + public boolean isWholeNumber() { + return type >= CHAR && type <= LONG; + } + + + /** + * Two PrimitiveTypes are equivalent when their type member variables + * are the same. + * + * @param o The Object whose equality with this object needs to be tested. + * @return true if the two Objects are equal, and false + * otherwise. + **/ + public boolean equals(Object o) { + return o instanceof PrimitiveType && type == ((PrimitiveType) o).type; + } + + + /** A hash code based on {@link #type}. */ + public int hashCode() { + return type; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + return new ASTNodeIterator(0); + } + + + /** + * Creates a new object with the same primitive data. + * + * @return The clone node. + **/ + public Object clone() { + return new PrimitiveType(type); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append(typeName(type)); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/QuantifiedConstraintExpression.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/QuantifiedConstraintExpression.java index 39b8d38f..9d587361 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/QuantifiedConstraintExpression.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/QuantifiedConstraintExpression.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,81 +11,72 @@ /** - * A quantified constraint expression is a compact way to specify a - * constraint as a function of every object in a given collection. - * - * @author Nick Rizzolo + * A quantified constraint expression is a compact way to specify a constraint as a function of + * every object in a given collection. + * + * @author Nick Rizzolo **/ -public abstract class QuantifiedConstraintExpression - extends ConstraintExpression -{ - /** - * (¬ø) The variable specified by this argument is set to each - * of the objects from the collection in turn and used throughout the - * quantified constraint. - **/ - public Argument argument; - /** - * (¬ø) The objects to iterate through; it must evaluate to a - * Java Collection. - **/ - public Expression collection; - /** (¬ø) The quantified constraint. */ - public ConstraintExpression constraint; - /** - * Filled in by SemanticAnalysis, this flag is set if - * collection contains any quantified variables. - **/ - public boolean collectionIsQuantified; +public abstract class QuantifiedConstraintExpression extends ConstraintExpression { + /** + * (¬ø) The variable specified by this argument is set to each of the objects from + * the collection in turn and used throughout the quantified constraint. + **/ + public Argument argument; + /** + * (¬ø) The objects to iterate through; it must evaluate to a Java + * Collection. + **/ + public Expression collection; + /** (¬ø) The quantified constraint. */ + public ConstraintExpression constraint; + /** + * Filled in by SemanticAnalysis, this flag is set if collection + * contains any quantified variables. + **/ + public boolean collectionIsQuantified; - /** - * Full constructor. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - * @param a The quantification variable specification. - * @param c Evaluates to the collection of objects. - * @param co The quantified constraint. - **/ - public QuantifiedConstraintExpression(int line, int byteOffset, Argument a, - Expression c, ConstraintExpression co) - { - super(line, byteOffset); - argument = a; - collection = c; - constraint = co; - } + /** + * Full constructor. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + * @param a The quantification variable specification. + * @param c Evaluates to the collection of objects. + * @param co The quantified constraint. + **/ + public QuantifiedConstraintExpression(int line, int byteOffset, Argument a, Expression c, + ConstraintExpression co) { + super(line, byteOffset); + argument = a; + collection = c; + constraint = co; + } - /** - * Returns a set of Arguments storing the name and type of - * each variable that is a subexpression of this expression. This method - * cannot be run before SemanticAnalysis runs. - **/ - public HashSet getVariableTypes() { - HashSet result = collection.getVariableTypes(); - result.addAll(constraint.getVariableTypes()); - return result; - } + /** + * Returns a set of Arguments storing the name and type of each variable that is a + * subexpression of this expression. This method cannot be run before + * SemanticAnalysis runs. + **/ + public HashSet getVariableTypes() { + HashSet result = collection.getVariableTypes(); + result.addAll(constraint.getVariableTypes()); + return result; + } - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(3); - I.children[0] = argument; - I.children[1] = collection; - I.children[2] = constraint; - return I; - } + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(3); + I.children[0] = argument; + I.children[1] = collection; + I.children[2] = constraint; + return I; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ReferenceType.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ReferenceType.java index 54083244..3c092a21 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ReferenceType.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ReferenceType.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,117 +11,116 @@ /** - * Represents a type defined by a class. Note that in LBJava's - * parser, the nonterminal referenceType refers to both types - * defined by classes and array types, but this class represents - * only the former. - * - * @author Nick Rizzolo + * Represents a type defined by a class. Note that in LBJava's parser, the nonterminal + * referenceType refers to both types defined by classes and array types, + * but this class represents only the former. + * + * @author Nick Rizzolo **/ -public class ReferenceType extends Type -{ - /** - * (¬ø) The expression representing the name of the class that - * defines this type. - **/ - private Name name; - - - /** - * Initializing constructor. Line and byte offset information is taken - * from the expression. - * - * @param name The expression representing the name of the class that - * defines this type. - **/ - public ReferenceType(Name name) { - super(name.line, name.byteOffset); - this.name = name; - } - - - /** - * Returns the name of the class that defines this type. - * - * @return The contents of name. - **/ - public Name getName() { return name; } - - - /** - * Returns an object representing the class that this type - * represents. - * - * @return An object representing the class that this type - * represents. - **/ - public Class typeClass() { - if (myClass == null) myClass = AST.globalSymbolTable.classForName(name); - return myClass; - } - - - /** - * Two ReferenceTypes are equivalent when their associated - * Java classes, as computed by typeClass() are - * equivalent. - * - * @param t The Type whose equality with this object needs to - * be tested. - * @return true if the two Types are equal, and - * false otherwise. - **/ - public boolean equals(Object t) { - if (!(t instanceof ReferenceType)) return false; - ReferenceType r = (ReferenceType) t; - if (typeClass() != null) return typeClass().equals(r.typeClass()); - return name.equals(r.name); - } - - - /** A hash code based on the hash code of {@link #name}. */ - public int hashCode() { return name.hashCode(); } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(1); - I.children[0] = name; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { return new ReferenceType((Name) name.clone()); } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { name.write(buffer); } +public class ReferenceType extends Type { + /** + * (¬ø) The expression representing the name of the class that defines this type. + **/ + private Name name; + + + /** + * Initializing constructor. Line and byte offset information is taken from the expression. + * + * @param name The expression representing the name of the class that defines this type. + **/ + public ReferenceType(Name name) { + super(name.line, name.byteOffset); + this.name = name; + } + + + /** + * Returns the name of the class that defines this type. + * + * @return The contents of name. + **/ + public Name getName() { + return name; + } + + + /** + * Returns an object representing the class that this type represents. + * + * @return An object representing the class that this type represents. + **/ + public Class typeClass() { + if (myClass == null) + myClass = AST.globalSymbolTable.classForName(name); + return myClass; + } + + + /** + * Two ReferenceTypes are equivalent when their associated Java class + * es, as computed by typeClass() are equivalent. + * + * @param t The Type whose equality with this object needs to be tested. + * @return true if the two Types are equal, and false + * otherwise. + **/ + public boolean equals(Object t) { + if (!(t instanceof ReferenceType)) + return false; + ReferenceType r = (ReferenceType) t; + if (typeClass() != null) + return typeClass().equals(r.typeClass()); + return name.equals(r.name); + } + + + /** A hash code based on the hash code of {@link #name}. */ + public int hashCode() { + return name.hashCode(); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(1); + I.children[0] = name; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new ReferenceType((Name) name.clone()); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + name.write(buffer); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ReturnStatement.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ReturnStatement.java index 5a30844e..ef24e5f9 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ReturnStatement.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ReturnStatement.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,98 +11,91 @@ /** - * Represents a return statement. - * - * @author Nick Rizzolo + * Represents a return statement. + * + * @author Nick Rizzolo **/ -public class ReturnStatement extends Statement -{ - /** (¬ø) The expression representing the value to return. */ - public Expression expression; - - - /** - * Full constructor. - * - * @param e The expression representing the value to return, if - * any. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public ReturnStatement(Expression e, int line, int byteOffset) { - super(line, byteOffset); - expression = e; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(1); - I.children[0] = expression; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new ReturnStatement((Expression) expression.clone(), -1, -1); - } - - - /** Returns a hash code for this {@link ASTNode}. */ - public int hashCode() { - return 31 * expression.hashCode(); - } - - - /** - * Distinguishes this {@link ASTNode} from other objects according to its - * contents recursively. - * - * @param o Another object. - * @return true iff this node is equal to o. - **/ - public boolean equals(Object o) { - if (!(o instanceof ReturnStatement)) return false; - ReturnStatement r = (ReturnStatement) o; - return expression.equals(r.expression); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("return "); - expression.write(buffer); - buffer.append(";"); - } +public class ReturnStatement extends Statement { + /** (¬ø) The expression representing the value to return. */ + public Expression expression; + + + /** + * Full constructor. + * + * @param e The expression representing the value to return, if any. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public ReturnStatement(Expression e, int line, int byteOffset) { + super(line, byteOffset); + expression = e; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(1); + I.children[0] = expression; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new ReturnStatement((Expression) expression.clone(), -1, -1); + } + + + /** Returns a hash code for this {@link ASTNode}. */ + public int hashCode() { + return 31 * expression.hashCode(); + } + + + /** + * Distinguishes this {@link ASTNode} from other objects according to its contents recursively. + * + * @param o Another object. + * @return true iff this node is equal to o. + **/ + public boolean equals(Object o) { + if (!(o instanceof ReturnStatement)) + return false; + ReturnStatement r = (ReturnStatement) o; + return expression.equals(r.expression); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("return "); + expression.write(buffer); + buffer.append(";"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SenseStatement.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SenseStatement.java index 91240c4b..2a20c23a 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SenseStatement.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SenseStatement.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,171 +11,155 @@ /** - * Represents a feature sensing statement. - * - * @author Nick Rizzolo + * Represents a feature sensing statement. + * + * @author Nick Rizzolo **/ -public class SenseStatement extends Statement -{ - /** - * (ø) Represents the name of the feature being sensed (only used in - * generators). - **/ - public Expression name; - /** (¬ø) Represents the value of the feature being sensed. */ - public Expression value; - /** true iff this is a senseall statement. */ - public boolean senseall; - - - /** - * Initializing constructor. - * - * @param v The value of the feature being sensed. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public SenseStatement(Expression v, int line, int byteOffset) { - this(null, v, false, line, byteOffset); - } - - /** - * Initializing constructor. - * - * @param v The value of the feature being sensed. - * @param all true iff this is a senseall - * statement. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public SenseStatement(Expression v, boolean all, int line, int byteOffset) { - this(null, v, all, line, byteOffset); - } - - /** - * Initializing constructor. - * - * @param n The name of the feature being sensed. - * @param v The value of the feature being sensed. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public SenseStatement(Expression n, Expression v, int line, int byteOffset) - { - this(n, v, false, line, byteOffset); - } - - /** - * Full constructor. - * - * @param n The name of the feature being sensed. - * @param v The value of the feature being sensed. - * @param all true iff this is a senseall - * statement. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public SenseStatement(Expression n, Expression v, boolean all, int line, - int byteOffset) { - super(line, byteOffset); - name = n; - value = v; - senseall = all; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(name == null ? 1 : 2); - if (name != null) I.children[0] = name; - I.children[I.children.length - 1] = value; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new SenseStatement( - (name == null ? null : (Expression) name.clone()), - (Expression) value.clone(), senseall, -1, -1); - } - - - /** Returns a hash code for this {@link ASTNode}. */ - public int hashCode() { - int result = (senseall ? 1 : 3) + 31 * value.hashCode(); - if (name != null) result += name.hashCode(); - return result; - } - - - /** - * Distinguishes this {@link ASTNode} from other objects according to its - * contents recursively. - * - * @param o Another object. - * @return true iff this node is equal to o. - **/ - public boolean equals(Object o) { - if (!(o instanceof SenseStatement)) return false; - SenseStatement s = (SenseStatement) o; - return - senseall == s.senseall && value.equals(s.value) - && (name == null ? s.name == null : name.equals(s.name)); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("sense"); - if (senseall) buffer.append("all"); - buffer.append(" "); - - if (name != null) { - name.write(buffer); - buffer.append(" : "); +public class SenseStatement extends Statement { + /** + * (ø) Represents the name of the feature being sensed (only used in generators). + **/ + public Expression name; + /** (¬ø) Represents the value of the feature being sensed. */ + public Expression value; + /** true iff this is a senseall statement. */ + public boolean senseall; + + + /** + * Initializing constructor. + * + * @param v The value of the feature being sensed. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public SenseStatement(Expression v, int line, int byteOffset) { + this(null, v, false, line, byteOffset); } - value.write(buffer); - buffer.append(";"); - } -} + /** + * Initializing constructor. + * + * @param v The value of the feature being sensed. + * @param all true iff this is a senseall statement. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public SenseStatement(Expression v, boolean all, int line, int byteOffset) { + this(null, v, all, line, byteOffset); + } + + /** + * Initializing constructor. + * + * @param n The name of the feature being sensed. + * @param v The value of the feature being sensed. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public SenseStatement(Expression n, Expression v, int line, int byteOffset) { + this(n, v, false, line, byteOffset); + } + + /** + * Full constructor. + * + * @param n The name of the feature being sensed. + * @param v The value of the feature being sensed. + * @param all true iff this is a senseall statement. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public SenseStatement(Expression n, Expression v, boolean all, int line, int byteOffset) { + super(line, byteOffset); + name = n; + value = v; + senseall = all; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(name == null ? 1 : 2); + if (name != null) + I.children[0] = name; + I.children[I.children.length - 1] = value; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new SenseStatement((name == null ? null : (Expression) name.clone()), + (Expression) value.clone(), senseall, -1, -1); + } + + /** Returns a hash code for this {@link ASTNode}. */ + public int hashCode() { + int result = (senseall ? 1 : 3) + 31 * value.hashCode(); + if (name != null) + result += name.hashCode(); + return result; + } + + + /** + * Distinguishes this {@link ASTNode} from other objects according to its contents recursively. + * + * @param o Another object. + * @return true iff this node is equal to o. + **/ + public boolean equals(Object o) { + if (!(o instanceof SenseStatement)) + return false; + SenseStatement s = (SenseStatement) o; + return senseall == s.senseall && value.equals(s.value) + && (name == null ? s.name == null : name.equals(s.name)); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("sense"); + if (senseall) + buffer.append("all"); + buffer.append(" "); + + if (name != null) { + name.write(buffer); + buffer.append(" : "); + } + + value.write(buffer); + buffer.append(";"); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Statement.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Statement.java index a83cca32..014a15d6 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Statement.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Statement.java @@ -1,33 +1,28 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; /** - * Abstract class from which statements are derived. LBJava currently has only - * one type of statement: the assignment statement. - * - * @author Nick Rizzolo + * Abstract class from which statements are derived. LBJava currently has only one type of + * statement: the assignment statement. + * + * @author Nick Rizzolo **/ -public abstract class Statement extends ASTNode -{ - /** - * Default constructor. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - Statement(int line, int byteOffset) { super(line, byteOffset); } +public abstract class Statement extends ASTNode { + /** + * Default constructor. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + Statement(int line, int byteOffset) { + super(line, byteOffset); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/StatementExpression.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/StatementExpression.java index aac23ab7..18c73777 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/StatementExpression.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/StatementExpression.java @@ -1,33 +1,27 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; /** - * Abstract class for representing expressions that can stand alone as a - * statement. - * - * @author Nick Rizzolo + * Abstract class for representing expressions that can stand alone as a statement. + * + * @author Nick Rizzolo **/ -public abstract class StatementExpression extends Expression -{ - /** - * Default constructor. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - StatementExpression(int line, int byteOffset) { super(line, byteOffset); } +public abstract class StatementExpression extends Expression { + /** + * Default constructor. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + StatementExpression(int line, int byteOffset) { + super(line, byteOffset); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/StatementList.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/StatementList.java index 7af761cc..8956f984 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/StatementList.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/StatementList.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -16,147 +13,143 @@ /** - * Currently, this is just a wrapper class for LinkedList. The - * code that uses it looks a little cleaner when casts are all taken care of - * automatically. - * - * @author Nick Rizzolo + * Currently, this is just a wrapper class for LinkedList. The code that uses it looks + * a little cleaner when casts are all taken care of automatically. + * + * @author Nick Rizzolo **/ -public class StatementList extends List -{ - /** Default constructor. */ - public StatementList() { super(-1, -1, " "); } - - /** - * Initializing constructor. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public StatementList(int line, int byteOffset) { - super(line, byteOffset, " "); - } - - /** - * Initializing constructor. Requires its argument to be - * non-null. - * - * @param s A single Statement with which to initialize this - * list. - **/ - public StatementList(Statement s) { - super(s.line, s.byteOffset, " "); - list.add(s); - } - - - /** - * Adds another Statement to the end of the list. - * - * @param s A reference to the Statement to be added. - **/ - public void add(Statement s) { list.add(s); } - - - /** - * Adds all the Statements in another - * StatementList to the end of this - * StatementList. - * - * @param s The list to be added. - **/ - public void addAll(StatementList s) { list.addAll(s.list); } - - - /** - * Transforms the list into an array of statements. - * - * @return An array of statements containing references to every statement - * in the list. - **/ - public Statement[] toArray() { - return (Statement[]) list.toArray(new Statement[list.size()]); - } - - - /** - * Returns an iterator used specifically to access the elements of this - * list. - * - * @return An iterator used specifically to access the elements of this - * list. - **/ - public StatementListIterator listIterator() { - return new StatementListIterator(); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { return listIterator(); } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - StatementList clone = new StatementList(); - for (Iterator i = list.iterator(); i.hasNext(); ) - clone.list.add(((Statement) i.next()).clone()); - return clone; - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Used to iterate though the children of a list of AST nodes. The entire - * interface of java.util.ListIterator is exposed through this - * class. - * - * @author Nick Rizzolo - **/ - public class StatementListIterator extends NodeListIterator - { +public class StatementList extends List { + /** Default constructor. */ + public StatementList() { + super(-1, -1, " "); + } + + /** + * Initializing constructor. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public StatementList(int line, int byteOffset) { + super(line, byteOffset, " "); + } + /** - * Returns the next AST node in the list. This method may be called - * repeatedly to iterate through the list, or intermixed with calls to - * previous() to go back and forth. (Note that alternating - * calls to next() and previous() will return - * the same element repeatedly.) - * - * @return The next AST node in the list. + * Initializing constructor. Requires its argument to be non-null. + * + * @param s A single Statement with which to initialize this list. **/ - public Statement nextItem() { return (Statement) I.next(); } + public StatementList(Statement s) { + super(s.line, s.byteOffset, " "); + list.add(s); + } /** - * Returns the previous element in the list. This method may be called - * repeatedly to iterate through the list backwards, or intermixed with - * calls to next to go back and forth. (Note that alternating calls to - * next and previous will return the same element repeatedly.) - * - * @return The previous AST node in the list. + * Adds another Statement to the end of the list. + * + * @param s A reference to the Statement to be added. **/ - public Statement previousItem() { return (Statement) I.previous(); } - } -} + public void add(Statement s) { + list.add(s); + } + + + /** + * Adds all the Statements in another StatementList to the end of this + * StatementList. + * + * @param s The list to be added. + **/ + public void addAll(StatementList s) { + list.addAll(s.list); + } + + + /** + * Transforms the list into an array of statements. + * + * @return An array of statements containing references to every statement in the list. + **/ + public Statement[] toArray() { + return (Statement[]) list.toArray(new Statement[list.size()]); + } + + + /** + * Returns an iterator used specifically to access the elements of this list. + * + * @return An iterator used specifically to access the elements of this list. + **/ + public StatementListIterator listIterator() { + return new StatementListIterator(); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + return listIterator(); + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + StatementList clone = new StatementList(); + for (Iterator i = list.iterator(); i.hasNext();) + clone.list.add(((Statement) i.next()).clone()); + return clone; + } + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Used to iterate though the children of a list of AST nodes. The entire interface of + * java.util.ListIterator is exposed through this class. + * + * @author Nick Rizzolo + **/ + public class StatementListIterator extends NodeListIterator { + /** + * Returns the next AST node in the list. This method may be called repeatedly to iterate + * through the list, or intermixed with calls to previous() to go back and + * forth. (Note that alternating calls to next() and previous() + * will return the same element repeatedly.) + * + * @return The next AST node in the list. + **/ + public Statement nextItem() { + return (Statement) I.next(); + } + + + /** + * Returns the previous element in the list. This method may be called repeatedly to iterate + * through the list backwards, or intermixed with calls to next to go back and forth. (Note + * that alternating calls to next and previous will return the same element repeatedly.) + * + * @return The previous AST node in the list. + **/ + public Statement previousItem() { + return (Statement) I.previous(); + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SubscriptVariable.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SubscriptVariable.java index 3520cdc2..42f6ff09 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SubscriptVariable.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SubscriptVariable.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,105 +11,104 @@ /** - * This class represents an array access. - * - * @author Nick Rizzolo + * This class represents an array access. + * + * @author Nick Rizzolo **/ -public class SubscriptVariable extends VariableInstance -{ - /** (¬ø) The expression describing the array to be accessed. */ - public Expression array; - /** - * (¬ø) The expression whose evaluation will be used as the - * subscript. - **/ - public Expression subscript; - - - /** - * Initializing constructor. Line and byte offset information is taken - * from the first expression. - * - * @param ar The expression describing the array to be accessed. - * @param sub The subscript expression. - **/ - public SubscriptVariable(Expression ar, Expression sub) { - super(ar.line, ar.byteOffset); - array = ar; - subscript = sub; - } - - - /** - * Returns a hash code value for java hash structures. - * - * @return A hash code for this object. - **/ - public int hashCode() { return array.hashCode() + subscript.hashCode(); } - - - /** - * Indicates whether some other object is "equal to" this one. - * - * @return true iff this object is the same as the argument. - **/ - public boolean equals(Object o) { - if (!(o instanceof SubscriptVariable)) return false; - SubscriptVariable s = (SubscriptVariable) o; - return array.equals(s.array) && subscript.equals(s.subscript); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(2); - I.children[0] = array; - I.children[1] = subscript; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new SubscriptVariable((Expression) array.clone(), - (Expression) subscript.clone()); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (parenthesized) buffer.append("("); - array.write(buffer); - buffer.append("["); - subscript.write(buffer); - buffer.append("]"); - if (parenthesized) buffer.append(")"); - } +public class SubscriptVariable extends VariableInstance { + /** (¬ø) The expression describing the array to be accessed. */ + public Expression array; + /** + * (¬ø) The expression whose evaluation will be used as the subscript. + **/ + public Expression subscript; + + + /** + * Initializing constructor. Line and byte offset information is taken from the first + * expression. + * + * @param ar The expression describing the array to be accessed. + * @param sub The subscript expression. + **/ + public SubscriptVariable(Expression ar, Expression sub) { + super(ar.line, ar.byteOffset); + array = ar; + subscript = sub; + } + + + /** + * Returns a hash code value for java hash structures. + * + * @return A hash code for this object. + **/ + public int hashCode() { + return array.hashCode() + subscript.hashCode(); + } + + + /** + * Indicates whether some other object is "equal to" this one. + * + * @return true iff this object is the same as the argument. + **/ + public boolean equals(Object o) { + if (!(o instanceof SubscriptVariable)) + return false; + SubscriptVariable s = (SubscriptVariable) o; + return array.equals(s.array) && subscript.equals(s.subscript); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(2); + I.children[0] = array; + I.children[1] = subscript; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new SubscriptVariable((Expression) array.clone(), (Expression) subscript.clone()); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (parenthesized) + buffer.append("("); + array.write(buffer); + buffer.append("["); + subscript.write(buffer); + buffer.append("]"); + if (parenthesized) + buffer.append(")"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchBlock.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchBlock.java index e29a23b7..e9ec2185 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchBlock.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchBlock.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,146 +11,135 @@ /** - * Represents the body of a switch statement. - * - * @author Nick Rizzolo + * Represents the body of a switch statement. + * + * @author Nick Rizzolo **/ -public class SwitchBlock extends ASTNode -{ - /** (¬ø) The list of labeled blocks of statements, if any. */ - public SwitchGroupList groups; - /** (¬ø) The trailing list of labels, if any. */ - public SwitchLabelList labels; - - - /** - * Initializing constructor. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public SwitchBlock(int line, int byteOffset) { - this(new SwitchGroupList(), new SwitchLabelList(), line, byteOffset); - } - - /** - * Initializing constructor. Line and byte offset information are taken - * from the labels. - * - * @param l The list of labels. - **/ - public SwitchBlock(SwitchLabelList l) { - this(new SwitchGroupList(), l, l.line, l.byteOffset); - } - - /** - * Initializing constructor. Line and byte offset information are taken - * from the groups. - * - * @param g The list of groups. - **/ - public SwitchBlock(SwitchGroupList g) { this(g, new SwitchLabelList()); } - - /** - * Initializing constructor. Line and byte offset information are taken - * from the groups. - * - * @param g The list of groups. - * @param l The list of labels. - **/ - public SwitchBlock(SwitchGroupList g, SwitchLabelList l) { - this(g, l, g.line, g.byteOffset); - } - - /** - * Full constructor. - * - * @param g The list of groups. - * @param l The list of labels. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public SwitchBlock(SwitchGroupList g, SwitchLabelList l, int line, - int byteOffset) { - super(line, byteOffset); - groups = g; - labels = l; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(2); - I.children[0] = groups; - I.children[1] = labels; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new SwitchBlock((SwitchGroupList) groups.clone(), - (SwitchLabelList) labels.clone()); - } - - - /** Returns a hash code for this {@link ASTNode}. */ - public int hashCode() { - return 31 * groups.hashCode() + 7 * labels.hashCode(); - } - - - /** - * Distinguishes this {@link ASTNode} from other objects according to its - * contents recursively. - * - * @param o Another object. - * @return true iff this node is equal to o. - **/ - public boolean equals(Object o) { - if (!(o instanceof SwitchBlock)) return false; - SwitchBlock s = (SwitchBlock) o; - return groups.equals(s.groups) && labels.equals(s.labels); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - groups.write(buffer); - buffer.append(" "); - labels.write(buffer); - } +public class SwitchBlock extends ASTNode { + /** (¬ø) The list of labeled blocks of statements, if any. */ + public SwitchGroupList groups; + /** (¬ø) The trailing list of labels, if any. */ + public SwitchLabelList labels; + + + /** + * Initializing constructor. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public SwitchBlock(int line, int byteOffset) { + this(new SwitchGroupList(), new SwitchLabelList(), line, byteOffset); + } + + /** + * Initializing constructor. Line and byte offset information are taken from the labels. + * + * @param l The list of labels. + **/ + public SwitchBlock(SwitchLabelList l) { + this(new SwitchGroupList(), l, l.line, l.byteOffset); + } + + /** + * Initializing constructor. Line and byte offset information are taken from the groups. + * + * @param g The list of groups. + **/ + public SwitchBlock(SwitchGroupList g) { + this(g, new SwitchLabelList()); + } + + /** + * Initializing constructor. Line and byte offset information are taken from the groups. + * + * @param g The list of groups. + * @param l The list of labels. + **/ + public SwitchBlock(SwitchGroupList g, SwitchLabelList l) { + this(g, l, g.line, g.byteOffset); + } + + /** + * Full constructor. + * + * @param g The list of groups. + * @param l The list of labels. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public SwitchBlock(SwitchGroupList g, SwitchLabelList l, int line, int byteOffset) { + super(line, byteOffset); + groups = g; + labels = l; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(2); + I.children[0] = groups; + I.children[1] = labels; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new SwitchBlock((SwitchGroupList) groups.clone(), (SwitchLabelList) labels.clone()); + } + + + /** Returns a hash code for this {@link ASTNode}. */ + public int hashCode() { + return 31 * groups.hashCode() + 7 * labels.hashCode(); + } + + + /** + * Distinguishes this {@link ASTNode} from other objects according to its contents recursively. + * + * @param o Another object. + * @return true iff this node is equal to o. + **/ + public boolean equals(Object o) { + if (!(o instanceof SwitchBlock)) + return false; + SwitchBlock s = (SwitchBlock) o; + return groups.equals(s.groups) && labels.equals(s.labels); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + groups.write(buffer); + buffer.append(" "); + labels.write(buffer); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchGroup.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchGroup.java index 5133e88c..39859f9f 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchGroup.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchGroup.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,100 +11,93 @@ /** - * Represents a list of statements labeled by one or more - * SwitchLabels. - * - * @author Nick Rizzolo + * Represents a list of statements labeled by one or more SwitchLabels. + * + * @author Nick Rizzolo **/ -public class SwitchGroup extends ASTNode -{ - /** (¬ø) The list of labels labeling this group. */ - public SwitchLabelList labels; - /** (¬ø) The list of statements in the group. */ - public StatementList statements; - - - /** - * Full constructor. Line and byte offset information are taken from the - * labels. - * - * @param l The list of labels. - * @param s The list of statements. - **/ - public SwitchGroup(SwitchLabelList l, StatementList s) { - super(l.line, l.byteOffset); - labels = l; - statements = s; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(2); - I.children[0] = labels; - I.children[1] = statements; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new SwitchGroup((SwitchLabelList) labels.clone(), - (StatementList) statements.clone()); - } - - - /** Returns a hash code for this {@link ASTNode}. */ - public int hashCode() { - return 31 * labels.hashCode() + 17 * statements.hashCode(); - } - - - /** - * Distinguishes this {@link ASTNode} from other objects according to its - * contents recursively. - * - * @param o Another object. - * @return true iff this node is equal to o. - **/ - public boolean equals(Object o) { - if (!(o instanceof SwitchGroup)) return false; - SwitchGroup s = (SwitchGroup) o; - return labels.equals(s.labels) && statements.equals(s.statements); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - labels.write(buffer); - buffer.append(" "); - statements.write(buffer); - } +public class SwitchGroup extends ASTNode { + /** (¬ø) The list of labels labeling this group. */ + public SwitchLabelList labels; + /** (¬ø) The list of statements in the group. */ + public StatementList statements; + + + /** + * Full constructor. Line and byte offset information are taken from the labels. + * + * @param l The list of labels. + * @param s The list of statements. + **/ + public SwitchGroup(SwitchLabelList l, StatementList s) { + super(l.line, l.byteOffset); + labels = l; + statements = s; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(2); + I.children[0] = labels; + I.children[1] = statements; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new SwitchGroup((SwitchLabelList) labels.clone(), (StatementList) statements.clone()); + } + + + /** Returns a hash code for this {@link ASTNode}. */ + public int hashCode() { + return 31 * labels.hashCode() + 17 * statements.hashCode(); + } + + + /** + * Distinguishes this {@link ASTNode} from other objects according to its contents recursively. + * + * @param o Another object. + * @return true iff this node is equal to o. + **/ + public boolean equals(Object o) { + if (!(o instanceof SwitchGroup)) + return false; + SwitchGroup s = (SwitchGroup) o; + return labels.equals(s.labels) && statements.equals(s.statements); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + labels.write(buffer); + buffer.append(" "); + statements.write(buffer); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchGroupList.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchGroupList.java index 1303908f..fe74f348 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchGroupList.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchGroupList.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -16,134 +13,132 @@ /** - * Currently, this is just a wrapper class for LinkedList. The - * code that uses it looks a little cleaner when casts are all taken care of - * automatically. - * - * @author Nick Rizzolo + * Currently, this is just a wrapper class for LinkedList. The code that uses it looks + * a little cleaner when casts are all taken care of automatically. + * + * @author Nick Rizzolo **/ -public class SwitchGroupList extends List -{ - /** Default constructor. */ - public SwitchGroupList() { super(-1, -1, " "); } - - /** - * Initializing constructor. Requires its argument to be - * non-null. - * - * @param s A single SwitchGroup with which to initialize - * this list. - **/ - public SwitchGroupList(SwitchGroup s) { - super(s.line, s.byteOffset, " "); - list.add(s); - } - - - /** - * Adds another SwitchGroup to the end of the list. - * - * @param s A reference to the SwitchGroup to be added. - **/ - public void add(SwitchGroup s) { list.add(s); } - - - /** - * Adds all the SwitchGroups in another - * SwitchGroupList to the end of this - * SwitchGroupList. - * - * @param s The list to be added. - **/ - public void addAll(SwitchGroupList s) { list.addAll(s.list); } - - - /** - * Transforms the list into an array of statements. - * - * @return An array of statements containing references to every statement - * in the list. - **/ - public SwitchGroup[] toArray() { - return (SwitchGroup[]) list.toArray(new SwitchGroup[list.size()]); - } - - - /** - * Returns an iterator used specifically to access the elements of this - * list. - * - * @return An iterator used specifically to access the elements of this - * list. - **/ - public SwitchGroupListIterator listIterator() { - return new SwitchGroupListIterator(); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { return listIterator(); } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - SwitchGroupList clone = new SwitchGroupList(); - for (Iterator i = list.iterator(); i.hasNext(); ) - clone.list.add(((SwitchGroup) i.next()).clone()); - return clone; - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Used to iterate though the children of a list of AST nodes. The entire - * interface of java.util.ListIterator is exposed through this - * class. - * - * @author Nick Rizzolo - **/ - public class SwitchGroupListIterator extends NodeListIterator - { +public class SwitchGroupList extends List { + /** Default constructor. */ + public SwitchGroupList() { + super(-1, -1, " "); + } + /** - * Returns the next AST node in the list. This method may be called - * repeatedly to iterate through the list, or intermixed with calls to - * previous() to go back and forth. (Note that alternating - * calls to next() and previous() will return - * the same element repeatedly.) - * - * @return The next AST node in the list. + * Initializing constructor. Requires its argument to be non-null. + * + * @param s A single SwitchGroup with which to initialize this list. **/ - public SwitchGroup nextItem() { return (SwitchGroup) I.next(); } + public SwitchGroupList(SwitchGroup s) { + super(s.line, s.byteOffset, " "); + list.add(s); + } /** - * Returns the previous element in the list. This method may be called - * repeatedly to iterate through the list backwards, or intermixed with - * calls to next to go back and forth. (Note that alternating calls to - * next and previous will return the same element repeatedly.) - * - * @return The previous AST node in the list. + * Adds another SwitchGroup to the end of the list. + * + * @param s A reference to the SwitchGroup to be added. **/ - public SwitchGroup previousItem() { return (SwitchGroup) I.previous(); } - } -} + public void add(SwitchGroup s) { + list.add(s); + } + + + /** + * Adds all the SwitchGroups in another SwitchGroupList to the end of + * this SwitchGroupList. + * + * @param s The list to be added. + **/ + public void addAll(SwitchGroupList s) { + list.addAll(s.list); + } + + + /** + * Transforms the list into an array of statements. + * + * @return An array of statements containing references to every statement in the list. + **/ + public SwitchGroup[] toArray() { + return (SwitchGroup[]) list.toArray(new SwitchGroup[list.size()]); + } + + + /** + * Returns an iterator used specifically to access the elements of this list. + * + * @return An iterator used specifically to access the elements of this list. + **/ + public SwitchGroupListIterator listIterator() { + return new SwitchGroupListIterator(); + } + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + return listIterator(); + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + SwitchGroupList clone = new SwitchGroupList(); + for (Iterator i = list.iterator(); i.hasNext();) + clone.list.add(((SwitchGroup) i.next()).clone()); + return clone; + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Used to iterate though the children of a list of AST nodes. The entire interface of + * java.util.ListIterator is exposed through this class. + * + * @author Nick Rizzolo + **/ + public class SwitchGroupListIterator extends NodeListIterator { + /** + * Returns the next AST node in the list. This method may be called repeatedly to iterate + * through the list, or intermixed with calls to previous() to go back and + * forth. (Note that alternating calls to next() and previous() + * will return the same element repeatedly.) + * + * @return The next AST node in the list. + **/ + public SwitchGroup nextItem() { + return (SwitchGroup) I.next(); + } + + + /** + * Returns the previous element in the list. This method may be called repeatedly to iterate + * through the list backwards, or intermixed with calls to next to go back and forth. (Note + * that alternating calls to next and previous will return the same element repeatedly.) + * + * @return The previous AST node in the list. + **/ + public SwitchGroup previousItem() { + return (SwitchGroup) I.previous(); + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchLabel.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchLabel.java index 5fb75857..3d92dafa 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchLabel.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchLabel.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,104 +11,99 @@ /** - * Represents a case or default label inside a switch block. - * - * @author Nick Rizzolo + * Represents a case or default label inside a switch block. + * + * @author Nick Rizzolo **/ -public class SwitchLabel extends ASTNode -{ - /** (ø) The expression representing the value to match, if any. */ - public Expression value; - - - /** - * Full constructor. - * - * @param v The expression representing the value to match, if - * any. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public SwitchLabel(Expression v, int line, int byteOffset) { - super(line, byteOffset); - value = v; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(value == null ? 0 : 1); - if (value != null) I.children[0] = value; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new SwitchLabel(value == null ? null : (Expression) value.clone(), - -1, -1); - } - - - /** Returns a hash code for this {@link ASTNode}. */ - public int hashCode() { - int result = 17; - if (value != null) result += 7 * value.hashCode(); - return result; - } - - - /** - * Distinguishes this {@link ASTNode} from other objects according to its - * contents recursively. - * - * @param o Another object. - * @return true iff this node is equal to o. - **/ - public boolean equals(Object o) { - if (!(o instanceof SwitchLabel)) return false; - SwitchLabel s = (SwitchLabel) o; - return value == null ? s.value == null : value.equals(s.value); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (value == null) buffer.append("default:"); - else { - buffer.append("case "); - value.write(buffer); - buffer.append(":"); +public class SwitchLabel extends ASTNode { + /** (ø) The expression representing the value to match, if any. */ + public Expression value; + + + /** + * Full constructor. + * + * @param v The expression representing the value to match, if any. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public SwitchLabel(Expression v, int line, int byteOffset) { + super(line, byteOffset); + value = v; } - } -} + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(value == null ? 0 : 1); + if (value != null) + I.children[0] = value; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new SwitchLabel(value == null ? null : (Expression) value.clone(), -1, -1); + } + + + /** Returns a hash code for this {@link ASTNode}. */ + public int hashCode() { + int result = 17; + if (value != null) + result += 7 * value.hashCode(); + return result; + } + + + /** + * Distinguishes this {@link ASTNode} from other objects according to its contents recursively. + * + * @param o Another object. + * @return true iff this node is equal to o. + **/ + public boolean equals(Object o) { + if (!(o instanceof SwitchLabel)) + return false; + SwitchLabel s = (SwitchLabel) o; + return value == null ? s.value == null : value.equals(s.value); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (value == null) + buffer.append("default:"); + else { + buffer.append("case "); + value.write(buffer); + buffer.append(":"); + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchLabelList.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchLabelList.java index 576f7567..aed24f27 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchLabelList.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchLabelList.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -16,134 +13,132 @@ /** - * Currently, this is just a wrapper class for LinkedList. The - * code that uses it looks a little cleaner when casts are all taken care of - * automatically. - * - * @author Nick Rizzolo + * Currently, this is just a wrapper class for LinkedList. The code that uses it looks + * a little cleaner when casts are all taken care of automatically. + * + * @author Nick Rizzolo **/ -public class SwitchLabelList extends List -{ - /** Default constructor. */ - public SwitchLabelList() { super(-1, -1, " "); } - - /** - * Initializing constructor. Requires its argument to be - * non-null. - * - * @param s A single SwitchLabel with which to initialize - * this list. - **/ - public SwitchLabelList(SwitchLabel s) { - super(s.line, s.byteOffset, " "); - list.add(s); - } - - - /** - * Adds another SwitchLabel to the end of the list. - * - * @param s A reference to the SwitchLabel to be added. - **/ - public void add(SwitchLabel s) { list.add(s); } - - - /** - * Adds all the SwitchLabels in another - * SwitchLabelList to the end of this - * SwitchLabelList. - * - * @param s The list to be added. - **/ - public void addAll(SwitchLabelList s) { list.addAll(s.list); } - - - /** - * Transforms the list into an array of statements. - * - * @return An array of statements containing references to every statement - * in the list. - **/ - public SwitchLabel[] toArray() { - return (SwitchLabel[]) list.toArray(new SwitchLabel[list.size()]); - } - - - /** - * Returns an iterator used specifically to access the elements of this - * list. - * - * @return An iterator used specifically to access the elements of this - * list. - **/ - public SwitchLabelListIterator listIterator() { - return new SwitchLabelListIterator(); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { return listIterator(); } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - SwitchLabelList clone = new SwitchLabelList(); - for (Iterator i = list.iterator(); i.hasNext(); ) - clone.list.add(((SwitchLabel) i.next()).clone()); - return clone; - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Used to iterate though the children of a list of AST nodes. The entire - * interface of java.util.ListIterator is exposed through this - * class. - * - * @author Nick Rizzolo - **/ - public class SwitchLabelListIterator extends NodeListIterator - { +public class SwitchLabelList extends List { + /** Default constructor. */ + public SwitchLabelList() { + super(-1, -1, " "); + } + /** - * Returns the next AST node in the list. This method may be called - * repeatedly to iterate through the list, or intermixed with calls to - * previous() to go back and forth. (Note that alternating - * calls to next() and previous() will return - * the same element repeatedly.) - * - * @return The next AST node in the list. + * Initializing constructor. Requires its argument to be non-null. + * + * @param s A single SwitchLabel with which to initialize this list. **/ - public SwitchLabel nextItem() { return (SwitchLabel) I.next(); } + public SwitchLabelList(SwitchLabel s) { + super(s.line, s.byteOffset, " "); + list.add(s); + } /** - * Returns the previous element in the list. This method may be called - * repeatedly to iterate through the list backwards, or intermixed with - * calls to next to go back and forth. (Note that alternating calls to - * next and previous will return the same element repeatedly.) - * - * @return The previous AST node in the list. + * Adds another SwitchLabel to the end of the list. + * + * @param s A reference to the SwitchLabel to be added. **/ - public SwitchLabel previousItem() { return (SwitchLabel) I.previous(); } - } -} + public void add(SwitchLabel s) { + list.add(s); + } + + + /** + * Adds all the SwitchLabels in another SwitchLabelList to the end of + * this SwitchLabelList. + * + * @param s The list to be added. + **/ + public void addAll(SwitchLabelList s) { + list.addAll(s.list); + } + + + /** + * Transforms the list into an array of statements. + * + * @return An array of statements containing references to every statement in the list. + **/ + public SwitchLabel[] toArray() { + return (SwitchLabel[]) list.toArray(new SwitchLabel[list.size()]); + } + + + /** + * Returns an iterator used specifically to access the elements of this list. + * + * @return An iterator used specifically to access the elements of this list. + **/ + public SwitchLabelListIterator listIterator() { + return new SwitchLabelListIterator(); + } + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + return listIterator(); + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + SwitchLabelList clone = new SwitchLabelList(); + for (Iterator i = list.iterator(); i.hasNext();) + clone.list.add(((SwitchLabel) i.next()).clone()); + return clone; + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Used to iterate though the children of a list of AST nodes. The entire interface of + * java.util.ListIterator is exposed through this class. + * + * @author Nick Rizzolo + **/ + public class SwitchLabelListIterator extends NodeListIterator { + /** + * Returns the next AST node in the list. This method may be called repeatedly to iterate + * through the list, or intermixed with calls to previous() to go back and + * forth. (Note that alternating calls to next() and previous() + * will return the same element repeatedly.) + * + * @return The next AST node in the list. + **/ + public SwitchLabel nextItem() { + return (SwitchLabel) I.next(); + } + + + /** + * Returns the previous element in the list. This method may be called repeatedly to iterate + * through the list backwards, or intermixed with calls to next to go back and forth. (Note + * that alternating calls to next and previous will return the same element repeatedly.) + * + * @return The previous AST node in the list. + **/ + public SwitchLabel previousItem() { + return (SwitchLabel) I.previous(); + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchStatement.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchStatement.java index 7c77d632..825e8ce9 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchStatement.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SwitchStatement.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,110 +11,103 @@ /** - * Represents a switch statement. - * - * @author Nick Rizzolo + * Represents a switch statement. + * + * @author Nick Rizzolo **/ -public class SwitchStatement extends Statement -{ - /** - * (¬ø) The expression determining which statements to execute. - **/ - public Expression expression; - /** - * (¬ø) The various code blocks that are executed depending on - * the expression's value. - **/ - public SwitchBlock block; - - - /** - * Full constructor. - * - * @param e The expression determining which code to execute. - * @param b The code to execute. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public SwitchStatement(Expression e, SwitchBlock b, int line, - int byteOffset) { - super(line, byteOffset); - expression = e; - block = b; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(2); - I.children[0] = expression; - I.children[1] = block; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new SwitchStatement((Expression) expression.clone(), - (SwitchBlock) block.clone(), -1, -1); - } - - - /** Returns a hash code for this {@link ASTNode}. */ - public int hashCode() { - return 31 * expression.hashCode() + 17 * block.hashCode(); - } - - - /** - * Distinguishes this {@link ASTNode} from other objects according to its - * contents recursively. - * - * @param o Another object. - * @return true iff this node is equal to o. - **/ - public boolean equals(Object o) { - if (!(o instanceof SwitchStatement)) return false; - SwitchStatement s = (SwitchStatement) o; - return expression.equals(s.expression) && block.equals(s.block); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("switch ("); - expression.write(buffer); - buffer.append(") "); - block.write(buffer); - } +public class SwitchStatement extends Statement { + /** + * (¬ø) The expression determining which statements to execute. + **/ + public Expression expression; + /** + * (¬ø) The various code blocks that are executed depending on the expression's + * value. + **/ + public SwitchBlock block; + + + /** + * Full constructor. + * + * @param e The expression determining which code to execute. + * @param b The code to execute. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public SwitchStatement(Expression e, SwitchBlock b, int line, int byteOffset) { + super(line, byteOffset); + expression = e; + block = b; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(2); + I.children[0] = expression; + I.children[1] = block; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new SwitchStatement((Expression) expression.clone(), (SwitchBlock) block.clone(), + -1, -1); + } + + + /** Returns a hash code for this {@link ASTNode}. */ + public int hashCode() { + return 31 * expression.hashCode() + 17 * block.hashCode(); + } + + + /** + * Distinguishes this {@link ASTNode} from other objects according to its contents recursively. + * + * @param o Another object. + * @return true iff this node is equal to o. + **/ + public boolean equals(Object o) { + if (!(o instanceof SwitchStatement)) + return false; + SwitchStatement s = (SwitchStatement) o; + return expression.equals(s.expression) && block.equals(s.block); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("switch ("); + expression.write(buffer); + buffer.append(") "); + block.write(buffer); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SymbolTable.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SymbolTable.java index bbc2f3a1..5fd75251 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SymbolTable.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SymbolTable.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -28,370 +25,385 @@ /** - * A symbol table is simply a HashMap associating names with - * their types. This class also assumes responsibility for determining type - * information for externally defined names. + * A symbol table is simply a HashMap associating names with their types. This class + * also assumes responsibility for determining type information for externally defined names. * - *

The global symbol table also keeps track of package and - * import declarations so that when a name cannot be found - * otherwise, it is searched for in the imported packages. + *

+ * The global symbol table also keeps track of package and import + * declarations so that when a name cannot be found otherwise, it is searched for in the imported + * packages. * * @author Nick Rizzolo **/ -public class SymbolTable -{ - /** - * Given the name of a class, which may or may not be fully qualified, this - * method returns the absolute path to the file with the same name and the - * given extension. This method was written for use by the LBJava compiler - * and its generated code. The user should not need to call it. - * - * @param name The name of the class. - * @param extension The extension of the file to search for. - * @param path Paths in which to search for name.extension. - * @return An object representing the file or null if it can't - * be located. - **/ - protected static File findFile(String name, String extension, String path) { - String fqName = name.replace('.', File.separatorChar) + "." + extension; - String[] paths = path.split("\\" + File.pathSeparator); - - for (int i = 0; i < paths.length; ++i) { - File file = new File(paths[i] + File.separator + fqName); - if (file.exists()) return file; - } - - return null; - } - - - /** - * The parent of this symbol table, or null if there is none. - **/ - private SymbolTable parent; - /** The children of this symbol table. */ - private LinkedList children; - /** Associates variable names with their types. */ - private HashMap table; - /** Associates externally defined names with their types. */ - private HashMap external; - /** The name representing the package of this source. */ - private String sourcePackage; - /** The list of names representing packages that have been imported. */ - private HashSet imported; - - - /** Initializes the member variables. */ - public SymbolTable() { this(null); } - - /** - * Initializes the member variables. - * - * @param p The parent of this table. - **/ - public SymbolTable(SymbolTable p) { - parent = p; - table = new HashMap(); - children = new LinkedList(); - sourcePackage = ""; - - if (parent == null) { - external = new HashMap(); - imported = new HashSet(); - } - else parent.addChild(this); - } - - - /** Retrieves the parent of this table. */ - public SymbolTable getParent() { return parent; } - - - /** - * Adds a child to this table. - * - * @param s The child to add. - **/ - public void addChild(SymbolTable s) { children.add(s); } - - - /** - * Adds a new entry to the table. - * - * @param a An argument containing the name its associated type. - * @return The type previously associated with the given name or - * null if no type was previously associated with it. - **/ - public Type put(Argument a) { return put(a.getName(), a.getType()); } - - - /** - * Adds a new entry to the table. - * - * @param name The name to add to the table. - * @param type The type to associate with the given name. - * @return The type previously associated with the given name or - * null if no type was previously associated with it. - **/ - public Type put(ClassifierName name, Type type) { - return put(name.toString(), type); - } - - - /** - * Adds a new entry to the table. - * - * @param name The name to add to the table. - * @param type The type to associate with the given name. - * @return The type previously associated with the given name or - * null if no type was previously associated with it. - **/ - public Type put(Name name, Type type) { return put(name.toString(), type); } - - - /** - * Adds a new entry to the table. - * - * @param name The name to add to the table. - * @param type The type to associate with the given name. - * @return The type previously associated with the given name or - * null if no type was previously associated with it. - **/ - public Type put(String name, Type type) { - Type result = (Type) table.get(name); - table.put(name, type); - return result; - } - - - /** - * Retrieves the type associated with the given name. If this table does - * not contain the name, imported packages are searched. - * - * @param name The name to retrieve type information for. - * @return The type associated with the given name or null if - * no type information could be found. - **/ - public Type get(ClassifierName name) { +public class SymbolTable { + /** + * Given the name of a class, which may or may not be fully qualified, this method returns the + * absolute path to the file with the same name and the given extension. This method was written + * for use by the LBJava compiler and its generated code. The user should not need to call it. + * + * @param name The name of the class. + * @param extension The extension of the file to search for. + * @param path Paths in which to search for name.extension. + * @return An object representing the file or null if it can't be located. + **/ + protected static File findFile(String name, String extension, String path) { + String fqName = name.replace('.', File.separatorChar) + "." + extension; + String[] paths = path.split("\\" + File.pathSeparator); + + for (int i = 0; i < paths.length; ++i) { + File file = new File(paths[i] + File.separator + fqName); + if (file.exists()) + return file; + } + + return null; + } + + + /** + * The parent of this symbol table, or null if there is none. + **/ + private SymbolTable parent; + /** The children of this symbol table. */ + private LinkedList children; + /** Associates variable names with their types. */ + private HashMap table; + /** Associates externally defined names with their types. */ + private HashMap external; + /** The name representing the package of this source. */ + private String sourcePackage; + /** The list of names representing packages that have been imported. */ + private HashSet imported; + + + /** Initializes the member variables. */ + public SymbolTable() { + this(null); + } + + /** + * Initializes the member variables. + * + * @param p The parent of this table. + **/ + public SymbolTable(SymbolTable p) { + parent = p; + table = new HashMap(); + children = new LinkedList(); + sourcePackage = ""; + + if (parent == null) { + external = new HashMap(); + imported = new HashSet(); + } else + parent.addChild(this); + } + + + /** Retrieves the parent of this table. */ + public SymbolTable getParent() { + return parent; + } + + + /** + * Adds a child to this table. + * + * @param s The child to add. + **/ + public void addChild(SymbolTable s) { + children.add(s); + } + + + /** + * Adds a new entry to the table. + * + * @param a An argument containing the name its associated type. + * @return The type previously associated with the given name or null if no type + * was previously associated with it. + **/ + public Type put(Argument a) { + return put(a.getName(), a.getType()); + } + + + /** + * Adds a new entry to the table. + * + * @param name The name to add to the table. + * @param type The type to associate with the given name. + * @return The type previously associated with the given name or null if no type + * was previously associated with it. + **/ + public Type put(ClassifierName name, Type type) { + return put(name.toString(), type); + } + + + /** + * Adds a new entry to the table. + * + * @param name The name to add to the table. + * @param type The type to associate with the given name. + * @return The type previously associated with the given name or null if no type + * was previously associated with it. + **/ + public Type put(Name name, Type type) { + return put(name.toString(), type); + } + + + /** + * Adds a new entry to the table. + * + * @param name The name to add to the table. + * @param type The type to associate with the given name. + * @return The type previously associated with the given name or null if no type + * was previously associated with it. + **/ + public Type put(String name, Type type) { + Type result = (Type) table.get(name); + table.put(name, type); + return result; + } + + + /** + * Retrieves the type associated with the given name. If this table does not contain the name, + * imported packages are searched. + * + * @param name The name to retrieve type information for. + * @return The type associated with the given name or null if no type information + * could be found. + **/ + public Type get(ClassifierName name) { String nameStr = name.referent.toString(); Type classifierType = get(nameStr); try { Class className = classForName(name); name.isField = className != null && className.getField(nameStr) != null; + } catch (NoSuchFieldException e) { } - catch (NoSuchFieldException e) {} return classifierType; - } - - - /** - * Retrieves the type associated with the given name. If this table does - * not contain the name, imported packages are searched. - * - * @param name The name to retrieve type information for. - * @return The type associated with the given name or null if - * no type information could be found. - **/ - public Type get(Name name) { return get(name.toString()); } - - - /** - * Retrieves the type associated with the given name. If this table does - * not contain the name, imported packages are searched. - * - * @param name The name to retrieve type information for. - * @return The type associated with the given name or null if - * no type information could be found. - **/ - public Type get(String name) { - if (localContainsKey(name)) return (Type) table.get(name); - if (parent != null) return parent.get(name); - if (external.containsKey(name)) return (Type) external.get(name); - - Type result = null; - Class c = classForName(name); - - if (c != null) { - if (ParameterizedConstraint.class.isAssignableFrom(c)) { - ParameterizedConstraint constraint = null; - try { constraint = (ParameterizedConstraint) c.newInstance(); } - catch (Exception e) { - System.err.println("Can't instantiate parameterized constraint '" - + c + "'. Make sure there is a public, no argument " - + "constructor defined:"); - e.printStackTrace(); - System.exit(1); - } - - result = - new ConstraintType(Type.parseType(constraint.getInputType())); - } - else if (Classifier.class.isAssignableFrom(c)) { - Classifier classifier = null; - try { classifier = (Classifier) c.newInstance(); } - catch (Exception e) { - System.err.println("Can't instantiate classifier '" + c - + "'. Make sure there is a public, no argument constructor " - + "defined:"); - e.printStackTrace(); - System.exit(1); - } - - result = - new ClassifierType( - Type.parseType(classifier.getInputType()), - new ClassifierReturnType( - classifier.getOutputType(), - new ConstantList(classifier.allowableValues())), - Learner.class.isAssignableFrom(c)); - } - else if (Inference.class.isAssignableFrom(c)) { - Inference inference = null; - try { inference = (Inference) c.newInstance(); } - catch (Exception e) { - System.err.println("Can't instantiate inference '" + c - + "'. Make sure there is a public, no argument constructor " - + "defined:"); - e.printStackTrace(); - System.exit(1); - } - - Type headType = new ReferenceType(new Name(inference.getHeadType())); - String[] headFinderTypeStrings = inference.getHeadFinderTypes(); - Type[] headFinderTypes = new Type[headFinderTypeStrings.length]; - for (int i = 0; i < headFinderTypeStrings.length; ++i) - headFinderTypes[i] = - new ReferenceType(new Name(headFinderTypeStrings[i])); - - result = new InferenceType(headType, headFinderTypes); - } - else if (Normalizer.class.isAssignableFrom(c)) { - try { c.newInstance(); } - catch (Exception e) { - System.err.println("Can't instantiate normalizer '" + c - + "'. Make sure there is a public, no argument constructor " - + "defined:"); - e.printStackTrace(); - System.exit(1); - } - - result = new NormalizerType(); - } - } - - external.put(name, result); - - return result; - } - - - /** - * Attempts to locate the named class in the current package and any - * imported packages. If the corresponding Java source file is found and - * either the class file does not exist or its time of last modification is - * earlier than the java file's, it is recompiled. If no class with the - * specified name is found, null is returned. - * - * @param name The name of the class to search for. - * @return The Class object representing that class. - **/ - public Class classForName(ClassifierName name) { - return classForName(name.referent); - } - - - /** - * Attempts to locate the named class in the current package and any - * imported packages. If the corresponding Java source file is found and - * either the class file does not exist or its time of last modification is - * earlier than the java file's, it is recompiled. If no class with the - * specified name is found, null is returned. - * - * @param name The name of the class to search for. - * @return The Class object representing that class. - **/ - public Class classForName(String name) { - return classForName(new Name(name)); - } - - - /** - * Attempts to locate the named class in the current package and any - * imported packages. If the corresponding Java source file is found and - * either the class file does not exist or its time of last modification is - * earlier than the java file's, it is recompiled. If no class with the - * specified name is found, null is returned. - * - * @param name The name of the class to search for. - * @return The Class object representing that class. - **/ - public Class classForName(Name name) { - if (parent != null) return parent.classForName(name); - - Class result = null; - - LinkedList prefixes = new LinkedList(); - prefixes.add(""); - if (sourcePackage.length() != 0) prefixes.add(sourcePackage + "."); - prefixes.add("java.lang."); - - for (Iterator I = imported.iterator(); I.hasNext(); ) { - String s = (String) I.next(); - if (s.endsWith(".*")) prefixes.add(s.substring(0, s.length() - 1)); - else if (s.endsWith("." + name.name[0])) - prefixes.add(s.substring(0, s.length() - name.name[0].length())); + } + + + /** + * Retrieves the type associated with the given name. If this table does not contain the name, + * imported packages are searched. + * + * @param name The name to retrieve type information for. + * @return The type associated with the given name or null if no type information + * could be found. + **/ + public Type get(Name name) { + return get(name.toString()); + } + + + /** + * Retrieves the type associated with the given name. If this table does not contain the name, + * imported packages are searched. + * + * @param name The name to retrieve type information for. + * @return The type associated with the given name or null if no type information + * could be found. + **/ + public Type get(String name) { + if (localContainsKey(name)) + return (Type) table.get(name); + if (parent != null) + return parent.get(name); + if (external.containsKey(name)) + return (Type) external.get(name); + + Type result = null; + Class c = classForName(name); + + if (c != null) { + if (ParameterizedConstraint.class.isAssignableFrom(c)) { + ParameterizedConstraint constraint = null; + try { + constraint = (ParameterizedConstraint) c.newInstance(); + } catch (Exception e) { + System.err.println("Can't instantiate parameterized constraint '" + c + + "'. Make sure there is a public, no argument " + + "constructor defined:"); + e.printStackTrace(); + System.exit(1); + } + + result = new ConstraintType(Type.parseType(constraint.getInputType())); + } else if (Classifier.class.isAssignableFrom(c)) { + Classifier classifier = null; + try { + classifier = (Classifier) c.newInstance(); + } catch (Exception e) { + System.err.println("Can't instantiate classifier '" + c + + "'. Make sure there is a public, no argument constructor " + + "defined:"); + e.printStackTrace(); + System.exit(1); + } + + result = + new ClassifierType(Type.parseType(classifier.getInputType()), + new ClassifierReturnType(classifier.getOutputType(), + new ConstantList(classifier.allowableValues())), + Learner.class.isAssignableFrom(c)); + } else if (Inference.class.isAssignableFrom(c)) { + Inference inference = null; + try { + inference = (Inference) c.newInstance(); + } catch (Exception e) { + System.err.println("Can't instantiate inference '" + c + + "'. Make sure there is a public, no argument constructor " + + "defined:"); + e.printStackTrace(); + System.exit(1); + } + + Type headType = new ReferenceType(new Name(inference.getHeadType())); + String[] headFinderTypeStrings = inference.getHeadFinderTypes(); + Type[] headFinderTypes = new Type[headFinderTypeStrings.length]; + for (int i = 0; i < headFinderTypeStrings.length; ++i) + headFinderTypes[i] = new ReferenceType(new Name(headFinderTypeStrings[i])); + + result = new InferenceType(headType, headFinderTypes); + } else if (Normalizer.class.isAssignableFrom(c)) { + try { + c.newInstance(); + } catch (Exception e) { + System.err.println("Can't instantiate normalizer '" + c + + "'. Make sure there is a public, no argument constructor " + + "defined:"); + e.printStackTrace(); + System.exit(1); + } + + result = new NormalizerType(); + } + } + + external.put(name, result); + + return result; + } + + + /** + * Attempts to locate the named class in the current package and any imported packages. If the + * corresponding Java source file is found and either the class file does not exist or its time + * of last modification is earlier than the java file's, it is recompiled. If no class with the + * specified name is found, null is returned. + * + * @param name The name of the class to search for. + * @return The Class object representing that class. + **/ + public Class classForName(ClassifierName name) { + return classForName(name.referent); + } + + + /** + * Attempts to locate the named class in the current package and any imported packages. If the + * corresponding Java source file is found and either the class file does not exist or its time + * of last modification is earlier than the java file's, it is recompiled. If no class with the + * specified name is found, null is returned. + * + * @param name The name of the class to search for. + * @return The Class object representing that class. + **/ + public Class classForName(String name) { + return classForName(new Name(name)); + } + + + /** + * Attempts to locate the named class in the current package and any imported packages. If the + * corresponding Java source file is found and either the class file does not exist or its time + * of last modification is earlier than the java file's, it is recompiled. If no class with the + * specified name is found, null is returned. + * + * @param name The name of the class to search for. + * @return The Class object representing that class. + **/ + public Class classForName(Name name) { + if (parent != null) + return parent.classForName(name); + + Class result = null; + + LinkedList prefixes = new LinkedList(); + prefixes.add(""); + if (sourcePackage.length() != 0) + prefixes.add(sourcePackage + "."); + prefixes.add("java.lang."); + + for (Iterator I = imported.iterator(); I.hasNext();) { + String s = (String) I.next(); + if (s.endsWith(".*")) + prefixes.add(s.substring(0, s.length() - 1)); + else if (s.endsWith("." + name.name[0])) + prefixes.add(s.substring(0, s.length() - name.name[0].length())); // Try to see if this is static field in one of the imported classes try { Class importedClass = Class.forName(s); Field field = importedClass.getField(name.toString()); - if (field != null) result = importedClass; + if (field != null) + result = importedClass; + } catch (Exception e) { } - catch (Exception e) { } - } + } - for (Iterator I = prefixes.iterator(); I.hasNext() && result == null; ) { - String prefix = (String) I.next(); - String fqName = prefix + name; - File javaFile = findFile(fqName, "java", Main.sourcePath); + for (Iterator I = prefixes.iterator(); I.hasNext() && result == null;) { + String prefix = (String) I.next(); + String fqName = prefix + name; + File javaFile = findFile(fqName, "java", Main.sourcePath); - if (javaFile != null) { - File classFile = findFile(fqName, "class", Main.classPath); + if (javaFile != null) { + File classFile = findFile(fqName, "class", Main.classPath); - if ((classFile == null - || javaFile.lastModified() > classFile.lastModified()) - && Train.runJavac(javaFile.toString())) - System.exit(1); - } + if ((classFile == null || javaFile.lastModified() > classFile.lastModified()) + && Train.runJavac(javaFile.toString())) + System.exit(1); + } - try { result = Class.forName(fqName); } - catch (Exception e) { } - catch (NoClassDefFoundError e) { } + try { + result = Class.forName(fqName); + } catch (Exception e) { + } catch (NoClassDefFoundError e) { + } - for (int i = 0; i < name.name.length - 1 && result == null; ++i) { - fqName = prefix + name.name[0]; - for (int j = 1; j <= i; ++j) fqName += "." + name.name[j]; - javaFile = findFile(fqName, "java", Main.sourcePath); + for (int i = 0; i < name.name.length - 1 && result == null; ++i) { + fqName = prefix + name.name[0]; + for (int j = 1; j <= i; ++j) + fqName += "." + name.name[j]; + javaFile = findFile(fqName, "java", Main.sourcePath); - if (javaFile != null) { - File classFile = findFile(fqName, "class", Main.classPath); + if (javaFile != null) { + File classFile = findFile(fqName, "class", Main.classPath); - if ((classFile == null - || javaFile.lastModified() > classFile.lastModified()) - && Train.runJavac(javaFile.toString())) - System.exit(1); - } + if ((classFile == null || javaFile.lastModified() > classFile.lastModified()) + && Train.runJavac(javaFile.toString())) + System.exit(1); + } - for (int j = i + 1; j < name.name.length; ++j) - fqName += "$" + name.name[j]; + for (int j = i + 1; j < name.name.length; ++j) + fqName += "$" + name.name[j]; - try { result = Class.forName(fqName); } - catch (Exception e) { } - catch (NoClassDefFoundError e) { } - } + try { + result = Class.forName(fqName); + } catch (Exception e) { + } catch (NoClassDefFoundError e) { + } + } // Try to see if this is static field in one of the imported classes (prefixes) - if (prefix.isEmpty()) continue; + if (prefix.isEmpty()) + continue; try { String prefixPath = prefix.replace('.', File.separatorChar); String[] paths = Main.sourcePath.split("\\" + File.pathSeparator); @@ -401,9 +413,11 @@ else if (s.endsWith("." + name.name[0])) importedClass = Class.forName(prefix.substring(0, prefix.length() - 1)); } catch (ClassNotFoundException e) { for (String path : paths) { - if (!new File(path).isDirectory()) continue; + if (!new File(path).isDirectory()) + continue; File dir = new File(path + File.separator + prefixPath); - if (!dir.isDirectory()) continue; + if (!dir.isDirectory()) + continue; for (String nFile : dir.list()) { String className = (prefix + nFile).replace(".class", ""); importedClass = Class.forName(className); @@ -413,206 +427,209 @@ else if (s.endsWith("." + name.name[0])) } } - if (importedClass == null) continue; + if (importedClass == null) + continue; Field field = importedClass.getField(name.toString()); - if (field != null) result = importedClass; + if (field != null) + result = importedClass; + } catch (Exception e) { } - catch (Exception e) { } } - return result; - } - - - /** - * Determines whether the specified name has been used as a key in this - * table or any of its parents. - * - * @param key The name. - * @return true iff key is already a key in this - * table or any of its parents. - **/ - public boolean containsKey(ClassifierName key) { - return containsKey(key.toString()); - } - - - /** - * Determines whether the specified name has been used as a key in this - * table or any of its parents. - * - * @param key The name. - * @return true iff key is already a key in this - * table or any of its parents. - **/ - public boolean containsKey(Name key) { return containsKey(key.toString()); } - - - /** - * Determines whether the specified name has been used as a key in this - * table or any of its parents. - * - * @param key The name. - * @return true iff key is already a key in this - * table or any of its parents. - **/ - public boolean containsKey(String key) { - if (!table.containsKey(key)) { - if (parent != null) return parent.containsKey(key); - return false; - } - - return true; - } - - - /** - * Determines whether the specified name has been used as a key in this - * table. - * - * @param key The name. - * @return true iff key is already a key in this - * table. - **/ - public boolean localContainsKey(ClassifierName key) { - return localContainsKey(key.toString()); - } - - - /** - * Determines whether the specified name has been used as a key in this - * table. - * - * @param key The name. - * @return true iff key is already a key in this - * table. - **/ - public boolean localContainsKey(Name key) { - return localContainsKey(key.toString()); - } - - - /** - * Determines whether the specified name has been used as a key in this - * table. - * - * @param key The name. - * @return true iff key is already a key in this - * table. - **/ - public boolean localContainsKey(String key) { - return table.containsKey(key); - } - - - /** - * Adds a name to the list of imported names in the top level table. - * - * @param name The name of a new imported package. - **/ - public void addImported(String name) { - if (parent == null) imported.add(name); - else parent.addImported(name); - } - - - /** - * Returns the size of the list of imported items. - * - * @return The size of the list of imported items. - **/ - public int importedSize() { - if (parent == null) return imported.size(); - return parent.importedSize(); - } - - - /** - * Sets the package name in the top level table. - * - * @param name The package name. - **/ - public void setPackage(String name) { - if (parent == null) sourcePackage = name; - else parent.setPackage(name); - } - - - /** - * Gets the package name. - * - * @return The package name. - **/ - public String getPackage() { - if (parent == null) return sourcePackage; - return parent.getPackage(); - } - - - /** - * Generates package and import statements from - * the names in the member variable imported. - * - * @param out The stream to write to. - **/ - public void generateHeader(java.io.PrintStream out) { - if (parent != null) { - parent.generateHeader(out); - return; - } - - if (sourcePackage.length() != 0) - out.println("package " + sourcePackage + ";\n"); - - String[] names = (String[]) imported.toArray(new String[0]); - Arrays.sort(names); - for (int i = 0; i < names.length; ++i) - out.println("import " + names[i] + ";"); - } - - - /** Returns the names of the symbols in this (local) table. */ - public String[] getSymbols() { - return (String[]) table.keySet().toArray(new String[0]); - } - - - /** - * Prints this table and all its children recursively to - * STDOUT. - **/ - public void print() { print(""); } - - - /** - * Prints this table and all its children recursively to - * STDOUT. - * - * @param indent The level of indentation. - **/ - public void print(String indent) { - if (parent == null) { - if (sourcePackage.length() == 0) - System.out.println("Package: " + sourcePackage); - System.out.println("Imported:"); - for (Iterator I = imported.iterator(); I.hasNext(); ) - System.out.println(" " + I.next()); - } - - System.out.println(indent + "Symbols:"); - String[] symbols = (String[]) table.keySet().toArray(new String[0]); - Arrays.sort(symbols); - for (int i = 0; i < symbols.length; ++i) - System.out.println(indent + " " + symbols[i] + " -> " - + table.get(symbols[i])); - - if (children.size() > 0) { - for (Iterator I = children.iterator(); I.hasNext(); ) { - System.out.println(); - ((SymbolTable) I.next()).print(indent + " "); - } - } - } -} + return result; + } + + + /** + * Determines whether the specified name has been used as a key in this table or any of its + * parents. + * + * @param key The name. + * @return true iff key is already a key in this table or any of its + * parents. + **/ + public boolean containsKey(ClassifierName key) { + return containsKey(key.toString()); + } + + + /** + * Determines whether the specified name has been used as a key in this table or any of its + * parents. + * + * @param key The name. + * @return true iff key is already a key in this table or any of its + * parents. + **/ + public boolean containsKey(Name key) { + return containsKey(key.toString()); + } + + + /** + * Determines whether the specified name has been used as a key in this table or any of its + * parents. + * + * @param key The name. + * @return true iff key is already a key in this table or any of its + * parents. + **/ + public boolean containsKey(String key) { + if (!table.containsKey(key)) { + if (parent != null) + return parent.containsKey(key); + return false; + } + + return true; + } + + + /** + * Determines whether the specified name has been used as a key in this table. + * + * @param key The name. + * @return true iff key is already a key in this table. + **/ + public boolean localContainsKey(ClassifierName key) { + return localContainsKey(key.toString()); + } + + + /** + * Determines whether the specified name has been used as a key in this table. + * + * @param key The name. + * @return true iff key is already a key in this table. + **/ + public boolean localContainsKey(Name key) { + return localContainsKey(key.toString()); + } + + + /** + * Determines whether the specified name has been used as a key in this table. + * + * @param key The name. + * @return true iff key is already a key in this table. + **/ + public boolean localContainsKey(String key) { + return table.containsKey(key); + } + + + /** + * Adds a name to the list of imported names in the top level table. + * + * @param name The name of a new imported package. + **/ + public void addImported(String name) { + if (parent == null) + imported.add(name); + else + parent.addImported(name); + } + + + /** + * Returns the size of the list of imported items. + * + * @return The size of the list of imported items. + **/ + public int importedSize() { + if (parent == null) + return imported.size(); + return parent.importedSize(); + } + + + /** + * Sets the package name in the top level table. + * + * @param name The package name. + **/ + public void setPackage(String name) { + if (parent == null) + sourcePackage = name; + else + parent.setPackage(name); + } + + + /** + * Gets the package name. + * + * @return The package name. + **/ + public String getPackage() { + if (parent == null) + return sourcePackage; + return parent.getPackage(); + } + + + /** + * Generates package and import statements from the names in the + * member variable imported. + * + * @param out The stream to write to. + **/ + public void generateHeader(java.io.PrintStream out) { + if (parent != null) { + parent.generateHeader(out); + return; + } + + if (sourcePackage.length() != 0) + out.println("package " + sourcePackage + ";\n"); + + String[] names = (String[]) imported.toArray(new String[0]); + Arrays.sort(names); + for (int i = 0; i < names.length; ++i) + out.println("import " + names[i] + ";"); + } + + + /** Returns the names of the symbols in this (local) table. */ + public String[] getSymbols() { + return (String[]) table.keySet().toArray(new String[0]); + } + + + /** + * Prints this table and all its children recursively to STDOUT. + **/ + public void print() { + print(""); + } + + + /** + * Prints this table and all its children recursively to STDOUT. + * + * @param indent The level of indentation. + **/ + public void print(String indent) { + if (parent == null) { + if (sourcePackage.length() == 0) + System.out.println("Package: " + sourcePackage); + System.out.println("Imported:"); + for (Iterator I = imported.iterator(); I.hasNext();) + System.out.println(" " + I.next()); + } + System.out.println(indent + "Symbols:"); + String[] symbols = (String[]) table.keySet().toArray(new String[0]); + Arrays.sort(symbols); + for (int i = 0; i < symbols.length; ++i) + System.out.println(indent + " " + symbols[i] + " -> " + table.get(symbols[i])); + + if (children.size() > 0) { + for (Iterator I = children.iterator(); I.hasNext();) { + System.out.println(); + ((SymbolTable) I.next()).print(indent + " "); + } + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SynchronizedStatement.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SynchronizedStatement.java index 062abd18..2f2f2f3d 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SynchronizedStatement.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/SynchronizedStatement.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,105 +11,97 @@ /** - * Represents a synchronized statement. - * - * @author Nick Rizzolo + * Represents a synchronized statement. + * + * @author Nick Rizzolo **/ -public class SynchronizedStatement extends Statement -{ - /** (¬ø) The expression representing the data to be protected. */ - public Expression data; - /** (¬ø) The code to execute while the data is protected. */ - public Block block; - - - /** - * Full constructor. - * - * @param d The expression representing the data to be protected. - * @param b The code to execute while the data is protected. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public SynchronizedStatement(Expression d, Block b, int line, - int byteOffset) { - super(line, byteOffset); - data = d; - block = b; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(2); - I.children[0] = data; - I.children[1] = block; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new SynchronizedStatement((Expression) data.clone(), - (Block) block.clone(), -1, -1); - } - - - /** Returns a hash code for this {@link ASTNode}. */ - public int hashCode() { - return 31 * data.hashCode() + 7 * block.hashCode(); - } - - - /** - * Distinguishes this {@link ASTNode} from other objects according to its - * contents recursively. - * - * @param o Another object. - * @return true iff this node is equal to o. - **/ - public boolean equals(Object o) { - if (!(o instanceof SynchronizedStatement)) return false; - SynchronizedStatement s = (SynchronizedStatement) o; - return data.equals(s.data) && block.equals(s.block); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("synchronized ("); - data.write(buffer); - buffer.append(") "); - block.write(buffer); - } +public class SynchronizedStatement extends Statement { + /** (¬ø) The expression representing the data to be protected. */ + public Expression data; + /** (¬ø) The code to execute while the data is protected. */ + public Block block; + + + /** + * Full constructor. + * + * @param d The expression representing the data to be protected. + * @param b The code to execute while the data is protected. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public SynchronizedStatement(Expression d, Block b, int line, int byteOffset) { + super(line, byteOffset); + data = d; + block = b; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(2); + I.children[0] = data; + I.children[1] = block; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new SynchronizedStatement((Expression) data.clone(), (Block) block.clone(), -1, -1); + } + + + /** Returns a hash code for this {@link ASTNode}. */ + public int hashCode() { + return 31 * data.hashCode() + 7 * block.hashCode(); + } + + + /** + * Distinguishes this {@link ASTNode} from other objects according to its contents recursively. + * + * @param o Another object. + * @return true iff this node is equal to o. + **/ + public boolean equals(Object o) { + if (!(o instanceof SynchronizedStatement)) + return false; + SynchronizedStatement s = (SynchronizedStatement) o; + return data.equals(s.data) && block.equals(s.block); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("synchronized ("); + data.write(buffer); + buffer.append(") "); + block.write(buffer); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ThrowStatement.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ThrowStatement.java index a0cf7d48..877b3c1c 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ThrowStatement.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/ThrowStatement.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,97 +11,91 @@ /** - * Represents a throw statement. - * - * @author Nick Rizzolo + * Represents a throw statement. + * + * @author Nick Rizzolo **/ -public class ThrowStatement extends Statement -{ - /** (¬ø) The expression representing the exception to throw. */ - public Expression exception; - - - /** - * Full constructor. - * - * @param e The expression representing the exception to throw. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public ThrowStatement(Expression e, int line, int byteOffset) { - super(line, byteOffset); - exception = e; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(1); - I.children[0] = exception; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new ThrowStatement((Expression) exception.clone(), -1, -1); - } - - - /** Returns a hash code for this {@link ASTNode}. */ - public int hashCode() { - return 31 * exception.hashCode(); - } - - - /** - * Distinguishes this {@link ASTNode} from other objects according to its - * contents recursively. - * - * @param o Another object. - * @return true iff this node is equal to o. - **/ - public boolean equals(Object o) { - if (!(o instanceof ThrowStatement)) return false; - ThrowStatement t = (ThrowStatement) o; - return exception.equals(t.exception); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("throw "); - exception.write(buffer); - buffer.append(";"); - } +public class ThrowStatement extends Statement { + /** (¬ø) The expression representing the exception to throw. */ + public Expression exception; + + + /** + * Full constructor. + * + * @param e The expression representing the exception to throw. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public ThrowStatement(Expression e, int line, int byteOffset) { + super(line, byteOffset); + exception = e; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(1); + I.children[0] = exception; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new ThrowStatement((Expression) exception.clone(), -1, -1); + } + + + /** Returns a hash code for this {@link ASTNode}. */ + public int hashCode() { + return 31 * exception.hashCode(); + } + + + /** + * Distinguishes this {@link ASTNode} from other objects according to its contents recursively. + * + * @param o Another object. + * @return true iff this node is equal to o. + **/ + public boolean equals(Object o) { + if (!(o instanceof ThrowStatement)) + return false; + ThrowStatement t = (ThrowStatement) o; + return exception.equals(t.exception); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("throw "); + exception.write(buffer); + buffer.append(";"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/TryStatement.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/TryStatement.java index 40ad1ebc..46f6e2dd 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/TryStatement.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/TryStatement.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,155 +11,144 @@ /** - * Represents a try statement. - * - * @author Nick Rizzolo + * Represents a try statement. + * + * @author Nick Rizzolo **/ -public class TryStatement extends Statement -{ - /** (¬ø) The code to look for exceptions in. */ - public Block block; - /** (¬ø) A list of clauses for catching exceptions, if any. */ - public CatchList catchList; - /** (ø) The block of the "finally" clause, if any. */ - public Block finallyBlock; - - - /** - * Initializing constructor. - * - * @param b The code to look for exceptions in. - * @param l The list of CatchClauses. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public TryStatement(Block b, CatchList l, int line, int byteOffset) { - this(b, l, null, line, byteOffset); - } - - /** - * Initializing constructor. - * - * @param b The code to look for exceptions in. - * @param f The finally block. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public TryStatement(Block b, Block f, int line, int byteOffset) { - this(b, new CatchList(), f, line, byteOffset); - } - - /** - * Full constructor. - * - * @param b The code to look for exceptions in. - * @param l The list of CatchClauses. - * @param f The finally block. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public TryStatement(Block b, CatchList l, Block f, int line, int byteOffset) - { - super(line, byteOffset); - block = b; - catchList = l; - finallyBlock = f; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(finallyBlock == null ? 2 : 3); - I.children[0] = block; - I.children[1] = catchList; - if (finallyBlock != null) I.children[2] = finallyBlock; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new TryStatement( - (Block) block.clone(), - (CatchList) catchList.clone(), - (finallyBlock == null ? null : (Block) finallyBlock.clone()), -1, -1); - } - - - /** Returns a hash code for this {@link ASTNode}. */ - public int hashCode() { - int result = 31 * block.hashCode() + 17 * catchList.hashCode(); - if (finallyBlock != null) result += 7 * finallyBlock.hashCode(); - return result; - } - - - /** - * Distinguishes this {@link ASTNode} from other objects according to its - * contents recursively. - * - * @param o Another object. - * @return true iff this node is equal to o. - **/ - public boolean equals(Object o) { - if (!(o instanceof TryStatement)) return false; - TryStatement t = (TryStatement) o; - return - block.equals(t.block) && catchList.equals(t.catchList) - && (finallyBlock == null ? t.finallyBlock == null - : finallyBlock.equals(t.finallyBlock)); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("try "); - block.write(buffer); - - if (catchList.size() > 0) { - buffer.append(" "); - catchList.write(buffer); +public class TryStatement extends Statement { + /** (¬ø) The code to look for exceptions in. */ + public Block block; + /** (¬ø) A list of clauses for catching exceptions, if any. */ + public CatchList catchList; + /** (ø) The block of the "finally" clause, if any. */ + public Block finallyBlock; + + + /** + * Initializing constructor. + * + * @param b The code to look for exceptions in. + * @param l The list of CatchClauses. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public TryStatement(Block b, CatchList l, int line, int byteOffset) { + this(b, l, null, line, byteOffset); } - if (finallyBlock != null) { - buffer.append(" "); - finallyBlock.write(buffer); + /** + * Initializing constructor. + * + * @param b The code to look for exceptions in. + * @param f The finally block. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public TryStatement(Block b, Block f, int line, int byteOffset) { + this(b, new CatchList(), f, line, byteOffset); } - } -} + /** + * Full constructor. + * + * @param b The code to look for exceptions in. + * @param l The list of CatchClauses. + * @param f The finally block. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public TryStatement(Block b, CatchList l, Block f, int line, int byteOffset) { + super(line, byteOffset); + block = b; + catchList = l; + finallyBlock = f; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(finallyBlock == null ? 2 : 3); + I.children[0] = block; + I.children[1] = catchList; + if (finallyBlock != null) + I.children[2] = finallyBlock; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new TryStatement((Block) block.clone(), (CatchList) catchList.clone(), + (finallyBlock == null ? null : (Block) finallyBlock.clone()), -1, -1); + } + + + /** Returns a hash code for this {@link ASTNode}. */ + public int hashCode() { + int result = 31 * block.hashCode() + 17 * catchList.hashCode(); + if (finallyBlock != null) + result += 7 * finallyBlock.hashCode(); + return result; + } + + + /** + * Distinguishes this {@link ASTNode} from other objects according to its contents recursively. + * + * @param o Another object. + * @return true iff this node is equal to o. + **/ + public boolean equals(Object o) { + if (!(o instanceof TryStatement)) + return false; + TryStatement t = (TryStatement) o; + return block.equals(t.block) + && catchList.equals(t.catchList) + && (finallyBlock == null ? t.finallyBlock == null : finallyBlock + .equals(t.finallyBlock)); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("try "); + block.write(buffer); + + if (catchList.size() > 0) { + buffer.append(" "); + catchList.write(buffer); + } + + if (finallyBlock != null) { + buffer.append(" "); + finallyBlock.write(buffer); + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Type.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Type.java index 5a0126a8..e980a682 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Type.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/Type.java @@ -1,140 +1,148 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; /** - * Abstract class representing the type of a variable or the return type of a - * method. - * - * @author Nick Rizzolo + * Abstract class representing the type of a variable or the return type of a method. + * + * @author Nick Rizzolo **/ -public abstract class Type extends ASTNode -{ - /** - * This method takes a Java Class object and generates an - * LBJava Type that represents the same type. - * - * @param c The Class to "translate". - * @return The Type object that represents the same type as - * the argument. - **/ - public static Type parseType(Class c) { return parseType(c.getName()); } - - - /** - * This method takes a Java type encoding and generates an LBJava - * Type that represents the same type. - * - * @param encoding The encoding to decode. - * @return The Type object that represents the same type as - * the argument. - **/ - public static Type parseType(String encoding) { - int dimensions = 0; - while (encoding.charAt(dimensions) == '[') ++dimensions; - - if (dimensions == 0) { - if (encoding.equals("boolean")) - return new PrimitiveType(PrimitiveType.BOOLEAN); - else if (encoding.equals("byte")) - return new PrimitiveType(PrimitiveType.BYTE); - else if (encoding.equals("char")) - return new PrimitiveType(PrimitiveType.CHAR); - else if (encoding.equals("double")) - return new PrimitiveType(PrimitiveType.DOUBLE); - else if (encoding.equals("float")) - return new PrimitiveType(PrimitiveType.FLOAT); - else if (encoding.equals("int")) - return new PrimitiveType(PrimitiveType.INT); - else if (encoding.equals("long")) - return new PrimitiveType(PrimitiveType.LONG); - else if (encoding.equals("short")) - return new PrimitiveType(PrimitiveType.SHORT); +public abstract class Type extends ASTNode { + /** + * This method takes a Java Class object and generates an LBJava Type + * that represents the same type. + * + * @param c The Class to "translate". + * @return The Type object that represents the same type as the argument. + **/ + public static Type parseType(Class c) { + return parseType(c.getName()); } - Type result = null; - if (dimensions > 0) { - switch (encoding.charAt(dimensions)) { - case 'Z': result = new PrimitiveType(PrimitiveType.BOOLEAN); break; - case 'B': result = new PrimitiveType(PrimitiveType.BYTE); break; - case 'C': result = new PrimitiveType(PrimitiveType.CHAR); break; - case 'D': result = new PrimitiveType(PrimitiveType.DOUBLE); break; - case 'F': result = new PrimitiveType(PrimitiveType.FLOAT); break; - case 'I': result = new PrimitiveType(PrimitiveType.INT); break; - case 'J': result = new PrimitiveType(PrimitiveType.LONG); break; - case 'S': result = new PrimitiveType(PrimitiveType.SHORT); break; - case 'L': break; - default: - System.err.println("ERROR: Can't parse type '" + encoding + "'"); - new Exception().printStackTrace(); - System.exit(1); - } - } - if (result == null) { - String referenceString; - if (dimensions == 0) referenceString = encoding; - else - referenceString = - encoding.substring(dimensions + 1, encoding.length() - 1); + /** + * This method takes a Java type encoding and generates an LBJava Type that + * represents the same type. + * + * @param encoding The encoding to decode. + * @return The Type object that represents the same type as the argument. + **/ + public static Type parseType(String encoding) { + int dimensions = 0; + while (encoding.charAt(dimensions) == '[') + ++dimensions; + + if (dimensions == 0) { + if (encoding.equals("boolean")) + return new PrimitiveType(PrimitiveType.BOOLEAN); + else if (encoding.equals("byte")) + return new PrimitiveType(PrimitiveType.BYTE); + else if (encoding.equals("char")) + return new PrimitiveType(PrimitiveType.CHAR); + else if (encoding.equals("double")) + return new PrimitiveType(PrimitiveType.DOUBLE); + else if (encoding.equals("float")) + return new PrimitiveType(PrimitiveType.FLOAT); + else if (encoding.equals("int")) + return new PrimitiveType(PrimitiveType.INT); + else if (encoding.equals("long")) + return new PrimitiveType(PrimitiveType.LONG); + else if (encoding.equals("short")) + return new PrimitiveType(PrimitiveType.SHORT); + } + + Type result = null; + if (dimensions > 0) { + switch (encoding.charAt(dimensions)) { + case 'Z': + result = new PrimitiveType(PrimitiveType.BOOLEAN); + break; + case 'B': + result = new PrimitiveType(PrimitiveType.BYTE); + break; + case 'C': + result = new PrimitiveType(PrimitiveType.CHAR); + break; + case 'D': + result = new PrimitiveType(PrimitiveType.DOUBLE); + break; + case 'F': + result = new PrimitiveType(PrimitiveType.FLOAT); + break; + case 'I': + result = new PrimitiveType(PrimitiveType.INT); + break; + case 'J': + result = new PrimitiveType(PrimitiveType.LONG); + break; + case 'S': + result = new PrimitiveType(PrimitiveType.SHORT); + break; + case 'L': + break; + default: + System.err.println("ERROR: Can't parse type '" + encoding + "'"); + new Exception().printStackTrace(); + System.exit(1); + } + } + + if (result == null) { + String referenceString; + if (dimensions == 0) + referenceString = encoding; + else + referenceString = encoding.substring(dimensions + 1, encoding.length() - 1); + + for (int i = referenceString.indexOf('$'); i != -1; i = referenceString.indexOf('$')) + referenceString = + referenceString.substring(0, i) + "." + referenceString.substring(i + 1); + + result = new ReferenceType(new Name(referenceString)); + } + + for (int i = 0; i < dimensions; ++i) + result = new ArrayType(result); + return result; + } - for (int i = referenceString.indexOf('$'); i != -1; - i = referenceString.indexOf('$')) - referenceString = referenceString.substring(0, i) + "." - + referenceString.substring(i + 1); - result = new ReferenceType(new Name(referenceString)); + /** + * Set true by SemanticAnalysis iff this type will be used to + * represent the argument of a QuantifiedConstraintExpression. + **/ + public boolean quantifierArgumentType; + /** + * Java's Class object defining the class that this Type represents. + **/ + protected Class myClass = null; + + + /** + * Default constructor. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + protected Type(int line, int byteOffset) { + super(line, byteOffset); + quantifierArgumentType = false; } - for (int i = 0; i < dimensions; ++i) result = new ArrayType(result); - return result; - } - - - /** - * Set true by SemanticAnalysis iff this type - * will be used to represent the argument of a - * QuantifiedConstraintExpression. - **/ - public boolean quantifierArgumentType; - /** - * Java's Class object defining the class that this - * Type represents. - **/ - protected Class myClass = null; - - - /** - * Default constructor. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - protected Type(int line, int byteOffset) { - super(line, byteOffset); - quantifierArgumentType = false; - } - - - /** - * Returns an object representing the class that this type - * represents. - * - * @return An object representing the class that this type - * represents. - **/ - public Class typeClass() { return myClass; } -} + /** + * Returns an object representing the class that this type represents. + * + * @return An object representing the class that this type represents. + **/ + public Class typeClass() { + return myClass; + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/UnaryExpression.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/UnaryExpression.java index 733275e5..598ff9b1 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/UnaryExpression.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/UnaryExpression.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,103 +11,100 @@ /** - * This class represents an expression involving a unary operator. - * - * @author Nick Rizzolo + * This class represents an expression involving a unary operator. + * + * @author Nick Rizzolo **/ -public class UnaryExpression extends Expression -{ - /** (¬ø) Representation of the unary operator. */ - public Operator operation; - /** (¬ø) The expression on which the unary operator operates. */ - public Expression subexpression; - - - /** - * Initializing constructor. Line and byte offset information is taken - * from the unary operator's representation. - * - * @param op Representation of the unary operator. - * @param sub The expression on which the unary operator operates. - **/ - public UnaryExpression(Operator op, Expression sub) { - super(op.line, op.byteOffset); - operation = op; - subexpression = sub; - } - - - /** - * Returns a hash code value for java hash structures. - * - * @return A hash code for this object. - **/ - public int hashCode() { - return operation.hashCode() + subexpression.hashCode(); - } - - - /** - * Indicates whether some other object is "equal to" this one. - * - * @return true iff this object is the same as the argument. - **/ - public boolean equals(Object o) { - if (!(o instanceof UnaryExpression)) return false; - UnaryExpression u = (UnaryExpression) o; - return operation.equals(u.operation) - && subexpression.equals(u.subexpression); - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(2); - I.children[0] = operation; - I.children[1] = subexpression; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new UnaryExpression((Operator) operation.clone(), - (Expression) subexpression.clone()); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (parenthesized) buffer.append("("); - operation.write(buffer); - subexpression.write(buffer); - if (parenthesized) buffer.append(")"); - } +public class UnaryExpression extends Expression { + /** (¬ø) Representation of the unary operator. */ + public Operator operation; + /** (¬ø) The expression on which the unary operator operates. */ + public Expression subexpression; + + + /** + * Initializing constructor. Line and byte offset information is taken from the unary operator's + * representation. + * + * @param op Representation of the unary operator. + * @param sub The expression on which the unary operator operates. + **/ + public UnaryExpression(Operator op, Expression sub) { + super(op.line, op.byteOffset); + operation = op; + subexpression = sub; + } + + + /** + * Returns a hash code value for java hash structures. + * + * @return A hash code for this object. + **/ + public int hashCode() { + return operation.hashCode() + subexpression.hashCode(); + } + + + /** + * Indicates whether some other object is "equal to" this one. + * + * @return true iff this object is the same as the argument. + **/ + public boolean equals(Object o) { + if (!(o instanceof UnaryExpression)) + return false; + UnaryExpression u = (UnaryExpression) o; + return operation.equals(u.operation) && subexpression.equals(u.subexpression); + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(2); + I.children[0] = operation; + I.children[1] = subexpression; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new UnaryExpression((Operator) operation.clone(), (Expression) subexpression.clone()); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (parenthesized) + buffer.append("("); + operation.write(buffer); + subexpression.write(buffer); + if (parenthesized) + buffer.append(")"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/UniversalQuantifierExpression.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/UniversalQuantifierExpression.java index 4f423f5c..da6d9867 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/UniversalQuantifierExpression.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/UniversalQuantifierExpression.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -15,94 +12,78 @@ /** - * A universal quantifier has the form: - *

- * forall argument in (expression) - * constraint-expression - *
- * where expression must evaluate to a - * Collection, and the universal quantifier expression is - * sastisfied iff constraint-expression is satisfied for - * all settings of argument taken from the - * Collection. - * - * @author Nick Rizzolo + * A universal quantifier has the form:
forall argument in + * (expression) constraint-expression
where + * expression must evaluate to a Collection, and the universal + * quantifier expression is sastisfied iff constraint-expression is satisfied + * for all settings of argument taken from the Collection . + * + * @author Nick Rizzolo **/ -public class UniversalQuantifierExpression - extends QuantifiedConstraintExpression -{ - /** - * Full constructor. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - * @param a The quantification variable specification. - * @param c Evaluates to the collection of objects. - * @param co The quantified constraint. - **/ - public UniversalQuantifierExpression(int line, int byteOffset, Argument a, - Expression c, ConstraintExpression co) - { - super(line, byteOffset, a, c, co); - } +public class UniversalQuantifierExpression extends QuantifiedConstraintExpression { + /** + * Full constructor. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + * @param a The quantification variable specification. + * @param c Evaluates to the collection of objects. + * @param co The quantified constraint. + **/ + public UniversalQuantifierExpression(int line, int byteOffset, Argument a, Expression c, + ConstraintExpression co) { + super(line, byteOffset, a, c, co); + } - /** - * Parser's constructor. Line and byte offset information are taken from - * the token. - * - * @param t The token containing line and byte offset information. - * @param a The quantification variable specification. - * @param c Evaluates to the collection of objects. - * @param co The quantified constraint. - **/ - public UniversalQuantifierExpression(TokenValue t, Argument a, - Expression c, ConstraintExpression co) - { - this(t.line, t.byteOffset, a, c, co); - } + /** + * Parser's constructor. Line and byte offset information are taken from the token. + * + * @param t The token containing line and byte offset information. + * @param a The quantification variable specification. + * @param c Evaluates to the collection of objects. + * @param co The quantified constraint. + **/ + public UniversalQuantifierExpression(TokenValue t, Argument a, Expression c, + ConstraintExpression co) { + this(t.line, t.byteOffset, a, c, co); + } - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return - new UniversalQuantifierExpression( - -1, -1, (Argument) argument.clone(), - (Expression) collection.clone(), - (ConstraintExpression) constraint.clone()); - } + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new UniversalQuantifierExpression(-1, -1, (Argument) argument.clone(), + (Expression) collection.clone(), (ConstraintExpression) constraint.clone()); + } - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("forall "); - argument.write(buffer); - buffer.append(" in ("); - collection.write(buffer); - buffer.append(") "); - constraint.write(buffer); - } + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("forall "); + argument.write(buffer); + buffer.append(" in ("); + collection.write(buffer); + buffer.append(") "); + constraint.write(buffer); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/VariableDeclaration.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/VariableDeclaration.java index f486a74d..f6934271 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/VariableDeclaration.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/VariableDeclaration.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,185 +11,179 @@ /** - * Represents a local variable declaration. - * - * @author Nick Rizzolo + * Represents a local variable declaration. + * + * @author Nick Rizzolo **/ -public class VariableDeclaration extends Statement -{ - /** Whether or not the argument was modified as final. */ - public boolean isFinal; - /** (¬ø) The type of the declared variable. */ - public Type type; - /** (¬ø) The names of variables declared in this statement. */ - public NameList names; - /** - * (¬ø) The initializing expressions for the declared variables, - * null being an allowable value. - **/ - public ExpressionList initializers; - - - /** - * Parser's constructor, leaving the type to be filled in later. - * - * @param n The name of the declared variable. - **/ - public VariableDeclaration(Name n) { this(n, null); } - - /** - * Parser's constructor, leaving the type to be filled in later. - * - * @param n The name of the declared variable. - * @param i The initializing expression for the declared variable. - **/ - public VariableDeclaration(Name n, Expression i) { - super(-1, -1); // Line and byte offset information will be filled in - // later. - names = new NameList(n); - initializers = new ExpressionList(i); - type = null; - isFinal = false; - } - - /** - * Full constructor. - * - * @param t The type of the declared variables. - * @param n The names of the declared variables. - * @param i The initializing expressions for the declared variables. - * @param f Whether or not the variables were declared as final. - **/ - public VariableDeclaration(Type t, NameList n, ExpressionList i, boolean f) - { - super(t.line, t.byteOffset); - type = t; - names = n; - initializers = i; - isFinal = f; - } - - - /** - * Adds the declarations in the specified declaration statement to the - * declarations in this statement. - * - * @param v The variables to be added. - **/ - public void addVariables(VariableDeclaration v) { - names.addAll(v.names); - initializers.addAll(v.initializers); - } - - - /** - * Setting this declaration statement's type also sets its line and byte - * offset information. - * - * @param t The new type for this variable declaration statement. - **/ - public void setType(Type t) { - type = t; - line = type.line; - byteOffset = type.byteOffset; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(3); - I.children[0] = type; - I.children[1] = names; - I.children[2] = initializers; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return - new VariableDeclaration((Type) type.clone(), (NameList) names.clone(), - (ExpressionList) initializers.clone(), isFinal); - } - - - /** Returns a hash code for this {@link ASTNode}. */ - public int hashCode() { - return - (isFinal ? 1 : 3) + 7 * type.hashCode() + 17 * names.hashCode() - + 31 * initializers.hashCode(); - } - - - /** - * Distinguishes this {@link ASTNode} from other objects according to its - * contents recursively. - * - * @param o Another object. - * @return true iff this node is equal to o. - **/ - public boolean equals(Object o) { - if (!(o instanceof VariableDeclaration)) return false; - VariableDeclaration v = (VariableDeclaration) o; - return - isFinal == v.isFinal && type.equals(v.type) && names.equals(v.names) - && initializers.equals(v.initializers); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - if (isFinal) buffer.append("final "); - type.write(buffer); - - ASTNodeIterator N = names.iterator(); - ExpressionList.ExpressionListIterator I = initializers.listIterator(); - buffer.append(" "); - N.next().write(buffer); - - Expression i = I.nextItem(); - if (i != null) { - buffer.append(" = "); - i.write(buffer); +public class VariableDeclaration extends Statement { + /** Whether or not the argument was modified as final. */ + public boolean isFinal; + /** (¬ø) The type of the declared variable. */ + public Type type; + /** (¬ø) The names of variables declared in this statement. */ + public NameList names; + /** + * (¬ø) The initializing expressions for the declared variables, null + * being an allowable value. + **/ + public ExpressionList initializers; + + + /** + * Parser's constructor, leaving the type to be filled in later. + * + * @param n The name of the declared variable. + **/ + public VariableDeclaration(Name n) { + this(n, null); } - while (N.hasNext()) { - buffer.append(", "); - N.next().write(buffer); - i = I.nextItem(); - if (i != null) { - buffer.append(" = "); - i.write(buffer); - } + /** + * Parser's constructor, leaving the type to be filled in later. + * + * @param n The name of the declared variable. + * @param i The initializing expression for the declared variable. + **/ + public VariableDeclaration(Name n, Expression i) { + super(-1, -1); // Line and byte offset information will be filled in + // later. + names = new NameList(n); + initializers = new ExpressionList(i); + type = null; + isFinal = false; + } + + /** + * Full constructor. + * + * @param t The type of the declared variables. + * @param n The names of the declared variables. + * @param i The initializing expressions for the declared variables. + * @param f Whether or not the variables were declared as final. + **/ + public VariableDeclaration(Type t, NameList n, ExpressionList i, boolean f) { + super(t.line, t.byteOffset); + type = t; + names = n; + initializers = i; + isFinal = f; + } + + + /** + * Adds the declarations in the specified declaration statement to the declarations in this + * statement. + * + * @param v The variables to be added. + **/ + public void addVariables(VariableDeclaration v) { + names.addAll(v.names); + initializers.addAll(v.initializers); } - buffer.append(";"); - } -} + /** + * Setting this declaration statement's type also sets its line and byte offset information. + * + * @param t The new type for this variable declaration statement. + **/ + public void setType(Type t) { + type = t; + line = type.line; + byteOffset = type.byteOffset; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(3); + I.children[0] = type; + I.children[1] = names; + I.children[2] = initializers; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new VariableDeclaration((Type) type.clone(), (NameList) names.clone(), + (ExpressionList) initializers.clone(), isFinal); + } + + + /** Returns a hash code for this {@link ASTNode}. */ + public int hashCode() { + return (isFinal ? 1 : 3) + 7 * type.hashCode() + 17 * names.hashCode() + 31 + * initializers.hashCode(); + } + + + /** + * Distinguishes this {@link ASTNode} from other objects according to its contents recursively. + * + * @param o Another object. + * @return true iff this node is equal to o. + **/ + public boolean equals(Object o) { + if (!(o instanceof VariableDeclaration)) + return false; + VariableDeclaration v = (VariableDeclaration) o; + return isFinal == v.isFinal && type.equals(v.type) && names.equals(v.names) + && initializers.equals(v.initializers); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + if (isFinal) + buffer.append("final "); + type.write(buffer); + + ASTNodeIterator N = names.iterator(); + ExpressionList.ExpressionListIterator I = initializers.listIterator(); + buffer.append(" "); + N.next().write(buffer); + + Expression i = I.nextItem(); + if (i != null) { + buffer.append(" = "); + i.write(buffer); + } + + while (N.hasNext()) { + buffer.append(", "); + N.next().write(buffer); + i = I.nextItem(); + if (i != null) { + buffer.append(" = "); + i.write(buffer); + } + } + + buffer.append(";"); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/VariableInstance.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/VariableInstance.java index 8cb86765..290d8fc6 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/VariableInstance.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/VariableInstance.java @@ -1,32 +1,27 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; /** - * Abstract class representing either a scalar or a subscript variable. - * - * @author Nick Rizzolo + * Abstract class representing either a scalar or a subscript variable. + * + * @author Nick Rizzolo **/ -public abstract class VariableInstance extends Expression -{ - /** - * Default constructor. - * - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - VariableInstance(int line, int byteOffset) { super(line, byteOffset); } +public abstract class VariableInstance extends Expression { + /** + * Default constructor. + * + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + VariableInstance(int line, int byteOffset) { + super(line, byteOffset); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/WhileStatement.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/WhileStatement.java index 7ddec51d..aa55145d 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/WhileStatement.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/IR/WhileStatement.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.IR; @@ -14,107 +11,99 @@ /** - * Represents a while loop. - * - * @author Nick Rizzolo + * Represents a while loop. + * + * @author Nick Rizzolo **/ -public class WhileStatement extends Statement -{ - /** - * (¬ø) The expression representing the loop's terminating - * condition. - **/ - public Expression condition; - /** (¬ø) The body of the loop. */ - public Statement body; - - - /** - * Full constructor. - * - * @param c The terminating condition. - * @param b The body of the loop. - * @param line The line on which the source code represented by this - * node is found. - * @param byteOffset The byte offset from the beginning of the source file - * at which the source code represented by this node is - * found. - **/ - public WhileStatement(Expression c, Statement b, int line, int byteOffset) { - super(line, byteOffset); - condition = c; - body = b; - } - - - /** - * Returns an iterator used to successively access the children of this - * node. - * - * @return An iterator used to successively access the children of this - * node. - **/ - public ASTNodeIterator iterator() { - ASTNodeIterator I = new ASTNodeIterator(2); - I.children[0] = condition; - I.children[1] = body; - return I; - } - - - /** - * Creates a new object with the same primitive data, and recursively - * creates new member data objects as well. - * - * @return The clone node. - **/ - public Object clone() { - return new WhileStatement((Expression) condition.clone(), - (Statement) body.clone(), -1, -1); - } - - - /** Returns a hash code for this {@link ASTNode}. */ - public int hashCode() { - return 31 * condition.hashCode() + 17 * body.hashCode(); - } - - - /** - * Distinguishes this {@link ASTNode} from other objects according to its - * contents recursively. - * - * @param o Another object. - * @return true iff this node is equal to o. - **/ - public boolean equals(Object o) { - if (!(o instanceof WhileStatement)) return false; - WhileStatement w = (WhileStatement) o; - return condition.equals(w.condition) && body.equals(w.body); - } - - - /** - * Ensures that the correct run() method is called for this - * type of node. - * - * @param pass The pass whose run() method should be called. - **/ - public void runPass(Pass pass) { pass.run(this); } - - - /** - * Writes a string representation of this ASTNode to the - * specified buffer. The representation written is parsable by the LBJava - * compiler, but not very readable. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - buffer.append("while ("); - condition.write(buffer); - buffer.append(") "); - body.write(buffer); - } +public class WhileStatement extends Statement { + /** + * (¬ø) The expression representing the loop's terminating condition. + **/ + public Expression condition; + /** (¬ø) The body of the loop. */ + public Statement body; + + + /** + * Full constructor. + * + * @param c The terminating condition. + * @param b The body of the loop. + * @param line The line on which the source code represented by this node is found. + * @param byteOffset The byte offset from the beginning of the source file at which the source + * code represented by this node is found. + **/ + public WhileStatement(Expression c, Statement b, int line, int byteOffset) { + super(line, byteOffset); + condition = c; + body = b; + } + + + /** + * Returns an iterator used to successively access the children of this node. + * + * @return An iterator used to successively access the children of this node. + **/ + public ASTNodeIterator iterator() { + ASTNodeIterator I = new ASTNodeIterator(2); + I.children[0] = condition; + I.children[1] = body; + return I; + } + + + /** + * Creates a new object with the same primitive data, and recursively creates new member data + * objects as well. + * + * @return The clone node. + **/ + public Object clone() { + return new WhileStatement((Expression) condition.clone(), (Statement) body.clone(), -1, -1); + } + + + /** Returns a hash code for this {@link ASTNode}. */ + public int hashCode() { + return 31 * condition.hashCode() + 17 * body.hashCode(); + } + + + /** + * Distinguishes this {@link ASTNode} from other objects according to its contents recursively. + * + * @param o Another object. + * @return true iff this node is equal to o. + **/ + public boolean equals(Object o) { + if (!(o instanceof WhileStatement)) + return false; + WhileStatement w = (WhileStatement) o; + return condition.equals(w.condition) && body.equals(w.body); + } + + + /** + * Ensures that the correct run() method is called for this type of node. + * + * @param pass The pass whose run() method should be called. + **/ + public void runPass(Pass pass) { + pass.run(this); + } + + + /** + * Writes a string representation of this ASTNode to the specified buffer. The + * representation written is parsable by the LBJava compiler, but not very readable. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + buffer.append("while ("); + condition.write(buffer); + buffer.append(") "); + body.write(buffer); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Main.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Main.java index dc715b74..c6390ac0 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Main.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Main.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava; @@ -28,562 +25,544 @@ /** - * LBJava's command line interface. Passing a source file to this class will - * invoke LBJava's frontend and optimization passes, resulting in the execution - * of the source file's code including creation of java files that implement - * the source file's semantics.

- * - * LBJava stands for - * Learning Based - * Java. LBJava is a language for building systems - * that learn.

- * - *

- *
Usage:
- *
- * java edu.illinois.cs.cogcomp.lbjava.Main [options] <source file> - *
- *
where [options] is one or more of the following:
- *
- * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
-c - * Compile only: This option tells LBJava to translate the given - * source to Java, but not to compile the generated Java - * sources or do any training. - *
-d <directory> - * Any class files generated during compilation will be written - * in the specified directory, just like javac's - * -d command line parameter. - *
-j <a> - * Sends the contents of <a> to - * javac as command line arguments while - * compiling. Don't forget to put quotes around - * <a> if there is more than one such - * argument or if the argument has a parameter. - *
-s Print the names of all declarations and quit.
-t <n> - * Enables default progress output during training. A message - * is printed every <n> examples while - * training any classifier whose learn expression - * doesn't contain a progressOutput clause. - *
-v Prints the version number and exits.
-w - * Disables the output of warning messages. Currenlty, there - * are only two types of warnings. A warning is reported if a - * constraint declaration does not contain any constraint - * statements, and a warning is reported if a learner's type is - * less specific than the declared type of the classifier it's - * being used in. - *
-x - * Clean: Delete all files that would otherwise be generated. - * No code is generated and no training takes place. - *
- * -generatedsourcepath <directory>
- * -gsp <directory> - *
- * LBJava will potentially generate many Java source files. Use - * this option to have LBJava write them to the specified - * directory instead of the current directory. - * <directory> must already exist. Note - * that LBJava will also compile these files which can result in - * even more class files than there were sources. Those class - * files will also be written in <directory> - * unless the -d command line parameter is - * utilized as well. - *
-sourcepath <path> - * If the LBJava source depends on classes whose source files - * cannot be found on the user's classpath, specify the - * directories where they can be found using this parameter. - * It works just like javac's - * -sourcepath command line parameter. - *
--parserDebugDebug: Debug output for parse phase only.
--lexerOutputLexer output: Print lexical token stream and quit.
--parserOutputParser output: Print the parsed AST and quit.
--semanticOutput - * Semantic analysis output: Print semantic analysis - * information and quit. - *
- *
- *
- *
- *
- * - * @author Nick Rizzolo + * LBJava's command line interface. Passing a source file to this class will invoke LBJava's + * frontend and optimization passes, resulting in the execution of the source file's code including + * creation of java files that implement the source file's semantics. + *

+ * + * LBJava stands for Learning Based Java. LBJava is a language for building + * systems that learn. + *

+ * + *

+ *
Usage:
+ *
+ * java edu.illinois.cs.cogcomp.lbjava.Main [options] <source file> + *
+ *
where [options] is one or more of the following:
+ *
+ * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
-c + * Compile only: This option tells LBJava to translate the given source to Java, but not to compile + * the generated Java sources or do any training.
-d <directory> + * Any class files generated during compilation will be written in the specified directory, just + * like javac's -d command line parameter.
-j <a> + * Sends the contents of <a> to javac as command line arguments + * while compiling. Don't forget to put quotes around <a> if there is more than + * one such argument or if the argument has a parameter.
-sPrint the names of all declarations and quit.
-t <n> + * Enables default progress output during training. A message is printed every + * <n> examples while training any classifier whose learn expression + * doesn't contain a progressOutput clause.
-vPrints the version number and exits.
-w + * Disables the output of warning messages. Currenlty, there are only two types of warnings. A + * warning is reported if a constraint declaration does not contain any constraint statements, and a + * warning is reported if a learner's type is less specific than the declared type of the classifier + * it's being used in.
-x + * Clean: Delete all files that would otherwise be generated. No code is generated and no training + * takes place.
+ * -generatedsourcepath <directory>
+ * -gsp <directory>
+ * LBJava will potentially generate many Java source files. Use this option to have LBJava write + * them to the specified directory instead of the current directory. <directory> + * must already exist. Note that LBJava will also compile these files which can result in even more + * class files than there were sources. Those class files will also be written in + * <directory> unless the -d command line parameter is utilized as + * well.
-sourcepath <path> + * If the LBJava source depends on classes whose source files cannot be found on the user's + * classpath, specify the directories where they can be found using this parameter. It works just + * like javac's -sourcepath command line parameter.
--parserDebugDebug: Debug output for parse phase only.
--lexerOutputLexer output: Print lexical token stream and quit.
--parserOutputParser output: Print the parsed AST and quit.
--semanticOutput + * Semantic analysis output: Print semantic analysis information and quit.
+ *
+ *
+ *
+ *
+ * + * @author Nick Rizzolo **/ -public class Main -{ - /** - * This flag is set to true if token printing is enabled on - * the command line. Tokens are the output from the scanner. - **/ - private static boolean printTokens = false; - /** - * This flag is set to true if AST printing is enabled on the - * command line. The AST is the output from the parser. - **/ - private static boolean printAST = false; - /** - * This flag is set to true if semantic analysis output is - * enabled on the command line. - **/ - private static boolean printSemantic = false; - /** - * This flag is set to true if revision analysis output is - * enabled on the command line. - **/ - private static boolean printRevisions = false; - /** - * This flag is set to true if the output of parser debugging - * information is enabled on the command line. - **/ - private static boolean parserDebug = false; - /** - * Set to the granularity at which progress messages should be printed - * during training. - **/ - private static int trainingOutput = 0; - /** - * This flag is set to true if cleaning has been enabled on - * the command line. - **/ - public static boolean clean = false; - /** - * This flag is set to true if the user has requested that the - * source only be compiled to Java. - **/ - private static boolean compileOnly = false; - /** This flag is set if concurrent training has been enabled. */ - public static boolean concurrentTraining = false; - /** This flag is set if warnings have been disabled on the command line. */ - public static boolean warningsDisabled = false; - /** This flag is set if symbol printing is enabled on the command line. */ - public static boolean printSymbols = false; - /** The relative path to the LBJava source file. */ - public static String sourceDirectory; - /** The name of the LBJava source file as specified on the command line. */ - public static String sourceFilename; - /** The source file's name without the .lbj extension. */ - public static String sourceFileBase; - /** - * Holds command line arguments to be sent to javac when - * compiling. - **/ - public static String javacArguments = ""; - /** - * A list of names of files generated by the compiler, created as they are - * generated. - **/ - public static HashSet fileNames; - /** - * The directory in which Javac will place class files (with subdirectories - * mimicing the package name included). - **/ - public static String classDirectory; - /** - * The directory in which class files should be written, not including the - * subdirectory structure that mimics the package. - **/ - public static String classPackageDirectory; - /** The directory in which to search for source files. */ - public static String classPath = System.getProperty("java.class.path"); - /** The directory in which to search for source files. */ - public static String sourcePath = System.getProperty("java.class.path"); - /** - * The directory in which to write generated Java source files (with - * subdirectories mimicing the package name included). - **/ - public static String generatedSourceDirectory; - /** The passes that will be executed. */ - private static LinkedList passes; - - - /** - * The main compiler driver. This method parses command line options and - * then calls all of LBJava's components. - * - * @param args The user's command line arguments are found here. - * @exception Exception An exception is thrown when any error occurs. - **/ - public static void main(String[] args) throws Exception { - AST ast = null; - try { ast = frontend(ProcessCommandLine(args)); } - catch (Exception e) { - if ("version".equals(e.getMessage())) System.exit(0); - - if (e.getMessage() == null - || !e.getMessage().equals("Incorrect arguments")) - throw e; - System.exit(1); - } +public class Main { + /** + * This flag is set to true if token printing is enabled on the command line. + * Tokens are the output from the scanner. + **/ + private static boolean printTokens = false; + /** + * This flag is set to true if AST printing is enabled on the command line. The AST + * is the output from the parser. + **/ + private static boolean printAST = false; + /** + * This flag is set to true if semantic analysis output is enabled on the command + * line. + **/ + private static boolean printSemantic = false; + /** + * This flag is set to true if revision analysis output is enabled on the command + * line. + **/ + private static boolean printRevisions = false; + /** + * This flag is set to true if the output of parser debugging information is + * enabled on the command line. + **/ + private static boolean parserDebug = false; + /** + * Set to the granularity at which progress messages should be printed during training. + **/ + private static int trainingOutput = 0; + /** + * This flag is set to true if cleaning has been enabled on the command line. + **/ + public static boolean clean = false; + /** + * This flag is set to true if the user has requested that the source only be + * compiled to Java. + **/ + private static boolean compileOnly = false; + /** This flag is set if concurrent training has been enabled. */ + public static boolean concurrentTraining = false; + /** This flag is set if warnings have been disabled on the command line. */ + public static boolean warningsDisabled = false; + /** This flag is set if symbol printing is enabled on the command line. */ + public static boolean printSymbols = false; + /** The relative path to the LBJava source file. */ + public static String sourceDirectory; + /** The name of the LBJava source file as specified on the command line. */ + public static String sourceFilename; + /** The source file's name without the .lbj extension. */ + public static String sourceFileBase; + /** + * Holds command line arguments to be sent to javac when compiling. + **/ + public static String javacArguments = ""; + /** + * A list of names of files generated by the compiler, created as they are generated. + **/ + public static HashSet fileNames; + /** + * The directory in which Javac will place class files (with subdirectories mimicing the package + * name included). + **/ + public static String classDirectory; + /** + * The directory in which class files should be written, not including the subdirectory + * structure that mimics the package. + **/ + public static String classPackageDirectory; + /** The directory in which to search for source files. */ + public static String classPath = System.getProperty("java.class.path"); + /** The directory in which to search for source files. */ + public static String sourcePath = System.getProperty("java.class.path"); + /** + * The directory in which to write generated Java source files (with subdirectories mimicing the + * package name included). + **/ + public static String generatedSourceDirectory; + /** The passes that will be executed. */ + private static LinkedList passes; + + + /** + * The main compiler driver. This method parses command line options and then calls all of + * LBJava's components. + * + * @param args The user's command line arguments are found here. + * @exception Exception An exception is thrown when any error occurs. + **/ + public static void main(String[] args) throws Exception { + AST ast = null; + try { + ast = frontend(ProcessCommandLine(args)); + } catch (Exception e) { + if ("version".equals(e.getMessage())) + System.exit(0); + + if (e.getMessage() == null || !e.getMessage().equals("Incorrect arguments")) + throw e; + System.exit(1); + } - if (ast == null) return; - // Happens if --lexerOutput, --parserOutput, or --semanticOutput is - // enabled. + if (ast == null) + return; + + // Happens if --lexerOutput, --parserOutput, or --semanticOutput is + // enabled. + + fileNames = new HashSet(); + passes = new LinkedList(); + runSemanticAnalysis(ast); + + if (clean) + passes.add(new Clean(ast)); + else { + passes.add(new ClassifierCSE(ast)); + passes.add(new RevisionAnalysis(ast)); + passes.add(new TranslateToJava(ast)); + if (!compileOnly) + passes.add(new Train(ast, trainingOutput)); + } - fileNames = new HashSet(); - passes = new LinkedList(); - runSemanticAnalysis(ast); + for (Iterator I = passes.iterator(); I.hasNext() && !Pass.fatalError;) { + I.next().run(); + Pass.printErrorsAndWarnings(); + } - if (clean) passes.add(new Clean(ast)); - else { - passes.add(new ClassifierCSE(ast)); - passes.add(new RevisionAnalysis(ast)); - passes.add(new TranslateToJava(ast)); - if (!compileOnly) passes.add(new Train(ast, trainingOutput)); + if (Pass.fatalError) + System.exit(1); } - for (Iterator I = passes.iterator(); I.hasNext() && !Pass.fatalError; ) { - I.next().run(); - Pass.printErrorsAndWarnings(); - } - if (Pass.fatalError) System.exit(1); - } - - - /** - * Runs the semantic analysis pass on the specified AST, then prints errors - * and warnings if they exist, and finally sets the - * {@link #generatedSourceDirectory} and {@link #classDirectory} variables. - * - * @param ast The AST. - **/ - public static void runSemanticAnalysis(AST ast) { - new SemanticAnalysis(ast).run(); - Pass.printErrorsAndWarnings(); - - if (generatedSourceDirectory != null) { - if (AST.globalSymbolTable.getPackage().length() != 0) - generatedSourceDirectory += - File.separator + AST.globalSymbolTable.getPackage() - .replace('.', File.separatorChar); - } - else if (sourceDirectory != null) - generatedSourceDirectory = sourceDirectory; - - if (classPackageDirectory != null - && AST.globalSymbolTable.getPackage().length() != 0) - classDirectory = - classPackageDirectory + File.separator - + AST.globalSymbolTable.getPackage().replace('.', File.separatorChar); - else classDirectory = classPackageDirectory; - } - - - /** - * Sets all the internal flags that correspond to the specified command - * line parameters, and checks the command line for errors. - * - * @param args The user's command line arguments are found here. - * @exception Exception An exception is thrown if an error is found. - * @return A stream for the input source file. - **/ - private static FileInputStream ProcessCommandLine(String[] args) - throws Exception { - if (args.length < 1) { - PrintUsage(); - throw new Exception("Incorrect arguments"); + /** + * Runs the semantic analysis pass on the specified AST, then prints errors and warnings if they + * exist, and finally sets the {@link #generatedSourceDirectory} and {@link #classDirectory} + * variables. + * + * @param ast The AST. + **/ + public static void runSemanticAnalysis(AST ast) { + new SemanticAnalysis(ast).run(); + Pass.printErrorsAndWarnings(); + + if (generatedSourceDirectory != null) { + if (AST.globalSymbolTable.getPackage().length() != 0) + generatedSourceDirectory += + File.separator + + AST.globalSymbolTable.getPackage().replace('.', + File.separatorChar); + } else if (sourceDirectory != null) + generatedSourceDirectory = sourceDirectory; + + if (classPackageDirectory != null && AST.globalSymbolTable.getPackage().length() != 0) + classDirectory = + classPackageDirectory + File.separator + + AST.globalSymbolTable.getPackage().replace('.', File.separatorChar); + else + classDirectory = classPackageDirectory; } - boolean printVersion = false; - int index; - for (index = 0; index < args.length - 1; ++index) { - if (args[index].equals("-t")) { - try { - trainingOutput = Integer.parseInt(args[++index]); - if (trainingOutput < 0) throw new Exception(); + + /** + * Sets all the internal flags that correspond to the specified command line parameters, and + * checks the command line for errors. + * + * @param args The user's command line arguments are found here. + * @exception Exception An exception is thrown if an error is found. + * @return A stream for the input source file. + **/ + private static FileInputStream ProcessCommandLine(String[] args) throws Exception { + if (args.length < 1) { + PrintUsage(); + throw new Exception("Incorrect arguments"); } - catch (Exception e) { - PrintUsage(); - throw - new Exception("The -t argument must be followed by a " - + "non-negative integer."); + + boolean printVersion = false; + int index; + for (index = 0; index < args.length - 1; ++index) { + if (args[index].equals("-t")) { + try { + trainingOutput = Integer.parseInt(args[++index]); + if (trainingOutput < 0) + throw new Exception(); + } catch (Exception e) { + PrintUsage(); + throw new Exception("The -t argument must be followed by a " + + "non-negative integer."); + } + } else if (args[index].equals("-c")) + compileOnly = true; + else if (args[index].equals("-d")) + classPackageDirectory = args[++index]; + else if (args[index].equals("-j")) + javacArguments += " " + args[++index]; + else if (args[index].equals("-p")) + concurrentTraining = true; + else if (args[index].equals("-s")) + printSymbols = true; + else if (args[index].equals("-v")) + printVersion = true; + else if (args[index].equals("-w")) + warningsDisabled = true; + else if (args[index].equals("-x")) + clean = true; + else if (args[index].equals("-generatedsourcepath") || args[index].equals("-gsp")) + generatedSourceDirectory = args[++index]; + else if (args[index].equals("-sourcepath")) + sourcePath = args[++index]; + else if (args[index].equals("--parserDebug")) + parserDebug = true; + else if (args[index].equals("--lexerOutput")) + printTokens = true; + else if (args[index].equals("--parserOutput")) + printAST = true; + else if (args[index].equals("--revisionOutput")) + printRevisions = true; + else if (args[index].equals("--semanticOutput")) + printSemantic = true; + else { + PrintUsage(); + throw new Exception("Unrecognized parameter: " + args[index]); + } } - } - else if (args[index].equals("-c")) compileOnly = true; - else if (args[index].equals("-d")) - classPackageDirectory = args[++index]; - else if (args[index].equals("-j")) - javacArguments += " " + args[++index]; - else if (args[index].equals("-p")) concurrentTraining = true; - else if (args[index].equals("-s")) printSymbols = true; - else if (args[index].equals("-v")) printVersion = true; - else if (args[index].equals("-w")) warningsDisabled = true; - else if (args[index].equals("-x")) clean = true; - else if (args[index].equals("-generatedsourcepath") - || args[index].equals("-gsp")) - generatedSourceDirectory = args[++index]; - else if (args[index].equals("-sourcepath")) sourcePath = args[++index]; - else if (args[index].equals("--parserDebug")) parserDebug = true; - else if (args[index].equals("--lexerOutput")) printTokens = true; - else if (args[index].equals("--parserOutput")) printAST = true; - else if (args[index].equals("--revisionOutput")) printRevisions = true; - else if (args[index].equals("--semanticOutput")) printSemantic = true; - else { - PrintUsage(); - throw new Exception("Unrecognized parameter: " + args[index]); - } - } - if (printVersion || args.length == 1 && args[0].equals("-v")) { - System.out.println("Learning Based Java (LBJava)"); - System.out.println( - "Copyright (C) 2011, Nicholas D. Rizzolo and Dan Roth."); - System.out.println("Cognitive Computation Group"); - System.out.println("University of Illinois at Urbana-Champaign"); - System.out.println("http://cogcomp.cs.illinois.edu/"); - throw new Exception("version"); - } + if (printVersion || args.length == 1 && args[0].equals("-v")) { + System.out.println("Learning Based Java (LBJava)"); + System.out.println("Copyright (C) 2011, Nicholas D. Rizzolo and Dan Roth."); + System.out.println("Cognitive Computation Group"); + System.out.println("University of Illinois at Urbana-Champaign"); + System.out.println("http://cogcomp.cs.illinois.edu/"); + throw new Exception("version"); + } - if (javacArguments.indexOf("-d ") != -1 - || javacArguments.indexOf("-sourcepath ") != -1 - || javacArguments.indexOf("-classpath ") != -1 - || javacArguments.indexOf("-cp ") != -1) - throw new Exception( - "None of the options '-d', '-sourcepath', or '-classpath' should " - + "be specified inside LBJ's '-j' option. Instead, specify '-d' " - + "and '-sourcepath' directly as options to LBJ, and specify " - + "-classpath to the JVM when executing LBJ."); - - if (clean - && (compileOnly || printTokens || printAST || printSemantic - || trainingOutput != 0)) { - System.err.println( - "The -x flag supercedes all other flags except --parserDebug and " - + "the path related flags."); - compileOnly = printTokens = printAST = printSemantic = false; - trainingOutput = 0; - } + if (javacArguments.indexOf("-d ") != -1 || javacArguments.indexOf("-sourcepath ") != -1 + || javacArguments.indexOf("-classpath ") != -1 + || javacArguments.indexOf("-cp ") != -1) + throw new Exception("None of the options '-d', '-sourcepath', or '-classpath' should " + + "be specified inside LBJ's '-j' option. Instead, specify '-d' " + + "and '-sourcepath' directly as options to LBJ, and specify " + + "-classpath to the JVM when executing LBJ."); + + if (clean + && (compileOnly || printTokens || printAST || printSemantic || trainingOutput != 0)) { + System.err.println("The -x flag supercedes all other flags except --parserDebug and " + + "the path related flags."); + compileOnly = printTokens = printAST = printSemantic = false; + trainingOutput = 0; + } - if (index >= args.length){ - PrintUsage(); - throw new Exception("Error: No input filename specified."); - } + if (index >= args.length) { + PrintUsage(); + throw new Exception("Error: No input filename specified."); + } - String file = args[index]; - if (!(file.length() > 4 && file.endsWith(".lbj"))) { - PrintUsage(); - throw new Exception("Source file name must end with \".lbj\"."); - } + String file = args[index]; + if (!(file.length() > 4 && file.endsWith(".lbj"))) { + PrintUsage(); + throw new Exception("Source file name must end with \".lbj\"."); + } - int lastSlash = file.lastIndexOf(File.separatorChar); - if (lastSlash != -1) { - sourceDirectory = file.substring(0, lastSlash); - sourceFilename = file.substring(lastSlash + 1); - } - else sourceFilename = file; + int lastSlash = file.lastIndexOf(File.separatorChar); + if (lastSlash != -1) { + sourceDirectory = file.substring(0, lastSlash); + sourceFilename = file.substring(lastSlash + 1); + } else + sourceFilename = file; - sourceFileBase = sourceFilename.substring(0, sourceFilename.length() - 4); + sourceFileBase = sourceFilename.substring(0, sourceFilename.length() - 4); - FileInputStream instream; - try { instream = new FileInputStream(file); } - catch (FileNotFoundException e) { - System.err.println("Error: Unable to open input file " + file + ": " - + e.getMessage()); - throw e; - } + FileInputStream instream; + try { + instream = new FileInputStream(file); + } catch (FileNotFoundException e) { + System.err.println("Error: Unable to open input file " + file + ": " + e.getMessage()); + throw e; + } - return instream; - } - - - /** - * This method scans and then parses the input. - * - * @param in A stream for the source input file. - * @exception Exception Thrown when any error occurs. - * @return The AST that results from parsing. - **/ - private static AST frontend(FileInputStream in) throws Exception { - Yylex scanner = new Yylex(in); - scanner.sourceFilename = sourceFilename; - if (printTokens) { - dumpTokenStream(scanner); - return null; + return instream; } - AST ast = null; - parser LBJavaparser = new parser(scanner); - if (parserDebug) ast = (AST) LBJavaparser.debug_parse().value; - else ast = (AST) LBJavaparser.parse().value; - if (ast == null) - throw new InternalError("Parser returned null abstract syntax tree."); + /** + * This method scans and then parses the input. + * + * @param in A stream for the source input file. + * @exception Exception Thrown when any error occurs. + * @return The AST that results from parsing. + **/ + private static AST frontend(FileInputStream in) throws Exception { + Yylex scanner = new Yylex(in); + scanner.sourceFilename = sourceFilename; + if (printTokens) { + dumpTokenStream(scanner); + return null; + } - AST result = ast; - if (printAST) { - new PrintAST(ast).run(); - result = null; - } + AST ast = null; + parser LBJavaparser = new parser(scanner); + if (parserDebug) + ast = (AST) LBJavaparser.debug_parse().value; + else + ast = (AST) LBJavaparser.parse().value; - if (printSemantic) { - runSemanticAnalysis(ast); - System.out.println("\nGlobal symbol table:"); - System.out.println("--------------------"); - ast.symbolTable.print(); + if (ast == null) + throw new InternalError("Parser returned null abstract syntax tree."); - System.out.println("\nDependor graph:"); - System.out.println("--------------------"); - SemanticAnalysis.printDependorGraph(); + AST result = ast; + if (printAST) { + new PrintAST(ast).run(); + result = null; + } - System.out.println("\nInvoked graph:"); - System.out.println("--------------------"); - SemanticAnalysis.printInvokedGraph(); - System.out.println(); + if (printSemantic) { + runSemanticAnalysis(ast); + System.out.println("\nGlobal symbol table:"); + System.out.println("--------------------"); + ast.symbolTable.print(); - result = null; - } + System.out.println("\nDependor graph:"); + System.out.println("--------------------"); + SemanticAnalysis.printDependorGraph(); + + System.out.println("\nInvoked graph:"); + System.out.println("--------------------"); + SemanticAnalysis.printInvokedGraph(); + System.out.println(); - if (printSymbols) { - new DeclarationNames(ast).run(); - result = null; + result = null; + } + + if (printSymbols) { + new DeclarationNames(ast).run(); + result = null; + } + + if (printRevisions) { + if (!printSemantic) + runSemanticAnalysis(ast); + new RevisionAnalysis(ast).run(); + System.out.println("\nRevision statuses:"); + System.out.println("--------------------"); + RevisionAnalysis.printRevisionStatus(); + result = null; + } + + return result; } - if (printRevisions) { - if (!printSemantic) runSemanticAnalysis(ast); - new RevisionAnalysis(ast).run(); - System.out.println("\nRevision statuses:"); - System.out.println("--------------------"); - RevisionAnalysis.printRevisionStatus(); - result = null; + + /** + * Dump the token stream produced by the given scanner to standard output. Returns when the + * end-of-file token is returned from the scanner, or if an exception is thrown by the scanner's + * next_token() method. + * + *

+ * Tokens are output as follows: the name of the token (as provided by the array + * symNames.nameTable[]), followed by a tab, followed by the token's semantic value (see + * TokenValue.toString()), followed by a tab, followed by the line and the byte offset in the + * file where the token began (the last two separated by a colon). + * + *

+ * Error tokens are printed specially; an error message is printed with only the line number + * listed. + * + * @param scanner A reference to the JLex generated scanner object. + */ + private static void dumpTokenStream(Yylex scanner) { + Symbol t; + TokenValue tValue; + + while (true) { + try { + t = scanner.next_token(); + } catch (IOException e) { + System.err.println(e); + return; + } + + tValue = (TokenValue) t.value; + switch (t.sym) { + case sym.EOF: + return; + case sym.error: + System.out.println("Scanner returned error token at " + tValue.line); + break; + default: + System.out.println(SymbolNames.nameTable[t.sym] + "\t" + tValue + "\t" + + (tValue.line + 1) + ":" + tValue.byteOffset); + } + } } - return result; - } - - - /** - * Dump the token stream produced by the given scanner to standard output. - * Returns when the end-of-file token is returned from the scanner, or if an - * exception is thrown by the scanner's next_token() method. - * - *

Tokens are output as follows: the name of the token (as provided by - * the array symNames.nameTable[]), followed by a tab, followed by the - * token's semantic value (see TokenValue.toString()), followed by a tab, - * followed by the line and the byte offset in the file where the token - * began (the last two separated by a colon). - * - *

Error tokens are printed specially; an error message is printed with - * only the line number listed. - * - * @param scanner A reference to the JLex generated scanner object. - */ - private static void dumpTokenStream(Yylex scanner) { - Symbol t; - TokenValue tValue; - - while (true) { - try { t = scanner.next_token(); } - catch (IOException e) { - System.err.println(e); - return; - } - - tValue = (TokenValue)t.value; - switch (t.sym) { - case sym.EOF: return; - case sym.error: - System.out.println("Scanner returned error token at " - + tValue.line); - break; - default: - System.out.println(SymbolNames.nameTable[t.sym] + "\t" + tValue - + "\t" + (tValue.line + 1) + ":" - + tValue.byteOffset); - } + + /** + * Print a usage message. This method is called when the user's command line cannot be + * interpreted. + **/ + public static void PrintUsage() { + System.err + .print("Usage: java edu.illinois.cs.cogcomp.lbjava.Main [options] \n" + + " where [options] is one or more of the following:\n" + + " -c Compile to Java only\n" + + " -d

Write generated class files to \n" + + " -j Send the specified arguments to javac\n" + // + " -p Train in parallel\n" + + " -s Print the names of all declarations and quit\n" + + " -t Enables default progress output during training\n" + + " -v Print the version number and quit\n" + + " -w Disables the output of warning messages\n" + + " -x Delete all files that would have been generated\n\n" + + + " -generatedsourcepath \n" + " -gsp \n" + + " Write generated Java source files to \n" + + " -sourcepath \n" + + " Search for Java source files in \n\n" + + + " --parserDebug Debug output for parse phase only\n" + + " --lexerOutput Print lexical token stream and quit\n" + + " --parserOutput Print the parsed AST and quit\n" + + " --revisionOutput Print revision analysis information and quit\n" + + " --semanticOutput Print semantic analysis information and quit\n"); } - } - - - /** - * Print a usage message. This method is called when the user's command - * line cannot be interpreted. - **/ - public static void PrintUsage() { - System.err.print( - "Usage: java edu.illinois.cs.cogcomp.lbjava.Main [options] \n" -+ " where [options] is one or more of the following:\n" -+ " -c Compile to Java only\n" -+ " -d Write generated class files to \n" -+ " -j Send the specified arguments to javac\n" -//+ " -p Train in parallel\n" -+ " -s Print the names of all declarations and quit\n" -+ " -t Enables default progress output during training\n" -+ " -v Print the version number and quit\n" -+ " -w Disables the output of warning messages\n" -+ " -x Delete all files that would have been generated\n\n" - -+ " -generatedsourcepath \n" -+ " -gsp \n" -+ " Write generated Java source files to \n" -+ " -sourcepath \n" -+ " Search for Java source files in \n\n" - -+ " --parserDebug Debug output for parse phase only\n" -+ " --lexerOutput Print lexical token stream and quit\n" -+ " --parserOutput Print the parsed AST and quit\n" -+ " --revisionOutput Print revision analysis information and quit\n" -+ " --semanticOutput Print semantic analysis information and quit\n"); - } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Pass.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Pass.java index 0c9968a0..9ca2fac6 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Pass.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Pass.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava; @@ -104,1267 +101,1261 @@ /** - * Abstract class from which all of LBJava's analysis and optimization passes - * are derived. - * - * @author Nick Rizzolo + * Abstract class from which all of LBJava's analysis and optimization passes are derived. + * + * @author Nick Rizzolo **/ -public abstract class Pass -{ - /** - * This flag gets set if an error occurs that should cause the LBJava - * compiler to stop executing after this pass finishes. - **/ - public static boolean fatalError = false; - /** - * Errors and warnings are collected here so they can be printed in order - * later. - **/ - private static Map> errorsAndWarnings = new HashMap>(); - /** - * Errors of the form "Cannot locate class ..." are only reported once; - * this set remembers which have already been reported. - **/ - private static HashSet missingClassErrors = new HashSet(); - /** - * A global flag controlling whether or not errors and warnings can - * currently be added. When this flag is false, the - * {@link #reportError(int,String)} and {@link #reportWarning(int,String)} - * methods become no-ops. - **/ - public static boolean canAddErrorsAndWarnings = true; - - - /** - * This method prints the given error message and sets the - * fatalError variable. - * - * @param line The line on which the error was recognized. - * @param message The error message. - **/ - public static void reportError(int line, String message) { - if (!canAddErrorsAndWarnings) return; - if (message.startsWith("Cannot locate class")) { - int start = message.indexOf('\'') + 1; - int end = message.lastIndexOf('\''); - String missingClass = message.substring(start, end); - if (missingClassErrors.contains(missingClass)) return; - missingClassErrors.add(missingClass); - } - - String error = "Error on line " + (line + 1) + ":\n"; - - String[] words = message.split("\\s+"); - for (int i = 0; i < words.length; ) { - String s = " "; - if (words[i].length() + 2 > 78) s += " " + words[i++]; - else - for (; i < words.length && s.length() + words[i].length() <= 78; ++i) - s += " " + words[i]; - error += s + "\n"; - } - - addErrorOrWarning(new Integer(line), error); - fatalError = true; - } - - - /** - * This method simply prints the given warning message. - * - * @param line The line on which the warning was recognized. - * @param message The warning message. - **/ - public static void reportWarning(int line, String message) { - if (!canAddErrorsAndWarnings || Main.clean || Main.warningsDisabled) - return; - - String warning = "Warning on line " + (line + 1) + ":\n"; - - String[] words = message.split("\\s+"); - for (int i = 0; i < words.length; ) { - String s = " "; - if (words[i].length() + 2 > 78) s += " " + words[i++]; - else - for (; i < words.length && s.length() + words[i].length() < 78; ++i) - s += " " + words[i]; - warning += s + "\n"; - } - - addErrorOrWarning(new Integer(line), warning); - } - - - /** Prints the errors and warnings to STDERR sorted by line. */ - public static void printErrorsAndWarnings() { - Map.Entry[] entries = - (Map.Entry[]) errorsAndWarnings.entrySet().toArray(new Map.Entry[0]); - Arrays.sort(entries, - new Comparator() { - public int compare(Object o1, Object o2) { - Map.Entry e1 = (Map.Entry) o1; - Map.Entry e2 = (Map.Entry) o2; - return ((Integer) e1.getKey()).compareTo((Integer) e2.getKey()); - } - }); - - for (int i = 0; i < entries.length; ++i) - for (Iterator I = ((LinkedList) entries[i].getValue()).iterator(); - I.hasNext(); ) - System.err.print(I.next()); - - errorsAndWarnings.clear(); - } - - - /** - * Adds a new entry into the {@link #errorsAndWarnings} multi-map. - * - * @param key The key. - * @param value The value to be associated with key, which - * will be added to a list of values associated with - * key. - **/ - private static void addErrorOrWarning(int key, String value) { - List values = errorsAndWarnings.get(key); - if (values == null) - errorsAndWarnings.put(key, values = new LinkedList()); - values.add(value); - } - - - /** - * A reference to the root node of the AST over which this pass will - * operate. - **/ - protected ASTNode root; - /** - * Stores the same thing as root, but this variable is - * declared as AST. - **/ - protected AST ast; - - - /** Default constructor. */ - public Pass() { root = ast = null; } - - /** - * Initializing constructor. This constructor initializes - * root. - * - * @param r The reference with which root will be - * initialized. - **/ - public Pass(ASTNode r) { setRoot(r); } - - - /** - * Sets the root member variable. - * - * @param r The reference with which root will be set. - **/ - public void setRoot(ASTNode r) { - root = r; - if (r instanceof AST) ast = (AST) r; - else ast = null; - } - - - /** - * The main interface: call this method to apply the pass to the AST. This - * method simply calls the recursive helper method. It uses the Visitor - * pattern to ensure that the correct recursive helper method is called. - * That way, the user can define a run() method with any - * ASTNode argument type in a class that extends - * Pass, and those methods will be called at the appropriate - * times during the traversal of the AST. - **/ - public void run() { root.runPass(this); } - - - /** - * This method supports derived passes that continue to descend down the - * AST after operating on a particular type of node. - * - * @param node The node on whose children the pass should be run. - **/ - public void runOnChildren(ASTNode node) { - for (ASTNodeIterator I = node.iterator(); I.hasNext(); ) { - ASTNode n = I.next(); - if (n != null) n.runPass(this); - } - } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(AST node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(PackageDeclaration node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ImportDeclaration node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(BinaryExpression node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(InstanceCreationExpression node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ParameterSet node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(InstanceofExpression node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ArrayCreationExpression node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ArrayInitializer node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(Conditional node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(LearningClassifierExpression node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(CastExpression node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(IncrementExpression node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(Assignment node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(Constant node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(UnaryExpression node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(Name node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(FieldAccess node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(SubscriptVariable node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(Argument node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(Operator node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(NameList node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ConstantList node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(StatementList node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ImportList node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(DeclarationList node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ExpressionList node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ClassifierType node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ConstraintType node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(InferenceType node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(NormalizerType node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ReferenceType node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ArrayType node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(PrimitiveType node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ClassifierReturnType node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ClassifierExpressionList node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ClassifierAssignment node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ClassifierName node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ClassifierCastExpression node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(Conjunction node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(CodedClassifier node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(CompositeGenerator node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(InferenceInvocation node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(VariableDeclaration node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(EmptyStatement node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(LabeledStatement node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(IfStatement node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(SwitchStatement node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(SwitchBlock node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(SwitchGroupList node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(SwitchGroup node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(SwitchLabelList node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(SwitchLabel node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(DoStatement node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(WhileStatement node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ForStatement node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ExpressionStatement node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ContinueStatement node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ReturnStatement node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(SenseStatement node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ThrowStatement node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(SynchronizedStatement node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(TryStatement node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(CatchList node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(Block node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(CatchClause node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(AssertStatement node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(BreakStatement node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(MethodInvocation node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(AtLeastQuantifierExpression node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(AtMostQuantifierExpression node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(BinaryConstraintExpression node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ConstraintDeclaration node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ConstraintEqualityExpression node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ConstraintInvocation node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ConstraintStatementExpression node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(ExistentialQuantifierExpression node) { - runOnChildren(node); - } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(InferenceDeclaration node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(InferenceDeclaration.HeadFinder node) { - runOnChildren(node); - } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(InferenceDeclaration.NormalizerDeclaration node) { - runOnChildren(node); - } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(NegatedConstraintExpression node) { runOnChildren(node); } - - - /** - * One of the recursive "helper" methods for run(). Simply in - * charge of delegating its work to the children of the node passed to it. - * Derived Passes will override this method when there is - * something useful to be done for the given node in that pass. If there - * isn't, it won't be overriden, and execution will continue to traverse - * the AST. - * - * @param node A reference to the node currently being processed. - **/ - public void run(UniversalQuantifierExpression node) { runOnChildren(node); } +public abstract class Pass { + /** + * This flag gets set if an error occurs that should cause the LBJava compiler to stop executing + * after this pass finishes. + **/ + public static boolean fatalError = false; + /** + * Errors and warnings are collected here so they can be printed in order later. + **/ + private static Map> errorsAndWarnings = + new HashMap>(); + /** + * Errors of the form "Cannot locate class ..." are only reported once; this set remembers which + * have already been reported. + **/ + private static HashSet missingClassErrors = new HashSet(); + /** + * A global flag controlling whether or not errors and warnings can currently be added. When + * this flag is false, the {@link #reportError(int,String)} and + * {@link #reportWarning(int,String)} methods become no-ops. + **/ + public static boolean canAddErrorsAndWarnings = true; + + + /** + * This method prints the given error message and sets the fatalError variable. + * + * @param line The line on which the error was recognized. + * @param message The error message. + **/ + public static void reportError(int line, String message) { + if (!canAddErrorsAndWarnings) + return; + if (message.startsWith("Cannot locate class")) { + int start = message.indexOf('\'') + 1; + int end = message.lastIndexOf('\''); + String missingClass = message.substring(start, end); + if (missingClassErrors.contains(missingClass)) + return; + missingClassErrors.add(missingClass); + } + + String error = "Error on line " + (line + 1) + ":\n"; + + String[] words = message.split("\\s+"); + for (int i = 0; i < words.length;) { + String s = " "; + if (words[i].length() + 2 > 78) + s += " " + words[i++]; + else + for (; i < words.length && s.length() + words[i].length() <= 78; ++i) + s += " " + words[i]; + error += s + "\n"; + } + + addErrorOrWarning(new Integer(line), error); + fatalError = true; + } + + + /** + * This method simply prints the given warning message. + * + * @param line The line on which the warning was recognized. + * @param message The warning message. + **/ + public static void reportWarning(int line, String message) { + if (!canAddErrorsAndWarnings || Main.clean || Main.warningsDisabled) + return; + + String warning = "Warning on line " + (line + 1) + ":\n"; + + String[] words = message.split("\\s+"); + for (int i = 0; i < words.length;) { + String s = " "; + if (words[i].length() + 2 > 78) + s += " " + words[i++]; + else + for (; i < words.length && s.length() + words[i].length() < 78; ++i) + s += " " + words[i]; + warning += s + "\n"; + } + + addErrorOrWarning(new Integer(line), warning); + } + + + /** Prints the errors and warnings to STDERR sorted by line. */ + public static void printErrorsAndWarnings() { + Map.Entry[] entries = (Map.Entry[]) errorsAndWarnings.entrySet().toArray(new Map.Entry[0]); + Arrays.sort(entries, new Comparator() { + public int compare(Object o1, Object o2) { + Map.Entry e1 = (Map.Entry) o1; + Map.Entry e2 = (Map.Entry) o2; + return ((Integer) e1.getKey()).compareTo((Integer) e2.getKey()); + } + }); + + for (int i = 0; i < entries.length; ++i) + for (Iterator I = ((LinkedList) entries[i].getValue()).iterator(); I.hasNext();) + System.err.print(I.next()); + + errorsAndWarnings.clear(); + } + + + /** + * Adds a new entry into the {@link #errorsAndWarnings} multi-map. + * + * @param key The key. + * @param value The value to be associated with key, which will be added to a list + * of values associated with key. + **/ + private static void addErrorOrWarning(int key, String value) { + List values = errorsAndWarnings.get(key); + if (values == null) + errorsAndWarnings.put(key, values = new LinkedList()); + values.add(value); + } + + + /** + * A reference to the root node of the AST over which this pass will operate. + **/ + protected ASTNode root; + /** + * Stores the same thing as root, but this variable is declared as AST + * . + **/ + protected AST ast; + + + /** Default constructor. */ + public Pass() { + root = ast = null; + } + + /** + * Initializing constructor. This constructor initializes root. + * + * @param r The reference with which root will be initialized. + **/ + public Pass(ASTNode r) { + setRoot(r); + } + + + /** + * Sets the root member variable. + * + * @param r The reference with which root will be set. + **/ + public void setRoot(ASTNode r) { + root = r; + if (r instanceof AST) + ast = (AST) r; + else + ast = null; + } + + + /** + * The main interface: call this method to apply the pass to the AST. This method simply calls + * the recursive helper method. It uses the Visitor pattern to ensure that the correct recursive + * helper method is called. That way, the user can define a run() method with any + * ASTNode argument type in a class that extends Pass, and those + * methods will be called at the appropriate times during the traversal of the AST. + **/ + public void run() { + root.runPass(this); + } + + + /** + * This method supports derived passes that continue to descend down the AST after operating on + * a particular type of node. + * + * @param node The node on whose children the pass should be run. + **/ + public void runOnChildren(ASTNode node) { + for (ASTNodeIterator I = node.iterator(); I.hasNext();) { + ASTNode n = I.next(); + if (n != null) + n.runPass(this); + } + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(AST node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(PackageDeclaration node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ImportDeclaration node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(BinaryExpression node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(InstanceCreationExpression node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ParameterSet node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(InstanceofExpression node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ArrayCreationExpression node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ArrayInitializer node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(Conditional node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(LearningClassifierExpression node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(CastExpression node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(IncrementExpression node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(Assignment node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(Constant node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(UnaryExpression node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(Name node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(FieldAccess node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(SubscriptVariable node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(Argument node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(Operator node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(NameList node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ConstantList node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(StatementList node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ImportList node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(DeclarationList node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ExpressionList node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ClassifierType node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ConstraintType node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(InferenceType node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(NormalizerType node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ReferenceType node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ArrayType node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(PrimitiveType node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ClassifierReturnType node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ClassifierExpressionList node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ClassifierAssignment node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ClassifierName node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ClassifierCastExpression node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(Conjunction node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(CodedClassifier node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(CompositeGenerator node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(InferenceInvocation node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(VariableDeclaration node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(EmptyStatement node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(LabeledStatement node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(IfStatement node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(SwitchStatement node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(SwitchBlock node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(SwitchGroupList node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(SwitchGroup node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(SwitchLabelList node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(SwitchLabel node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(DoStatement node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(WhileStatement node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ForStatement node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ExpressionStatement node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ContinueStatement node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ReturnStatement node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(SenseStatement node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ThrowStatement node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(SynchronizedStatement node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(TryStatement node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(CatchList node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(Block node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(CatchClause node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(AssertStatement node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(BreakStatement node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(MethodInvocation node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(AtLeastQuantifierExpression node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(AtMostQuantifierExpression node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(BinaryConstraintExpression node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ConstraintDeclaration node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ConstraintEqualityExpression node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ConstraintInvocation node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ConstraintStatementExpression node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(ExistentialQuantifierExpression node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(InferenceDeclaration node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(InferenceDeclaration.HeadFinder node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(InferenceDeclaration.NormalizerDeclaration node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(NegatedConstraintExpression node) { + runOnChildren(node); + } + + + /** + * One of the recursive "helper" methods for run(). Simply in charge of delegating + * its work to the children of the node passed to it. Derived Passes will override + * this method when there is something useful to be done for the given node in that pass. If + * there isn't, it won't be overriden, and execution will continue to traverse the AST. + * + * @param node A reference to the node currently being processed. + **/ + public void run(UniversalQuantifierExpression node) { + runOnChildren(node); + } } diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/PrintAST.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/PrintAST.java index 8d65ae9c..17787a05 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/PrintAST.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/PrintAST.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava; @@ -92,892 +89,928 @@ /** - * The PrintAST pass simply prints a text representation of the - * parsed AST to standard output. - * - * @author Nick Rizzolo + * The PrintAST pass simply prints a text representation of the parsed AST to standard + * output. + * + * @author Nick Rizzolo **/ -public class PrintAST extends Pass -{ - /** The current amount of indentation to print. */ - private int indent; - - - /** - * Instantiates a pass that runs on an entire AST. - * - * @param ast The program to run this pass on. - **/ - public PrintAST(AST ast) { super(ast); } - - - /** - * Prints the given text preceded by the amount of indentation called for - * in the indent member variable and followed by a new line. - * - * @param text The text to print. - **/ - public void indentedPrintln(String text) { - for (int i = 0; i < indent; ++i) System.out.print(" "); - System.out.println(text); - } - - - /** - * Prints the given text preceeded by the amount of indentation called for - * in the indent member variable and followed by the line - * number and byte offset information for the specified - * ASTNode and a new line. - * - * @param text The text to print. - * @param node The node for which to print line and byte offset - * information. - **/ - public void indentedPrintln(String text, ASTNode node) { - for (int i = 0; i < indent; ++i) System.out.print(" "); - System.out.println(text + " (" + (node.line + 1) + ", " + node.byteOffset - + ")"); - } - - - /** - * The default routine for printing a non-terminal AST node is to first - * print the name of the AST node's class with line and byte offset - * information, and then recursively print its children at indentation - * level one higher. - * - * @param text The text to print. - * @param node The node for which to print line and byte offset - * information. - **/ - public void nonTerminal(String text, ASTNode node) { - indentedPrintln(text, node); - ++indent; - runOnChildren(node); - --indent; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(AST node) { - indent = 0; - nonTerminal("AST", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(PackageDeclaration node) { - nonTerminal("PackageDeclaration", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ImportDeclaration node) { - nonTerminal("ImportDeclaration", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param n The node to process. - **/ - public void run(Name n) { - indentedPrintln("Name", n); - ++indent; - indentedPrintln("name: " + n); - indentedPrintln("dimensions: " + n.dimensions); - --indent; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(BinaryExpression node) { - nonTerminal("BinaryExpression", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(InstanceCreationExpression node) { - nonTerminal("InstanceCreationExpression", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(InstanceofExpression node) { - nonTerminal("InstanceofExpression", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ArrayCreationExpression node) { - indentedPrintln("ArrayCreationExpression", node); - ++indent; - indentedPrintln("dimensions: " + node.dimensions); - runOnChildren(node); - --indent; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ArrayInitializer node) { - nonTerminal("ArrayInitializer", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(Conditional node) { nonTerminal("Conditional", node); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(LearningClassifierExpression node) { - nonTerminal("LearningClassifierExpression", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(CastExpression node) { - nonTerminal("CastExpression", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(IncrementExpression node) { - nonTerminal("IncrementExpression", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(Assignment node) { nonTerminal("Assignment", node); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(Constant node) { - indentedPrintln("Constant", node); - ++indent; - indentedPrintln("value: " + node.value); - --indent; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(UnaryExpression node) { - nonTerminal("UnaryExpression", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ParameterSet node) { - nonTerminal("ParameterSet", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(FieldAccess node) { - indentedPrintln("FieldAccess", node); - ++indent; - indentedPrintln("name: " + node.name); - runOnChildren(node); - --indent; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(SubscriptVariable node) { - nonTerminal("SubscriptVariable", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(Argument node) { - indentedPrintln("Argument", node); - ++indent; - if (node.getFinal()) indentedPrintln("final"); - indentedPrintln("name: " + node.getName()); - runOnChildren(node); - --indent; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(Operator node) { indentedPrintln(node.toString(), node); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(NameList node) { nonTerminal("NameList", node); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ConstantList node) { nonTerminal("ConstantList", node); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(StatementList node) { nonTerminal("StatementList", node); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ExpressionList node) { - nonTerminal("ExpressionList", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ClassifierType node) { - nonTerminal("ClassifierType", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ReferenceType node) { nonTerminal("ReferenceType", node); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ArrayType node) { nonTerminal("ArrayType", node); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(PrimitiveType node) { - indentedPrintln("PrimitiveType", node); - ++indent; - indentedPrintln("type: " + node); - --indent; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ClassifierReturnType node) { - indentedPrintln("ClassifierReturnType", node); - ++indent; - indentedPrintln("type: " + node.getTypeName()); - runOnChildren(node); - --indent; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ClassifierAssignment node) { - indentedPrintln("ClassifierAssignment", node); - ++indent; - indentedPrintln("comment: " + node.comment); - indentedPrintln("name: " + node.name); - runOnChildren(node); - --indent; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(VariableDeclaration node) { - indentedPrintln("VariableDeclaration", node); - ++indent; - if (node.isFinal) indentedPrintln("final"); - node.type.runPass(this); - node.names.runPass(this); - - indentedPrintln("ExpressionList", node.initializers); - ++indent; - ExpressionList.ExpressionListIterator I = - node.initializers.listIterator(); - while (I.hasNext()) { - Expression i = I.nextItem(); - if (i == null) indentedPrintln("null"); - else i.runPass(this); - } - - indent -= 2; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(EmptyStatement node) { - nonTerminal("EmptyStatement", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(LabeledStatement node) { - indentedPrintln("LabeledStatement", node); - ++indent; - indentedPrintln("label: " + node.label); - runOnChildren(node); - --indent; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(IfStatement node) { nonTerminal("IfStatement", node); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(SwitchStatement node) { - nonTerminal("SwitchStatement", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(SwitchBlock node) { nonTerminal("SwitchBlock", node); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(SwitchGroupList node) { - nonTerminal("SwitchGroupList", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(SwitchGroup node) { nonTerminal("SwitchGroup", node); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(SwitchLabelList node) { - nonTerminal("SwitchLabelList", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(SwitchLabel node) { nonTerminal("SwitchLabel", node); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(DoStatement node) { nonTerminal("DoStatement", node); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(WhileStatement node) { - nonTerminal("WhileStatement", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ForStatement node) { nonTerminal("ForStatement", node); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ExpressionStatement node) { - nonTerminal("ExpressionStatement", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ContinueStatement node) { - indentedPrintln("ContinueStatement", node); - ++indent; - indentedPrintln("label: " + node.label); - --indent; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ReturnStatement node) { - nonTerminal("ReturnStatement", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(SenseStatement node) { - nonTerminal("SenseStatement", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ThrowStatement node) { - nonTerminal("ThrowStatement", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(SynchronizedStatement node) { - nonTerminal("SynchronizedStatement", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(TryStatement node) { nonTerminal("TryStatement", node); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(CatchList node) { nonTerminal("CatchList", node); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(Block node) { nonTerminal("Block", node); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(CatchClause node) { nonTerminal("CatchClause", node); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(AssertStatement node) { - nonTerminal("AssertStatement", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(BreakStatement node) { - indentedPrintln("BreakStatement", node); - ++indent; - indentedPrintln("label: " + node.label); - --indent; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(MethodInvocation node) { - nonTerminal("MethodInvocation", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(DeclarationList node) { - nonTerminal("DeclarationList", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ClassifierCastExpression node) { - nonTerminal("ClassifierCastExpression", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ClassifierExpressionList node) { - nonTerminal("ClassifierExpressionList", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ClassifierName node) { - nonTerminal("ClassifierName", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(CodedClassifier node) { - nonTerminal("CodedClassifier", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(CompositeGenerator node) { - nonTerminal("CompositeGenerator", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(Conjunction node) { - nonTerminal("Conjunction", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ImportList node) { - nonTerminal("ImportList", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(AtLeastQuantifierExpression node) { - nonTerminal("AtLeastQuantifierExpression", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(AtMostQuantifierExpression node) { - nonTerminal("AtMostQuantifierExpression", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(BinaryConstraintExpression node) { - nonTerminal("BinaryConstraintExpression", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ConstraintDeclaration node) { - indentedPrintln("ConstraintDeclaration", node); - ++indent; - indentedPrintln("comment: " + node.comment); - indentedPrintln("name: " + node.name); - runOnChildren(node); - --indent; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ConstraintEqualityExpression node) { - nonTerminal("ConstraintEqualityExpression", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ConstraintInvocation node) { - nonTerminal("ConstraintInvcation", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ConstraintStatementExpression node) { - nonTerminal("ConstraintStatementExpression", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(ExistentialQuantifierExpression node) { - nonTerminal("ExistentialQuantifierExpression", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(InferenceDeclaration node) { - indentedPrintln("InferenceDeclaration", node); - ++indent; - indentedPrintln("comment: " + node.comment); - indentedPrintln("name: " + node.name); - runOnChildren(node); - --indent; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(InferenceDeclaration.HeadFinder node) { - nonTerminal("InferenceDeclaration.HeadFinder", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(InferenceDeclaration.NormalizerDeclaration node) { - nonTerminal("InferenceDeclaration.NormalizerDeclaration", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(InferenceInvocation node) { - nonTerminal("InferenceInvocation", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(NegatedConstraintExpression node) { - nonTerminal("NegatedConstraintExpression", node); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param node The node to process. - **/ - public void run(UniversalQuantifierExpression node) { - nonTerminal("UniversalQuantifierExpression", node); - } -} +public class PrintAST extends Pass { + /** The current amount of indentation to print. */ + private int indent; + + + /** + * Instantiates a pass that runs on an entire AST. + * + * @param ast The program to run this pass on. + **/ + public PrintAST(AST ast) { + super(ast); + } + + + /** + * Prints the given text preceded by the amount of indentation called for in the + * indent member variable and followed by a new line. + * + * @param text The text to print. + **/ + public void indentedPrintln(String text) { + for (int i = 0; i < indent; ++i) + System.out.print(" "); + System.out.println(text); + } + + + /** + * Prints the given text preceeded by the amount of indentation called for in the + * indent member variable and followed by the line number and byte offset + * information for the specified ASTNode and a new line. + * + * @param text The text to print. + * @param node The node for which to print line and byte offset information. + **/ + public void indentedPrintln(String text, ASTNode node) { + for (int i = 0; i < indent; ++i) + System.out.print(" "); + System.out.println(text + " (" + (node.line + 1) + ", " + node.byteOffset + ")"); + } + + + /** + * The default routine for printing a non-terminal AST node is to first print the name of the + * AST node's class with line and byte offset information, and then recursively print its + * children at indentation level one higher. + * + * @param text The text to print. + * @param node The node for which to print line and byte offset information. + **/ + public void nonTerminal(String text, ASTNode node) { + indentedPrintln(text, node); + ++indent; + runOnChildren(node); + --indent; + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(AST node) { + indent = 0; + nonTerminal("AST", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(PackageDeclaration node) { + nonTerminal("PackageDeclaration", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ImportDeclaration node) { + nonTerminal("ImportDeclaration", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param n The node to process. + **/ + public void run(Name n) { + indentedPrintln("Name", n); + ++indent; + indentedPrintln("name: " + n); + indentedPrintln("dimensions: " + n.dimensions); + --indent; + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(BinaryExpression node) { + nonTerminal("BinaryExpression", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(InstanceCreationExpression node) { + nonTerminal("InstanceCreationExpression", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(InstanceofExpression node) { + nonTerminal("InstanceofExpression", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ArrayCreationExpression node) { + indentedPrintln("ArrayCreationExpression", node); + ++indent; + indentedPrintln("dimensions: " + node.dimensions); + runOnChildren(node); + --indent; + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ArrayInitializer node) { + nonTerminal("ArrayInitializer", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(Conditional node) { + nonTerminal("Conditional", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(LearningClassifierExpression node) { + nonTerminal("LearningClassifierExpression", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(CastExpression node) { + nonTerminal("CastExpression", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(IncrementExpression node) { + nonTerminal("IncrementExpression", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(Assignment node) { + nonTerminal("Assignment", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(Constant node) { + indentedPrintln("Constant", node); + ++indent; + indentedPrintln("value: " + node.value); + --indent; + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(UnaryExpression node) { + nonTerminal("UnaryExpression", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ParameterSet node) { + nonTerminal("ParameterSet", node); + } + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(FieldAccess node) { + indentedPrintln("FieldAccess", node); + ++indent; + indentedPrintln("name: " + node.name); + runOnChildren(node); + --indent; + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(SubscriptVariable node) { + nonTerminal("SubscriptVariable", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(Argument node) { + indentedPrintln("Argument", node); + ++indent; + if (node.getFinal()) + indentedPrintln("final"); + indentedPrintln("name: " + node.getName()); + runOnChildren(node); + --indent; + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(Operator node) { + indentedPrintln(node.toString(), node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(NameList node) { + nonTerminal("NameList", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ConstantList node) { + nonTerminal("ConstantList", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(StatementList node) { + nonTerminal("StatementList", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ExpressionList node) { + nonTerminal("ExpressionList", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ClassifierType node) { + nonTerminal("ClassifierType", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ReferenceType node) { + nonTerminal("ReferenceType", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ArrayType node) { + nonTerminal("ArrayType", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(PrimitiveType node) { + indentedPrintln("PrimitiveType", node); + ++indent; + indentedPrintln("type: " + node); + --indent; + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ClassifierReturnType node) { + indentedPrintln("ClassifierReturnType", node); + ++indent; + indentedPrintln("type: " + node.getTypeName()); + runOnChildren(node); + --indent; + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ClassifierAssignment node) { + indentedPrintln("ClassifierAssignment", node); + ++indent; + indentedPrintln("comment: " + node.comment); + indentedPrintln("name: " + node.name); + runOnChildren(node); + --indent; + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(VariableDeclaration node) { + indentedPrintln("VariableDeclaration", node); + ++indent; + if (node.isFinal) + indentedPrintln("final"); + node.type.runPass(this); + node.names.runPass(this); + + indentedPrintln("ExpressionList", node.initializers); + ++indent; + ExpressionList.ExpressionListIterator I = node.initializers.listIterator(); + while (I.hasNext()) { + Expression i = I.nextItem(); + if (i == null) + indentedPrintln("null"); + else + i.runPass(this); + } + + indent -= 2; + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(EmptyStatement node) { + nonTerminal("EmptyStatement", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(LabeledStatement node) { + indentedPrintln("LabeledStatement", node); + ++indent; + indentedPrintln("label: " + node.label); + runOnChildren(node); + --indent; + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(IfStatement node) { + nonTerminal("IfStatement", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(SwitchStatement node) { + nonTerminal("SwitchStatement", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(SwitchBlock node) { + nonTerminal("SwitchBlock", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(SwitchGroupList node) { + nonTerminal("SwitchGroupList", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(SwitchGroup node) { + nonTerminal("SwitchGroup", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(SwitchLabelList node) { + nonTerminal("SwitchLabelList", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(SwitchLabel node) { + nonTerminal("SwitchLabel", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(DoStatement node) { + nonTerminal("DoStatement", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(WhileStatement node) { + nonTerminal("WhileStatement", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ForStatement node) { + nonTerminal("ForStatement", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ExpressionStatement node) { + nonTerminal("ExpressionStatement", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ContinueStatement node) { + indentedPrintln("ContinueStatement", node); + ++indent; + indentedPrintln("label: " + node.label); + --indent; + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ReturnStatement node) { + nonTerminal("ReturnStatement", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(SenseStatement node) { + nonTerminal("SenseStatement", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ThrowStatement node) { + nonTerminal("ThrowStatement", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(SynchronizedStatement node) { + nonTerminal("SynchronizedStatement", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(TryStatement node) { + nonTerminal("TryStatement", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(CatchList node) { + nonTerminal("CatchList", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(Block node) { + nonTerminal("Block", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(CatchClause node) { + nonTerminal("CatchClause", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(AssertStatement node) { + nonTerminal("AssertStatement", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(BreakStatement node) { + indentedPrintln("BreakStatement", node); + ++indent; + indentedPrintln("label: " + node.label); + --indent; + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(MethodInvocation node) { + nonTerminal("MethodInvocation", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(DeclarationList node) { + nonTerminal("DeclarationList", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ClassifierCastExpression node) { + nonTerminal("ClassifierCastExpression", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ClassifierExpressionList node) { + nonTerminal("ClassifierExpressionList", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ClassifierName node) { + nonTerminal("ClassifierName", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(CodedClassifier node) { + nonTerminal("CodedClassifier", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(CompositeGenerator node) { + nonTerminal("CompositeGenerator", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(Conjunction node) { + nonTerminal("Conjunction", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ImportList node) { + nonTerminal("ImportList", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(AtLeastQuantifierExpression node) { + nonTerminal("AtLeastQuantifierExpression", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(AtMostQuantifierExpression node) { + nonTerminal("AtMostQuantifierExpression", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(BinaryConstraintExpression node) { + nonTerminal("BinaryConstraintExpression", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ConstraintDeclaration node) { + indentedPrintln("ConstraintDeclaration", node); + ++indent; + indentedPrintln("comment: " + node.comment); + indentedPrintln("name: " + node.name); + runOnChildren(node); + --indent; + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ConstraintEqualityExpression node) { + nonTerminal("ConstraintEqualityExpression", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ConstraintInvocation node) { + nonTerminal("ConstraintInvcation", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ConstraintStatementExpression node) { + nonTerminal("ConstraintStatementExpression", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(ExistentialQuantifierExpression node) { + nonTerminal("ExistentialQuantifierExpression", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(InferenceDeclaration node) { + indentedPrintln("InferenceDeclaration", node); + ++indent; + indentedPrintln("comment: " + node.comment); + indentedPrintln("name: " + node.name); + runOnChildren(node); + --indent; + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(InferenceDeclaration.HeadFinder node) { + nonTerminal("InferenceDeclaration.HeadFinder", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(InferenceDeclaration.NormalizerDeclaration node) { + nonTerminal("InferenceDeclaration.NormalizerDeclaration", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(InferenceInvocation node) { + nonTerminal("InferenceInvocation", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(NegatedConstraintExpression node) { + nonTerminal("NegatedConstraintExpression", node); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param node The node to process. + **/ + public void run(UniversalQuantifierExpression node) { + nonTerminal("UniversalQuantifierExpression", node); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/RevisionAnalysis.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/RevisionAnalysis.java index dbc4b5fc..7dce64d8 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/RevisionAnalysis.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/RevisionAnalysis.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava; @@ -44,572 +41,531 @@ /** * To be run after SemanticAnalysis, this pass determines which - * CodeGenerators need to have their code generated and which - * classifiers need to be trained based on the revisions made to the LBJava - * source file. + * CodeGenerators need to have their code generated and which classifiers need to be + * trained based on the revisions made to the LBJava source file. * - *

A hard coded classifier, a constraint, or an inference named - * foo needs its code regenerated iff at least one of the - * following is true: + *

+ * A hard coded classifier, a constraint, or an inference named foo needs its code + * regenerated iff at least one of the following is true: *

    - *
  • The file foo.java does not exist. - *
  • - * Using the comments at the top of foo.java, it is - * determined that the code specifying foo has been revised. + *
  • The file foo.java does not exist. + *
  • + * Using the comments at the top of foo.java, it is determined that the code specifying + * foo has been revised. *
- * If the comments at the top of foo.java do not exist, or if - * they don't have the expected form, the file will not be overwritten and an - * error will be generated. + * If the comments at the top of foo.java do not exist, or if they don't have the + * expected form, the file will not be overwritten and an error will be generated. * - *

All CodeGenerators are also labeled as either "affected" - * (by a revision) or "unaffected". An CodeGenerator named - * foo is labeled "affected" iff at least one of the following - * is true: + *

+ * All CodeGenerators are also labeled as either "affected" (by a revision) or + * "unaffected". An CodeGenerator named foo is labeled "affected" iff at + * least one of the following is true: *

    - *
  • - * foo is a hard coded classifier, a constraint, or an - * inference and either: - *
      - *
    • its code needed to be regenerated as described above or - *
    • it invokes another "affected" CodeGenerator. - *
    - *
  • - * foo is a learning classifier and at least one of its - * label or extractor classifiers is "affected". + *
  • + * foo is a hard coded classifier, a constraint, or an inference and either: + *
      + *
    • its code needed to be regenerated as described above or + *
    • it invokes another "affected" CodeGenerator. + *
    + *
  • + * foo is a learning classifier and at least one of its label or extractor classifiers + * is "affected". *
* - *

A learning classifier named foo needs to have its code - * regenerated and retrained iff at least one of the following is true: + *

+ * A learning classifier named foo needs to have its code regenerated and retrained iff + * at least one of the following is true: *

    - *
  • The file foo.java does not exist. - *
  • - * Using the comments at the top of foo.java, it is - * determined that the code specifying foo has been revised. - *
  • At least one of its label or extractor classifiers is "affected". + *
  • The file foo.java does not exist. + *
  • + * Using the comments at the top of foo.java, it is determined that the code specifying + * foo has been revised. + *
  • At least one of its label or extractor classifiers is "affected". *
* - * @see SemanticAnalysis + * @see SemanticAnalysis * @author Nick Rizzolo **/ -public class RevisionAnalysis extends Pass -{ - /** Constant representing the "unaffected" revision status. */ - public static final Integer UNAFFECTED = new Integer(0); - /** Constant representing the "affected" revision status. */ - public static final Integer AFFECTED = new Integer(1); - /** Constant representing the "revised" revision status. */ - public static final Integer REVISED = new Integer(2); - /** The names of the three revision states. */ - public static final String[] statusNames = - { "unaffected", "affected", "revised" }; - - /** - * Keeps track of the names of classifiers whose revision status has been - * resolved. - **/ - public static HashMap revisionStatus; - /** - * Set to true iff no code has changed since the compiler was - * last run. - **/ - public static boolean noChanges; - - - /** - * Prints the contents of {@link #revisionStatus} to STDOUT. - **/ - public static void printRevisionStatus() { - if (revisionStatus == null) { - System.out.println("No revision statuses."); - return; - } - - for (Iterator I = revisionStatus.entrySet().iterator(); I.hasNext(); ) { - Map.Entry e = (Map.Entry) I.next(); - String name = (String) e.getKey(); - Integer status = (Integer) e.getValue(); - System.out.println(name + ": " + statusToString(status)); - - Object classifierExpression = - SemanticAnalysis.representationTable.get(name); - if (classifierExpression instanceof LearningClassifierExpression) { - LearningClassifierExpression lce = - (LearningClassifierExpression) classifierExpression; - System.out.println( - " features: " + statusToString(lce.featuresStatus)); - System.out.println(" pruning: " + statusToString(lce.pruneStatus)); - System.out.println( - " learning: " + statusToString(lce.learningStatus)); - System.out.println( - " only code generation: " + lce.onlyCodeGeneration); - } - } - } - - - /** - * Returns the name of a revision status, or "no status" if - * the status is null. - **/ - public static String statusToString(Integer status) { - if (status == null) return "no status"; - return statusNames[status.intValue()]; - } - - - /** - * Read the second line from the specified classifier's generated code. - * - * @param name The name of the classifier. - * @param line The line number at which the classifier whose source we're - * reading is declared in its LBJava source file. - * @return The second line from the classifier's generated code without the - * opening comment marker (//), or null if the - * generated code doesn't exist or the file doesn't appear to be - * generated code. - **/ - private static String readSecondLine(String name, int line) { - name += ".java"; - if (Main.generatedSourceDirectory != null) - name = Main.generatedSourceDirectory + File.separator + name; - - File javaSource = new File(name); - if (!javaSource.exists()) return null; - - BufferedReader in = null; - try { in = new BufferedReader(new FileReader(javaSource)); } - catch (Exception e) { - System.err.println("Can't open '" + name + "' for input: " + e); - System.exit(1); - } - - String line1 = ""; - String line2 = ""; - try { - line1 = in.readLine(); - line2 = in.readLine(); - } - catch (Exception e) { - System.err.println("Can't read from '" + name + "': " + e); - System.exit(1); - } - - try { in.close(); } - catch (Exception e) { - System.err.println("Can't close file '" + name + "': " + e); - System.exit(1); - } - - if (line1 == null || line2 == null || !line2.startsWith("// ") - || !TranslateToJava.disclaimer.equals(line1)) { - reportError(line, - "The file '" + name + "' does not appear to have been generated by " - + "LBJava, but LBJava needs to overwrite it. Either remove the file, " - + "or change the name of the classifier in '" + Main.sourceFilename - + "'."); - return null; - } - - return line2.substring(3); - } - - - /** - * This method reads the comments at the top of the file containing the - * code corresponding to the specified code generating node to determine if - * the LBJava source describing that code generator has been modified since - * the LBJava compiler was last executed. - * - * @param node The code generating node. - * @param convert Whether or not the code is converted to hexadecimal - * compressed format. - * @return true iff the associated Java file did not exist or - * it contained the expected comments and those comments indicate - * that a revision has taken place. - **/ - private static boolean codeRevision(CodeGenerator node, boolean convert) { - String name = node.getName(); - String line2 = readSecondLine(name, node.getLine()); - if (line2 == null) return true; - String expected = null; - - if (convert) { - PrintStream converter = null; - ByteArrayOutputStream converted = new ByteArrayOutputStream(); - try { - converter = new PrintStream( - new GZIPOutputStream( - new HexOutputStream(converted))); - } - catch (Exception e) { - System.err.println("Could not create converter stream."); - System.exit(1); - } - - converter.print(node.shallow().toString()); - converter.close(); - - expected = converted.toString(); - } - else expected = node.shallow().toString(); - - return !line2.equals(expected); - } - - - /** - * Recursively propagates the information about which nodes are "affected". - * - * @param name The name of an affected node. - **/ - private static void propagateAffected(String name) { - boolean isCompositeGenerator = - SemanticAnalysis.representationTable.get(name) - instanceof CompositeGenerator; - boolean isRevised = revisionStatus.get(name) == REVISED; - - HashSet dependors = (HashSet) SemanticAnalysis.dependorGraph.get(name); - - assert dependors != null : "null entry in dependorGraph for " + name; - - for (Iterator I = dependors.iterator(); I.hasNext(); ) { - String dependor = (String) I.next(); - - if (SemanticAnalysis.representationTable.get(dependor) - instanceof LearningClassifierExpression) { - LearningClassifierExpression lce = - (LearningClassifierExpression) - SemanticAnalysis.representationTable.get(dependor); - - if (lce.featuresStatus == null || lce.featuresStatus != REVISED) - lce.featuresStatus = AFFECTED; - if (lce.pruneStatus == null || lce.pruneStatus != REVISED) - lce.pruneStatus = AFFECTED; - if (lce.learningStatus == null || lce.learningStatus != REVISED) - lce.learningStatus = AFFECTED; - lce.startingRound = 1; - } - - if (!revisionStatus.containsKey(dependor)) { - if (isCompositeGenerator && isRevised - && SemanticAnalysis.representationTable.get(dependor) - instanceof LearningClassifierExpression) - revisionStatus.put(dependor, REVISED); - else revisionStatus.put(dependor, AFFECTED); - propagateAffected((String) dependor); - } - } - } - - - /** - * Instantiates a pass that runs on an entire AST. - * - * @param ast The program to run this pass on. - **/ - public RevisionAnalysis(AST ast) { - super(ast); - revisionStatus = new HashMap(); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param list The node to process. - **/ - public void run(DeclarationList list) { - noChanges = true; - if (list.size() == 0) return; - - runOnChildren(list); - - noChanges = revisionStatus.size() == 0; - String[] revised = - (String[]) revisionStatus.keySet().toArray(new String[0]); - for (int i = 0; i < revised.length; ++i) - propagateAffected(revised[i]); - - for (Iterator I = SemanticAnalysis.dependorGraph.keySet().iterator(); - I.hasNext(); ) { - Object name = I.next(); - if (!revisionStatus.containsKey(name)) { - revisionStatus.put(name, UNAFFECTED); - - if (SemanticAnalysis.representationTable.get(name) - instanceof LearningClassifierExpression) { - LearningClassifierExpression lce = - (LearningClassifierExpression) - SemanticAnalysis.representationTable.get(name); - - if (lce.featuresStatus == null) lce.featuresStatus = UNAFFECTED; - if (lce.pruneStatus == null) lce.pruneStatus = UNAFFECTED; - if (lce.learningStatus == null) lce.learningStatus = UNAFFECTED; - } - } - } - } - - - /** - * Parses a learning classifier expression out of an encoded string using - * the automatically generated scanner and parser. - * - * @param s The string out of which the learning classifier expression - * will be parsed. - * @return The parsed learning classifier expression. - **/ - private static LearningClassifierExpression parseLCE(String s) { - Reader reader = null; - try { - reader = - new BufferedReader( - new InputStreamReader( - new GZIPInputStream( - new HexStringInputStream(s)))); - } - catch (Exception e) { - System.err.println( - "LBJava ERROR: Can't instantiate string parser for LCE:"); - e.printStackTrace(); - System.exit(1); - } - - AST ast = null; - try { ast = (AST) new parser(new Yylex(reader)).parse().value; } - catch (Exception e) { - System.err.println("LBJava ERROR: Can't parse LCE from string:"); - e.printStackTrace(); - System.exit(1); - } - - SemanticAnalysis.runAndRestore(ast); - ClassifierAssignment ca = - (ClassifierAssignment) ast.declarations.iterator().next(); - return (LearningClassifierExpression) ca.expression; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param lce The node to process. - **/ - public void run(LearningClassifierExpression lce) { - runOnChildren(lce); - String lceName = lce.name.toString(); - - String line2 = readSecondLine(lce.getName(), lce.getLine()); - if (line2 == null || line2.length() == 0 || line2.equals("rebuild")) { - revisionStatus.put(lceName, REVISED); - lce.featuresStatus = lce.pruneStatus = lce.learningStatus = AFFECTED; - return; - } - - LearningClassifierExpression oldLCE = parseLCE(line2); - String exFilePath = lceName + ".ex"; - if (Main.generatedSourceDirectory != null) - exFilePath = - Main.generatedSourceDirectory + File.separator + exFilePath; - String lexFilePath = lceName + ".lex"; - String lcFilePath = lceName + ".lc"; - if (Main.classDirectory != null) { - String prefix = Main.classDirectory + File.separator; - lexFilePath = prefix + lexFilePath; - lcFilePath = prefix + lcFilePath; - } - File exFile = new File(exFilePath); - File lexFile = new File(lexFilePath); - File lcFile = new File(lcFilePath); - - boolean preExtractToDisk = - lce.preExtract.value.startsWith("\"disk") - || lce.preExtract.value.equals("true") - || lce.preExtract.value.equals("\"true\""); - boolean previousPreExtractToDisk = - oldLCE.preExtract.value.startsWith("\"disk") - || oldLCE.preExtract.value.equals("true") - || oldLCE.preExtract.value.equals("\"true\""); - - if (!oldLCE.returnType.equals(lce.returnType) - || !oldLCE.name.equals(lce.name) - || !oldLCE.argument.equals(lce.argument) - || (oldLCE.labeler == null - ? lce.labeler != null - : lce.labeler == null - || !oldLCE.labeler.name.equals(lce.labeler.name)) - || !oldLCE.extractor.name.equals(lce.extractor.name) - || (oldLCE.parser == null ? lce.parser != null - : !oldLCE.parser.equals(lce.parser)) - || (oldLCE.featureEncoding == null - ? lce.featureEncoding != null - : lce.featureEncoding == null - || !oldLCE.featureEncoding.value - .equals(lce.featureEncoding.value)) - || preExtractToDisk && !previousPreExtractToDisk - || (preExtractToDisk ? !exFile.exists() : !lcFile.exists()) - || !lexFile.exists()) { - revisionStatus.put(lceName, REVISED); - lce.featuresStatus = lce.pruneStatus = lce.learningStatus = AFFECTED; - return; - } - - if ((oldLCE.pruneCountType == null - ? lce.pruneCountType != null - : !oldLCE.pruneCountType.equals(lce.pruneCountType)) - || (oldLCE.pruneThresholdType == null - ? lce.pruneThresholdType != null - : !oldLCE.pruneThresholdType.equals(lce.pruneThresholdType)) - || (oldLCE.pruneThreshold == null - ? lce.pruneThreshold != null - : !oldLCE.pruneThreshold.equals(lce.pruneThreshold))) { - lce.featuresStatus = preExtractToDisk ? UNAFFECTED : REVISED; - lce.pruneStatus = REVISED; - lce.learningStatus = AFFECTED; - lce.previousPruneCountType = oldLCE.pruneCountType; - revisionStatus.put(lceName, AFFECTED); - return; - } - - if ((oldLCE.learnerName == null - ? lce.learnerName != null - : !oldLCE.learnerName.equals(lce.learnerName)) - || (oldLCE.learnerConstructor == null - ? lce.learnerConstructor != null - : !oldLCE.learnerConstructor.equals(lce.learnerConstructor)) - || (oldLCE.learnerParameterBlock == null - ? lce.learnerParameterBlock != null - : !oldLCE.learnerParameterBlock.toString() - .equals(lce.learnerParameterBlock.toString())) - || (oldLCE.K == null ? lce.K != null : !oldLCE.K.equals(lce.K)) - || oldLCE.splitPolicy != lce.splitPolicy - || (oldLCE.testingMetric == null - ? lce.testingMetric != null - : !oldLCE.testingMetric.equals(lce.testingMetric)) - || !oldLCE.alpha.equals(lce.alpha) - || !lcFile.exists()) { - lce.featuresStatus = lce.pruneStatus = - preExtractToDisk ? UNAFFECTED : REVISED; - lce.learningStatus = REVISED; - revisionStatus.put(lceName, AFFECTED); - return; - } - - if (oldLCE.rounds == null ? lce.rounds != null - : !oldLCE.rounds.equals(lce.rounds)) { - lce.featuresStatus = lce.pruneStatus = - preExtractToDisk ? UNAFFECTED : REVISED; - lce.learningStatus = REVISED; - revisionStatus.put(lceName, AFFECTED); - - if (lce.K == null && lce.parameterSets.size() == 0 - && lce.rounds instanceof Constant - && oldLCE.rounds instanceof Constant) { - int rounds = - lce.rounds == null - ? 1 : Integer.parseInt(((Constant) lce.rounds).value); - int oldRounds = - oldLCE.rounds == null - ? 1 : Integer.parseInt(((Constant) oldLCE.rounds).value); - if (rounds > oldRounds) lce.startingRound = oldRounds + 1; - } - - return; - } - - lce.onlyCodeGeneration = - (oldLCE.comment == null ? lce.comment != null - : !oldLCE.comment.equals(lce.comment)) - || (oldLCE.cacheIn == null ? lce.cacheIn != null - : !oldLCE.cacheIn.equals(lce.cacheIn)) - || oldLCE.singleExampleCache != lce.singleExampleCache - || (oldLCE.evaluation == null - ? lce.evaluation != null - : !oldLCE.evaluation.equals(lce.evaluation)); - if (lce.onlyCodeGeneration) { - revisionStatus.put(lceName, REVISED); - lce.featuresStatus = lce.pruneStatus = lce.learningStatus = UNAFFECTED; - } - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param cn The node to process. - **/ - public void run(ClassifierName cn) { - if (cn.referent == cn.name) return; - if (codeRevision(cn, false)) - revisionStatus.put(cn.name.toString(), REVISED); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param cc The node to process. - **/ - public void run(CodedClassifier cc) { - if (codeRevision(cc, true)) - revisionStatus.put(cc.name.toString(), REVISED); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param cg The node to process. - **/ - public void run(CompositeGenerator cg) { - runOnChildren(cg); - if (codeRevision(cg, true)) - revisionStatus.put(cg.name.toString(), REVISED); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param c The node to process. - **/ - public void run(Conjunction c) { - runOnChildren(c); - if (codeRevision(c, false)) - revisionStatus.put(c.name.toString(), REVISED); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param i The node to process. - **/ - public void run(InferenceInvocation i) { - if (codeRevision(i, false)) - revisionStatus.put(i.name.toString(), REVISED); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param cd The node to process. - **/ - public void run(ConstraintDeclaration cd) { - if (codeRevision(cd, true)) - revisionStatus.put(cd.name.toString(), REVISED); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param id The node to process. - **/ - public void run(InferenceDeclaration id) { - if (codeRevision(id, true)) - revisionStatus.put(id.name.toString(), REVISED); - run(id.constraint); - } -} \ No newline at end of file +public class RevisionAnalysis extends Pass { + /** Constant representing the "unaffected" revision status. */ + public static final Integer UNAFFECTED = new Integer(0); + /** Constant representing the "affected" revision status. */ + public static final Integer AFFECTED = new Integer(1); + /** Constant representing the "revised" revision status. */ + public static final Integer REVISED = new Integer(2); + /** The names of the three revision states. */ + public static final String[] statusNames = {"unaffected", "affected", "revised"}; + + /** + * Keeps track of the names of classifiers whose revision status has been resolved. + **/ + public static HashMap revisionStatus; + /** + * Set to true iff no code has changed since the compiler was last run. + **/ + public static boolean noChanges; + + + /** + * Prints the contents of {@link #revisionStatus} to STDOUT. + **/ + public static void printRevisionStatus() { + if (revisionStatus == null) { + System.out.println("No revision statuses."); + return; + } + + for (Iterator I = revisionStatus.entrySet().iterator(); I.hasNext();) { + Map.Entry e = (Map.Entry) I.next(); + String name = (String) e.getKey(); + Integer status = (Integer) e.getValue(); + System.out.println(name + ": " + statusToString(status)); + + Object classifierExpression = SemanticAnalysis.representationTable.get(name); + if (classifierExpression instanceof LearningClassifierExpression) { + LearningClassifierExpression lce = + (LearningClassifierExpression) classifierExpression; + System.out.println(" features: " + statusToString(lce.featuresStatus)); + System.out.println(" pruning: " + statusToString(lce.pruneStatus)); + System.out.println(" learning: " + statusToString(lce.learningStatus)); + System.out.println(" only code generation: " + lce.onlyCodeGeneration); + } + } + } + + + /** + * Returns the name of a revision status, or "no status" if the status is + * null. + **/ + public static String statusToString(Integer status) { + if (status == null) + return "no status"; + return statusNames[status.intValue()]; + } + + + /** + * Read the second line from the specified classifier's generated code. + * + * @param name The name of the classifier. + * @param line The line number at which the classifier whose source we're reading is declared in + * its LBJava source file. + * @return The second line from the classifier's generated code without the opening comment + * marker (//), or null if the generated code doesn't exist or the file + * doesn't appear to be generated code. + **/ + private static String readSecondLine(String name, int line) { + name += ".java"; + if (Main.generatedSourceDirectory != null) + name = Main.generatedSourceDirectory + File.separator + name; + + File javaSource = new File(name); + if (!javaSource.exists()) + return null; + + BufferedReader in = null; + try { + in = new BufferedReader(new FileReader(javaSource)); + } catch (Exception e) { + System.err.println("Can't open '" + name + "' for input: " + e); + System.exit(1); + } + + String line1 = ""; + String line2 = ""; + try { + line1 = in.readLine(); + line2 = in.readLine(); + } catch (Exception e) { + System.err.println("Can't read from '" + name + "': " + e); + System.exit(1); + } + + try { + in.close(); + } catch (Exception e) { + System.err.println("Can't close file '" + name + "': " + e); + System.exit(1); + } + + if (line1 == null || line2 == null || !line2.startsWith("// ") + || !TranslateToJava.disclaimer.equals(line1)) { + reportError(line, "The file '" + name + "' does not appear to have been generated by " + + "LBJava, but LBJava needs to overwrite it. Either remove the file, " + + "or change the name of the classifier in '" + Main.sourceFilename + "'."); + return null; + } + + return line2.substring(3); + } + + + /** + * This method reads the comments at the top of the file containing the code corresponding to + * the specified code generating node to determine if the LBJava source describing that code + * generator has been modified since the LBJava compiler was last executed. + * + * @param node The code generating node. + * @param convert Whether or not the code is converted to hexadecimal compressed format. + * @return true iff the associated Java file did not exist or it contained the + * expected comments and those comments indicate that a revision has taken place. + **/ + private static boolean codeRevision(CodeGenerator node, boolean convert) { + String name = node.getName(); + String line2 = readSecondLine(name, node.getLine()); + if (line2 == null) + return true; + String expected = null; + + if (convert) { + PrintStream converter = null; + ByteArrayOutputStream converted = new ByteArrayOutputStream(); + try { + converter = new PrintStream(new GZIPOutputStream(new HexOutputStream(converted))); + } catch (Exception e) { + System.err.println("Could not create converter stream."); + System.exit(1); + } + + converter.print(node.shallow().toString()); + converter.close(); + + expected = converted.toString(); + } else + expected = node.shallow().toString(); + + return !line2.equals(expected); + } + + + /** + * Recursively propagates the information about which nodes are "affected". + * + * @param name The name of an affected node. + **/ + private static void propagateAffected(String name) { + boolean isCompositeGenerator = + SemanticAnalysis.representationTable.get(name) instanceof CompositeGenerator; + boolean isRevised = revisionStatus.get(name) == REVISED; + + HashSet dependors = (HashSet) SemanticAnalysis.dependorGraph.get(name); + + assert dependors != null : "null entry in dependorGraph for " + name; + + for (Iterator I = dependors.iterator(); I.hasNext();) { + String dependor = (String) I.next(); + + if (SemanticAnalysis.representationTable.get(dependor) instanceof LearningClassifierExpression) { + LearningClassifierExpression lce = + (LearningClassifierExpression) SemanticAnalysis.representationTable + .get(dependor); + + if (lce.featuresStatus == null || lce.featuresStatus != REVISED) + lce.featuresStatus = AFFECTED; + if (lce.pruneStatus == null || lce.pruneStatus != REVISED) + lce.pruneStatus = AFFECTED; + if (lce.learningStatus == null || lce.learningStatus != REVISED) + lce.learningStatus = AFFECTED; + lce.startingRound = 1; + } + + if (!revisionStatus.containsKey(dependor)) { + if (isCompositeGenerator + && isRevised + && SemanticAnalysis.representationTable.get(dependor) instanceof LearningClassifierExpression) + revisionStatus.put(dependor, REVISED); + else + revisionStatus.put(dependor, AFFECTED); + propagateAffected((String) dependor); + } + } + } + + + /** + * Instantiates a pass that runs on an entire AST. + * + * @param ast The program to run this pass on. + **/ + public RevisionAnalysis(AST ast) { + super(ast); + revisionStatus = new HashMap(); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param list The node to process. + **/ + public void run(DeclarationList list) { + noChanges = true; + if (list.size() == 0) + return; + + runOnChildren(list); + + noChanges = revisionStatus.size() == 0; + String[] revised = (String[]) revisionStatus.keySet().toArray(new String[0]); + for (int i = 0; i < revised.length; ++i) + propagateAffected(revised[i]); + + for (Iterator I = SemanticAnalysis.dependorGraph.keySet().iterator(); I.hasNext();) { + Object name = I.next(); + if (!revisionStatus.containsKey(name)) { + revisionStatus.put(name, UNAFFECTED); + + if (SemanticAnalysis.representationTable.get(name) instanceof LearningClassifierExpression) { + LearningClassifierExpression lce = + (LearningClassifierExpression) SemanticAnalysis.representationTable + .get(name); + + if (lce.featuresStatus == null) + lce.featuresStatus = UNAFFECTED; + if (lce.pruneStatus == null) + lce.pruneStatus = UNAFFECTED; + if (lce.learningStatus == null) + lce.learningStatus = UNAFFECTED; + } + } + } + } + + + /** + * Parses a learning classifier expression out of an encoded string using the automatically + * generated scanner and parser. + * + * @param s The string out of which the learning classifier expression will be parsed. + * @return The parsed learning classifier expression. + **/ + private static LearningClassifierExpression parseLCE(String s) { + Reader reader = null; + try { + reader = + new BufferedReader(new InputStreamReader(new GZIPInputStream( + new HexStringInputStream(s)))); + } catch (Exception e) { + System.err.println("LBJava ERROR: Can't instantiate string parser for LCE:"); + e.printStackTrace(); + System.exit(1); + } + + AST ast = null; + try { + ast = (AST) new parser(new Yylex(reader)).parse().value; + } catch (Exception e) { + System.err.println("LBJava ERROR: Can't parse LCE from string:"); + e.printStackTrace(); + System.exit(1); + } + + SemanticAnalysis.runAndRestore(ast); + ClassifierAssignment ca = (ClassifierAssignment) ast.declarations.iterator().next(); + return (LearningClassifierExpression) ca.expression; + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param lce The node to process. + **/ + public void run(LearningClassifierExpression lce) { + runOnChildren(lce); + String lceName = lce.name.toString(); + + String line2 = readSecondLine(lce.getName(), lce.getLine()); + if (line2 == null || line2.length() == 0 || line2.equals("rebuild")) { + revisionStatus.put(lceName, REVISED); + lce.featuresStatus = lce.pruneStatus = lce.learningStatus = AFFECTED; + return; + } + + LearningClassifierExpression oldLCE = parseLCE(line2); + String exFilePath = lceName + ".ex"; + if (Main.generatedSourceDirectory != null) + exFilePath = Main.generatedSourceDirectory + File.separator + exFilePath; + String lexFilePath = lceName + ".lex"; + String lcFilePath = lceName + ".lc"; + if (Main.classDirectory != null) { + String prefix = Main.classDirectory + File.separator; + lexFilePath = prefix + lexFilePath; + lcFilePath = prefix + lcFilePath; + } + File exFile = new File(exFilePath); + File lexFile = new File(lexFilePath); + File lcFile = new File(lcFilePath); + + boolean preExtractToDisk = + lce.preExtract.value.startsWith("\"disk") || lce.preExtract.value.equals("true") + || lce.preExtract.value.equals("\"true\""); + boolean previousPreExtractToDisk = + oldLCE.preExtract.value.startsWith("\"disk") + || oldLCE.preExtract.value.equals("true") + || oldLCE.preExtract.value.equals("\"true\""); + + if (!oldLCE.returnType.equals(lce.returnType) + || !oldLCE.name.equals(lce.name) + || !oldLCE.argument.equals(lce.argument) + || (oldLCE.labeler == null ? lce.labeler != null : lce.labeler == null + || !oldLCE.labeler.name.equals(lce.labeler.name)) + || !oldLCE.extractor.name.equals(lce.extractor.name) + || (oldLCE.parser == null ? lce.parser != null : !oldLCE.parser.equals(lce.parser)) + || (oldLCE.featureEncoding == null ? lce.featureEncoding != null + : lce.featureEncoding == null + || !oldLCE.featureEncoding.value.equals(lce.featureEncoding.value)) + || preExtractToDisk && !previousPreExtractToDisk + || (preExtractToDisk ? !exFile.exists() : !lcFile.exists()) || !lexFile.exists()) { + revisionStatus.put(lceName, REVISED); + lce.featuresStatus = lce.pruneStatus = lce.learningStatus = AFFECTED; + return; + } + + if ((oldLCE.pruneCountType == null ? lce.pruneCountType != null : !oldLCE.pruneCountType + .equals(lce.pruneCountType)) + || (oldLCE.pruneThresholdType == null ? lce.pruneThresholdType != null + : !oldLCE.pruneThresholdType.equals(lce.pruneThresholdType)) + || (oldLCE.pruneThreshold == null ? lce.pruneThreshold != null + : !oldLCE.pruneThreshold.equals(lce.pruneThreshold))) { + lce.featuresStatus = preExtractToDisk ? UNAFFECTED : REVISED; + lce.pruneStatus = REVISED; + lce.learningStatus = AFFECTED; + lce.previousPruneCountType = oldLCE.pruneCountType; + revisionStatus.put(lceName, AFFECTED); + return; + } + + if ((oldLCE.learnerName == null ? lce.learnerName != null : !oldLCE.learnerName + .equals(lce.learnerName)) + || (oldLCE.learnerConstructor == null ? lce.learnerConstructor != null + : !oldLCE.learnerConstructor.equals(lce.learnerConstructor)) + || (oldLCE.learnerParameterBlock == null ? lce.learnerParameterBlock != null + : !oldLCE.learnerParameterBlock.toString().equals( + lce.learnerParameterBlock.toString())) + || (oldLCE.K == null ? lce.K != null : !oldLCE.K.equals(lce.K)) + || oldLCE.splitPolicy != lce.splitPolicy + || (oldLCE.testingMetric == null ? lce.testingMetric != null + : !oldLCE.testingMetric.equals(lce.testingMetric)) + || !oldLCE.alpha.equals(lce.alpha) || !lcFile.exists()) { + lce.featuresStatus = lce.pruneStatus = preExtractToDisk ? UNAFFECTED : REVISED; + lce.learningStatus = REVISED; + revisionStatus.put(lceName, AFFECTED); + return; + } + + if (oldLCE.rounds == null ? lce.rounds != null : !oldLCE.rounds.equals(lce.rounds)) { + lce.featuresStatus = lce.pruneStatus = preExtractToDisk ? UNAFFECTED : REVISED; + lce.learningStatus = REVISED; + revisionStatus.put(lceName, AFFECTED); + + if (lce.K == null && lce.parameterSets.size() == 0 && lce.rounds instanceof Constant + && oldLCE.rounds instanceof Constant) { + int rounds = + lce.rounds == null ? 1 : Integer.parseInt(((Constant) lce.rounds).value); + int oldRounds = + oldLCE.rounds == null ? 1 : Integer + .parseInt(((Constant) oldLCE.rounds).value); + if (rounds > oldRounds) + lce.startingRound = oldRounds + 1; + } + + return; + } + + lce.onlyCodeGeneration = + (oldLCE.comment == null ? lce.comment != null : !oldLCE.comment.equals(lce.comment)) + || (oldLCE.cacheIn == null ? lce.cacheIn != null : !oldLCE.cacheIn + .equals(lce.cacheIn)) + || oldLCE.singleExampleCache != lce.singleExampleCache + || (oldLCE.evaluation == null ? lce.evaluation != null : !oldLCE.evaluation + .equals(lce.evaluation)); + if (lce.onlyCodeGeneration) { + revisionStatus.put(lceName, REVISED); + lce.featuresStatus = lce.pruneStatus = lce.learningStatus = UNAFFECTED; + } + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param cn The node to process. + **/ + public void run(ClassifierName cn) { + if (cn.referent == cn.name) + return; + if (codeRevision(cn, false)) + revisionStatus.put(cn.name.toString(), REVISED); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param cc The node to process. + **/ + public void run(CodedClassifier cc) { + if (codeRevision(cc, true)) + revisionStatus.put(cc.name.toString(), REVISED); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param cg The node to process. + **/ + public void run(CompositeGenerator cg) { + runOnChildren(cg); + if (codeRevision(cg, true)) + revisionStatus.put(cg.name.toString(), REVISED); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param c The node to process. + **/ + public void run(Conjunction c) { + runOnChildren(c); + if (codeRevision(c, false)) + revisionStatus.put(c.name.toString(), REVISED); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param i The node to process. + **/ + public void run(InferenceInvocation i) { + if (codeRevision(i, false)) + revisionStatus.put(i.name.toString(), REVISED); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param cd The node to process. + **/ + public void run(ConstraintDeclaration cd) { + if (codeRevision(cd, true)) + revisionStatus.put(cd.name.toString(), REVISED); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param id The node to process. + **/ + public void run(InferenceDeclaration id) { + if (codeRevision(id, true)) + revisionStatus.put(id.name.toString(), REVISED); + run(id.constraint); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/SemanticAnalysis.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/SemanticAnalysis.java index 18f5d645..9a5bf245 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/SemanticAnalysis.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/SemanticAnalysis.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava; @@ -84,2621 +81,2407 @@ /** - * The SemanticAnalysis pass builds useful tables, computes - * classifier types and other useful information, and generally checks that - * things appear only where they are expected. More specifically, the - * following data is arranged: + * The SemanticAnalysis pass builds useful tables, computes classifier types and other + * useful information, and generally checks that things appear only where they are expected. More + * specifically, the following data is arranged: * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * + *
A1 - * The global symbol table is built. It stores information about - * classifier, constraint, and inference declarations as well as - * symbols local to method bodies. - *
A2 - * The classifier representation table is built. It stores references - * to internal representations of source code implementing classifiers - * indexed by the classifiers' names. - *
A3 - * Names for every {@link ClassifierExpression} are computed. - *
A4 - * Type information is computed for classifiers, - * {@link InstanceCreationExpression}s creating outer classes, and - * {@link Name}s known to refer to classifiers, the latter two only to - * support the semantic checks performed over the various classifier - * specification syntaxes. - *
A5 - * Method invocations that are actually classifier invocations are - * marked as such. - *
A6 - * If a sense statement with a single argument appears in - * a generator, the argument expression is moved from the - * {@link SenseStatement#value value} to the - * {@link SenseStatement#name name} variable in the - * {@link SenseStatement} object, and the - * {@link SenseStatement#value value} variable gets a new - * {@link Constant} representing "true" if the generator - * is discrete and "1" if the generator is real. - *
A7 - * If there are any for, if, - * while, or do statements that contain a - * single statement in their body, that statement is wrapped in a - * {@link Block}. - *
A8 - * The dependor graph, linking the names of {@link CodeGenerator}s with - * the names of other {@link CodeGenerator}s that depend on them, is - * built for use by {@link RevisionAnalysis}. - *
A9 - * The invoked graph, linking the names of {@link CodeGenerator}s with - * the names of other {@link CodeGenerator}s that are invoked by them, - * is built for use by {@link TranslateToJava}. - *
A10 - * If a method of the unique instance of a learning classifier is - * invoked using the learning classifier's name, code must be inserted - * to create an instance of that classifier ahead of time and then to - * call the method on that instance. - *
A11 - * If a {@link LearningClassifierExpression} does not have a - * with clause, the default learning algorithm is - * substituted. - *
A12 - * Flags are set in each {@link ConstraintEqualityExpression} - * indicating if its subexpressions are learner invocations. - *
A13 - * {@link Name}s and every {@link ASTNode} that represents a new local - * scope gets a link to the symbol table representing its scope. - *
A14 - * {@link Argument} types in arguments of quantifier expressions are - * marked as such. - *
A15 - * Quantified {@link ConstraintEqualityExpression}s, - * {@link ConstraintInvocation}s, and - * {@link QuantifiedConstraintExpression}s are marked as such. - *
A16 - * If a {@link InferenceDeclaration} does not have a with - * clause, the default inference algorithm is substituted. - *
A17 - * When a {@link ClassifierName} is not alone on the right hand side of - * a {@link ClassifierAssignment}, its {@link ClassifierName#name name} - * is set equal to its {@link ClassifierName#referent referent}. - *
A18 - * The {@link ClassifierExpression#cacheIn} member variable is set when - * the containing {@link ClassifierAssignment} had a - * cached or cachedin modifier. - *
A19 - * The {@link ClassifierExpression#comment} field of each top level - * classifier expression is set to the comment of the containing - * {@link ClassifierAssignment}. - *
A20 - * When a with clause is specified with an - * {@link InstanceCreationExpression} as an argument, - * {@link LearningClassifierExpression#learnerName} is set to the name - * of the class instantiated. - *
A21 - * The value of {@link ClassifierAssignment#singleExampleCache} is - * propagated from {@link ClassifierAssignment}s to - * {@link ClassifierExpression}s. - *
+ * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * *
A1 + * The global symbol table is built. It stores information about classifier, constraint, and + * inference declarations as well as symbols local to method bodies.
A2 + * The classifier representation table is built. It stores references to internal representations of + * source code implementing classifiers indexed by the classifiers' names.
A3 + * Names for every {@link ClassifierExpression} are computed.
A4 + * Type information is computed for classifiers, {@link InstanceCreationExpression}s creating outer + * classes, and {@link Name}s known to refer to classifiers, the latter two only to support the + * semantic checks performed over the various classifier specification syntaxes.
A5 + * Method invocations that are actually classifier invocations are marked as such.
A6 + * If a sense statement with a single argument appears in a generator, the argument + * expression is moved from the {@link SenseStatement#value value} to the + * {@link SenseStatement#name name} variable in the {@link SenseStatement} object, and the + * {@link SenseStatement#value value} variable gets a new {@link Constant} representing + * "true" if the generator is discrete and "1" if the generator is real.
A7 + * If there are any for, if, while, or do + * statements that contain a single statement in their body, that statement is wrapped in a + * {@link Block}.
A8 + * The dependor graph, linking the names of {@link CodeGenerator}s with the names of other + * {@link CodeGenerator}s that depend on them, is built for use by {@link RevisionAnalysis}.
A9 + * The invoked graph, linking the names of {@link CodeGenerator}s with the names of other + * {@link CodeGenerator}s that are invoked by them, is built for use by {@link TranslateToJava}.
A10 + * If a method of the unique instance of a learning classifier is invoked using the learning + * classifier's name, code must be inserted to create an instance of that classifier ahead of time + * and then to call the method on that instance.
A11 + * If a {@link LearningClassifierExpression} does not have a with clause, the default + * learning algorithm is substituted.
A12 + * Flags are set in each {@link ConstraintEqualityExpression} indicating if its subexpressions are + * learner invocations.
A13 + * {@link Name}s and every {@link ASTNode} that represents a new local scope gets a link to the + * symbol table representing its scope.
A14 + * {@link Argument} types in arguments of quantifier expressions are marked as such.
A15 + * Quantified {@link ConstraintEqualityExpression}s, {@link ConstraintInvocation}s, and + * {@link QuantifiedConstraintExpression}s are marked as such.
A16 + * If a {@link InferenceDeclaration} does not have a with clause, the default inference + * algorithm is substituted.
A17 + * When a {@link ClassifierName} is not alone on the right hand side of a + * {@link ClassifierAssignment}, its {@link ClassifierName#name name} is set equal to its + * {@link ClassifierName#referent referent}.
A18 + * The {@link ClassifierExpression#cacheIn} member variable is set when the containing + * {@link ClassifierAssignment} had a cached or cachedin modifier.
A19 + * The {@link ClassifierExpression#comment} field of each top level classifier expression is set to + * the comment of the containing {@link ClassifierAssignment}.
A20 + * When a with clause is specified with an {@link InstanceCreationExpression} as an + * argument, {@link LearningClassifierExpression#learnerName} is set to the name of the class + * instantiated.
A21 + * The value of {@link ClassifierAssignment#singleExampleCache} is propagated from + * {@link ClassifierAssignment}s to {@link ClassifierExpression}s.
* - *

And the following conditions are checked for: + *

+ * And the following conditions are checked for: * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * + *
B1No named classifier is defined more than once.
B2 - * Classifier and constraint invocations can only contain a single - * argument. - *
B3 - * The output type of every classifier expression is checked for - * appropriateness in its context. - *
B4 - * The input type of a {@link ClassifierName} is checked for - * appropriateness in its context. - *
B5 - * The {@link InstanceCreationExpression} in the from - * clause of a {@link LearningClassifierExpression} instantiates a - * {@link Parser}. - *
B6 - * The {@link InstanceCreationExpression} in the with - * clause of a {@link LearningClassifierExpression} instantiates a - * {@link Learner}. - *
B7 - * The {@link Learner} specified in a - * {@link LearningClassifierExpression} must have input type - * assignable from the learning classifier expression's input type. - *
B8 - * Classifiers with feature type discrete, - * real, or arrays of those may be invoked as if they were - * methods in any context. - *
B9 - * Any classifier other than one of feature return type - * mixed% may be invoked as a method when that invocation - * is the value argument of a sense statement inside a - * generator of the same basic type (discrete or - * real). Generators may not be invoked in any other - * context. Array producing classifiers may also be invoked as the - * only argument of a sense statement inside another array - * producing classifier of the same basic type. - *
B10 - * sense statements may only appear in classifiers that - * are generators or that return arrays. - *
B11 - * The expression : expression form of the sense - * statement may only appear in a generator. - *
B12 - * return statements may not appear in classifiers that - * are generators or that return arrays or in constraints. - *
B13 - * Every {@link ReferenceType} must successfully locate the Java - * Class object for the type it refers to. - *
B14 - * The only "mixed" classifier return type is mixed%. - *
B15 - * A {@link CodedClassifier} may not be declared as - * mixed%. - *
B16 - * There can be no more than one with clause in a - * {@link LearningClassifierExpression}. - *
B17 - * There can be no more than one from clause in a - * {@link LearningClassifierExpression}. - *
B18 - * There must be exactly one using clause in a - * {@link LearningClassifierExpression}. - *
B19 - * Constraint statements may only appear in constraint declarations. - *
B20 - * Constraint declarations must contain at least one constraint - * statement. - *
B21 Names in classifier expressions must refer to classifiers.
B22 - * The name to the left of the parentheses in an - * {@link InferenceInvocation} must refer to an inference. - *
B23 - * The name inside the parentheses of an - * {@link InferenceInvocation} must refer to a discrete learner. - *
B24 - * The input type of the classifier inside the parentheses of an - * {@link InferenceInvocation} is checked for appropriateness in its - * context. - *
B25 - * The inference of an {@link InferenceInvocation} must contain an - * {@link InferenceDeclaration.HeadFinder} whose input type is - * the same as the input type of the {@link InferenceInvocation}'s - * argument learner. - *
B26 - * Only constraints can be invoked with the @ operator - * in a constraint statement. - *
B27 - * The left hand side of the normalizedby operator must be - * the name of a {@link Learner}. - *
B28 - * The right hand side of the normalizedby operator must - * instantiate a {@link Normalizer}. - *
B29 - * An {@link InferenceDeclaration} must contain at least one head - * finder method. - *
B30 - * An {@link InferenceDeclaration} must contain exactly one - * subjectto clause. - *
B31 - * An {@link InferenceDeclaration} may contain no more than one - * with clause. - *
B32 - * The {@link InstanceCreationExpression} in the with - * clause of an {@link InferenceDeclaration} instantiates a - * {@link Inference}. - *
B33 - * An inference may not be invoked anywhere other than classifier - * expression context. - *
B34 - * Constraint expressions are only allowed to appear as part of their - * own separate expression statement. (The only other place that the - * parser will allow them is in the head of a for loop.) - *
B35 - * The value supplied before the rounds keyword in a - * {@link LearningClassifierExpression}'s from clause must - * be an integer. - *
B36 - * The cachedin and cached keywords can be - * used to cache the value(s) produced by classifiers returning either - * a single feature or an array of features in a member variable of a - * user's class or a WeakHashMap respectively. The values - * of features produced by generators and conjunctions cannot be cached - * in this way. - *
B37 - * There can be no more than one evaluate clause in a - * {@link LearningClassifierExpression}. - *
B38 - * In the body of a coded classifier, a method invocation with no - * parent object is assumed to be a classifier invocation. As such, - * that classifier's definition must be accessible in one form or - * another. - *
B39 - * LBJava must be properly configured to use the selected - * inference algorithm. - *
B40 - * The value supplied after the cval keyword in a - * {@link LearningClassifierExpression} must be an integer. - *
B41 - * The value supplied after preExtract must be a Boolean - * or one of ("none"|"disk"|"diskZip"|"memory"|"memoryZip"). - *
B42 - * The value supplied after progressOutput must be an - * integer. - *
B43 - * The value supplied after the alpha keyword in a - * {@link LearningClassifierExpression} must be a double. - *
B44 - * The input to any classifier must have either type - * {@link ReferenceType} or type {@link ArrayType}. - *
B45 - * The alpha keyword should not be used if the - * cval keyword is not being used. - *
B46 - * The testingMetric keyword should not be used if both - * the cval and testFrom keywords are not - * present. - *
B47 - * There can be no more than one cval clause in a - * {@link LearningClassifierExpression}. - *
B48 - * There can be no more than one testingMetric clause in a - * {@link LearningClassifierExpression}. - *
B49 - * There can be no more than one alpha clause in a - * {@link LearningClassifierExpression}. - *
B50 - * The {@link InstanceCreationExpression} in the testFrom - * clause of a {@link LearningClassifierExpression} instantiates a - * {@link Parser}. - *
B51 - * There can be no more than one testFrom clause in a - * {@link LearningClassifierExpression}. - *
B52 - * Parameter tuning can only be performed if either a cval - * clause or testFrom clause is supplied. - *
B53 - * A parameter set must include only simple constant expressions. - *
B54 - * A parameter range must be defined such that the enumerated list - * of values is finite. - *
B55 - * A parameter range must be defined with numeric values. - *
B56 - * A parameter set must be defined within a - * {@link LearningClassifierExpression}. - *
B57 - * There can be no more than one preExtract clause in a - * {@link LearningClassifierExpression}. - *
B58 - * The prune clause must be of the form - * prune a b x where a is one of - * ("global"|"perClass"), b is one of - * ("count"|"percent"), and x is numeric. - *
B59 - * The prune threshold must be an integer when using the 'count' type - * or a real number in [0,1] when using the 'percent' type. - *
B60 - * Feature pre-extraction should not be explicitly be explicitly - * enabled when there is no "from" clause (and thus no parser). - *
B61 - * The conjunction of a classifier with itself is not allowed if the - * classifier has return type discrete or - * real. - *
B62 - * There can be no more than one encoding clause in a - * {@link LearningClassifierExpression}. - *
+ * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * *
B1No named classifier is defined more than once.
B2 + * Classifier and constraint invocations can only contain a single argument.
B3 + * The output type of every classifier expression is checked for appropriateness in its context.
B4 + * The input type of a {@link ClassifierName} is checked for appropriateness in its context.
B5 + * The {@link InstanceCreationExpression} in the from clause of a + * {@link LearningClassifierExpression} instantiates a {@link Parser}.
B6 + * The {@link InstanceCreationExpression} in the with clause of a + * {@link LearningClassifierExpression} instantiates a {@link Learner}.
B7 + * The {@link Learner} specified in a {@link LearningClassifierExpression} must have input type + * assignable from the learning classifier expression's input type.
B8 + * Classifiers with feature type discrete, real, or arrays of those may be + * invoked as if they were methods in any context.
B9 + * Any classifier other than one of feature return type mixed% may be invoked as a + * method when that invocation is the value argument of a sense statement inside a + * generator of the same basic type (discrete or real). Generators may not + * be invoked in any other context. Array producing classifiers may also be invoked as the only + * argument of a sense statement inside another array producing classifier of the same + * basic type.
B10 + * sense statements may only appear in classifiers that are generators or that return + * arrays.
B11 + * The expression : expression form of the sense statement may only appear in a + * generator.
B12 + * return statements may not appear in classifiers that are generators or that return + * arrays or in constraints.
B13 + * Every {@link ReferenceType} must successfully locate the Java Class object for the + * type it refers to.
B14 + * The only "mixed" classifier return type is mixed%.
B15 + * A {@link CodedClassifier} may not be declared as mixed%.
B16 + * There can be no more than one with clause in a {@link LearningClassifierExpression}. + *
B17 + * There can be no more than one from clause in a {@link LearningClassifierExpression}. + *
B18 + * There must be exactly one using clause in a {@link LearningClassifierExpression}.
B19 + * Constraint statements may only appear in constraint declarations.
B20 + * Constraint declarations must contain at least one constraint statement.
B21Names in classifier expressions must refer to classifiers.
B22 + * The name to the left of the parentheses in an {@link InferenceInvocation} must refer to an + * inference.
B23 + * The name inside the parentheses of an {@link InferenceInvocation} must refer to a discrete + * learner.
B24 + * The input type of the classifier inside the parentheses of an {@link InferenceInvocation} is + * checked for appropriateness in its context.
B25 + * The inference of an {@link InferenceInvocation} must contain an + * {@link InferenceDeclaration.HeadFinder} whose input type is the same as the input type of the + * {@link InferenceInvocation}'s argument learner.
B26 + * Only constraints can be invoked with the @ operator in a constraint statement.
B27 + * The left hand side of the normalizedby operator must be the name of a + * {@link Learner}.
B28 + * The right hand side of the normalizedby operator must instantiate a + * {@link Normalizer}.
B29 + * An {@link InferenceDeclaration} must contain at least one head finder method.
B30 + * An {@link InferenceDeclaration} must contain exactly one subjectto clause.
B31 + * An {@link InferenceDeclaration} may contain no more than one with clause.
B32 + * The {@link InstanceCreationExpression} in the with clause of an + * {@link InferenceDeclaration} instantiates a {@link Inference}.
B33 + * An inference may not be invoked anywhere other than classifier expression context.
B34 + * Constraint expressions are only allowed to appear as part of their own separate expression + * statement. (The only other place that the parser will allow them is in the head of a + * for loop.)
B35 + * The value supplied before the rounds keyword in a + * {@link LearningClassifierExpression}'s from clause must be an integer.
B36 + * The cachedin and cached keywords can be used to cache the value(s) + * produced by classifiers returning either a single feature or an array of features in a member + * variable of a user's class or a WeakHashMap respectively. The values of features + * produced by generators and conjunctions cannot be cached in this way.
B37 + * There can be no more than one evaluate clause in a + * {@link LearningClassifierExpression}.
B38 + * In the body of a coded classifier, a method invocation with no parent object is assumed to be a + * classifier invocation. As such, that classifier's definition must be accessible in one form or + * another.
B39 + * LBJava must be properly configured to use the selected inference algorithm.
B40 + * The value supplied after the cval keyword in a {@link LearningClassifierExpression} + * must be an integer.
B41 + * The value supplied after preExtract must be a Boolean or one of + * ("none"|"disk"|"diskZip"|"memory"|"memoryZip").
B42 + * The value supplied after progressOutput must be an integer.
B43 + * The value supplied after the alpha keyword in a {@link LearningClassifierExpression} + * must be a double.
B44 + * The input to any classifier must have either type {@link ReferenceType} or type {@link ArrayType} + * .
B45 + * The alpha keyword should not be used if the cval keyword is not being + * used.
B46 + * The testingMetric keyword should not be used if both the cval and + * testFrom keywords are not present.
B47 + * There can be no more than one cval clause in a {@link LearningClassifierExpression}. + *
B48 + * There can be no more than one testingMetric clause in a + * {@link LearningClassifierExpression}.
B49 + * There can be no more than one alpha clause in a {@link LearningClassifierExpression} + * .
B50 + * The {@link InstanceCreationExpression} in the testFrom clause of a + * {@link LearningClassifierExpression} instantiates a {@link Parser}.
B51 + * There can be no more than one testFrom clause in a + * {@link LearningClassifierExpression}.
B52 + * Parameter tuning can only be performed if either a cval clause or + * testFrom clause is supplied.
B53 + * A parameter set must include only simple constant expressions.
B54 + * A parameter range must be defined such that the enumerated list of values is finite.
B55 + * A parameter range must be defined with numeric values.
B56 + * A parameter set must be defined within a {@link LearningClassifierExpression}.
B57 + * There can be no more than one preExtract clause in a + * {@link LearningClassifierExpression}.
B58 + * The prune clause must be of the form prune a b x where a + * is one of ("global"|"perClass"), b is one of ("count"|"percent"), and x + * is numeric.
B59 + * The prune threshold must be an integer when using the 'count' type or a real number in [0,1] when + * using the 'percent' type.
B60 + * Feature pre-extraction should not be explicitly be explicitly enabled when there is no "from" + * clause (and thus no parser).
B61 + * The conjunction of a classifier with itself is not allowed if the classifier has return type + * discrete or real.
B62 + * There can be no more than one encoding clause in a + * {@link LearningClassifierExpression}.
* - * @see RevisionAnalysis - * @see Parser - * @see Learner - * @see Normalizer - * @see Inference + * @see RevisionAnalysis + * @see Parser + * @see Learner + * @see Normalizer + * @see Inference * @author Nick Rizzolo **/ -public class SemanticAnalysis extends Pass -{ - // Static variables. - /** - * The keys of this map are the names of {@link CodeGenerator}s; the values - * are HashSets of names of other locally defined - * {@link CodeGenerator}s that depend on the {@link CodeGenerator} named by - * the associated key. The dependor graph has an entry for every - * {@link CodeGenerator} in the source. - **/ - public static HashMap> dependorGraph; - /** - * The keys of this map are the names of {@link CodeGenerator}s; the values - * are HashSets of names of other (not necessarily locally - * defined) {@link CodeGenerator}s that are invoked within the - * {@link CodeGenerator} named by the associated key. The invoked graph - * does not necessarily have an entry for every {@link CodeGenerator} in - * the source. - **/ - public static HashMap> invokedGraph; - /** - * The keys of this map are the names of {@link Classifier}s; the values - * are {@link ASTNode}s representing the source code implementations of the - * associated {@link Classifier}s. This table has an entry for every - * {@link Classifier} in the source. - **/ - public static HashMap representationTable; - - - // Utility methods. - /** - * Running an instance of this pass overwrites the static member variables; - * use this method to run an instance of this pass and then restore the - * static member variables to their states before the pass was run. - * - * @param ast An abstract syntax tree to run semantic analysis on. - **/ - public static void runAndRestore(AST ast) { - HashMap> dg = dependorGraph, ig = invokedGraph; - HashMap rt = representationTable; - Pass.canAddErrorsAndWarnings = false; - new SemanticAnalysis(ast).run(); - Pass.canAddErrorsAndWarnings = true; - dependorGraph = dg; - invokedGraph = ig; - representationTable = rt; - } - - - /** - * Adds an edge from dependency to dependor in the {@link #dependorGraph}. - * If the dependor is null, no new list item is added, but the - * HashSet associated with the dependency is still created if - * it didn't already exist. - * - * @param dependency The name of the node depended on. - * @param dependor The name of the node doing the depending. - **/ - public static void addDependor(String dependency, String dependor) { - HashSet dependors = dependorGraph.get(dependency); - - if (dependors == null) { - dependors = new HashSet(); - dependorGraph.put(dependency, dependors); - } - - if (dependor != null) dependors.add(dependor); - } - - - /** - * Use this method to determine if one {@link CodeGenerator} depends on - * another either directly or indirectly. - * - * @param c1 One {@link CodeGenerator}. - * @param c2 The other {@link CodeGenerator}. - * @return true iff c1 depends on - * c2. - **/ - public static boolean isDependentOn(String c1, String c2) { - LinkedList queue = new LinkedList(); - queue.add(c2); - - HashSet visited = new HashSet(); - - while (queue.size() > 0) { - String c = queue.removeFirst(); - if (c.equals(c1)) return true; - - visited.add(c); - for (Iterator I = dependorGraph.get(c).iterator(); - I.hasNext(); ) { - c = I.next(); - if (!visited.contains(c)) queue.add(c); - } - } - - return false; - } - - - /** - * Adds an edge from invoker to invokee in the {@link #invokedGraph}. - * - * @param invoker The name of the node doing the invoking. - * @param invokee The name of the invoked node. - **/ - private static void addInvokee(String invoker, String invokee) { - HashSet invokees = invokedGraph.get(invoker); - - if (invokees == null) { - invokees = new HashSet(); - invokedGraph.put(invoker, invokees); - } - - invokees.add(invokee); - } - - - /** - * Prints the contents of {@link #dependorGraph} to STDOUT in - * a readable form. - **/ - public static void printDependorGraph() { printGraph(dependorGraph); } - - - /** - * Prints the contents of {@link #invokedGraph} to STDOUT in a - * readable form. - **/ - public static void printInvokedGraph() { printGraph(invokedGraph); } - - - /** - * Prints the contents of the specified graph to STDOUT in a - * readable form. - * - * @param graph The graph to print as a map of collections. - **/ - private static void printGraph(HashMap> graph) { - String[] keys = graph.keySet().toArray(new String[0]); - Arrays.sort(keys); - for (int i = 0; i < keys.length; ++i) { - System.out.print(keys[i] + " ->"); - String[] edges = - (String[]) graph.get(keys[i]).toArray(new String[0]); - for (int j = 0; j < edges.length; ++j) - System.out.print(" " + edges[j]); - System.out.println(); - } - } - - - /** - * Calls the Class#isAssignableFrom(Class) method after making - * sure that both classes involved aren't null. The assumption made when - * calling this method is that if either argument class is - * null, an error has already been generated with respect to - * it. - * - * @param c1 Class 1. - * @param c2 Class 2. - * @return true iff either class is null or c1 is assignable - * from c2. - **/ - @SuppressWarnings({ "unchecked", "rawtypes" }) - private static boolean isAssignableFrom(Class c1, Class c2) { - return c1 == null || c2 == null || c1.isAssignableFrom(c2); - } - - - /** - * Called when analyzing the feature types for use by a WEKA classifier. - * Writes the necessary attribute information from a - * ClassifierReturnType to lce.attributeString. - * - *

lce.attributeString takes the form of a colon-separated - * list of attribute specifications, each of which are formated in the - * following way: - * "type_name(_value-list)". - * - *

value-list takes the same format as it would in an lbj - * source file. i.e. {"value1","value2",...} - * - *

type can take the values str (string - * attributes), nom (nominal attributes), or num - * (numerical attributes). - * - *

The first attribute in this string is, by convention, considered to - * be the class attribute. - **/ - public void wekaIze(int line, ClassifierReturnType RT, Name name) { - String typeName = RT.getTypeName(); - if (!typeName.equals("discrete") && !typeName.equals("real")) - reportError(line, "Classifiers with return type " + typeName - + " are not usable with WEKA learning algorithms"); - - // String attribute case - if (typeName.equals("discrete")) { - if (RT.values.size() == 0) { - lceInQuestion.attributeString.append("str_"); - lceInQuestion.attributeString.append(name.toString()); - lceInQuestion.attributeString.append(':'); - } - // Nominal attribute case - else { - lceInQuestion.attributeString.append("nom_"); - lceInQuestion.attributeString.append(name); - lceInQuestion.attributeString.append('_'); - - Constant[] constantList = RT.values.toArray(); - - for (int i = 0; i < constantList.length; ++i) { - String value = constantList[i].value; - - if (value.length() > 1 && value.charAt(0) == '"' - && value.charAt(value.length() - 1) == '"') - value = value.substring(1, value.length() - 1); - - lceInQuestion.attributeString.append(value); - lceInQuestion.attributeString.append(','); - } - - lceInQuestion.attributeString - .deleteCharAt(lceInQuestion.attributeString.length() - 1); - lceInQuestion.attributeString.append(':'); - } - } - // Numerical attribute case - else { - lceInQuestion.attributeString.append("num_"); - lceInQuestion.attributeString.append(name); - lceInQuestion.attributeString.append(':'); - } - } - - - /** - * Creates a new anonymous classifier name. - * - * @param lastName The last part of the classifier's name as determined by - * its parent's name. - * @return The created name. - **/ - public Name anonymousClassifier(String lastName) { // A3 - int index = lastName.indexOf('$'); - if (lastName.indexOf('$', index + 1) >= 0) return new Name(lastName); - return - new Name(lastName.substring(0, index) + "$" - + lastName.substring(index)); - } - - - // Member variables. - /** - * Lets AST children know about the code producing node they are contained - * in. - **/ - private CodeGenerator currentCG; - /** - * Lets AST children know the return type of the - * {@link ClassifierAssignment} they are contained in. - **/ - private ClassifierReturnType currentRT; - /** - * Used when analyzing constraint declarations to determine if a constraint - * statement appears within them. - **/ - private boolean containsConstraintStatement; - /** Lets all nodes know what symbol table represents their scope. */ - private SymbolTable currentSymbolTable; - /** - * Lets AST nodes know how deeply nested inside - * {@link QuantifiedConstraintExpression}s they are. - **/ - private int quantifierNesting; - /** - * A flag which indicates whether or not the compiler is in the process of - * gathering attribute information for a WEKA learning algorithm. - **/ - private boolean attributeAnalysis = false; - /** - * A reference to the LearningClassifierExpression which is - * currently under analysis. - **/ - private LearningClassifierExpression lceInQuestion; - - - /** Default constructor. */ - public SemanticAnalysis() { } - - /** - * Instantiates a pass that runs on an entire {@link AST}. - * - * @param ast The program to run this pass on. - **/ - public SemanticAnalysis(AST ast) { super(ast); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param ast The node to process. - **/ - public void run(AST ast) { - currentSymbolTable = ast.symbolTable; - - if (ast.symbolTable.importedSize() == 0) { // A1 - ast.symbolTable.addImported("edu.illinois.cs.cogcomp.lbjava.classify.*"); - ast.symbolTable.addImported("edu.illinois.cs.cogcomp.lbjava.learn.*"); - ast.symbolTable.addImported("edu.illinois.cs.cogcomp.lbjava.parse.*"); - ast.symbolTable.addImported("edu.illinois.cs.cogcomp.lbjava.io.IOUtilities"); - ast.symbolTable.addImported("edu.illinois.cs.cogcomp.lbjava.infer.*"); - } - - dependorGraph = new HashMap>(); - invokedGraph = new HashMap>(); - representationTable = new HashMap(); - quantifierNesting = 0; - - runOnChildren(ast); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param decl The node to process. - **/ - public void run(PackageDeclaration decl) { - ast.symbolTable.setPackage(decl.name.toString()); // A1 - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param decl The node to process. - **/ - public void run(ImportDeclaration decl) { - ast.symbolTable.addImported(decl.name.toString()); // A1 - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param list The node to process. - **/ - public void run(DeclarationList list) { - if (list.size() == 0) return; - - for (DeclarationList.DeclarationListIterator I = list.listIterator(); - I.hasNext(); ) { - Declaration d = I.nextItem(); - if (ast.symbolTable.containsKey(d.name)) // B1 - reportError(d.line, - "A declaration named '" + d.name + "' already exists."); - ast.symbolTable.put(d.name, d.getType()); // A1 - } - - currentCG = null; - runOnChildren(list); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param ca The node to process. - **/ - public void run(ClassifierAssignment ca) { - Type inputType = ca.argument.getType(); - if (!(inputType instanceof ReferenceType // B44 - || inputType instanceof ArrayType)) - reportError(ca.line, - "The input to a classifier must be a single object reference."); - - ca.expression.name = (Name) ca.name.clone(); // A3 - - ca.expression.returnType = (ClassifierReturnType) ca.returnType.clone(); - // B3 - ca.expression.argument = (Argument) ca.argument.clone(); // A4 - - ca.expression.singleExampleCache = ca.singleExampleCache; // A21 - - if (ca.cacheIn != null) { - // B36 - if (ca.returnType.type == ClassifierReturnType.DISCRETE_GENERATOR - || ca.returnType.type == ClassifierReturnType.REAL_GENERATOR - || ca.returnType.type == ClassifierReturnType.MIXED_GENERATOR) - reportError(ca.line, - "Generators' outputs cannot be cached (in a member " - + "variable or otherwise)."); - if (ca.expression instanceof Conjunction) - reportError(ca.line, - "Conjunctive classifiers' outputs cannot be cached (in a " - + "member variable or otherwise)."); - - ca.expression.setCacheIn(ca.cacheIn); // A18 - } - - currentRT = (ClassifierReturnType) ca.returnType.clone(); // A4 - currentSymbolTable = ca.symbolTable = new SymbolTable(currentSymbolTable); - // A13 - runOnChildren(ca); - currentSymbolTable = currentSymbolTable.getParent(); - ca.expression.returnType = (ClassifierReturnType) ca.returnType.clone(); - - ca.expression.comment = ca.comment; // A19 - representationTable.put(ca.name.toString(), ca.expression); // A2 - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param cce The node to process. - **/ - public void run(ClassifierCastExpression cce) { - if (!cce.castType.isContainableIn(cce.returnType)) // B3 - reportError(cce.line, - "Found classifier expression of return type '" + cce.castType - + "' when '" + cce.returnType + "' was expected."); - - cce.expression.name = (Name) cce.name.clone(); // A3 - - cce.expression.returnType = (ClassifierReturnType) cce.castType.clone(); - // B3 - cce.expression.argument = (Argument) cce.argument.clone(); // A4 - - cce.expression.singleExampleCache = cce.singleExampleCache; // A21 - - ClassifierReturnType saveRT = currentRT; - currentRT = (ClassifierReturnType) cce.castType.clone(); // A4 - boolean saveAttributeAnalysis = attributeAnalysis; - attributeAnalysis = false; - - runOnChildren(cce); - - attributeAnalysis = saveAttributeAnalysis; - currentRT = saveRT; - - representationTable.put(cce.name.toString(), cce); // A2 - cce.expression.returnType = (ClassifierReturnType) cce.castType.clone(); - - if (attributeAnalysis) wekaIze(cce.line, cce.returnType, cce.name); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param cn The node to process. - **/ - public void run(ClassifierName cn) { - if (cn.name.toString().indexOf("$$") != -1) // A3 A17 - cn.name = cn.referent; - else addDependor(cn.name.toString(), null); // A8 - - Type t = ast.symbolTable.get(cn); - if (!(t instanceof ClassifierType)) { // B21 - reportError(cn.line, "'" + cn + "' is not known to be a classifier."); - cn.returnType = null; - return; - } - - ClassifierType type = (ClassifierType) t; - - Type input = type.getInput(); - if (!isAssignableFrom(input.typeClass(), - cn.argument.getType().typeClass())) // B4 - reportError(cn.line, - "Classifier '" + cn + "' has input type '" + input + "' when '" - + cn.argument.getType() + "' was expected."); - - ClassifierReturnType output = type.getOutput(); - if (!output.isContainableIn(cn.returnType)) // B3 - reportError(cn.line, - "Classifier '" + cn + "' has return type '" + output + "' when '" - + cn.returnType + "' was expected."); - else cn.returnType = output; // A4 - - if (attributeAnalysis) wekaIze(cn.line, cn.returnType, cn.name); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param cc The node to process. - **/ - public void run(CodedClassifier cc) { - addDependor(cc.name.toString(), null); // A8 - - cc.returnType = (ClassifierReturnType) currentRT.clone(); // A4 - - if (cc.returnType.type == ClassifierReturnType.MIXED_GENERATOR) // B15 - reportError(cc.line, - "A coded classifier may not have return type 'mixed%'."); - - // A13 - currentSymbolTable = cc.symbolTable = cc.body.symbolTable = - new SymbolTable(currentSymbolTable); - - CodeGenerator saveCG = currentCG; - currentCG = cc; - run(cc.argument); // A1 - runOnChildren(cc); - currentCG = saveCG; - - representationTable.put(cc.name.toString(), cc); // A2 - currentSymbolTable = currentSymbolTable.getParent(); - - if (attributeAnalysis) wekaIze(cc.line, cc.returnType, cc.name); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param cg The node to process. - **/ - public void run(CompositeGenerator cg) { - addDependor(cg.name.toString(), null); // A8 - - int i = 0; - for (ClassifierExpressionList.ClassifierExpressionListIterator I = - cg.components.listIterator(); - I.hasNext(); ) { - ClassifierExpression e = I.nextItem(); - - e.name = anonymousClassifier(cg.name + "$" + i++); // A3 - e.returnType = - new ClassifierReturnType(ClassifierReturnType.MIXED_GENERATOR); // B3 - e.argument = (Argument) cg.argument.clone(); // A4 - e.singleExampleCache = cg.singleExampleCache; // A21 - - e.runPass(this); - - addDependor(e.name.toString(), cg.name.toString()); // A8 - } - - String cgReturnType = null; - ConstantList values = null; - for (ClassifierExpressionList.ClassifierExpressionListIterator I = - cg.components.listIterator(); - I.hasNext(); ) { // A4 - ClassifierExpression component = I.nextItem(); - - if (component.returnType == null) return; - String componentReturnType = component.returnType.toString(); - if (cgReturnType == null) { - cgReturnType = componentReturnType; - values = component.returnType.values; - } - else { - if (cgReturnType.startsWith("discrete") - && !componentReturnType.startsWith("discrete") - || cgReturnType.startsWith("real") - && !componentReturnType.startsWith("real")) - cgReturnType = "mixed"; - if (values.size() > 0 && !values.equals(component.returnType.values)) - values = new ConstantList(); - } - } - - assert cgReturnType != null : "Empty component list"; - - // A4 - ClassifierReturnType output = null; - if (cgReturnType.startsWith("discrete")) - output = - new ClassifierReturnType(ClassifierReturnType.DISCRETE_GENERATOR, - values); - else if (cgReturnType.startsWith("real")) - output = new ClassifierReturnType(ClassifierReturnType.REAL_GENERATOR); - else - output = new ClassifierReturnType(ClassifierReturnType.MIXED_GENERATOR); - - if (!output.isContainableIn(cg.returnType)) // B3 - reportError(cg.line, - "Found a classifier expression of return type '" + output - + "' when '" + cg.returnType + "' was expected."); - else cg.returnType = output; - - representationTable.put(cg.name.toString(), cg); // A2 - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param c The node to process. - **/ - @SuppressWarnings("rawtypes") - public void run(Conjunction c) { - addDependor(c.name.toString(), null); // A8 - - c.left.name = anonymousClassifier(c.name + "$0"); // A3 - c.left.returnType = - new ClassifierReturnType(ClassifierReturnType.MIXED_GENERATOR); // B3 - c.left.argument = (Argument) c.argument.clone(); // A4 - c.left.singleExampleCache = c.singleExampleCache; // A21 - - c.right.name = anonymousClassifier(c.name + "$1"); // A3 - c.right.returnType = - new ClassifierReturnType(ClassifierReturnType.MIXED_GENERATOR); // B3 - c.right.argument = (Argument) c.argument.clone(); // A4 - c.right.singleExampleCache = c.singleExampleCache; // A21 - - boolean saveAttributeAnalysis = attributeAnalysis; - attributeAnalysis = false; - - runOnChildren(c); - - attributeAnalysis = saveAttributeAnalysis; - - if (c.left.returnType == null || c.right.returnType == null) return; - - addDependor(c.left.name.toString(), c.name.toString()); // A8 - addDependor(c.right.name.toString(), c.name.toString()); // A8 - - // A4 - Type inputType = c.right.argument.getType(); - Class inputRight = inputType.typeClass(); - Type leftType = c.left.argument.getType(); - Class inputLeft = leftType.typeClass(); - if (!isAssignableFrom(inputLeft, inputRight)) inputType = leftType; - - c.argument = - new Argument(inputType, c.argument.getName(), c.argument.getFinal()); - - ConstantList valuesLeft = c.left.returnType.values; - ConstantList valuesRight = c.right.returnType.values; - ConstantList values = new ConstantList(); - if (valuesLeft.size() > 0 && valuesRight.size() > 0) - for (ConstantList.ConstantListIterator I = valuesLeft.listIterator(); - I.hasNext(); ) { - Constant valueLeft = I.nextItem(); - for (ConstantList.ConstantListIterator J = valuesRight.listIterator(); - J.hasNext(); ) - values.add( - new Constant(valueLeft.noQuotes() + "&" - + J.nextItem().noQuotes())); - } - - int rt1 = c.left.returnType.type; - int rt2 = c.right.returnType.type; - if (rt2 < rt1) { - int temp = rt1; - rt1 = rt2; - rt2 = temp; - } - - ClassifierReturnType output = null; - switch (10 * rt1 + rt2) { - case 0: - output = - new ClassifierReturnType(ClassifierReturnType.DISCRETE, values); - break; - - case 11: - output = new ClassifierReturnType(ClassifierReturnType.REAL); - break; - - case 3: case 33: - output = - new ClassifierReturnType(ClassifierReturnType.DISCRETE_ARRAY, - values); - break; - - case 14: case 44: - output = new ClassifierReturnType(ClassifierReturnType.REAL_ARRAY); - break; - - case 6: case 36: case 66: - output = - new ClassifierReturnType(ClassifierReturnType.DISCRETE_GENERATOR, - values); - break; - - case 1: case 4: case 7: case 13: case 16: case 17: case 34: case 37: - case 46: case 47: case 67: case 77: - output = - new ClassifierReturnType(ClassifierReturnType.REAL_GENERATOR); - break; - - case 8: case 18: case 38: case 48: case 68: case 78: case 88: - output = - new ClassifierReturnType(ClassifierReturnType.MIXED_GENERATOR); - break; - } - - assert output != null - : "Unexpected conjunction types: " - + ClassifierReturnType.typeName(rt1) + ", " - + ClassifierReturnType.typeName(rt2); - - if (!output.isContainableIn(c.returnType)) // B3 - reportError(c.line, - "Found a classifier expression of return type '" + output - + "' when '" + c.returnType + "' was expected."); - else if ((output.type == ClassifierReturnType.DISCRETE - || output.type == ClassifierReturnType.REAL) - && c.left.equals(c.right)) // B61 - reportError(c.line, - "A classifier cannot be conjuncted with itself unless it returns " - + "multiple features."); - else c.returnType = output; - - representationTable.put(c.name.toString(), c); // A2 - - if (attributeAnalysis) wekaIze(c.line, c.returnType, c.name); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param ii The node to process. - **/ - public void run(InferenceInvocation ii) { - // A8 - addDependor(ii.name.toString(), null); - addDependor(ii.inference.toString(), ii.name.toString()); - addDependor(ii.classifier.toString(), ii.name.toString()); - - runOnChildren(ii); - - if (!(ii.inference.typeCache instanceof InferenceType)) { // B22 - reportError(ii.inference.line, - "'" + ii.inference + "' is not known to be a inference."); - return; - } - - if (!(ii.classifier.typeCache instanceof ClassifierType)) { // B23 - reportError(ii.classifier.line, - "'" + ii.classifier + "' is not known to be a learner."); - return; - } - - ClassifierType argumentType = (ClassifierType) ii.classifier.typeCache; - ClassifierReturnType output = argumentType.getOutput(); - if (output.type != ClassifierReturnType.DISCRETE - || !argumentType.isLearner()) // B23 - reportError(ii.classifier.line, - "'" + ii.classifier + "' is not a discrete learner."); - - Type input = argumentType.getInput(); - if (!isAssignableFrom(input.typeClass(), - ii.argument.getType().typeClass())) // B24 - reportError(ii.line, - "Classifier '" + ii + "' has input type '" + input + "' when '" - + ii.argument.getType() + "' was expected."); - - if (!output.isContainableIn(ii.returnType)) // B3 - reportError(ii.line, - "Classifier '" + ii + "' has return type '" + output + "' when '" - + ii.returnType + "' was expected."); - else ii.returnType = output; // A4 - - InferenceType type = (InferenceType) ii.inference.typeCache; - boolean found = false; - for (int i = 0; i < type.getFindersLength() && !found; ++i) - found = type.getFinderType(i).equals(input); - - if (!found) // B25 - reportError(ii.line, - "Inference '" + ii.inference + "' does not contain a head finder " - + " method for class '" + input + "'."); - - representationTable.put(ii.name.toString(), ii); // A2 - - if (attributeAnalysis) wekaIze(ii.line, ii.returnType, ii.name); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param lce The node to process. - **/ - @SuppressWarnings("rawtypes") - public void run(LearningClassifierExpression lce) { - String lceName = lce.name.toString(); - addDependor(lceName, null); // A8 - - // Setting up signatures for labeler and extractor. - int i = 0; - if (lce.labeler != null) { - lce.labeler.name = anonymousClassifier(lceName + "$" + i++); // A3 - lce.labeler.returnType = - new ClassifierReturnType(ClassifierReturnType.MIXED_GENERATOR); // B3 - lce.labeler.argument = (Argument) lce.argument.clone(); // A4 - lce.labeler.singleExampleCache = lce.singleExampleCache; // A21 - } - - lce.extractor.name = anonymousClassifier(lceName + "$" + i); // A3 - lce.extractor.returnType = - new ClassifierReturnType(ClassifierReturnType.MIXED_GENERATOR); // B3 - lce.extractor.argument = (Argument) lce.argument.clone(); // A4 - lce.extractor.singleExampleCache = lce.singleExampleCache; // A21 - - // Making sure an appropriate quantity of each clause is present. - if (lce.usingClauses != 1) { // B18 - reportError(lce.line, - "A learning classifier expression must contain exactly one 'using' " - + "clause."); - return; - } - - if (lce.fromClauses > 1) { // B17 - reportError(lce.line, - "A learning classifier expression can have no more than one 'from' " - + "clause."); - return; - } - - if (lce.withClauses > 1) { // B16 - reportError(lce.line, - "A learning classifier expression can have no more than one 'with' " - + "clause."); - return; - } - - if (lce.encodingClauses > 1) { // B62 - reportError(lce.line, - "A learning classifier expression can have no more than one " - + "'encoding' clause."); - return; - } - - if (lce.testFromClauses > 1) { // B51 - reportError(lce.line, - "A learning classifier expression can have no more than one " - + "'testFrom' clause."); - return; - } - - if (lce.evaluateClauses > 1) { // B37 - reportError(lce.line, - "A learning classifier expression can have no more than one " - + "'evaluate' clause."); - } - - if (lce.cvalClauses == 0) { - if (lce.alphaClauses > 0) { // B45 - reportError(lce.line, - "The alpha keyword is meaningful only if the cval keyword is " - + "also being used, and should not be used otherwise."); - } - - if (lce.testFromClauses == 0 && lce.testingMetric != null) { // B46 - reportError(lce.testingMetric.line, - "The 'testingMetric' keyword is meaningful only if one of 'cval' " - + "or 'testFrom' is also present, and should not be used " - + "otherwise."); - } - } - - if (lce.cvalClauses > 1) { // B47 - reportError(lce.line, - "A learning classifier expression can have no more than one 'cval'" - + " clause."); - } - - if (lce.testingMetricClauses > 1) { // B48 - reportError(lce.line, - "A learning classifier expression can have no more than one " - + "'testingMetric' clause."); - } - - if (lce.alphaClauses > 1) { // B49 - reportError(lce.line, - "A learning classifier expression can have no more than one 'alpha'" - + " clause."); - } - - if (lce.preExtractClauses > 1) { // B57 - reportError(lce.line, - "A learning classifier expression can have no more than one " - + "'preExtract' clause."); - } - - if (lce.evaluation != null && lce.evaluation instanceof MethodInvocation) - ((MethodInvocation) lce.evaluation).isEvaluateArgument = true; - if (lce.rounds != null && lce.rounds instanceof ParameterSet) - ((ParameterSet) lce.rounds).inRounds = true; - - // Make sure we have a learning algorithm. - if (lce.learnerName == null) { - if (lce.learnerConstructor == null) { // A11 - if (lce.returnType.toString().charAt(0) == 'd') - lce.learnerConstructor = - LearningClassifierExpression.defaultDiscreteLearner; - else - lce.learnerConstructor = - LearningClassifierExpression.defaultRealLearner; - //lce.learnerConstructor.runPass(this); - } - - lce.learnerName = lce.learnerConstructor.name; // A20 - } - - //lce.learnerName.runPass(this); - - boolean weka = false; - weka = lce.learnerName.equals(new Name("WekaWrapper")) - || lce.learnerName.equals(new Name("edu.illinois.cs.cogcomp.lbjava.learn.WekaWrapper")); - - boolean saveAttributeAnalysis = attributeAnalysis; - LearningClassifierExpression saveLCE = null; - - // Weka specific pre-processing. - if (weka) { - attributeAnalysis = true; - lce.attributeString = new StringBuffer(); - - // Identify which learning classifier expression we are gathering - // feature information for. - saveLCE = lceInQuestion; - lceInQuestion = lce; - } - - CodeGenerator saveCG = currentCG; - currentCG = lce; - runOnChildren(lce); - currentCG = saveCG; - - // Weka specific post-processing. - if (weka) { - attributeAnalysis = saveAttributeAnalysis; - lceInQuestion = saveLCE; - if (lce.attributeString.length() != 0) - lce.attributeString.deleteCharAt(lce.attributeString.length() - 1); - - if (lce.learnerParameterBlock != null) - lce.learnerParameterBlock.statementList().add( - new ExpressionStatement( - new Assignment( - new Operator(Operator.ASSIGN), - new Name("attributeString"), - new Constant('"' + lce.attributeString.toString() - + '"')))); - } - - if (lce.labeler != null) - addDependor(lce.labeler.name.toString(), lceName); // A8 - addDependor(lce.extractor.name.toString(), lceName); // A8 - - // Check the "rounds" clause (if any) for semantic errors. - if (lce.rounds != null) { - if (lce.rounds instanceof Constant) { - try { Integer.parseInt(((Constant) lce.rounds).value); } - catch (Exception e) { // B35 - reportError(lce.rounds.line, - "The value supplied before 'rounds' must be an integer."); - } - } - else if (!(lce.rounds instanceof ParameterSet)) { - reportError(lce.rounds.line, - "The value supplied before 'rounds' must be an integer."); - } - } - - // Check CV clauses for appropriate argument types. - if (lce.K != null) { - try { Integer.parseInt(lce.K.value); } - catch (Exception e) { // B40 - reportError(lce.K.line, - "The value supplied after 'cval' must be an integer."); - } - } - - if (lce.alpha != null) { - try { Double.parseDouble(lce.alpha.value); } - catch (Exception e) { // B43 - reportError(lce.alpha.line, - "The value supplied after 'alpha' must be an double."); - } - } - - // Check "preExtract" clause for appropriate argument type. - if (!(lce.preExtract.value.equals("\"none\"") - || lce.preExtract.value.equals("\"disk\"") - || lce.preExtract.value.equals("\"diskZip\"") - || lce.preExtract.value.equals("\"memory\"") - || lce.preExtract.value.equals("\"memoryZip\"") - || lce.preExtract.value.equals("\"true\"") - || lce.preExtract.value.equals("\"false\"") - || lce.preExtract.value.equals("true") - || lce.preExtract.value.equals("false"))) { // B41 - reportError(lce.preExtract.line, - "The value supplied after 'preExtract' must be a boolean or one of " - + "(\"none\"|\"disk\"|\"diskZip\"|\"memory\"|\"memoryZip\")."); - } - - // Check that pre-extraction has not been enabled without a from clause. - if (!(lce.preExtract.value.equals("\"none\"") - || lce.preExtract.value.equals("\"false\"") - || lce.preExtract.value.equals("false")) - && lce.parser == null) { // B60 - reportWarning(lce.preExtract.line, - "Feature pre-extraction will be disabled since there is no " - + "\"from\" clause."); - lce.preExtract = new Constant("false"); - } - - // Check "progressOutput" clause for appropriate argument type. - if (lce.progressOutput != null) { - try { Integer.parseInt(lce.progressOutput.value); } - catch (Exception e) { // B42 - reportError(lce.progressOutput.line, - "The value supplied after 'progressOutput' must be an integer."); - } - } - - // Check "prune" clause for appropriate argument types. - // Only certain keywords are legal. - if (lce.pruneCountType != null) { // B58 - if (!(lce.pruneCountType.value.equals("\"global\"") - || lce.pruneCountType.value.equals("\"perClass\"")) - || !(lce.pruneThresholdType.value.equals("\"count\"") - || lce.pruneThresholdType.value.equals("\"percent\""))) { - reportError(lce.pruneCountType.line, - "The prune clause must take the form " - + "'prune (\"global\"|\"perClass\") (\"count\"|\"percent\") X' " - + "where X is numeric."); - } - - if (lce.preExtract.value.equals("\"none\"") - || lce.preExtract.value.equals("\"false\"") - || lce.preExtract.value.equals("false")) { - reportError(lce.preExtract.line, - "Feature pruning cannot be performed unless pre-extraction is " - + "enabled."); - } - } - - // The theshold must have the right type for the given keywords. - if (lce.pruneThresholdType != null) { // B59 - if (lce.pruneThresholdType.value.equals("\"percent\"")) { - try { - double p = Double.parseDouble(lce.pruneThreshold.value); - if (p < 0 || p > 1) throw new Exception(); - } - catch (Exception e) { - reportError(lce.pruneThresholdType.line, - "The prune threshold must be a real number in [0,1] when using " - + "the 'percent' type."); - } - } - else { - try { Integer.parseInt(lce.pruneThreshold.value); } - catch (Exception e) { - reportError(lce.pruneThresholdType.line, - "The prune threshold must be an integer when using the 'count' " - + "type."); - } - } - } - - // Pruning implies pre-extraction. - if (lce.pruneCountType != null - && (lce.preExtract == null || lce.preExtract.value.equals("\"none\"") - || lce.preExtract.value.equals("\"false\"") - || lce.preExtract.value.equals("false"))) { - lce.preExtract = - new Constant(LearningClassifierExpression.defaultPreExtract); - reportWarning(lce.pruneCountType.line, - "Pruning cannot be performed without pre-extraction. Setting " - + "'preExtract " + lce.preExtract + "'."); - } - - // Check "from" clause for appropriate argument type. - if (lce.parser != null) { // B5 - if (!(lce.parser.typeCache instanceof ReferenceType)) - reportError(lce.parser.line, - "The 'from' clause of a learning classifier expression must " - + "instantiate a edu.illinois.cs.cogcomp.lbjava.parse.Parser."); - else { - Class iceClass = lce.parser.typeCache.typeClass(); - if (!isAssignableFrom(Parser.class, iceClass)) - reportError(lce.parser.line, - "The 'from' clause of a learning classifier expression must " - + "instantiate a edu.illinois.cs.cogcomp.lbjava.parse.Parser."); - } - } - - // Check "with" clause for appropriate argument types. - Type input = lce.argument.getType(); - Class inputClass = input.typeClass(); - ClassifierReturnType output = null; - - // Check that the specified algorithm accepts our input. - if (!(lce.learnerName.typeCache instanceof ClassifierType) - || !((ClassifierType) lce.learnerName.typeCache).isLearner()) { // B6 - reportError(lce.learnerName.line, - "The 'with' clause of a learning classifier expression must " - + "instantiate a edu.illinois.cs.cogcomp.lbjava.learn.Learner."); - } - else { - Class iceClass = AST.globalSymbolTable.classForName(lce.learnerName); - - if (iceClass != null) { - if (!isAssignableFrom(Learner.class, iceClass)) // B6 - reportError(lce.learnerName.line, - "The 'with' clause of a learning classifier expression must " - + "instantiate a edu.illinois.cs.cogcomp.lbjava.learn.Learner."); - else { // A4 - ClassifierType learnerType = - (ClassifierType) lce.learnerName.typeCache; - Type learnerInputType = learnerType.getInput(); - if (!isAssignableFrom(learnerInputType.typeClass(), inputClass)) - reportError(lce.learnerName.line, // B7 - "A learning classifier with input type '" + input - + "' cannot use a Learner with input type '" - + learnerInputType + "'."); - - output = learnerType.getOutput(); - } - } - } - - // Check that the specified algorithm can produce our output. - if (output != null && !output.isContainableIn(lce.returnType)) { // B3 - if (output.toString().charAt(0) != 'd' - || lce.returnType.toString().charAt(0) != 'd') - reportError(lce.line, - "Learner " + lce.learnerName + " returns '" + output - + "' which conflicts with the declared return type '" - + lce.returnType + "'."); - else { - lce.checkDiscreteValues = true; - reportWarning(lce.line, - "Learner " + lce.learnerName + " returns '" + output - + "' which may conflict with the declared return type '" - + lce.returnType + "'. A run-time error will be reported if a " - + "conflict is detected."); - } - } - else lce.returnType = output; - - if (output != null && lce.labeler != null - && lce.labeler.returnType != null - && !lce.labeler.returnType.isContainableIn(output)) { // B3 - if (output.toString().charAt(0) == 'd' - && lce.labeler.returnType.toString().charAt(0) == 'd') - reportWarning(lce.line, - "The labeler for learner " + lceName + " may return more labels " - + "than the learner is designed to deal with. A run-time error " - + "will be reported if a conflict is detected."); - else - reportWarning(lce.line, - "The labeler for learner " + lceName + " may return labels that " - + "the learner is not designed to deal with. A run-time error " - + "will be reported if a conflict is detected."); - } - - // Check "testFrom" clause for appropriate argument type. - if (lce.testParser != null) { // B50 - if (!(lce.testParser.typeCache instanceof ReferenceType)) - reportError(lce.testParser.line, - "The 'testFrom' clause of a learning classifier expression must " - + "instantiate a edu.illinois.cs.cogcomp.lbjava.parse.Parser."); - else { - Class iceClass = lce.testParser.typeCache.typeClass(); - if (!isAssignableFrom(Parser.class, iceClass)) - reportError(lce.testParser.line, - "The 'testFrom' clause of a learning classifier expression must" - + " instantiate a edu.illinois.cs.cogcomp.lbjava.parse.Parser."); - } - } - - representationTable.put(lceName, lce); // A2 - - if (attributeAnalysis) wekaIze(lce.line, lce.returnType, lce.name); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param p The node to process. - **/ - public void run(ParameterSet p) { - if (!(currentCG instanceof LearningClassifierExpression)) { // B56 - reportError(p.line, - "Parameter sets cannot be defined outside of a " - + "LearningClassifierExpression."); - return; - } - else { - LearningClassifierExpression lce = - (LearningClassifierExpression) currentCG; - if (lce.K == null && lce.testParser == null) { // B52 - reportError(lce.line, - "Parameter tuning can only be performed if either a 'cval' " - + "clause or a 'testFrom' clause is supplied."); - return; - } - - if (!p.inRounds) lce.parameterSets.add(p); - } - - runOnChildren(p); - - // Make sure the values in the parameter set specification make sense. - ExpressionList.ExpressionListIterator PI = null; - - if (p.isRange()) { - ExpressionList rangeList = new ExpressionList(); - rangeList.add(p.start); - rangeList.add(p.end); - rangeList.add(p.increment); - - PI = rangeList.listIterator(); - } - else { - PI = p.listIterator(); - } - - for (int PIindex = 0; PI.hasNext(); ++PIindex) { - Expression pe = PI.nextItem(); - - // Replace unary negation expressions with negative constants. - if (pe instanceof UnaryExpression) { - if (p.inRounds) { - reportError(p.line, - "The number of rounds must be a positive integer."); - return; - } - - UnaryExpression upe = (UnaryExpression) pe; - if (upe.operation.operation != Operator.MINUS) { // B53 - reportError(p.line, "A parameter set must include only literals."); - return; - } - - Expression subpe = upe.subexpression; - if (!(subpe instanceof Constant)) { // B53 - reportError(p.line, "A parameter set must include only literals."); - return; - } - - pe = - new Constant(pe.line, pe.byteOffset, - "-" + ((Constant) subpe).value); - run((Constant) pe); - - if (p.isRange()) { - switch (PIindex) { - case 0: p.start = pe; break; - case 1: p.end = pe; break; - case 2: p.increment = pe; break; - } - } - else PI.set(pe); - } - else if (!(pe instanceof Constant)) { // B53 - reportError(p.line, - "A parameter set must include only simple constant expressions."); - return; - } - else if (p.inRounds) { - try { Integer.parseInt(((Constant) pe).value); } - catch (Exception ex) { // B35 - reportError(p.line, - "The number of rounds must be a positive integer."); - return; - } - } - - // Determine the type of the parameter set. - if (!pe.typeCache.typeClass().equals(String.class) - && !(pe.typeCache instanceof PrimitiveType)) { - reportError(p.line, - "Parameter sets must include only primitive constants or " - + "strings."); - return; - } - else if (p.isRange() - && (!(pe.typeCache instanceof PrimitiveType) - || ((PrimitiveType) pe.typeCache).type - == PrimitiveType.BOOLEAN)) { - reportError(p.line, - "Parameter set ranges must involve primitive values that aren't" - + "booleans."); - return; - } - - else if (p.type == null) p.type = (Type) pe.typeCache.clone(); - - else if (p.type.typeClass().equals(String.class) - != pe.typeCache.typeClass().equals(String.class)) { - reportError(p.line, - "Strings cannot appear in a parameter set with any other type of " - + "value."); - return; - } - else if (p.type instanceof PrimitiveType) { - PrimitiveType pt = (PrimitiveType) p.type; - PrimitiveType pet = (PrimitiveType) pe.typeCache; - if ((pt.type == PrimitiveType.BOOLEAN) - != (pet.type == PrimitiveType.BOOLEAN)) { - reportError(p.line, - "booleans cannot appear in a parameter set with any other type " - + "of value."); - return; - } - - if (p.isRange() && PIindex == 2) { - if (pt.type == PrimitiveType.CHAR) { - if (!pet.isWholeNumber()) { - reportError(p.line, - "The increment of a character parameter set should be an " - + "integer."); - return; - } - } - else pt.type = Math.max(pt.type, pet.type); - } - else pt.type = Math.max(pt.type, pet.type); - } - } - - // If a range, make sure it's not infinite, and convert it. - if (p.isRange()) { - PrimitiveType pt = (PrimitiveType) p.type; - double s = - pt.type == PrimitiveType.CHAR - ? (double) ((Constant) p.start).value.charAt(1) - : Double.parseDouble(((Constant) p.start).value); - double e = - pt.type == PrimitiveType.CHAR - ? (double) ((Constant) p.start).value.charAt(1) - : Double.parseDouble(((Constant) p.end).value); - double i = Double.parseDouble(((Constant) p.increment).value); - - // B54 - if (i == 0.0 || e - s != 0 && (e - s > 0) != (i > 0)) - reportError(p.line, - "Infinite loop detected in parameter set range specification."); - else p.convertRange(); - } - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param b The node to process. - **/ - public void run(Block b) { - boolean needLocalTable = b.symbolTable == null; - if (needLocalTable) - currentSymbolTable = b.symbolTable = - new SymbolTable(currentSymbolTable); // A13 - - runOnChildren(b); - - if (needLocalTable) currentSymbolTable = currentSymbolTable.getParent(); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param m The node to process. - **/ - public void run(MethodInvocation m) { - runOnChildren(m); - - if (m.name.typeCache instanceof ClassifierType - && m.parentObject == null) { - if (m.arguments.size() != 1) // B2 - reportError(m.line, "Classifiers can only take a single argument."); - else { - ClassifierReturnType returnType = - ((ClassifierType) m.name.typeCache).getOutput(); - m.isClassifierInvocation = true; // A5 B8 - - if (m.isSensedValue // B9 - && !returnType - .isContainableIn(((CodedClassifier) currentCG).returnType)) - reportError(m.line, - "Classifier " + currentCG.getName() + " with return type '" - + ((CodedClassifier) currentCG).returnType - + "' cannot sense classifier " + m.name + " with return type '" - + returnType + "'."); - else - if (!m.isSensedValue - && (returnType.type == ClassifierReturnType.DISCRETE_GENERATOR - || returnType.type == ClassifierReturnType.REAL_GENERATOR - || returnType.type == ClassifierReturnType.MIXED_GENERATOR)) - reportError(m.line, - "Feature generators may only be invoked as the value " - + "argument of a sense statement in another generator."); - else if (currentCG != null) // A9 - addInvokee(currentCG.getName(), m.name.toString()); - } - } - else if (m.name.typeCache instanceof InferenceType - && m.parentObject == null) // B33 - reportError(m.line, - "Inferences may only be invoked to create a new classifier in " - + "classifier expression context."); - else if (m.parentObject == null && m.name.name.length == 1 - && !m.isEvaluateArgument) // B38 - reportError(m.line, "Unrecognized classifier name: '" + m.name + "'"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param ice The node to process. - **/ - public void run(InstanceCreationExpression ice) { - runOnChildren(ice); - - if (ice.parentObject == null) { // A4 - ice.typeCache = new ReferenceType(ice.name); - ice.typeCache.runPass(this); - } - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param n The node to process. - **/ - public void run(Name n) { - n.symbolTable = currentSymbolTable; // A13 - - n.typeCache = n.symbolTable.get(n); // A4 - - if (currentCG == null) return; - - if (n.typeCache instanceof ClassifierType) { - if (ast.symbolTable.containsKey(n)) // A8 - addDependor(n.toString(), currentCG.getName()); - } - else if (n.name.length > 1) { - String className = n.toString(); - className = className.substring(0, className.lastIndexOf('.')); - String fieldOrMethod = n.name[n.name.length - 1]; - - if (ast.symbolTable.containsKey(className) - && !fieldOrMethod.equals("isTraining")) { - String currentCGName = currentCG.getName(); - addDependor(className, currentCGName); // A8 - - // A10 - if (ast.symbolTable.get(className) instanceof ClassifierType - && !fieldOrMethod.equals("getInstance")) - addInvokee(currentCGName, className); - } - } - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param s The node to process. - **/ - public void run(ForStatement s) { - if (s.initializers != null) // B34 - for (ASTNodeIterator I = s.initializers.iterator(); I.hasNext(); ) { - ASTNode statementExpression = I.next(); - if (statementExpression instanceof ConstraintStatementExpression) - reportError(statementExpression.line, - "Constraint expressions are only allowed to appear as part of " - + "their own separate expression statement."); - } - - if (s.updaters != null) // B34 - for (ASTNodeIterator I = s.updaters.iterator(); I.hasNext(); ) { - ASTNode statementExpression = I.next(); - if (statementExpression instanceof ConstraintStatementExpression) - reportError(statementExpression.line, - "Constraint expressions are only allowed to appear as part of " - + "their own separate expression statement."); - } - - if (!(s.body instanceof Block)) // A7 - s.body = new Block(new StatementList(s.body)); - - currentSymbolTable = s.symbolTable = s.body.symbolTable = - new SymbolTable(currentSymbolTable); // A13 - - runOnChildren(s); - - currentSymbolTable = currentSymbolTable.getParent(); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param s The node to process. - **/ - public void run(IfStatement s) { - if (!(s.thenClause instanceof Block)) // A7 - s.thenClause = new Block(new StatementList(s.thenClause)); - if (s.elseClause != null && !(s.elseClause instanceof Block)) // A7 - s.elseClause = new Block(new StatementList(s.elseClause)); - runOnChildren(s); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param s The node to process. - **/ - public void run(ReturnStatement s) { - if (currentCG instanceof ConstraintDeclaration - || currentCG instanceof LearningClassifierExpression - || currentCG instanceof CodedClassifier - && ((CodedClassifier) currentCG).returnType.type - != ClassifierReturnType.DISCRETE - && ((CodedClassifier) currentCG).returnType.type - != ClassifierReturnType.REAL) // B12 - reportError(s.line, - "return statements may only appear in classifers of type discrete " - + "or real, not in an array returner, a generator, or a " - + "constraint."); - - runOnChildren(s); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param s The node to process. - **/ - public void run(SenseStatement s) { - if (!(currentCG instanceof CodedClassifier) - || ((CodedClassifier) currentCG).returnType.type - == ClassifierReturnType.DISCRETE - || ((CodedClassifier) currentCG).returnType.type - == ClassifierReturnType.REAL) { // B10 - reportError(s.line, - "sense statements may only appear in an array returning classifier " - + "or a generator."); - return; - } - - CodedClassifier currentCC = (CodedClassifier) currentCG; - if (s.name != null) { // B11 - if (currentCC.returnType.type == ClassifierReturnType.DISCRETE_ARRAY - || currentCC.returnType.type == ClassifierReturnType.REAL_ARRAY) - reportError(s.line, - "The names of features need not be sensed in an array returning " - + "classifier. (Use sense ; instead of sense " - + " : ;)"); - } - else if (currentCC.returnType.type - == ClassifierReturnType.DISCRETE_GENERATOR) { // A6 - s.name = s.value; - s.value = new Constant("true"); - } - else if (currentCC.returnType.type == ClassifierReturnType.REAL_GENERATOR) - { // A6 - s.name = s.value; - s.value = new Constant("1"); - } - - s.value.senseValueChild(); // B9 - - runOnChildren(s); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param s The node to process. - **/ - public void run(WhileStatement s) { - if (!(s.body instanceof Block)) // A7 - s.body = new Block(new StatementList(s.body)); - runOnChildren(s); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param s The node to process. - **/ - public void run(DoStatement s) { - if (!(s.body instanceof Block)) // A7 - s.body = new Block(new StatementList(s.body)); - runOnChildren(s); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param v The node to process. - **/ - public void run(VariableDeclaration v) { - for (NameList.NameListIterator I = v.names.listIterator(); I.hasNext(); ) - currentSymbolTable.put(I.nextItem(), v.type); // A1 - runOnChildren(v); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param a The node to process. - **/ - public void run(Argument a) { - currentSymbolTable.put(a); // A1 - runOnChildren(a); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param c The node to process. - **/ - public void run(Constant c) { - int cType; - - String value = c.value; - - if (value.equals("true") || value.equals("false")) - cType = PrimitiveType.BOOLEAN; - else if (value.charAt(0) == '\'') cType = PrimitiveType.CHAR; - else if (value.charAt(0) == '.') { - if (value.matches(".*[fF].*")) { - cType = PrimitiveType.FLOAT; - } - else { - cType = PrimitiveType.DOUBLE; - } - } - else if (value.substring(0, 1).matches("[0-9\\-]")) { - if (value.matches(".*[fF].*")) { - cType = PrimitiveType.FLOAT; - } - else if (value.matches(".*[\\.dD].*")) { - cType = PrimitiveType.DOUBLE; - } - else if (value.matches(".*[lL].*")) { - cType = PrimitiveType.LONG; - } - else { - cType = PrimitiveType.INT; - } - } - else { - cType = -1; // is a string - } - - if (cType == -1) - c.typeCache = new ReferenceType(new Name("java.lang.String")); - else c.typeCache = new PrimitiveType(cType); - c.typeCache.runPass(this); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param t The node to process. - **/ - public void run(ReferenceType t) { - runOnChildren(t); - if (t.typeClass() == null) // B13 - reportError(t.line, "Cannot locate class '" + t + "'."); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param t The node to process. - **/ - public void run(ClassifierReturnType t) { - // B14 - if (t.type == ClassifierReturnType.MIXED) - reportError(t.line, - "There is no such type as mixed. (There is only mixed%.)"); - else if (t.type == ClassifierReturnType.MIXED_ARRAY) - reportError(t.line, - "There is no such type as mixed[]. (There is only mixed%.)"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param c The node to process. - **/ - public void run(ConstraintDeclaration c) { - addDependor(c.getName(), null); // A8 - - currentSymbolTable = c.symbolTable = c.body.symbolTable - = new SymbolTable(currentSymbolTable); // A13 - - containsConstraintStatement = false; - CodeGenerator saveCG = currentCG; - currentCG = c; - runOnChildren(c); - currentCG = saveCG; - - currentSymbolTable = currentSymbolTable.getParent(); - - if (!containsConstraintStatement) // B20 - reportWarning(c.line, - "Constraint '" + c.name - + "' does not contain any constraint statements."); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(ConstraintStatementExpression e) { - if (!(currentCG instanceof ConstraintDeclaration)) { // B19 - reportError(e.line, - "Constraint statements may only appear in constraint " - + "declarations."); - return; - } - - containsConstraintStatement = true; - runOnChildren(e); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param q The node to process. - **/ - public void run(UniversalQuantifierExpression q) { - q.argument.getType().quantifierArgumentType = true; // A14 - // A13 - currentSymbolTable = q.symbolTable = new SymbolTable(currentSymbolTable); - - ++quantifierNesting; - runOnChildren(q); - --quantifierNesting; - - currentSymbolTable = currentSymbolTable.getParent(); - - // A15 - q.collectionIsQuantified = - quantifierNesting > 0 && q.collection.containsQuantifiedVariable(); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param q The node to process. - **/ - public void run(ExistentialQuantifierExpression q) { - q.argument.getType().quantifierArgumentType = true; // A14 - // A13 - currentSymbolTable = q.symbolTable = new SymbolTable(currentSymbolTable); - - ++quantifierNesting; - runOnChildren(q); - --quantifierNesting; - - currentSymbolTable = currentSymbolTable.getParent(); - - // A15 - q.collectionIsQuantified = - quantifierNesting > 0 && q.collection.containsQuantifiedVariable(); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param q The node to process. - **/ - public void run(AtLeastQuantifierExpression q) { - q.argument.getType().quantifierArgumentType = true; // A14 - // A13 - currentSymbolTable = q.symbolTable = new SymbolTable(currentSymbolTable); - - ++quantifierNesting; - runOnChildren(q); - --quantifierNesting; - - currentSymbolTable = currentSymbolTable.getParent(); - - // A15 - if (quantifierNesting > 0) { - q.collectionIsQuantified = q.collection.containsQuantifiedVariable(); - q.lowerBoundIsQuantified = q.lowerBound.containsQuantifiedVariable(); - } - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param q The node to process. - **/ - public void run(AtMostQuantifierExpression q) { - q.argument.getType().quantifierArgumentType = true; // A14 - // A13 - currentSymbolTable = q.symbolTable = new SymbolTable(currentSymbolTable); - - ++quantifierNesting; - runOnChildren(q); - --quantifierNesting; - - currentSymbolTable = currentSymbolTable.getParent(); - - // A15 - if (quantifierNesting > 0) { - q.collectionIsQuantified = q.collection.containsQuantifiedVariable(); - q.upperBoundIsQuantified = q.upperBound.containsQuantifiedVariable(); - } - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param i The node to process. - **/ - public void run(ConstraintInvocation i) { - if (i.invocation.arguments.size() != 1) // B2 - reportError(i.line, "Constraints can only take a single argument."); - - runOnChildren(i); // A9 - - if (!(i.invocation.name.typeCache instanceof ConstraintType)) // B26 - reportError(i.line, - "Only constraints can be invoked with the '@' operator."); - - // A15 - i.invocationIsQuantified = - quantifierNesting > 0 && i.invocation.containsQuantifiedVariable(); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(ConstraintEqualityExpression e) { - runOnChildren(e); - - e.leftIsDiscreteLearner = e.rightIsDiscreteLearner = false; - if (e.left instanceof MethodInvocation) { - MethodInvocation m = (MethodInvocation) e.left; - // A12 - e.leftIsDiscreteLearner = m.name.typeCache instanceof ClassifierType; - if (e.leftIsDiscreteLearner) { - ClassifierType type = (ClassifierType) m.name.typeCache; - e.leftIsDiscreteLearner = - type.getOutput().type == ClassifierReturnType.DISCRETE - && type.isLearner(); - } - } - - if (e.right instanceof MethodInvocation) { - MethodInvocation m = (MethodInvocation) e.right; - // A12 - e.rightIsDiscreteLearner = m.name.typeCache instanceof ClassifierType; - if (e.rightIsDiscreteLearner) { - ClassifierType type = (ClassifierType) m.name.typeCache; - e.rightIsDiscreteLearner = - type.getOutput().type == ClassifierReturnType.DISCRETE - && type.isLearner(); - } - } - - // A15 - if (quantifierNesting > 0) { - e.leftIsQuantified = e.left.containsQuantifiedVariable(); - e.rightIsQuantified = e.right.containsQuantifiedVariable(); - } - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param d The node to process. - **/ - @SuppressWarnings("rawtypes") - public void run(InferenceDeclaration d) { - addDependor(d.getName(), null); // A8 - - if (d.headFinders.length == 0) // B29 - reportError(d.line, - "An inference with no head finder methods can never be applied to " - + "a learner."); - - if (d.subjecttoClauses != 1) // B30 - reportError(d.line, - "Every inference must contain exactly one 'subjectto' clause " - + "specifying a constraint. " + d.subjecttoClauses); - - if (d.withClauses > 1) // B31 - reportError(d.line, - "An inference may contain no more than one 'with' clause " - + "specifying an inference algorithm."); - - currentCG = d; - currentSymbolTable = d.symbolTable = new SymbolTable(currentSymbolTable); - runOnChildren(d); - currentSymbolTable = currentSymbolTable.getParent(); - currentCG = null; - - if (d.algorithm != null) { - Class iceClass = d.algorithm.typeCache.typeClass(); - if (!isAssignableFrom(Inference.class, iceClass)) // B32 - reportError(d.algorithm.line, - "The 'with' clause of an inference must instantiate an " - + "edu.illinois.cs.cogcomp.lbjava.infer.Inference."); - } - else - d.algorithm = InferenceDeclaration.defaultInferenceConstructor; // A16 - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param h The node to process. - **/ - public void run(InferenceDeclaration.HeadFinder h) { - currentSymbolTable = h.symbolTable = h.body.symbolTable = - new SymbolTable(currentSymbolTable); // A13 - runOnChildren(h); - currentSymbolTable = currentSymbolTable.getParent(); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param n The node to process. - **/ - public void run(InferenceDeclaration.NormalizerDeclaration n) { - runOnChildren(n); - - if (n.learner != null - && !(n.learner.typeCache instanceof ClassifierType - && ((ClassifierType) n.learner.typeCache).isLearner())) // B27 - reportError(n.line, - "The left hand side of the 'normalizedby' operator must be the " - + "name of a edu.illinois.cs.cogcomp.lbjava.learn.Learner."); - - if (!(n.normalizer.typeCache instanceof ReferenceType) - || !isAssignableFrom(Normalizer.class, - ((ReferenceType) n.normalizer.typeCache).typeClass())) // B28 - reportError(n.line, - "The right hand side of the 'normalizedby' operator must " - + "instantiate a edu.illinois.cs.cogcomp.lbjava.learn.Normalizer."); - } +public class SemanticAnalysis extends Pass { + // Static variables. + /** + * The keys of this map are the names of {@link CodeGenerator}s; the values are + * HashSets of names of other locally defined {@link CodeGenerator}s that depend on + * the {@link CodeGenerator} named by the associated key. The dependor graph has an entry for + * every {@link CodeGenerator} in the source. + **/ + public static HashMap> dependorGraph; + /** + * The keys of this map are the names of {@link CodeGenerator}s; the values are + * HashSets of names of other (not necessarily locally defined) + * {@link CodeGenerator}s that are invoked within the {@link CodeGenerator} named by the + * associated key. The invoked graph does not necessarily have an entry for every + * {@link CodeGenerator} in the source. + **/ + public static HashMap> invokedGraph; + /** + * The keys of this map are the names of {@link Classifier}s; the values are {@link ASTNode}s + * representing the source code implementations of the associated {@link Classifier}s. This + * table has an entry for every {@link Classifier} in the source. + **/ + public static HashMap representationTable; + + + // Utility methods. + /** + * Running an instance of this pass overwrites the static member + * variables; use this method to run an instance of this pass and then restore the static member + * variables to their states before the pass was run. + * + * @param ast An abstract syntax tree to run semantic analysis on. + **/ + public static void runAndRestore(AST ast) { + HashMap> dg = dependorGraph, ig = invokedGraph; + HashMap rt = representationTable; + Pass.canAddErrorsAndWarnings = false; + new SemanticAnalysis(ast).run(); + Pass.canAddErrorsAndWarnings = true; + dependorGraph = dg; + invokedGraph = ig; + representationTable = rt; + } + + + /** + * Adds an edge from dependency to dependor in the + * {@link #dependorGraph}. If the dependor is null, no new list item is added, but + * the HashSet associated with the dependency is still created if it didn't already + * exist. + * + * @param dependency The name of the node depended on. + * @param dependor The name of the node doing the depending. + **/ + public static void addDependor(String dependency, String dependor) { + HashSet dependors = dependorGraph.get(dependency); + + if (dependors == null) { + dependors = new HashSet(); + dependorGraph.put(dependency, dependors); + } + + if (dependor != null) + dependors.add(dependor); + } + + + /** + * Use this method to determine if one + * {@link CodeGenerator} depends on another either directly or indirectly. + * + * @param c1 One {@link CodeGenerator}. + * @param c2 The other {@link CodeGenerator}. + * @return true iff c1 depends on c2. + **/ + public static boolean isDependentOn(String c1, String c2) { + LinkedList queue = new LinkedList(); + queue.add(c2); + + HashSet visited = new HashSet(); + + while (queue.size() > 0) { + String c = queue.removeFirst(); + if (c.equals(c1)) + return true; + + visited.add(c); + for (Iterator I = dependorGraph.get(c).iterator(); I.hasNext();) { + c = I.next(); + if (!visited.contains(c)) + queue.add(c); + } + } + + return false; + } + + + /** + * Adds an edge from invoker to invokee in the + * {@link #invokedGraph}. + * + * @param invoker The name of the node doing the invoking. + * @param invokee The name of the invoked node. + **/ + private static void addInvokee(String invoker, String invokee) { + HashSet invokees = invokedGraph.get(invoker); + + if (invokees == null) { + invokees = new HashSet(); + invokedGraph.put(invoker, invokees); + } + + invokees.add(invokee); + } + + + /** + * Prints the contents of {@link #dependorGraph} to + * STDOUT in a readable form. + **/ + public static void printDependorGraph() { + printGraph(dependorGraph); + } + + + /** + * Prints the contents of {@link #invokedGraph} to + * STDOUT in a readable form. + **/ + public static void printInvokedGraph() { + printGraph(invokedGraph); + } + + + /** + * Prints the contents of the specified graph to + * STDOUT in a readable form. + * + * @param graph The graph to print as a map of collections. + **/ + private static void printGraph(HashMap> graph) { + String[] keys = graph.keySet().toArray(new String[0]); + Arrays.sort(keys); + for (int i = 0; i < keys.length; ++i) { + System.out.print(keys[i] + " ->"); + String[] edges = (String[]) graph.get(keys[i]).toArray(new String[0]); + for (int j = 0; j < edges.length; ++j) + System.out.print(" " + edges[j]); + System.out.println(); + } + } + + + /** + * Calls the Class#isAssignableFrom(Class) + * method after making sure that both classes involved aren't null. The assumption made when + * calling this method is that if either argument class is null, an error has + * already been generated with respect to it. + * + * @param c1 Class 1. + * @param c2 Class 2. + * @return true iff either class is null or c1 is assignable from c2. + **/ + @SuppressWarnings({"unchecked", "rawtypes"}) + private static boolean isAssignableFrom(Class c1, Class c2) { + return c1 == null || c2 == null || c1.isAssignableFrom(c2); + } + + + /** + * Called when analyzing the feature types for + * use by a WEKA classifier. Writes the necessary attribute information from a + * ClassifierReturnType to lce.attributeString. + * + *

+ * lce.attributeString takes the form of a colon-separated list of attribute + * specifications, each of which are formated in the following way: "type_ + * name(_value-list)". + * + *

+ * value-list takes the same format as it would in an lbj source file. i.e. + * {"value1","value2",...} + * + *

+ * type can take the values str (string attributes), nom + * (nominal attributes), or num (numerical attributes). + * + *

+ * The first attribute in this string is, by convention, considered to be the class attribute. + **/ + public void wekaIze(int line, ClassifierReturnType RT, Name name) { + String typeName = RT.getTypeName(); + if (!typeName.equals("discrete") && !typeName.equals("real")) + reportError(line, "Classifiers with return type " + typeName + + " are not usable with WEKA learning algorithms"); + + // String attribute case + if (typeName.equals("discrete")) { + if (RT.values.size() == 0) { + lceInQuestion.attributeString.append("str_"); + lceInQuestion.attributeString.append(name.toString()); + lceInQuestion.attributeString.append(':'); + } + // Nominal attribute case + else { + lceInQuestion.attributeString.append("nom_"); + lceInQuestion.attributeString.append(name); + lceInQuestion.attributeString.append('_'); + + Constant[] constantList = RT.values.toArray(); + + for (int i = 0; i < constantList.length; ++i) { + String value = constantList[i].value; + + if (value.length() > 1 && value.charAt(0) == '"' + && value.charAt(value.length() - 1) == '"') + value = value.substring(1, value.length() - 1); + + lceInQuestion.attributeString.append(value); + lceInQuestion.attributeString.append(','); + } + + lceInQuestion.attributeString + .deleteCharAt(lceInQuestion.attributeString.length() - 1); + lceInQuestion.attributeString.append(':'); + } + } + // Numerical attribute case + else { + lceInQuestion.attributeString.append("num_"); + lceInQuestion.attributeString.append(name); + lceInQuestion.attributeString.append(':'); + } + } + + + /** + * Creates a new anonymous classifier name. + * + * @param lastName The last part of the classifier's name as determined by its parent's name. + * @return The created name. + **/ + public Name anonymousClassifier(String lastName) { // A3 + int index = lastName.indexOf('$'); + if (lastName.indexOf('$', index + 1) >= 0) + return new Name(lastName); + return new Name(lastName.substring(0, index) + "$" + lastName.substring(index)); + } + + + // Member variables. + /** + * Lets AST children know about the code producing node they are contained in. + **/ + private CodeGenerator currentCG; + /** + * Lets AST children know the return type of the {@link ClassifierAssignment} they are contained + * in. + **/ + private ClassifierReturnType currentRT; + /** + * Used when analyzing constraint declarations to determine if a constraint statement appears + * within them. + **/ + private boolean containsConstraintStatement; + /** Lets all nodes know what symbol table represents their scope. */ + private SymbolTable currentSymbolTable; + /** + * Lets AST nodes know how deeply nested inside {@link QuantifiedConstraintExpression}s they + * are. + **/ + private int quantifierNesting; + /** + * A flag which indicates whether or not the compiler is in the process of gathering attribute + * information for a WEKA learning algorithm. + **/ + private boolean attributeAnalysis = false; + /** + * A reference to the LearningClassifierExpression which is currently under + * analysis. + **/ + private LearningClassifierExpression lceInQuestion; + + + /** Default constructor. */ + public SemanticAnalysis() {} + + /** + * Instantiates a pass that runs on an entire {@link AST}. + * + * @param ast The program to run this pass on. + **/ + public SemanticAnalysis(AST ast) { + super(ast); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param ast The node to process. + **/ + public void run(AST ast) { + currentSymbolTable = ast.symbolTable; + + if (ast.symbolTable.importedSize() == 0) { // A1 + ast.symbolTable.addImported("edu.illinois.cs.cogcomp.lbjava.classify.*"); + ast.symbolTable.addImported("edu.illinois.cs.cogcomp.lbjava.learn.*"); + ast.symbolTable.addImported("edu.illinois.cs.cogcomp.lbjava.parse.*"); + ast.symbolTable.addImported("edu.illinois.cs.cogcomp.lbjava.io.IOUtilities"); + ast.symbolTable.addImported("edu.illinois.cs.cogcomp.lbjava.infer.*"); + } + + dependorGraph = new HashMap>(); + invokedGraph = new HashMap>(); + representationTable = new HashMap(); + quantifierNesting = 0; + + runOnChildren(ast); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param decl The node to process. + **/ + public void run(PackageDeclaration decl) { + ast.symbolTable.setPackage(decl.name.toString()); // A1 + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param decl The node to process. + **/ + public void run(ImportDeclaration decl) { + ast.symbolTable.addImported(decl.name.toString()); // A1 + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param list The node to process. + **/ + public void run(DeclarationList list) { + if (list.size() == 0) + return; + + for (DeclarationList.DeclarationListIterator I = list.listIterator(); I.hasNext();) { + Declaration d = I.nextItem(); + if (ast.symbolTable.containsKey(d.name)) // B1 + reportError(d.line, "A declaration named '" + d.name + "' already exists."); + ast.symbolTable.put(d.name, d.getType()); // A1 + } + + currentCG = null; + runOnChildren(list); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param ca The node to process. + **/ + public void run(ClassifierAssignment ca) { + Type inputType = ca.argument.getType(); + if (!(inputType instanceof ReferenceType // B44 + || inputType instanceof ArrayType)) + reportError(ca.line, "The input to a classifier must be a single object reference."); + + ca.expression.name = (Name) ca.name.clone(); // A3 + + ca.expression.returnType = (ClassifierReturnType) ca.returnType.clone(); + // B3 + ca.expression.argument = (Argument) ca.argument.clone(); // A4 + + ca.expression.singleExampleCache = ca.singleExampleCache; // A21 + + if (ca.cacheIn != null) { + // B36 + if (ca.returnType.type == ClassifierReturnType.DISCRETE_GENERATOR + || ca.returnType.type == ClassifierReturnType.REAL_GENERATOR + || ca.returnType.type == ClassifierReturnType.MIXED_GENERATOR) + reportError(ca.line, "Generators' outputs cannot be cached (in a member " + + "variable or otherwise)."); + if (ca.expression instanceof Conjunction) + reportError(ca.line, "Conjunctive classifiers' outputs cannot be cached (in a " + + "member variable or otherwise)."); + + ca.expression.setCacheIn(ca.cacheIn); // A18 + } + + currentRT = (ClassifierReturnType) ca.returnType.clone(); // A4 + currentSymbolTable = ca.symbolTable = new SymbolTable(currentSymbolTable); + // A13 + runOnChildren(ca); + currentSymbolTable = currentSymbolTable.getParent(); + ca.expression.returnType = (ClassifierReturnType) ca.returnType.clone(); + + ca.expression.comment = ca.comment; // A19 + representationTable.put(ca.name.toString(), ca.expression); // A2 + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param cce The node to process. + **/ + public void run(ClassifierCastExpression cce) { + if (!cce.castType.isContainableIn(cce.returnType)) // B3 + reportError(cce.line, "Found classifier expression of return type '" + cce.castType + + "' when '" + cce.returnType + "' was expected."); + + cce.expression.name = (Name) cce.name.clone(); // A3 + + cce.expression.returnType = (ClassifierReturnType) cce.castType.clone(); + // B3 + cce.expression.argument = (Argument) cce.argument.clone(); // A4 + + cce.expression.singleExampleCache = cce.singleExampleCache; // A21 + + ClassifierReturnType saveRT = currentRT; + currentRT = (ClassifierReturnType) cce.castType.clone(); // A4 + boolean saveAttributeAnalysis = attributeAnalysis; + attributeAnalysis = false; + + runOnChildren(cce); + + attributeAnalysis = saveAttributeAnalysis; + currentRT = saveRT; + + representationTable.put(cce.name.toString(), cce); // A2 + cce.expression.returnType = (ClassifierReturnType) cce.castType.clone(); + + if (attributeAnalysis) + wekaIze(cce.line, cce.returnType, cce.name); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param cn The node to process. + **/ + public void run(ClassifierName cn) { + if (cn.name.toString().indexOf("$$") != -1) // A3 A17 + cn.name = cn.referent; + else + addDependor(cn.name.toString(), null); // A8 + + Type t = ast.symbolTable.get(cn); + if (!(t instanceof ClassifierType)) { // B21 + reportError(cn.line, "'" + cn + "' is not known to be a classifier."); + cn.returnType = null; + return; + } + + ClassifierType type = (ClassifierType) t; + + Type input = type.getInput(); + if (!isAssignableFrom(input.typeClass(), cn.argument.getType().typeClass())) // B4 + reportError(cn.line, "Classifier '" + cn + "' has input type '" + input + "' when '" + + cn.argument.getType() + "' was expected."); + + ClassifierReturnType output = type.getOutput(); + if (!output.isContainableIn(cn.returnType)) // B3 + reportError(cn.line, "Classifier '" + cn + "' has return type '" + output + "' when '" + + cn.returnType + "' was expected."); + else + cn.returnType = output; // A4 + + if (attributeAnalysis) + wekaIze(cn.line, cn.returnType, cn.name); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param cc The node to process. + **/ + public void run(CodedClassifier cc) { + addDependor(cc.name.toString(), null); // A8 + + cc.returnType = (ClassifierReturnType) currentRT.clone(); // A4 + + if (cc.returnType.type == ClassifierReturnType.MIXED_GENERATOR) // B15 + reportError(cc.line, "A coded classifier may not have return type 'mixed%'."); + + // A13 + currentSymbolTable = + cc.symbolTable = cc.body.symbolTable = new SymbolTable(currentSymbolTable); + + CodeGenerator saveCG = currentCG; + currentCG = cc; + run(cc.argument); // A1 + runOnChildren(cc); + currentCG = saveCG; + + representationTable.put(cc.name.toString(), cc); // A2 + currentSymbolTable = currentSymbolTable.getParent(); + + if (attributeAnalysis) + wekaIze(cc.line, cc.returnType, cc.name); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param cg The node to process. + **/ + public void run(CompositeGenerator cg) { + addDependor(cg.name.toString(), null); // A8 + + int i = 0; + for (ClassifierExpressionList.ClassifierExpressionListIterator I = + cg.components.listIterator(); I.hasNext();) { + ClassifierExpression e = I.nextItem(); + + e.name = anonymousClassifier(cg.name + "$" + i++); // A3 + e.returnType = new ClassifierReturnType(ClassifierReturnType.MIXED_GENERATOR); // B3 + e.argument = (Argument) cg.argument.clone(); // A4 + e.singleExampleCache = cg.singleExampleCache; // A21 + + e.runPass(this); + + addDependor(e.name.toString(), cg.name.toString()); // A8 + } + + String cgReturnType = null; + ConstantList values = null; + for (ClassifierExpressionList.ClassifierExpressionListIterator I = + cg.components.listIterator(); I.hasNext();) { // A4 + ClassifierExpression component = I.nextItem(); + + if (component.returnType == null) + return; + String componentReturnType = component.returnType.toString(); + if (cgReturnType == null) { + cgReturnType = componentReturnType; + values = component.returnType.values; + } else { + if (cgReturnType.startsWith("discrete") + && !componentReturnType.startsWith("discrete") + || cgReturnType.startsWith("real") + && !componentReturnType.startsWith("real")) + cgReturnType = "mixed"; + if (values.size() > 0 && !values.equals(component.returnType.values)) + values = new ConstantList(); + } + } + + assert cgReturnType != null : "Empty component list"; + + // A4 + ClassifierReturnType output = null; + if (cgReturnType.startsWith("discrete")) + output = new ClassifierReturnType(ClassifierReturnType.DISCRETE_GENERATOR, values); + else if (cgReturnType.startsWith("real")) + output = new ClassifierReturnType(ClassifierReturnType.REAL_GENERATOR); + else + output = new ClassifierReturnType(ClassifierReturnType.MIXED_GENERATOR); + + if (!output.isContainableIn(cg.returnType)) // B3 + reportError(cg.line, "Found a classifier expression of return type '" + output + + "' when '" + cg.returnType + "' was expected."); + else + cg.returnType = output; + + representationTable.put(cg.name.toString(), cg); // A2 + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param c The node to process. + **/ + @SuppressWarnings("rawtypes") + public void run(Conjunction c) { + addDependor(c.name.toString(), null); // A8 + + c.left.name = anonymousClassifier(c.name + "$0"); // A3 + c.left.returnType = new ClassifierReturnType(ClassifierReturnType.MIXED_GENERATOR); // B3 + c.left.argument = (Argument) c.argument.clone(); // A4 + c.left.singleExampleCache = c.singleExampleCache; // A21 + + c.right.name = anonymousClassifier(c.name + "$1"); // A3 + c.right.returnType = new ClassifierReturnType(ClassifierReturnType.MIXED_GENERATOR); // B3 + c.right.argument = (Argument) c.argument.clone(); // A4 + c.right.singleExampleCache = c.singleExampleCache; // A21 + + boolean saveAttributeAnalysis = attributeAnalysis; + attributeAnalysis = false; + + runOnChildren(c); + + attributeAnalysis = saveAttributeAnalysis; + + if (c.left.returnType == null || c.right.returnType == null) + return; + + addDependor(c.left.name.toString(), c.name.toString()); // A8 + addDependor(c.right.name.toString(), c.name.toString()); // A8 + + // A4 + Type inputType = c.right.argument.getType(); + Class inputRight = inputType.typeClass(); + Type leftType = c.left.argument.getType(); + Class inputLeft = leftType.typeClass(); + if (!isAssignableFrom(inputLeft, inputRight)) + inputType = leftType; + + c.argument = new Argument(inputType, c.argument.getName(), c.argument.getFinal()); + + ConstantList valuesLeft = c.left.returnType.values; + ConstantList valuesRight = c.right.returnType.values; + ConstantList values = new ConstantList(); + if (valuesLeft.size() > 0 && valuesRight.size() > 0) + for (ConstantList.ConstantListIterator I = valuesLeft.listIterator(); I.hasNext();) { + Constant valueLeft = I.nextItem(); + for (ConstantList.ConstantListIterator J = valuesRight.listIterator(); J.hasNext();) + values.add(new Constant(valueLeft.noQuotes() + "&" + J.nextItem().noQuotes())); + } + + int rt1 = c.left.returnType.type; + int rt2 = c.right.returnType.type; + if (rt2 < rt1) { + int temp = rt1; + rt1 = rt2; + rt2 = temp; + } + + ClassifierReturnType output = null; + switch (10 * rt1 + rt2) { + case 0: + output = new ClassifierReturnType(ClassifierReturnType.DISCRETE, values); + break; + + case 11: + output = new ClassifierReturnType(ClassifierReturnType.REAL); + break; + + case 3: + case 33: + output = new ClassifierReturnType(ClassifierReturnType.DISCRETE_ARRAY, values); + break; + + case 14: + case 44: + output = new ClassifierReturnType(ClassifierReturnType.REAL_ARRAY); + break; + + case 6: + case 36: + case 66: + output = new ClassifierReturnType(ClassifierReturnType.DISCRETE_GENERATOR, values); + break; + + case 1: + case 4: + case 7: + case 13: + case 16: + case 17: + case 34: + case 37: + case 46: + case 47: + case 67: + case 77: + output = new ClassifierReturnType(ClassifierReturnType.REAL_GENERATOR); + break; + + case 8: + case 18: + case 38: + case 48: + case 68: + case 78: + case 88: + output = new ClassifierReturnType(ClassifierReturnType.MIXED_GENERATOR); + break; + } + + assert output != null : "Unexpected conjunction types: " + + ClassifierReturnType.typeName(rt1) + ", " + ClassifierReturnType.typeName(rt2); + + if (!output.isContainableIn(c.returnType)) // B3 + reportError(c.line, "Found a classifier expression of return type '" + output + + "' when '" + c.returnType + "' was expected."); + else if ((output.type == ClassifierReturnType.DISCRETE || output.type == ClassifierReturnType.REAL) + && c.left.equals(c.right)) // B61 + reportError(c.line, "A classifier cannot be conjuncted with itself unless it returns " + + "multiple features."); + else + c.returnType = output; + + representationTable.put(c.name.toString(), c); // A2 + + if (attributeAnalysis) + wekaIze(c.line, c.returnType, c.name); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param ii The node to process. + **/ + public void run(InferenceInvocation ii) { + // A8 + addDependor(ii.name.toString(), null); + addDependor(ii.inference.toString(), ii.name.toString()); + addDependor(ii.classifier.toString(), ii.name.toString()); + + runOnChildren(ii); + + if (!(ii.inference.typeCache instanceof InferenceType)) { // B22 + reportError(ii.inference.line, "'" + ii.inference + "' is not known to be a inference."); + return; + } + + if (!(ii.classifier.typeCache instanceof ClassifierType)) { // B23 + reportError(ii.classifier.line, "'" + ii.classifier + "' is not known to be a learner."); + return; + } + + ClassifierType argumentType = (ClassifierType) ii.classifier.typeCache; + ClassifierReturnType output = argumentType.getOutput(); + if (output.type != ClassifierReturnType.DISCRETE || !argumentType.isLearner()) // B23 + reportError(ii.classifier.line, "'" + ii.classifier + "' is not a discrete learner."); + + Type input = argumentType.getInput(); + if (!isAssignableFrom(input.typeClass(), ii.argument.getType().typeClass())) // B24 + reportError(ii.line, "Classifier '" + ii + "' has input type '" + input + "' when '" + + ii.argument.getType() + "' was expected."); + + if (!output.isContainableIn(ii.returnType)) // B3 + reportError(ii.line, "Classifier '" + ii + "' has return type '" + output + "' when '" + + ii.returnType + "' was expected."); + else + ii.returnType = output; // A4 + + InferenceType type = (InferenceType) ii.inference.typeCache; + boolean found = false; + for (int i = 0; i < type.getFindersLength() && !found; ++i) + found = type.getFinderType(i).equals(input); + + if (!found) // B25 + reportError(ii.line, "Inference '" + ii.inference + "' does not contain a head finder " + + " method for class '" + input + "'."); + + representationTable.put(ii.name.toString(), ii); // A2 + + if (attributeAnalysis) + wekaIze(ii.line, ii.returnType, ii.name); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param lce The node to process. + **/ + @SuppressWarnings("rawtypes") + public void run(LearningClassifierExpression lce) { + String lceName = lce.name.toString(); + addDependor(lceName, null); // A8 + + // Setting up signatures for labeler and extractor. + int i = 0; + if (lce.labeler != null) { + lce.labeler.name = anonymousClassifier(lceName + "$" + i++); // A3 + lce.labeler.returnType = new ClassifierReturnType(ClassifierReturnType.MIXED_GENERATOR); // B3 + lce.labeler.argument = (Argument) lce.argument.clone(); // A4 + lce.labeler.singleExampleCache = lce.singleExampleCache; // A21 + } + + lce.extractor.name = anonymousClassifier(lceName + "$" + i); // A3 + lce.extractor.returnType = new ClassifierReturnType(ClassifierReturnType.MIXED_GENERATOR); // B3 + lce.extractor.argument = (Argument) lce.argument.clone(); // A4 + lce.extractor.singleExampleCache = lce.singleExampleCache; // A21 + + // Making sure an appropriate quantity of each clause is present. + if (lce.usingClauses != 1) { // B18 + reportError(lce.line, + "A learning classifier expression must contain exactly one 'using' " + + "clause."); + return; + } + + if (lce.fromClauses > 1) { // B17 + reportError(lce.line, + "A learning classifier expression can have no more than one 'from' " + + "clause."); + return; + } + + if (lce.withClauses > 1) { // B16 + reportError(lce.line, + "A learning classifier expression can have no more than one 'with' " + + "clause."); + return; + } + + if (lce.encodingClauses > 1) { // B62 + reportError(lce.line, "A learning classifier expression can have no more than one " + + "'encoding' clause."); + return; + } + + if (lce.testFromClauses > 1) { // B51 + reportError(lce.line, "A learning classifier expression can have no more than one " + + "'testFrom' clause."); + return; + } + + if (lce.evaluateClauses > 1) { // B37 + reportError(lce.line, "A learning classifier expression can have no more than one " + + "'evaluate' clause."); + } + + if (lce.cvalClauses == 0) { + if (lce.alphaClauses > 0) { // B45 + reportError(lce.line, + "The alpha keyword is meaningful only if the cval keyword is " + + "also being used, and should not be used otherwise."); + } + + if (lce.testFromClauses == 0 && lce.testingMetric != null) { // B46 + reportError(lce.testingMetric.line, + "The 'testingMetric' keyword is meaningful only if one of 'cval' " + + "or 'testFrom' is also present, and should not be used " + + "otherwise."); + } + } + + if (lce.cvalClauses > 1) { // B47 + reportError(lce.line, + "A learning classifier expression can have no more than one 'cval'" + + " clause."); + } + + if (lce.testingMetricClauses > 1) { // B48 + reportError(lce.line, "A learning classifier expression can have no more than one " + + "'testingMetric' clause."); + } + + if (lce.alphaClauses > 1) { // B49 + reportError(lce.line, + "A learning classifier expression can have no more than one 'alpha'" + + " clause."); + } + + if (lce.preExtractClauses > 1) { // B57 + reportError(lce.line, "A learning classifier expression can have no more than one " + + "'preExtract' clause."); + } + + if (lce.evaluation != null && lce.evaluation instanceof MethodInvocation) + ((MethodInvocation) lce.evaluation).isEvaluateArgument = true; + if (lce.rounds != null && lce.rounds instanceof ParameterSet) + ((ParameterSet) lce.rounds).inRounds = true; + + // Make sure we have a learning algorithm. + if (lce.learnerName == null) { + if (lce.learnerConstructor == null) { // A11 + if (lce.returnType.toString().charAt(0) == 'd') + lce.learnerConstructor = LearningClassifierExpression.defaultDiscreteLearner; + else + lce.learnerConstructor = LearningClassifierExpression.defaultRealLearner; + // lce.learnerConstructor.runPass(this); + } + + lce.learnerName = lce.learnerConstructor.name; // A20 + } + + // lce.learnerName.runPass(this); + + boolean weka = false; + weka = + lce.learnerName.equals(new Name("WekaWrapper")) + || lce.learnerName.equals(new Name( + "edu.illinois.cs.cogcomp.lbjava.learn.WekaWrapper")); + + boolean saveAttributeAnalysis = attributeAnalysis; + LearningClassifierExpression saveLCE = null; + + // Weka specific pre-processing. + if (weka) { + attributeAnalysis = true; + lce.attributeString = new StringBuffer(); + + // Identify which learning classifier expression we are gathering + // feature information for. + saveLCE = lceInQuestion; + lceInQuestion = lce; + } + + CodeGenerator saveCG = currentCG; + currentCG = lce; + runOnChildren(lce); + currentCG = saveCG; + + // Weka specific post-processing. + if (weka) { + attributeAnalysis = saveAttributeAnalysis; + lceInQuestion = saveLCE; + if (lce.attributeString.length() != 0) + lce.attributeString.deleteCharAt(lce.attributeString.length() - 1); + + if (lce.learnerParameterBlock != null) + lce.learnerParameterBlock.statementList().add( + new ExpressionStatement(new Assignment(new Operator(Operator.ASSIGN), + new Name("attributeString"), new Constant('"' + lce.attributeString + .toString() + '"')))); + } + + if (lce.labeler != null) + addDependor(lce.labeler.name.toString(), lceName); // A8 + addDependor(lce.extractor.name.toString(), lceName); // A8 + + // Check the "rounds" clause (if any) for semantic errors. + if (lce.rounds != null) { + if (lce.rounds instanceof Constant) { + try { + Integer.parseInt(((Constant) lce.rounds).value); + } catch (Exception e) { // B35 + reportError(lce.rounds.line, + "The value supplied before 'rounds' must be an integer."); + } + } else if (!(lce.rounds instanceof ParameterSet)) { + reportError(lce.rounds.line, + "The value supplied before 'rounds' must be an integer."); + } + } + + // Check CV clauses for appropriate argument types. + if (lce.K != null) { + try { + Integer.parseInt(lce.K.value); + } catch (Exception e) { // B40 + reportError(lce.K.line, "The value supplied after 'cval' must be an integer."); + } + } + + if (lce.alpha != null) { + try { + Double.parseDouble(lce.alpha.value); + } catch (Exception e) { // B43 + reportError(lce.alpha.line, "The value supplied after 'alpha' must be an double."); + } + } + + // Check "preExtract" clause for appropriate argument type. + if (!(lce.preExtract.value.equals("\"none\"") || lce.preExtract.value.equals("\"disk\"") + || lce.preExtract.value.equals("\"diskZip\"") + || lce.preExtract.value.equals("\"memory\"") + || lce.preExtract.value.equals("\"memoryZip\"") + || lce.preExtract.value.equals("\"true\"") + || lce.preExtract.value.equals("\"false\"") || lce.preExtract.value.equals("true") || lce.preExtract.value + .equals("false"))) { // B41 + reportError(lce.preExtract.line, + "The value supplied after 'preExtract' must be a boolean or one of " + + "(\"none\"|\"disk\"|\"diskZip\"|\"memory\"|\"memoryZip\")."); + } + + // Check that pre-extraction has not been enabled without a from clause. + if (!(lce.preExtract.value.equals("\"none\"") || lce.preExtract.value.equals("\"false\"") || lce.preExtract.value + .equals("false")) && lce.parser == null) { // B60 + reportWarning(lce.preExtract.line, + "Feature pre-extraction will be disabled since there is no " + + "\"from\" clause."); + lce.preExtract = new Constant("false"); + } + + // Check "progressOutput" clause for appropriate argument type. + if (lce.progressOutput != null) { + try { + Integer.parseInt(lce.progressOutput.value); + } catch (Exception e) { // B42 + reportError(lce.progressOutput.line, + "The value supplied after 'progressOutput' must be an integer."); + } + } + + // Check "prune" clause for appropriate argument types. + // Only certain keywords are legal. + if (lce.pruneCountType != null) { // B58 + if (!(lce.pruneCountType.value.equals("\"global\"") || lce.pruneCountType.value + .equals("\"perClass\"")) + || !(lce.pruneThresholdType.value.equals("\"count\"") || lce.pruneThresholdType.value + .equals("\"percent\""))) { + reportError(lce.pruneCountType.line, "The prune clause must take the form " + + "'prune (\"global\"|\"perClass\") (\"count\"|\"percent\") X' " + + "where X is numeric."); + } + + if (lce.preExtract.value.equals("\"none\"") || lce.preExtract.value.equals("\"false\"") + || lce.preExtract.value.equals("false")) { + reportError(lce.preExtract.line, + "Feature pruning cannot be performed unless pre-extraction is " + + "enabled."); + } + } + + // The theshold must have the right type for the given keywords. + if (lce.pruneThresholdType != null) { // B59 + if (lce.pruneThresholdType.value.equals("\"percent\"")) { + try { + double p = Double.parseDouble(lce.pruneThreshold.value); + if (p < 0 || p > 1) + throw new Exception(); + } catch (Exception e) { + reportError(lce.pruneThresholdType.line, + "The prune threshold must be a real number in [0,1] when using " + + "the 'percent' type."); + } + } else { + try { + Integer.parseInt(lce.pruneThreshold.value); + } catch (Exception e) { + reportError(lce.pruneThresholdType.line, + "The prune threshold must be an integer when using the 'count' " + + "type."); + } + } + } + + // Pruning implies pre-extraction. + if (lce.pruneCountType != null + && (lce.preExtract == null || lce.preExtract.value.equals("\"none\"") + || lce.preExtract.value.equals("\"false\"") || lce.preExtract.value + .equals("false"))) { + lce.preExtract = new Constant(LearningClassifierExpression.defaultPreExtract); + reportWarning(lce.pruneCountType.line, + "Pruning cannot be performed without pre-extraction. Setting " + + "'preExtract " + lce.preExtract + "'."); + } + + // Check "from" clause for appropriate argument type. + if (lce.parser != null) { // B5 + if (!(lce.parser.typeCache instanceof ReferenceType)) + reportError(lce.parser.line, + "The 'from' clause of a learning classifier expression must " + + "instantiate a edu.illinois.cs.cogcomp.lbjava.parse.Parser."); + else { + Class iceClass = lce.parser.typeCache.typeClass(); + if (!isAssignableFrom(Parser.class, iceClass)) + reportError(lce.parser.line, + "The 'from' clause of a learning classifier expression must " + + "instantiate a edu.illinois.cs.cogcomp.lbjava.parse.Parser."); + } + } + + // Check "with" clause for appropriate argument types. + Type input = lce.argument.getType(); + Class inputClass = input.typeClass(); + ClassifierReturnType output = null; + + // Check that the specified algorithm accepts our input. + if (!(lce.learnerName.typeCache instanceof ClassifierType) + || !((ClassifierType) lce.learnerName.typeCache).isLearner()) { // B6 + reportError(lce.learnerName.line, + "The 'with' clause of a learning classifier expression must " + + "instantiate a edu.illinois.cs.cogcomp.lbjava.learn.Learner."); + } else { + Class iceClass = AST.globalSymbolTable.classForName(lce.learnerName); + + if (iceClass != null) { + if (!isAssignableFrom(Learner.class, iceClass)) // B6 + reportError(lce.learnerName.line, + "The 'with' clause of a learning classifier expression must " + + "instantiate a edu.illinois.cs.cogcomp.lbjava.learn.Learner."); + else { // A4 + ClassifierType learnerType = (ClassifierType) lce.learnerName.typeCache; + Type learnerInputType = learnerType.getInput(); + if (!isAssignableFrom(learnerInputType.typeClass(), inputClass)) + reportError(lce.learnerName.line, // B7 + "A learning classifier with input type '" + input + + "' cannot use a Learner with input type '" + + learnerInputType + "'."); + + output = learnerType.getOutput(); + } + } + } + + // Check that the specified algorithm can produce our output. + if (output != null && !output.isContainableIn(lce.returnType)) { // B3 + if (output.toString().charAt(0) != 'd' || lce.returnType.toString().charAt(0) != 'd') + reportError(lce.line, "Learner " + lce.learnerName + " returns '" + output + + "' which conflicts with the declared return type '" + lce.returnType + + "'."); + else { + lce.checkDiscreteValues = true; + reportWarning(lce.line, "Learner " + lce.learnerName + " returns '" + output + + "' which may conflict with the declared return type '" + lce.returnType + + "'. A run-time error will be reported if a " + "conflict is detected."); + } + } else + lce.returnType = output; + + if (output != null && lce.labeler != null && lce.labeler.returnType != null + && !lce.labeler.returnType.isContainableIn(output)) { // B3 + if (output.toString().charAt(0) == 'd' + && lce.labeler.returnType.toString().charAt(0) == 'd') + reportWarning(lce.line, "The labeler for learner " + lceName + + " may return more labels " + + "than the learner is designed to deal with. A run-time error " + + "will be reported if a conflict is detected."); + else + reportWarning(lce.line, "The labeler for learner " + lceName + + " may return labels that " + + "the learner is not designed to deal with. A run-time error " + + "will be reported if a conflict is detected."); + } + + // Check "testFrom" clause for appropriate argument type. + if (lce.testParser != null) { // B50 + if (!(lce.testParser.typeCache instanceof ReferenceType)) + reportError(lce.testParser.line, + "The 'testFrom' clause of a learning classifier expression must " + + "instantiate a edu.illinois.cs.cogcomp.lbjava.parse.Parser."); + else { + Class iceClass = lce.testParser.typeCache.typeClass(); + if (!isAssignableFrom(Parser.class, iceClass)) + reportError(lce.testParser.line, + "The 'testFrom' clause of a learning classifier expression must" + + " instantiate a edu.illinois.cs.cogcomp.lbjava.parse.Parser."); + } + } + + representationTable.put(lceName, lce); // A2 + + if (attributeAnalysis) + wekaIze(lce.line, lce.returnType, lce.name); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param p The node to process. + **/ + public void run(ParameterSet p) { + if (!(currentCG instanceof LearningClassifierExpression)) { // B56 + reportError(p.line, "Parameter sets cannot be defined outside of a " + + "LearningClassifierExpression."); + return; + } else { + LearningClassifierExpression lce = (LearningClassifierExpression) currentCG; + if (lce.K == null && lce.testParser == null) { // B52 + reportError(lce.line, "Parameter tuning can only be performed if either a 'cval' " + + "clause or a 'testFrom' clause is supplied."); + return; + } + + if (!p.inRounds) + lce.parameterSets.add(p); + } + + runOnChildren(p); + + // Make sure the values in the parameter set specification make sense. + ExpressionList.ExpressionListIterator PI = null; + + if (p.isRange()) { + ExpressionList rangeList = new ExpressionList(); + rangeList.add(p.start); + rangeList.add(p.end); + rangeList.add(p.increment); + + PI = rangeList.listIterator(); + } else { + PI = p.listIterator(); + } + + for (int PIindex = 0; PI.hasNext(); ++PIindex) { + Expression pe = PI.nextItem(); + + // Replace unary negation expressions with negative constants. + if (pe instanceof UnaryExpression) { + if (p.inRounds) { + reportError(p.line, "The number of rounds must be a positive integer."); + return; + } + + UnaryExpression upe = (UnaryExpression) pe; + if (upe.operation.operation != Operator.MINUS) { // B53 + reportError(p.line, "A parameter set must include only literals."); + return; + } + + Expression subpe = upe.subexpression; + if (!(subpe instanceof Constant)) { // B53 + reportError(p.line, "A parameter set must include only literals."); + return; + } + + pe = new Constant(pe.line, pe.byteOffset, "-" + ((Constant) subpe).value); + run((Constant) pe); + + if (p.isRange()) { + switch (PIindex) { + case 0: + p.start = pe; + break; + case 1: + p.end = pe; + break; + case 2: + p.increment = pe; + break; + } + } else + PI.set(pe); + } else if (!(pe instanceof Constant)) { // B53 + reportError(p.line, + "A parameter set must include only simple constant expressions."); + return; + } else if (p.inRounds) { + try { + Integer.parseInt(((Constant) pe).value); + } catch (Exception ex) { // B35 + reportError(p.line, "The number of rounds must be a positive integer."); + return; + } + } + + // Determine the type of the parameter set. + if (!pe.typeCache.typeClass().equals(String.class) + && !(pe.typeCache instanceof PrimitiveType)) { + reportError(p.line, "Parameter sets must include only primitive constants or " + + "strings."); + return; + } else if (p.isRange() + && (!(pe.typeCache instanceof PrimitiveType) || ((PrimitiveType) pe.typeCache).type == PrimitiveType.BOOLEAN)) { + reportError(p.line, + "Parameter set ranges must involve primitive values that aren't" + + "booleans."); + return; + } + + else if (p.type == null) + p.type = (Type) pe.typeCache.clone(); + + else if (p.type.typeClass().equals(String.class) != pe.typeCache.typeClass().equals( + String.class)) { + reportError(p.line, + "Strings cannot appear in a parameter set with any other type of " + + "value."); + return; + } else if (p.type instanceof PrimitiveType) { + PrimitiveType pt = (PrimitiveType) p.type; + PrimitiveType pet = (PrimitiveType) pe.typeCache; + if ((pt.type == PrimitiveType.BOOLEAN) != (pet.type == PrimitiveType.BOOLEAN)) { + reportError(p.line, + "booleans cannot appear in a parameter set with any other type " + + "of value."); + return; + } + + if (p.isRange() && PIindex == 2) { + if (pt.type == PrimitiveType.CHAR) { + if (!pet.isWholeNumber()) { + reportError(p.line, + "The increment of a character parameter set should be an " + + "integer."); + return; + } + } else + pt.type = Math.max(pt.type, pet.type); + } else + pt.type = Math.max(pt.type, pet.type); + } + } + + // If a range, make sure it's not infinite, and convert it. + if (p.isRange()) { + PrimitiveType pt = (PrimitiveType) p.type; + double s = + pt.type == PrimitiveType.CHAR ? (double) ((Constant) p.start).value.charAt(1) + : Double.parseDouble(((Constant) p.start).value); + double e = + pt.type == PrimitiveType.CHAR ? (double) ((Constant) p.start).value.charAt(1) + : Double.parseDouble(((Constant) p.end).value); + double i = Double.parseDouble(((Constant) p.increment).value); + + // B54 + if (i == 0.0 || e - s != 0 && (e - s > 0) != (i > 0)) + reportError(p.line, "Infinite loop detected in parameter set range specification."); + else + p.convertRange(); + } + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param b The node to process. + **/ + public void run(Block b) { + boolean needLocalTable = b.symbolTable == null; + if (needLocalTable) + currentSymbolTable = b.symbolTable = new SymbolTable(currentSymbolTable); // A13 + + runOnChildren(b); + + if (needLocalTable) + currentSymbolTable = currentSymbolTable.getParent(); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param m The node to process. + **/ + public void run(MethodInvocation m) { + runOnChildren(m); + + if (m.name.typeCache instanceof ClassifierType && m.parentObject == null) { + if (m.arguments.size() != 1) // B2 + reportError(m.line, "Classifiers can only take a single argument."); + else { + ClassifierReturnType returnType = ((ClassifierType) m.name.typeCache).getOutput(); + m.isClassifierInvocation = true; // A5 B8 + + if (m.isSensedValue // B9 + && !returnType.isContainableIn(((CodedClassifier) currentCG).returnType)) + reportError(m.line, "Classifier " + currentCG.getName() + " with return type '" + + ((CodedClassifier) currentCG).returnType + + "' cannot sense classifier " + m.name + " with return type '" + + returnType + "'."); + else if (!m.isSensedValue + && (returnType.type == ClassifierReturnType.DISCRETE_GENERATOR + || returnType.type == ClassifierReturnType.REAL_GENERATOR || returnType.type == ClassifierReturnType.MIXED_GENERATOR)) + reportError(m.line, "Feature generators may only be invoked as the value " + + "argument of a sense statement in another generator."); + else if (currentCG != null) // A9 + addInvokee(currentCG.getName(), m.name.toString()); + } + } else if (m.name.typeCache instanceof InferenceType && m.parentObject == null) // B33 + reportError(m.line, "Inferences may only be invoked to create a new classifier in " + + "classifier expression context."); + else if (m.parentObject == null && m.name.name.length == 1 && !m.isEvaluateArgument) // B38 + reportError(m.line, "Unrecognized classifier name: '" + m.name + "'"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param ice The node to process. + **/ + public void run(InstanceCreationExpression ice) { + runOnChildren(ice); + + if (ice.parentObject == null) { // A4 + ice.typeCache = new ReferenceType(ice.name); + ice.typeCache.runPass(this); + } + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param n The node to process. + **/ + public void run(Name n) { + n.symbolTable = currentSymbolTable; // A13 + + n.typeCache = n.symbolTable.get(n); // A4 + + if (currentCG == null) + return; + + if (n.typeCache instanceof ClassifierType) { + if (ast.symbolTable.containsKey(n)) // A8 + addDependor(n.toString(), currentCG.getName()); + } else if (n.name.length > 1) { + String className = n.toString(); + className = className.substring(0, className.lastIndexOf('.')); + String fieldOrMethod = n.name[n.name.length - 1]; + + if (ast.symbolTable.containsKey(className) && !fieldOrMethod.equals("isTraining")) { + String currentCGName = currentCG.getName(); + addDependor(className, currentCGName); // A8 + + // A10 + if (ast.symbolTable.get(className) instanceof ClassifierType + && !fieldOrMethod.equals("getInstance")) + addInvokee(currentCGName, className); + } + } + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param s The node to process. + **/ + public void run(ForStatement s) { + if (s.initializers != null) // B34 + for (ASTNodeIterator I = s.initializers.iterator(); I.hasNext();) { + ASTNode statementExpression = I.next(); + if (statementExpression instanceof ConstraintStatementExpression) + reportError(statementExpression.line, + "Constraint expressions are only allowed to appear as part of " + + "their own separate expression statement."); + } + + if (s.updaters != null) // B34 + for (ASTNodeIterator I = s.updaters.iterator(); I.hasNext();) { + ASTNode statementExpression = I.next(); + if (statementExpression instanceof ConstraintStatementExpression) + reportError(statementExpression.line, + "Constraint expressions are only allowed to appear as part of " + + "their own separate expression statement."); + } + + if (!(s.body instanceof Block)) // A7 + s.body = new Block(new StatementList(s.body)); + + currentSymbolTable = + s.symbolTable = s.body.symbolTable = new SymbolTable(currentSymbolTable); // A13 + + runOnChildren(s); + + currentSymbolTable = currentSymbolTable.getParent(); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param s The node to process. + **/ + public void run(IfStatement s) { + if (!(s.thenClause instanceof Block)) // A7 + s.thenClause = new Block(new StatementList(s.thenClause)); + if (s.elseClause != null && !(s.elseClause instanceof Block)) // A7 + s.elseClause = new Block(new StatementList(s.elseClause)); + runOnChildren(s); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param s The node to process. + **/ + public void run(ReturnStatement s) { + if (currentCG instanceof ConstraintDeclaration + || currentCG instanceof LearningClassifierExpression + || currentCG instanceof CodedClassifier + && ((CodedClassifier) currentCG).returnType.type != ClassifierReturnType.DISCRETE + && ((CodedClassifier) currentCG).returnType.type != ClassifierReturnType.REAL) // B12 + reportError(s.line, "return statements may only appear in classifers of type discrete " + + "or real, not in an array returner, a generator, or a " + "constraint."); + + runOnChildren(s); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param s The node to process. + **/ + public void run(SenseStatement s) { + if (!(currentCG instanceof CodedClassifier) + || ((CodedClassifier) currentCG).returnType.type == ClassifierReturnType.DISCRETE + || ((CodedClassifier) currentCG).returnType.type == ClassifierReturnType.REAL) { // B10 + reportError(s.line, + "sense statements may only appear in an array returning classifier " + + "or a generator."); + return; + } + + CodedClassifier currentCC = (CodedClassifier) currentCG; + if (s.name != null) { // B11 + if (currentCC.returnType.type == ClassifierReturnType.DISCRETE_ARRAY + || currentCC.returnType.type == ClassifierReturnType.REAL_ARRAY) + reportError(s.line, + "The names of features need not be sensed in an array returning " + + "classifier. (Use sense ; instead of sense " + + " : ;)"); + } else if (currentCC.returnType.type == ClassifierReturnType.DISCRETE_GENERATOR) { // A6 + s.name = s.value; + s.value = new Constant("true"); + } else if (currentCC.returnType.type == ClassifierReturnType.REAL_GENERATOR) { // A6 + s.name = s.value; + s.value = new Constant("1"); + } + + s.value.senseValueChild(); // B9 + + runOnChildren(s); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param s The node to process. + **/ + public void run(WhileStatement s) { + if (!(s.body instanceof Block)) // A7 + s.body = new Block(new StatementList(s.body)); + runOnChildren(s); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param s The node to process. + **/ + public void run(DoStatement s) { + if (!(s.body instanceof Block)) // A7 + s.body = new Block(new StatementList(s.body)); + runOnChildren(s); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param v The node to process. + **/ + public void run(VariableDeclaration v) { + for (NameList.NameListIterator I = v.names.listIterator(); I.hasNext();) + currentSymbolTable.put(I.nextItem(), v.type); // A1 + runOnChildren(v); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param a The node to process. + **/ + public void run(Argument a) { + currentSymbolTable.put(a); // A1 + runOnChildren(a); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param c The node to process. + **/ + public void run(Constant c) { + int cType; + + String value = c.value; + + if (value.equals("true") || value.equals("false")) + cType = PrimitiveType.BOOLEAN; + else if (value.charAt(0) == '\'') + cType = PrimitiveType.CHAR; + else if (value.charAt(0) == '.') { + if (value.matches(".*[fF].*")) { + cType = PrimitiveType.FLOAT; + } else { + cType = PrimitiveType.DOUBLE; + } + } else if (value.substring(0, 1).matches("[0-9\\-]")) { + if (value.matches(".*[fF].*")) { + cType = PrimitiveType.FLOAT; + } else if (value.matches(".*[\\.dD].*")) { + cType = PrimitiveType.DOUBLE; + } else if (value.matches(".*[lL].*")) { + cType = PrimitiveType.LONG; + } else { + cType = PrimitiveType.INT; + } + } else { + cType = -1; // is a string + } + + if (cType == -1) + c.typeCache = new ReferenceType(new Name("java.lang.String")); + else + c.typeCache = new PrimitiveType(cType); + c.typeCache.runPass(this); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param t The node to process. + **/ + public void run(ReferenceType t) { + runOnChildren(t); + if (t.typeClass() == null) // B13 + reportError(t.line, "Cannot locate class '" + t + "'."); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param t The node to process. + **/ + public void run(ClassifierReturnType t) { + // B14 + if (t.type == ClassifierReturnType.MIXED) + reportError(t.line, "There is no such type as mixed. (There is only mixed%.)"); + else if (t.type == ClassifierReturnType.MIXED_ARRAY) + reportError(t.line, "There is no such type as mixed[]. (There is only mixed%.)"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param c The node to process. + **/ + public void run(ConstraintDeclaration c) { + addDependor(c.getName(), null); // A8 + + currentSymbolTable = + c.symbolTable = c.body.symbolTable = new SymbolTable(currentSymbolTable); // A13 + + containsConstraintStatement = false; + CodeGenerator saveCG = currentCG; + currentCG = c; + runOnChildren(c); + currentCG = saveCG; + + currentSymbolTable = currentSymbolTable.getParent(); + + if (!containsConstraintStatement) // B20 + reportWarning(c.line, "Constraint '" + c.name + + "' does not contain any constraint statements."); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(ConstraintStatementExpression e) { + if (!(currentCG instanceof ConstraintDeclaration)) { // B19 + reportError(e.line, "Constraint statements may only appear in constraint " + + "declarations."); + return; + } + + containsConstraintStatement = true; + runOnChildren(e); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param q The node to process. + **/ + public void run(UniversalQuantifierExpression q) { + q.argument.getType().quantifierArgumentType = true; // A14 + // A13 + currentSymbolTable = q.symbolTable = new SymbolTable(currentSymbolTable); + + ++quantifierNesting; + runOnChildren(q); + --quantifierNesting; + + currentSymbolTable = currentSymbolTable.getParent(); + + // A15 + q.collectionIsQuantified = + quantifierNesting > 0 && q.collection.containsQuantifiedVariable(); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param q The node to process. + **/ + public void run(ExistentialQuantifierExpression q) { + q.argument.getType().quantifierArgumentType = true; // A14 + // A13 + currentSymbolTable = q.symbolTable = new SymbolTable(currentSymbolTable); + + ++quantifierNesting; + runOnChildren(q); + --quantifierNesting; + + currentSymbolTable = currentSymbolTable.getParent(); + + // A15 + q.collectionIsQuantified = + quantifierNesting > 0 && q.collection.containsQuantifiedVariable(); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param q The node to process. + **/ + public void run(AtLeastQuantifierExpression q) { + q.argument.getType().quantifierArgumentType = true; // A14 + // A13 + currentSymbolTable = q.symbolTable = new SymbolTable(currentSymbolTable); + + ++quantifierNesting; + runOnChildren(q); + --quantifierNesting; + + currentSymbolTable = currentSymbolTable.getParent(); + + // A15 + if (quantifierNesting > 0) { + q.collectionIsQuantified = q.collection.containsQuantifiedVariable(); + q.lowerBoundIsQuantified = q.lowerBound.containsQuantifiedVariable(); + } + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param q The node to process. + **/ + public void run(AtMostQuantifierExpression q) { + q.argument.getType().quantifierArgumentType = true; // A14 + // A13 + currentSymbolTable = q.symbolTable = new SymbolTable(currentSymbolTable); + + ++quantifierNesting; + runOnChildren(q); + --quantifierNesting; + + currentSymbolTable = currentSymbolTable.getParent(); + + // A15 + if (quantifierNesting > 0) { + q.collectionIsQuantified = q.collection.containsQuantifiedVariable(); + q.upperBoundIsQuantified = q.upperBound.containsQuantifiedVariable(); + } + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param i The node to process. + **/ + public void run(ConstraintInvocation i) { + if (i.invocation.arguments.size() != 1) // B2 + reportError(i.line, "Constraints can only take a single argument."); + + runOnChildren(i); // A9 + + if (!(i.invocation.name.typeCache instanceof ConstraintType)) // B26 + reportError(i.line, "Only constraints can be invoked with the '@' operator."); + + // A15 + i.invocationIsQuantified = + quantifierNesting > 0 && i.invocation.containsQuantifiedVariable(); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(ConstraintEqualityExpression e) { + runOnChildren(e); + + e.leftIsDiscreteLearner = e.rightIsDiscreteLearner = false; + if (e.left instanceof MethodInvocation) { + MethodInvocation m = (MethodInvocation) e.left; + // A12 + e.leftIsDiscreteLearner = m.name.typeCache instanceof ClassifierType; + if (e.leftIsDiscreteLearner) { + ClassifierType type = (ClassifierType) m.name.typeCache; + e.leftIsDiscreteLearner = + type.getOutput().type == ClassifierReturnType.DISCRETE && type.isLearner(); + } + } + + if (e.right instanceof MethodInvocation) { + MethodInvocation m = (MethodInvocation) e.right; + // A12 + e.rightIsDiscreteLearner = m.name.typeCache instanceof ClassifierType; + if (e.rightIsDiscreteLearner) { + ClassifierType type = (ClassifierType) m.name.typeCache; + e.rightIsDiscreteLearner = + type.getOutput().type == ClassifierReturnType.DISCRETE && type.isLearner(); + } + } + + // A15 + if (quantifierNesting > 0) { + e.leftIsQuantified = e.left.containsQuantifiedVariable(); + e.rightIsQuantified = e.right.containsQuantifiedVariable(); + } + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param d The node to process. + **/ + @SuppressWarnings("rawtypes") + public void run(InferenceDeclaration d) { + addDependor(d.getName(), null); // A8 + + if (d.headFinders.length == 0) // B29 + reportError(d.line, "An inference with no head finder methods can never be applied to " + + "a learner."); + + if (d.subjecttoClauses != 1) // B30 + reportError(d.line, "Every inference must contain exactly one 'subjectto' clause " + + "specifying a constraint. " + d.subjecttoClauses); + + if (d.withClauses > 1) // B31 + reportError(d.line, "An inference may contain no more than one 'with' clause " + + "specifying an inference algorithm."); + + currentCG = d; + currentSymbolTable = d.symbolTable = new SymbolTable(currentSymbolTable); + runOnChildren(d); + currentSymbolTable = currentSymbolTable.getParent(); + currentCG = null; + + if (d.algorithm != null) { + Class iceClass = d.algorithm.typeCache.typeClass(); + if (!isAssignableFrom(Inference.class, iceClass)) // B32 + reportError(d.algorithm.line, + "The 'with' clause of an inference must instantiate an " + + "edu.illinois.cs.cogcomp.lbjava.infer.Inference."); + } else + d.algorithm = InferenceDeclaration.defaultInferenceConstructor; // A16 + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param h The node to process. + **/ + public void run(InferenceDeclaration.HeadFinder h) { + currentSymbolTable = + h.symbolTable = h.body.symbolTable = new SymbolTable(currentSymbolTable); // A13 + runOnChildren(h); + currentSymbolTable = currentSymbolTable.getParent(); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param n The node to process. + **/ + public void run(InferenceDeclaration.NormalizerDeclaration n) { + runOnChildren(n); + + if (n.learner != null + && !(n.learner.typeCache instanceof ClassifierType && ((ClassifierType) n.learner.typeCache) + .isLearner())) // B27 + reportError(n.line, "The left hand side of the 'normalizedby' operator must be the " + + "name of a edu.illinois.cs.cogcomp.lbjava.learn.Learner."); + + if (!(n.normalizer.typeCache instanceof ReferenceType) + || !isAssignableFrom(Normalizer.class, + ((ReferenceType) n.normalizer.typeCache).typeClass())) // B28 + reportError(n.line, "The right hand side of the 'normalizedby' operator must " + + "instantiate a edu.illinois.cs.cogcomp.lbjava.learn.Normalizer."); + } } diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Train.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Train.java index c39a317d..15a28665 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Train.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/Train.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava; @@ -43,1075 +40,1045 @@ import edu.illinois.cs.cogcomp.lbjava.parse.Parser; import edu.illinois.cs.cogcomp.lbjava.util.ClassUtils; +import edu.illinois.cs.cogcomp.lbjava.util.FileUtils; + /** - * After code has been generated with {@link TranslateToJava}, this pass - * trains any classifiers for which training was indicated. + * After code has been generated with {@link TranslateToJava}, this pass trains any classifiers for + * which training was indicated. * - * @see TranslateToJava + * @see TranslateToJava * @author Nick Rizzolo **/ -public class Train extends Pass -{ - /** - * Generates a String containing the name of the specified - * Throwable and its stack trace. - * - * @param t Throwable. - * @return The generated message. - **/ - private static String stackTrace(Throwable t) { - String message = " " + t + "\n"; - StackTraceElement[] elements = t.getStackTrace(); - if (elements.length == 0) message += " no stack trace available\n"; - for (int i = 0; i < elements.length; ++i) - message += " " + elements[i] + "\n"; - return message; - } - - - /** - * Remembers which files have been compiled via {@link #runJavac(String)}. - **/ - private static final TreeSet compiledFiles = new TreeSet(); - - - /** - * Run the javac compiler with the specified arguments in - * addition to those specified on the command line. - * - * @param arguments The arguments to send to javac. - * @return true iff errors were encountered. - **/ - public static boolean runJavac(String arguments) { - String[] files = arguments.split("\\s+"); - arguments = ""; - for (int i = 0; i < files.length; ++i) - if (compiledFiles.add(files[i])) - arguments += " " + files[i]; - if (arguments.length() == 0) return false; - - Process javac = null; - String pathArguments = "-classpath " + Main.classPath + " -sourcepath " - + Main.sourcePath; - - if (Main.generatedSourceDirectory != null) { - String gsd = Main.generatedSourceDirectory; - int packageIndex = -1; - if (AST.globalSymbolTable.getPackage().length() != 0) - packageIndex = - gsd.lastIndexOf(File.separator + AST.globalSymbolTable.getPackage() - .replace('.', File.separatorChar)); - if (packageIndex != -1) gsd = gsd.substring(0, packageIndex); - pathArguments += File.pathSeparator + gsd; - } - - if (Main.classPackageDirectory != null) - pathArguments += " -d " + Main.classPackageDirectory; - - String command = "javac " + Main.javacArguments + " " - + pathArguments + arguments; - - try { javac = Runtime.getRuntime().exec(command); } - catch (Exception e) { - System.err.println("Failed to execute 'javac': " + e); - System.exit(1); - } - - BufferedReader error = - new BufferedReader(new InputStreamReader(javac.getErrorStream())); - try { - for (String line = error.readLine(); line != null; - line = error.readLine()) - System.out.println(line); - } - catch (Exception e) { - System.err.println("Error reading STDERR from 'javac': " + e); - System.exit(1); - } - - int exit = 0; - try { exit = javac.waitFor(); } - catch (Exception e) { - System.err.println("Error waiting for 'javac' to terminate: " + e); - System.exit(1); - } - - return exit != 0; - } - - - // Member variables. - /** - * Progress output will be printed every progressOutput - * examples. - **/ - protected int progressOutput; - /** - * Set to true iff there existed a - * {@link LearningClassifierExpression} for which new code was generated. - **/ - protected boolean newCode; - /** - * An array of the training threads, which is never modified after it is - * constructed. - **/ - protected TrainingThread[] threads; - /** A map of all the training threads indexed by the name of the learner. */ - protected HashMap threadMap; - /** - * The keys of this map are the names of learners; the values are - * LinkedLists of the names of the learners that the learner - * named by the key depends on. - **/ - protected HashMap learnerDependencies; - - - // Constructor. - /** - * Instantiates a pass that runs on an entire {@link AST}. - * - * @param ast The program to run this pass on. - * @param output Progress output will be printed every output - * examples. - **/ - public Train(AST ast, int output) { - super(ast); - progressOutput = output; - } - - - // Methods related to learnerDependencies. - /** - * Adds an edge from dependor to dependency in the - * {@link #learnerDependencies} graph. If dependency is - * null, no new list item is added, but the - * HashSet associated with dependor is still - * created if it didn't already exist. - * - * @param dependor The name of the node doing the depending. - * @param dependency The name of the node depended on. - **/ - private void addDependency(String dependor, String dependency) { - HashSet dependencies = (HashSet) learnerDependencies.get(dependor); - - if (dependencies == null) { - dependencies = new HashSet(); - learnerDependencies.put(dependor, dependencies); - } - - if (dependency != null) dependencies.add(dependency); - } - - - /** - * This method initializes the {@link #learnerDependencies} graph such - * that the entry for each learner contains the names of all learners that - * depend on it, except that cycles are broken by preferring that learners - * appearing earlier in the source get trained first. - **/ - protected void fillLearnerDependorsDAG() { - threads = - (TrainingThread[]) threadMap.values().toArray(new TrainingThread[0]); - Arrays.sort(threads, - new Comparator() { - public int compare(Object o1, Object o2) { - TrainingThread t1 = (TrainingThread) o1; - TrainingThread t2 = (TrainingThread) o2; - return t2.byteOffset - t1.byteOffset; - } - }); - - for (int i = 0; i < threads.length - 1; ++i) - for (int j = i + 1; j < threads.length; ++j) { - if (SemanticAnalysis.isDependentOn(threads[i].getName(), - threads[j].getName())) - addDependency(threads[i].getName(), threads[j].getName()); - else if (SemanticAnalysis.isDependentOn(threads[j].getName(), - threads[i].getName())) - addDependency(threads[j].getName(), threads[i].getName()); - } - } - - - /** - * This method updates the {@link #learnerDependencies} graph by removing - * the specified name from every dependencies list, and then starts every - * thread that has no more dependencies. - * - * @param name The name of a learner whose training has completed. - **/ - protected void executeReadyThreads(String name) { - LinkedList ready = new LinkedList(); - - synchronized (learnerDependencies) { - for (Iterator I = learnerDependencies.entrySet().iterator(); - I.hasNext(); ) { - Map.Entry e = (Map.Entry) I.next(); - HashSet dependencies = (HashSet) e.getValue(); - dependencies.remove(name); - if (dependencies.size() == 0) ready.add(e.getKey()); - } - } - - for (Iterator I = ready.iterator(); I.hasNext(); ) { - TrainingThread thread = null; - - synchronized (threadMap) { - thread = (TrainingThread) threadMap.remove(I.next()); - } - - if (thread != null) { - thread.start(); - - if (!Main.concurrentTraining) { - try { thread.join(); } - catch (InterruptedException e) { - System.err.println("LBJava ERROR: Training of " + thread.getName() - + " has been interrupted."); - fatalError = true; - } - } - } - } - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param ast The node to process. - **/ - public void run(AST ast) { - if (RevisionAnalysis.noChanges) return; - threadMap = new HashMap(); - learnerDependencies = new HashMap(); - - if (Main.fileNames.size() > 0) { - String files = ""; - for (Iterator I = Main.fileNames.iterator(); I.hasNext(); ) - files += " " + I.next(); - System.out.println("Compiling generated code"); - if (runJavac(files)) return; - } - - Main.fileNames.clear(); - - runOnChildren(ast); - - fillLearnerDependorsDAG(); - executeReadyThreads(null); - - for (int i = 0; i < threads.length; ++i) { - try { threads[i].join(); } - catch (InterruptedException e) { - System.err.println("LBJava ERROR: Training of " + threads[i].getName() - + " has been interrupted."); - fatalError = true; - } - } - - if (!fatalError && newCode) { - String files = ""; - for (Iterator I = Main.fileNames.iterator(); I.hasNext(); ) - files += " " + I.next(); - System.out.println("Compiling generated code"); - compiledFiles.clear(); - runJavac(files); - } - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param lce The node to process. - **/ - public void run(LearningClassifierExpression lce) { - runOnChildren(lce); - - String lceName = lce.name.toString(); - - if (lce.parser == null - ? !RevisionAnalysis.revisionStatus.get(lceName) - .equals(RevisionAnalysis.REVISED) - : lce.learningStatus.equals(RevisionAnalysis.UNAFFECTED) - && !lce.onlyCodeGeneration) - return; - - newCode |= true; - - TrainingThread thread = new TrainingThread(lceName, lce.byteOffset, lce); - threadMap.put(lceName, thread); - addDependency(lceName, null); - } - - - // The following three methods are here to stop AST traversal. - /** - * Runs this pass on all nodes of the indicated type. There's no reason to - * traverse children of {@link CodedClassifier}s, so this method exists - * simply to stop that from happening. - * - * @param cc The node to process. - **/ - public void run(CodedClassifier cc) { } - - - /** - * Runs this pass on all nodes of the indicated type. There's no reason to - * traverse children of {@link ConstraintDeclaration}s, so this method - * exists simply to stop that from happening. - * - * @param cd The node to process. - **/ - public void run(ConstraintDeclaration cd) { } - - - /** - * Runs this pass on all nodes of the indicated type. There's no reason to - * traverse children of {@link InferenceDeclaration}s, so this method - * exists simply to stop that from happening. - * - * @param id The node to process. - **/ - public void run(InferenceDeclaration id) { } - - - /** - * Helps the {@link TrainingThread#getParameterCombinations()} method - * iterate through all combinations and permutations of integers such that - * each integer is at least 0 and less than the corresponding element of - * maxes. - * - * @param I The current array of integers. - * @param maxes The maximums for each element of I. - **/ - private static boolean increment(int[] I, int[] maxes) { - int i = 0; - while (i < I.length && ++I[i] == maxes[i]) I[i++] = 0; - return i < I.length; - } - - - /** - * This class contains the code that trains a learning classifier. It is a - * subclass of Thread so that it may be executed concurrently. - * - * @author Nick Rizzolo - **/ - protected class TrainingThread extends Thread - { - // Member variables. - /** The byte offset at which the learner appeared. */ - public int byteOffset; - /** The expression that specified the learner. */ - protected LearningClassifierExpression lce; - /** The learning classifier being trained. */ - protected Learner learner; - /** The class of {@link #learner}. */ - protected Class learnerClass; - /** {@link #learner}'s Parameters class. */ - protected Class parametersClass; - /** The file into which training examples are extracted. */ - protected String exFilePath; - /** The file into which testing examples are extracted. */ - protected String testExFilePath; - /** The directory into which class files, model files, etc are written. */ - protected String classDir; - /** Whether or not example vectors should be pre-extracted. */ - protected boolean preExtract; - /** Whether or not pre-extracted example files should be compressed. */ - protected boolean preExtractZip; - /** Actually does the training. */ - protected BatchTrainer trainer; - /** The parser from which testing objects are obtained. */ - protected Parser testParser; - /** - * The metric with which to measure the learner's performance on a test - * set. - **/ - protected TestingMetric testingMetric; - - - // Constructor. - /** - * Initializing constructor. - * - * @param n The name of the learner. - * @param b The byte offset at which the learner appeared. - * @param lce The expression that specified the learner. - **/ - public TrainingThread(String n, int b, LearningClassifierExpression lce) { - super(n); - byteOffset = b; - this.lce = lce; - if (lce.onlyCodeGeneration) return; - - classDir = Main.classDirectory == null - ? "" : Main.classDirectory + File.separator; - learner = getLearner(classDir); - - preExtract = lce.preExtract != null - && !lce.preExtract.value.equals("false") - && !lce.preExtract.value.equals("\"false\"") - && !lce.preExtract.value.equals("\"none\""); - boolean preExtractToDisk = - preExtract && !lce.preExtract.value.startsWith("\"mem"); - preExtractZip = preExtract && lce.preExtract.value.endsWith("Zip\""); - - if (preExtractToDisk) { - exFilePath = getName() + ".ex"; - testExFilePath = getName() + ".test.ex"; - if (Main.generatedSourceDirectory != null) { - exFilePath = - Main.generatedSourceDirectory + File.separator + exFilePath; - testExFilePath = - Main.generatedSourceDirectory + File.separator + testExFilePath; - } - } - - Parser parser = null; - if (lce.parser != null) { - if (lce.featuresStatus == RevisionAnalysis.UNAFFECTED) { - // Implies preExtractToDisk is true because of RevisionAnalysis; - // therefore, exFilePath != null - parser = - new ArrayFileParser(exFilePath, - lce.preExtract.value.endsWith("Zip\"")); - if (lce.pruneStatus != RevisionAnalysis.UNAFFECTED) - learner.readLexiconOnDemand(classDir + getName() + ".lex"); - } - else parser = getParser("getParser"); - } - - if (lce.testParser != null) { - if (lce.pruneStatus == RevisionAnalysis.UNAFFECTED - && new File(testExFilePath).exists()) - // If pruneStatus is affected, pruning will rearrange our lexicon, - // so we must re-extract the test set from the original parser. In - // addition, pruneStatus == UNAFFECTED implies featuresStatus == - // UNAFFECTED. So, like above, as soon as we know pruneStatus == - // UNAFFECTED, we know testExFilePath != null - testParser = - new ArrayFileParser(testExFilePath, - lce.preExtract.value.endsWith("Zip\"")); - else testParser = getParser("getTestParser"); - } - - testingMetric = getTestingMetric(); - - if (lce.progressOutput != null) - progressOutput = Integer.parseInt(lce.progressOutput.value); - - trainer = new BatchTrainer(learner, parser, progressOutput); - } - - - /** - * Obtain an instance of the learner appropriate for the revision status - * of the source file. This method also fills in the - * {@link #learnerClass} and {@link #parametersClass} fields. - * - *

If the only change between the last run of the compiler and this - * run is that more training rounds were added, the entire model file can - * be loaded from disk. Failing that, if features are unaffected - * according to {@link RevisionAnalysis}, it means only the label lexicon - * should be read. Otherwise, we just start with a fresh instance of the - * learner via its static getInstance() method. In any - * case, the learner is initialized so that it will write its model - * and/or lexicon files to the specified directory as necessary. - * - * @param dir The directory in which the model and lexicon are written. - * @return An instance of the learner. - **/ - private Learner getLearner(String dir) { - String fullyQualified = AST.globalSymbolTable.getPackage(); - if (fullyQualified.length() > 0) fullyQualified += "."; - fullyQualified += getName(); - learnerClass = ClassUtils.getClass(fullyQualified, true); - - Class[] declaredClasses = learnerClass.getDeclaredClasses(); - int c = 0; - while (c < declaredClasses.length - && !declaredClasses[c].getName() - .endsWith(getName() + "$Parameters")) - ++c; - - if (c == declaredClasses.length) { - System.err.println( - "LBJava ERROR: Expected to find a single member class inside " - + getName() + " named 'Parameters'."); - for (int i = 0; i < declaredClasses.length; ++i) - System.err.println(i + ": " + declaredClasses[i].getName()); - System.exit(1); - } - parametersClass = declaredClasses[c]; - - Learner l = null; - - if (lce.startingRound > 1) { - // In the condition above, note that before setting - // lce.startingRound > 1, RevisionAnalysis ensures that the lce is - // unaffected other than the number of rounds and that there will be - // no parameter tuning or cross validation. - l = Learner.readLearner(dir + getName() + ".lc"); - l.setLexiconLocation(dir + getName() + ".lex"); - } - else if (lce.featuresStatus == RevisionAnalysis.UNAFFECTED) { - Constructor noArg = null; - try { noArg = parametersClass.getConstructor(new Class[0]); } - catch (Exception e) { - System.err.println( - "LBJava ERROR: Can't find a no-argument constructor for " - + getName() + ".Parameters."); - System.exit(1); - } - - Learner.Parameters p = null; - try { p = (Learner.Parameters) noArg.newInstance(new Object[0]); } - catch (Exception e) { - System.err.println( - "LBJava ERROR: Can't instantiate " + getName() + ".Parameters:"); - e.printStackTrace(); - System.exit(1); - } - - l = Learner.readLearner(dir + getName() + ".lc", false); - l.setParameters(p); - l.setLexiconLocation(dir + getName() + ".lex"); - } - else { - Method getInstance = null; - try { - getInstance = - learnerClass.getDeclaredMethod("getInstance", new Class[0]); - } - catch (Exception e) { - System.err.println("LBJava ERROR: Could not access method '" - + fullyQualified + ".getInstance()':"); - System.exit(1); - } - - try { l = (Learner) getInstance.invoke(null, null); } - catch (Exception e) { - System.err.println("LBJava ERROR: Could not get unique instance of '" - + fullyQualified + "': " + e); - e.getCause().printStackTrace(); - System.exit(1); - } - - if (l == null) { - System.err.println("LBJava ERROR: Could not get unique instance of '" - + fullyQualified + "'."); - System.exit(1); - } - - l.setModelLocation(dir + getName() + ".lc"); - l.setLexiconLocation(dir + getName() + ".lex"); - } - - return l; - } - - - /** - * Call the specified method of {@link #learnerClass}, and return the - * Parser returned by that method. - * - * @param name The name of the method. - * @return The parser returned by the named method. - **/ - private Parser getParser(String name) { - Method m = null; - try { m = learnerClass.getDeclaredMethod(name, new Class[0]); } - catch (Exception e) { - reportError(lce.line, - "Could not access method '" + lce.name + "." + name - + "()': " + e); - return null; - } - - Parser result = null; - - try { result = (Parser) m.invoke(null, null); } - catch (Exception e) { - System.err.println( - "Could not instantiate parser '" + lce.parser.name + "': " + e - + ", caused by"); - Throwable cause = e.getCause(); - System.err.print(stackTrace(cause)); - - if (cause instanceof ExceptionInInitializerError) { - System.err.println("... caused by"); - System.err.print( - stackTrace(((ExceptionInInitializerError) cause).getCause())); - } - - return null; - } - - return result; - } - - - /** - * Call the getTestingMetric() method of - * {@link #learnerClass} and return the testing metric it returns. - **/ - private TestingMetric getTestingMetric() { - TestingMetric testingMetric = null; - if (lce.testingMetric != null) { - Method getTestingMetric = null; - try { - getTestingMetric = - learnerClass.getDeclaredMethod("getTestingMetric", new Class[0]); - } - catch (Exception e) { - reportError(lce.line, - "Could not access method'" + getName() - + ".getTestingMetric()': " + e); - return null; - } - - try { - testingMetric = (TestingMetric) getTestingMetric.invoke(null, null); - } - catch (Exception e) { - System.err.println( - "Could not instantiate testing metric '" + lce.parser.name - + "': " + e + ", caused by"); - System.err.print(stackTrace(e.getCause())); - return null; - } - } - else testingMetric = new Accuracy(); - - return testingMetric; - } - - - /** - * Handles feature pre-extraction and dataset pruning under the - * assumption that pre-extraction has been called for by the source code. - * The two go hand-in-hand, as we only need to compute and store feature - * counts during pre-extraction if we are pruning. - **/ - private void preExtractAndPrune() { - Lexicon.PruningPolicy pruningPolicy = new Lexicon.PruningPolicy(); - Lexicon.CountPolicy countPolicy = Lexicon.CountPolicy.none; - if (lce.pruneCountType != null) { - pruningPolicy = - lce.pruneThresholdType.value.equals("\"count\"") - ? new Lexicon.PruningPolicy( - Integer.parseInt(lce.pruneThreshold.value)) - : new Lexicon.PruningPolicy( - Double.parseDouble(lce.pruneThreshold.value)); - countPolicy = - lce.pruneCountType.value.equals("\"global\"") - ? Lexicon.CountPolicy.global : Lexicon.CountPolicy.perClass; - } - - Learner preExtractLearner = learner; // Needed in case we're pruning. - Lexicon lexicon = null; // Needed for pre-extracting the test set. - // As seen below, we can always read the lexicon off disk just before - // pre-extracting the test set, but if one of the operations between - // now and then obtains the lexicon incidentally, we'll keep it here - // to avoid reading it from disk again. - - if (pruningPolicy.isNone()) { - if (lce.featuresStatus != RevisionAnalysis.UNAFFECTED) - lexicon = trainer.preExtract(exFilePath, preExtractZip); - else if (lce.pruneStatus != RevisionAnalysis.UNAFFECTED) - lexicon = learner.getLexiconDiscardCounts(); - else trainer.fillInSizes(); - } - else if (lce.featuresStatus != RevisionAnalysis.UNAFFECTED - || lce.pruneStatus != RevisionAnalysis.UNAFFECTED - && lce.previousPruneCountType == null) - preExtractLearner = - trainer.preExtract(exFilePath, preExtractZip, countPolicy); - else if (lce.previousPruneCountType != null - && !lce.previousPruneCountType.equals(lce.pruneCountType)) { - if (lce.previousPruneCountType.value.equals("\"global\"")) - // implies lce.pruneCountType.equals("\"perClass\"") - preExtractLearner = - trainer.preExtract(exFilePath, preExtractZip, countPolicy); - else // lce.previousPruneCountType.value.equals("\"perClass\"") - learner.getLexicon().perClassToGlobalCounts(); - } - // else pruneThresholdType or pruneThreshold may have changed, but - // that does not require recounting of features. - - if (lce.featuresStatus == RevisionAnalysis.UNAFFECTED - ? lce.pruneStatus != RevisionAnalysis.UNAFFECTED - : !pruningPolicy.isNone()) { - trainer.pruneDataset(exFilePath, preExtractZip, pruningPolicy, - preExtractLearner); - lexicon = preExtractLearner.getLexicon(); - if (preExtractLearner == learner) learner.setLexicon(null); - } - - if (testParser != null - && (lce.pruneStatus != RevisionAnalysis.UNAFFECTED - || !(new File(testExFilePath).exists()))) { - if (lexicon == null) - learner.readLexiconOnDemand(classDir + getName() + ".lex"); - else { - learner.setLexicon(lexicon); - lexicon = null; // See comment below - } - - BatchTrainer preExtractor = - new BatchTrainer(learner, testParser, trainer.getProgressOutput(), - "test set: "); - preExtractor.preExtract(testExFilePath, preExtractZip, - Lexicon.CountPolicy.none); - testParser = preExtractor.getParser(); - } - - // At this point, it should be the case that (lexicon == null) implies - // that the lexicon is not in memory. Above, we intentionally discard - // the lexicon when pre-extracting the test set, since that process will - // add unwanted features (since pre-extraction always happens under the - // assumption that we are training). - - // Given the above comment, we now ensure that when this learning classifier (ie, - // the one whose feature vectors we have just pre-extracted) is - // called as a feature for some other learning classifier defined in the - // same sourcefile, it will be prepared take a raw example object as - // input. - String name = getName(); - HashSet dependors = (HashSet) SemanticAnalysis.dependorGraph.get(name); - if (lexicon != null && dependors.size() > 0) - learner.setLexicon(lexicon); - else learner.readLexiconOnDemand(classDir + name + ".lex"); - } - - - /** - * Uses the various {@link ParameterSet ParameterSet}s in the AST - * to generate an array of parameter combinations representing the cross - * product of all {@link ParameterSet ParameterSet}s except the - * one in the {@link LearningClassifierExpression#rounds} field, if any. - **/ - private Learner.Parameters[] getParameterCombinations() { - Class[] paramTypes = new Class[lce.parameterSets.size()]; - Object[][] arguments = new Object[paramTypes.length][]; - int[] lengths = new int[paramTypes.length]; - int totalCombinations = 1; - - Iterator iterator = lce.parameterSets.iterator(); - for (int i = 0; i < paramTypes.length; i++) { - ParameterSet ps = (ParameterSet) iterator.next(); - paramTypes[i] = ps.type.typeClass(); - arguments[i] = ps.toStringArray(); - lengths[i] = arguments[i].length; - totalCombinations *= lengths[i]; - } - - for (int i = 0; i < arguments.length; i++) { - Class t = paramTypes[i]; - if (t.isPrimitive()) { - if (t.getName().equals("int")) - for (int j = 0; j < lengths[i]; ++j) - arguments[i][j] = new Integer((String) arguments[i][j]); - else if (t.getName().equals("long")) - for (int j = 0; j < lengths[i]; ++j) - arguments[i][j] = new Long((String) arguments[i][j]); - else if (t.getName().equals("short")) - for (int j = 0; j < lengths[i]; ++j) - arguments[i][j] = new Short((String) arguments[i][j]); - else if (t.getName().equals("double")) - for (int j = 0; j < lengths[i]; ++j) - arguments[i][j] = new Double((String) arguments[i][j]); - else if (t.getName().equals("float")) - for (int j = 0; j < lengths[i]; ++j) - arguments[i][j] = new Float((String) arguments[i][j]); - else if (t.getName().equals("boolean")) - for (int j = 0; j < lengths[i]; ++j) - arguments[i][j] = new Boolean((String) arguments[i][j]); - } - } - - Constructor c = null; - try { c = parametersClass.getConstructor(paramTypes); } - catch (Exception e) { - System.err.println( - "LBJava ERROR: Can't find a parameter tuning constructor for " - + getName() + ".Parameters."); - e.printStackTrace(); - System.exit(1); - } - - Learner.Parameters[] result = new Learner.Parameters[totalCombinations]; - int[] I = new int[paramTypes.length]; - Object[] a = new Object[paramTypes.length]; - int i = 0; - - do { - for (int j = 0; j < a.length; ++j) a[j] = arguments[j][I[j]]; - try { result[i++] = (Learner.Parameters) c.newInstance(a); } - catch (Exception e) { - System.err.println( - "LBJava ERROR: Can't instantiate " + getName() + ".Parameters:"); - e.printStackTrace(); - System.exit(1); - } - } while (increment(I, lengths)); - - return result; - } - - - /** - * Determines the best parameters to use when training the learner, - * under the assumption that {@link ParameterSet}s were present. - * Here, "best" means the parameters that did the best out of some small - * set of particular parameter settings. - **/ - private Learner.Parameters tune() { - Learner.Parameters[] parameterCombinations = - getParameterCombinations(); - int[] rounds = null; - if (lce.rounds != null) { - if (lce.rounds instanceof ParameterSet) - rounds = ((ParameterSet) lce.rounds).toSortedIntArray(); - else - rounds = - new int[]{ Integer.parseInt(((Constant) lce.rounds).value) }; - } - else rounds = new int[]{ 1 }; - - if (lce.K != null) { - int k = Integer.parseInt(lce.K.value); - double alpha = Double.parseDouble(lce.alpha.value); - return - trainer.tune(parameterCombinations, rounds, k, lce.splitPolicy, - alpha, testingMetric); - } - - return - trainer.tune(parameterCombinations, rounds, testParser, - testingMetric); - } - - - /** Performs the training and then generates the new code. */ - public void run() { - boolean tuningParameters = - lce.parameterSets.size() > 0 - || lce.rounds != null && lce.rounds instanceof ParameterSet; - - if (!lce.onlyCodeGeneration) { - // If there's a "from" clause, train. - try { - if (lce.parser != null) { - System.out.println("Training " + getName()); - if (preExtract) { - preExtractAndPrune(); - System.gc(); - } - else learner.saveLexicon(); - int trainingRounds = 1; - - if (tuningParameters) { - String parametersPath = getName(); - if (Main.classDirectory != null) - parametersPath = - Main.classDirectory + File.separator + parametersPath; - parametersPath += ".p"; - - Learner.Parameters bestParameters = tune(); - trainingRounds = bestParameters.rounds; - Learner.writeParameters(bestParameters, parametersPath); - System.out.println(" " + getName() - + ": Training on entire training set"); - } - else { - if (lce.rounds != null) - trainingRounds = - Integer.parseInt(((Constant) lce.rounds).value); - - if (lce.K != null) { - int[] rounds = { trainingRounds }; - int k = Integer.parseInt(lce.K.value); - double alpha = Double.parseDouble(lce.alpha.value); - trainer.crossValidation(rounds, k, lce.splitPolicy, alpha, - testingMetric, true); - System.out.println(" " + getName() - + ": Training on entire training set"); - } - } - - trainer.train(lce.startingRound, trainingRounds); - - if (testParser != null) { - System.out.println("Testing " + getName()); - new Accuracy(true).test(learner, learner.getLabeler(), - testParser); - } - - System.out.println("Writing " + getName()); - } - else learner.saveLexicon(); // Writes .lex even if lexicon is empty. - - learner.save(); // Doesn't write .lex if lexicon is empty. - } - catch (Exception e) { - System.err.println( - "LBJava ERROR: Exception while training " + getName() + ":"); - e.printStackTrace(); - fatalError = true; - return; - } - - // Set learner's static instance field to the newly learned instance. - Field field = null; - try { field = learnerClass.getField("instance"); } - catch (Exception e) { - System.err.println("Can't access " + learnerClass - + "'s 'instance' field: " + e); - System.exit(1); - } - - try { field.set(null, learner); } - catch (Exception e) { - System.err.println("Can't set " + learnerClass - + "'s 'instance' field: " + e); - System.exit(1); - } - } - else System.out.println("Generating code for " + lce.name); - - // Write the new code. - PrintStream out = TranslateToJava.open(lce); - if (out == null) return; - - out.println(TranslateToJava.disclaimer); - out.print("// "); - TranslateToJava.compressAndPrint(lce.shallow(), out); - out.println("\n"); - - ast.symbolTable.generateHeader(out); - - if (lce.cacheIn != null) { - String f = lce.cacheIn.toString(); - boolean cachedInMap = f.equals(ClassifierAssignment.mapCache); - if (cachedInMap) out.println("import java.util.WeakHashMap;"); - } - - out.println("\n"); - if (lce.comment != null) out.println(lce.comment); - - out.println("\n\npublic class " + getName() + " extends " - + lce.learnerName); - out.println("{"); - out.println(" private static java.net.URL _lcFilePath;"); - out.println(" private static java.net.URL _lexFilePath;"); - if (tuningParameters) - out.println(" private static java.net.URL parametersPath;"); - out.println(); - - out.println(" static"); - out.println(" {"); - out.println(" _lcFilePath = " + getName() + ".class.getResource(\"" - + getName() + ".lc\");\n"); - - out.println(" if (_lcFilePath == null)"); - out.println(" {"); - out.println(" System.err.println(\"ERROR: Can't locate " - + getName() + ".lc in the class path.\");"); - out.println(" System.exit(1);"); - out.println(" }\n"); - - out.println(" _lexFilePath = " + getName() + ".class.getResource(\"" - + getName() + ".lex\");\n"); - - out.println(" if (_lexFilePath == null)"); - out.println(" {"); - out.println(" System.err.println(\"ERROR: Can't locate " - + getName() + ".lex in the class path.\");"); - out.println(" System.exit(1);"); - out.println(" }"); - - if (tuningParameters) { - out.println( - "\n parametersPath = " + getName() + ".class.getResource(\"" - + getName() + ".p\");\n"); - - out.println(" if (parametersPath == null)"); - out.println(" {"); - out.println(" System.err.println(\"ERROR: Can't locate " - + getName() + ".p in the class path.\");"); - out.println(" System.exit(1);"); - out.println(" }"); - } - out.println(" }\n"); - - out.println(" private static void loadInstance()"); - out.println(" {"); - out.println(" if (instance == null)"); - out.println(" {"); - out.println(" instance = (" + getName() - + ") Learner.readLearner(_lcFilePath);"); - out.println(" instance.readLexiconOnDemand(_lexFilePath);"); - out.println(" }"); - out.println(" }\n"); - - if (tuningParameters) { - out.println(" private static " + lce.learnerName - + ".Parameters bestParameters;\n"); - - out.println(" public static " + lce.learnerName - + ".Parameters getBestParameters()"); - out.println(" {"); - out.println(" if (bestParameters == null)"); - out.println(" bestParameters = (" + lce.learnerName - + ".Parameters) Learner.readParameters(parametersPath);"); - out.println(" return bestParameters;"); - out.println(" }\n"); - } - - if (exFilePath != null - && lce.featuresStatus != RevisionAnalysis.UNAFFECTED - && new File(exFilePath).exists()) - out.println( - " public static Parser getParser() { return new " - + "edu.illinois.cs.cogcomp.lbjava.parse.ArrayFileParser(\"" - + new File(exFilePath).getAbsolutePath() - + "\"); }"); - else - out.println(" public static Parser getParser() { return " - + lce.parser + "; }"); - - if (testExFilePath != null - && lce.featuresStatus != RevisionAnalysis.UNAFFECTED - && new File(testExFilePath).exists()) - out.println( - " public static Parser getTestParser() { return new " - + "edu.illinois.cs.cogcomp.lbjava.parse.ArrayFileParser(\"" - + new File(testExFilePath).getAbsolutePath() + "\"); }"); - else - out.println(" public static Parser getTestParser() { return " - + lce.testParser + "; }\n"); - - TranslateToJava.generateLearnerBody(out, lce); - - if (lce.parameterSets.size() > 0) { - out.println(); - out.println(" public static class Parameters extends " - + lce.learnerName + ".Parameters"); - out.println(" {"); - out.println( - " public Parameters() { super(getBestParameters()); }"); - out.println(" }"); - } - - out.println("}\n"); - out.close(); - - executeReadyThreads(getName()); - } - } -} \ No newline at end of file +public class Train extends Pass { + /** + * Generates a String containing the name of the + * specified Throwable and its stack trace. + * + * @param t Throwable. + * @return The generated message. + **/ + private static String stackTrace(Throwable t) { + String message = " " + t + "\n"; + StackTraceElement[] elements = t.getStackTrace(); + if (elements.length == 0) + message += " no stack trace available\n"; + for (int i = 0; i < elements.length; ++i) + message += " " + elements[i] + "\n"; + return message; + } + + + /** + * Remembers which files have been compiled via {@link #runJavac(String)}. + **/ + private static final TreeSet compiledFiles = new TreeSet(); + + + /** + * Run the javac compiler with the specified arguments in + * addition to those specified on the command line. + * + * @param arguments The arguments to send to javac. + * @return true iff errors were encountered. + **/ + public static boolean runJavac(String arguments) { + String[] files = arguments.split("\\s+"); + arguments = ""; + for (int i = 0; i < files.length; ++i) + if (compiledFiles.add(files[i])) + arguments += " " + files[i]; + if (arguments.length() == 0) + return false; + + Process javac = null; + String pathArguments = "-classpath " + Main.classPath + " -sourcepath " + Main.sourcePath; + + if (Main.generatedSourceDirectory != null) { + String gsd = Main.generatedSourceDirectory; + int packageIndex = -1; + if (AST.globalSymbolTable.getPackage().length() != 0) + packageIndex = + gsd.lastIndexOf(File.separator + + AST.globalSymbolTable.getPackage().replace('.', + File.separatorChar)); + if (packageIndex != -1) + gsd = gsd.substring(0, packageIndex); + pathArguments += File.pathSeparator + gsd; + } + + if (Main.classPackageDirectory != null) + pathArguments += " -d " + Main.classPackageDirectory; + + String command = "javac " + Main.javacArguments + " " + pathArguments + arguments; + + try { + javac = Runtime.getRuntime().exec(command); + } catch (Exception e) { + System.err.println("Failed to execute 'javac': " + e); + System.exit(1); + } + + BufferedReader error = new BufferedReader(new InputStreamReader(javac.getErrorStream())); + try { + for (String line = error.readLine(); line != null; line = error.readLine()) + System.out.println(line); + } catch (Exception e) { + System.err.println("Error reading STDERR from 'javac': " + e); + System.exit(1); + } + + int exit = 0; + try { + exit = javac.waitFor(); + } catch (Exception e) { + System.err.println("Error waiting for 'javac' to terminate: " + e); + System.exit(1); + } + + return exit != 0; + } + + + // Member variables. + /** + * Progress output will be printed every progressOutput examples. + **/ + protected int progressOutput; + /** + * Set to true iff there existed a {@link LearningClassifierExpression} for which + * new code was generated. + **/ + protected boolean newCode; + /** + * An array of the training threads, which is never modified after it is constructed. + **/ + protected TrainingThread[] threads; + /** A map of all the training threads indexed by the name of the learner. */ + protected HashMap threadMap; + /** + * The keys of this map are the names of learners; the values are LinkedLists of + * the names of the learners that the learner named by the key depends on. + **/ + protected HashMap learnerDependencies; + + + // Constructor. + /** + * Instantiates a pass that runs on an entire {@link AST}. + * + * @param ast The program to run this pass on. + * @param output Progress output will be printed every output examples. + **/ + public Train(AST ast, int output) { + super(ast); + progressOutput = output; + } + + + // Methods related to learnerDependencies. + /** + * Adds an edge from dependor to dependency in the + * {@link #learnerDependencies} graph. If dependency is null, no new + * list item is added, but the HashSet associated with dependor is + * still created if it didn't already exist. + * + * @param dependor The name of the node doing the depending. + * @param dependency The name of the node depended on. + **/ + private void addDependency(String dependor, String dependency) { + HashSet dependencies = (HashSet) learnerDependencies.get(dependor); + + if (dependencies == null) { + dependencies = new HashSet(); + learnerDependencies.put(dependor, dependencies); + } + + if (dependency != null) + dependencies.add(dependency); + } + + + /** + * This method initializes the {@link #learnerDependencies} + * graph such that the entry for each learner contains the names of all learners that depend on + * it, except that cycles are broken by preferring that learners appearing earlier in the source + * get trained first. + **/ + protected void fillLearnerDependorsDAG() { + threads = (TrainingThread[]) threadMap.values().toArray(new TrainingThread[0]); + Arrays.sort(threads, new Comparator() { + public int compare(Object o1, Object o2) { + TrainingThread t1 = (TrainingThread) o1; + TrainingThread t2 = (TrainingThread) o2; + return t2.byteOffset - t1.byteOffset; + } + }); + + for (int i = 0; i < threads.length - 1; ++i) + for (int j = i + 1; j < threads.length; ++j) { + if (SemanticAnalysis.isDependentOn(threads[i].getName(), threads[j].getName())) + addDependency(threads[i].getName(), threads[j].getName()); + else if (SemanticAnalysis.isDependentOn(threads[j].getName(), threads[i].getName())) + addDependency(threads[j].getName(), threads[i].getName()); + } + } + + + /** + * This method updates the {@link #learnerDependencies} + * graph by removing the specified name from every dependencies list, and then starts every + * thread that has no more dependencies. + * + * @param name The name of a learner whose training has completed. + **/ + protected void executeReadyThreads(String name) { + LinkedList ready = new LinkedList(); + + synchronized (learnerDependencies) { + for (Iterator I = learnerDependencies.entrySet().iterator(); I.hasNext();) { + Map.Entry e = (Map.Entry) I.next(); + HashSet dependencies = (HashSet) e.getValue(); + dependencies.remove(name); + if (dependencies.size() == 0) + ready.add(e.getKey()); + } + } + + for (Iterator I = ready.iterator(); I.hasNext();) { + TrainingThread thread = null; + + synchronized (threadMap) { + thread = (TrainingThread) threadMap.remove(I.next()); + } + + if (thread != null) { + thread.start(); + + if (!Main.concurrentTraining) { + try { + thread.join(); + } catch (InterruptedException e) { + System.err.println("LBJava ERROR: Training of " + thread.getName() + + " has been interrupted."); + fatalError = true; + } + } + } + } + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param ast The node to process. + **/ + public void run(AST ast) { + if (RevisionAnalysis.noChanges) + return; + threadMap = new HashMap(); + learnerDependencies = new HashMap(); + + if (Main.fileNames.size() > 0) { + String files = ""; + for (Iterator I = Main.fileNames.iterator(); I.hasNext();) + files += " " + I.next(); + System.out.println("Compiling generated code"); + if (runJavac(files)) + return; + } + + Main.fileNames.clear(); + + runOnChildren(ast); + + fillLearnerDependorsDAG(); + executeReadyThreads(null); + + for (int i = 0; i < threads.length; ++i) { + try { + threads[i].join(); + } catch (InterruptedException e) { + System.err.println("LBJava ERROR: Training of " + threads[i].getName() + + " has been interrupted."); + fatalError = true; + } + } + + if (!fatalError && newCode) { + String files = ""; + for (Iterator I = Main.fileNames.iterator(); I.hasNext();) + files += " " + I.next(); + System.out.println("Compiling generated code"); + compiledFiles.clear(); + runJavac(files); + } + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param lce The node to process. + **/ + public void run(LearningClassifierExpression lce) { + runOnChildren(lce); + + String lceName = lce.name.toString(); + + if (lce.parser == null ? !RevisionAnalysis.revisionStatus.get(lceName).equals( + RevisionAnalysis.REVISED) : lce.learningStatus.equals(RevisionAnalysis.UNAFFECTED) + && !lce.onlyCodeGeneration) + return; + + newCode |= true; + + TrainingThread thread = new TrainingThread(lceName, lce.byteOffset, lce); + threadMap.put(lceName, thread); + addDependency(lceName, null); + } + + + // The following three methods are here to stop AST traversal. + /** + * Runs this pass on all nodes of the indicated type. There's no + * reason to traverse children of {@link CodedClassifier}s, so this method exists simply to stop + * that from happening. + * + * @param cc The node to process. + **/ + public void run(CodedClassifier cc) {} + + + /** + * Runs this pass on all nodes of the indicated type. + * There's no reason to traverse children of {@link ConstraintDeclaration}s, so this method + * exists simply to stop that from happening. + * + * @param cd The node to process. + **/ + public void run(ConstraintDeclaration cd) {} + + + /** + * Runs this pass on all nodes of the indicated type. There's + * no reason to traverse children of {@link InferenceDeclaration}s, so this method exists simply + * to stop that from happening. + * + * @param id The node to process. + **/ + public void run(InferenceDeclaration id) {} + + + /** + * Helps the {@link TrainingThread#getParameterCombinations()} + * method iterate through all combinations and permutations of integers such that each integer + * is at least 0 and less than the corresponding element of maxes. + * + * @param I The current array of integers. + * @param maxes The maximums for each element of I. + **/ + private static boolean increment(int[] I, int[] maxes) { + int i = 0; + while (i < I.length && ++I[i] == maxes[i]) + I[i++] = 0; + return i < I.length; + } + + + /** + * This class contains the code that trains a learning classifier. It is a subclass of + * Thread so that it may be executed concurrently. + * + * @author Nick Rizzolo + **/ + protected class TrainingThread extends Thread { + // Member variables. + /** The byte offset at which the learner appeared. */ + public int byteOffset; + /** The expression that specified the learner. */ + protected LearningClassifierExpression lce; + /** The learning classifier being trained. */ + protected Learner learner; + /** The class of {@link #learner}. */ + protected Class learnerClass; + /** {@link #learner}'s Parameters class. */ + protected Class parametersClass; + /** The file into which training examples are extracted. */ + protected String exFilePath; + /** The file into which testing examples are extracted. */ + protected String testExFilePath; + /** The directory into which class files, model files, etc are written. */ + protected String classDir; + /** Whether or not example vectors should be pre-extracted. */ + protected boolean preExtract; + /** Whether or not pre-extracted example files should be compressed. */ + protected boolean preExtractZip; + /** Actually does the training. */ + protected BatchTrainer trainer; + /** The parser from which testing objects are obtained. */ + protected Parser testParser; + /** + * The metric with which to measure the learner's performance on a test set. + **/ + protected TestingMetric testingMetric; + + + // Constructor. + /** + * Initializing constructor. + * + * @param n The name of the learner. + * @param b The byte offset at which the learner appeared. + * @param lce The expression that specified the learner. + **/ + public TrainingThread(String n, int b, LearningClassifierExpression lce) { + super(n); + byteOffset = b; + this.lce = lce; + if (lce.onlyCodeGeneration) + return; + + classDir = Main.classDirectory == null ? "" : Main.classDirectory + File.separator; + learner = getLearner(classDir); + + preExtract = + lce.preExtract != null && !lce.preExtract.value.equals("false") + && !lce.preExtract.value.equals("\"false\"") + && !lce.preExtract.value.equals("\"none\""); + boolean preExtractToDisk = preExtract && !lce.preExtract.value.startsWith("\"mem"); + preExtractZip = preExtract && lce.preExtract.value.endsWith("Zip\""); + + if (preExtractToDisk) { + exFilePath = getName() + ".ex"; + testExFilePath = getName() + ".test.ex"; + if (Main.generatedSourceDirectory != null) { + exFilePath = Main.generatedSourceDirectory + File.separator + exFilePath; + testExFilePath = + Main.generatedSourceDirectory + File.separator + testExFilePath; + } + } + + Parser parser = null; + if (lce.parser != null) { + if (lce.featuresStatus == RevisionAnalysis.UNAFFECTED) { + // Implies preExtractToDisk is true because of RevisionAnalysis; + // therefore, exFilePath != null + parser = + new ArrayFileParser(exFilePath, lce.preExtract.value.endsWith("Zip\"")); + if (lce.pruneStatus != RevisionAnalysis.UNAFFECTED) + learner.readLexiconOnDemand(classDir + getName() + ".lex"); + } else + parser = getParser("getParser"); + } + + if (lce.testParser != null) { + if (lce.pruneStatus == RevisionAnalysis.UNAFFECTED + && new File(testExFilePath).exists()) + // If pruneStatus is affected, pruning will rearrange our lexicon, + // so we must re-extract the test set from the original parser. In + // addition, pruneStatus == UNAFFECTED implies featuresStatus == + // UNAFFECTED. So, like above, as soon as we know pruneStatus == + // UNAFFECTED, we know testExFilePath != null + testParser = + new ArrayFileParser(testExFilePath, + lce.preExtract.value.endsWith("Zip\"")); + else + testParser = getParser("getTestParser"); + } + + testingMetric = getTestingMetric(); + + if (lce.progressOutput != null) + progressOutput = Integer.parseInt(lce.progressOutput.value); + + trainer = new BatchTrainer(learner, parser, progressOutput); + } + + + /** + * Obtain an instance of the learner appropriate for the + * revision status of the source file. This method also fills in the {@link #learnerClass} + * and {@link #parametersClass} fields. + * + *

+ * If the only change between the last run of the compiler and this run is that more + * training rounds were added, the entire model file can be loaded from disk. Failing that, + * if features are unaffected according to {@link RevisionAnalysis}, it means only the label + * lexicon should be read. Otherwise, we just start with a fresh instance of the learner via + * its static getInstance() method. In any case, the learner is initialized so + * that it will write its model and/or lexicon files to the specified directory as + * necessary. + * + * @param dir The directory in which the model and lexicon are written. + * @return An instance of the learner. + **/ + private Learner getLearner(String dir) { + String fullyQualified = AST.globalSymbolTable.getPackage(); + if (fullyQualified.length() > 0) + fullyQualified += "."; + fullyQualified += getName(); + learnerClass = ClassUtils.getClass(fullyQualified, true); + + Class[] declaredClasses = learnerClass.getDeclaredClasses(); + int c = 0; + while (c < declaredClasses.length + && !declaredClasses[c].getName().endsWith(getName() + "$Parameters")) + ++c; + + if (c == declaredClasses.length) { + System.err.println("LBJava ERROR: Expected to find a single member class inside " + + getName() + " named 'Parameters'."); + for (int i = 0; i < declaredClasses.length; ++i) + System.err.println(i + ": " + declaredClasses[i].getName()); + System.exit(1); + } + parametersClass = declaredClasses[c]; + + Learner l = null; + + if (lce.startingRound > 1) { + // In the condition above, note that before setting + // lce.startingRound > 1, RevisionAnalysis ensures that the lce is + // unaffected other than the number of rounds and that there will be + // no parameter tuning or cross validation. + l = Learner.readLearner(dir + getName() + ".lc"); + l.setLexiconLocation(dir + getName() + ".lex"); + } else if (lce.featuresStatus == RevisionAnalysis.UNAFFECTED) { + Constructor noArg = null; + try { + noArg = parametersClass.getConstructor(new Class[0]); + } catch (Exception e) { + System.err.println("LBJava ERROR: Can't find a no-argument constructor for " + + getName() + ".Parameters."); + System.exit(1); + } + + Learner.Parameters p = null; + try { + p = (Learner.Parameters) noArg.newInstance(new Object[0]); + } catch (Exception e) { + System.err.println("LBJava ERROR: Can't instantiate " + getName() + + ".Parameters:"); + e.printStackTrace(); + System.exit(1); + } + + l = Learner.readLearner(dir + getName() + ".lc", false); + l.setParameters(p); + l.setLexiconLocation(dir + getName() + ".lex"); + } else { + Method getInstance = null; + try { + getInstance = learnerClass.getDeclaredMethod("getInstance", new Class[0]); + } catch (Exception e) { + System.err.println("LBJava ERROR: Could not access method '" + fullyQualified + + ".getInstance()':"); + System.exit(1); + } + + try { + l = (Learner) getInstance.invoke(null, null); + } catch (Exception e) { + System.err.println("LBJava ERROR: Could not get unique instance of '" + + fullyQualified + "': " + e); + e.getCause().printStackTrace(); + System.exit(1); + } + + if (l == null) { + System.err.println("LBJava ERROR: Could not get unique instance of '" + + fullyQualified + "'."); + System.exit(1); + } + + l.setModelLocation(dir + getName() + ".lc"); + l.setLexiconLocation(dir + getName() + ".lex"); + } + + return l; + } + + + /** + * Call the specified method of {@link #learnerClass}, and return + * the Parser returned by that method. + * + * @param name The name of the method. + * @return The parser returned by the named method. + **/ + private Parser getParser(String name) { + Method m = null; + try { + m = learnerClass.getDeclaredMethod(name, new Class[0]); + } catch (Exception e) { + reportError(lce.line, "Could not access method '" + lce.name + "." + name + "()': " + + e); + return null; + } + + Parser result = null; + + try { + result = (Parser) m.invoke(null, null); + } catch (Exception e) { + System.err.println("Could not instantiate parser '" + lce.parser.name + "': " + e + + ", caused by"); + Throwable cause = e.getCause(); + System.err.print(stackTrace(cause)); + + if (cause instanceof ExceptionInInitializerError) { + System.err.println("... caused by"); + System.err.print(stackTrace(((ExceptionInInitializerError) cause).getCause())); + } + + return null; + } + + return result; + } + + + /** + * Call the getTestingMetric() method of + * {@link #learnerClass} and return the testing metric it returns. + **/ + private TestingMetric getTestingMetric() { + TestingMetric testingMetric = null; + if (lce.testingMetric != null) { + Method getTestingMetric = null; + try { + getTestingMetric = + learnerClass.getDeclaredMethod("getTestingMetric", new Class[0]); + } catch (Exception e) { + reportError(lce.line, "Could not access method'" + getName() + + ".getTestingMetric()': " + e); + return null; + } + + try { + testingMetric = (TestingMetric) getTestingMetric.invoke(null, null); + } catch (Exception e) { + System.err.println("Could not instantiate testing metric '" + lce.parser.name + + "': " + e + ", caused by"); + System.err.print(stackTrace(e.getCause())); + return null; + } + } else + testingMetric = new Accuracy(); + + return testingMetric; + } + + + /** + * Handles feature pre-extraction and dataset pruning under + * the assumption that pre-extraction has been called for by the source code. The two go + * hand-in-hand, as we only need to compute and store feature counts during pre-extraction + * if we are pruning. + **/ + private void preExtractAndPrune() { + Lexicon.PruningPolicy pruningPolicy = new Lexicon.PruningPolicy(); + Lexicon.CountPolicy countPolicy = Lexicon.CountPolicy.none; + if (lce.pruneCountType != null) { + pruningPolicy = + lce.pruneThresholdType.value.equals("\"count\"") ? new Lexicon.PruningPolicy( + Integer.parseInt(lce.pruneThreshold.value)) + : new Lexicon.PruningPolicy( + Double.parseDouble(lce.pruneThreshold.value)); + countPolicy = + lce.pruneCountType.value.equals("\"global\"") ? Lexicon.CountPolicy.global + : Lexicon.CountPolicy.perClass; + } + + Learner preExtractLearner = learner; // Needed in case we're pruning. + Lexicon lexicon = null; // Needed for pre-extracting the test set. + // As seen below, we can always read the lexicon off disk just before + // pre-extracting the test set, but if one of the operations between + // now and then obtains the lexicon incidentally, we'll keep it here + // to avoid reading it from disk again. + + if (pruningPolicy.isNone()) { + if (lce.featuresStatus != RevisionAnalysis.UNAFFECTED) + lexicon = trainer.preExtract(exFilePath, preExtractZip); + else if (lce.pruneStatus != RevisionAnalysis.UNAFFECTED) + lexicon = learner.getLexiconDiscardCounts(); + else + trainer.fillInSizes(); + } else if (lce.featuresStatus != RevisionAnalysis.UNAFFECTED + || lce.pruneStatus != RevisionAnalysis.UNAFFECTED + && lce.previousPruneCountType == null) + preExtractLearner = trainer.preExtract(exFilePath, preExtractZip, countPolicy); + else if (lce.previousPruneCountType != null + && !lce.previousPruneCountType.equals(lce.pruneCountType)) { + if (lce.previousPruneCountType.value.equals("\"global\"")) + // implies lce.pruneCountType.equals("\"perClass\"") + preExtractLearner = trainer.preExtract(exFilePath, preExtractZip, countPolicy); + else + // lce.previousPruneCountType.value.equals("\"perClass\"") + learner.getLexicon().perClassToGlobalCounts(); + } + // else pruneThresholdType or pruneThreshold may have changed, but + // that does not require recounting of features. + + if (lce.featuresStatus == RevisionAnalysis.UNAFFECTED ? lce.pruneStatus != RevisionAnalysis.UNAFFECTED + : !pruningPolicy.isNone()) { + trainer.pruneDataset(exFilePath, preExtractZip, pruningPolicy, preExtractLearner); + lexicon = preExtractLearner.getLexicon(); + if (preExtractLearner == learner) + learner.setLexicon(null); + } + + if (testParser != null + && (lce.pruneStatus != RevisionAnalysis.UNAFFECTED || !(new File(testExFilePath) + .exists()))) { + if (lexicon == null) + learner.readLexiconOnDemand(classDir + getName() + ".lex"); + else { + learner.setLexicon(lexicon); + lexicon = null; // See comment below + } + + BatchTrainer preExtractor = + new BatchTrainer(learner, testParser, trainer.getProgressOutput(), + "test set: "); + preExtractor.preExtract(testExFilePath, preExtractZip, Lexicon.CountPolicy.none); + testParser = preExtractor.getParser(); + } + + // At this point, it should be the case that (lexicon == null) implies + // that the lexicon is not in memory. Above, we intentionally discard + // the lexicon when pre-extracting the test set, since that process will + // add unwanted features (since pre-extraction always happens under the + // assumption that we are training). + + // Given the above comment, we now ensure that when this learning classifier (ie, + // the one whose feature vectors we have just pre-extracted) is + // called as a feature for some other learning classifier defined in the + // same sourcefile, it will be prepared take a raw example object as + // input. + String name = getName(); + HashSet dependors = (HashSet) SemanticAnalysis.dependorGraph.get(name); + if (lexicon != null && dependors.size() > 0) + learner.setLexicon(lexicon); + else + learner.readLexiconOnDemand(classDir + name + ".lex"); + } + + + /** + * Uses the various {@link ParameterSet ParameterSet}s + * in the AST to generate an array of parameter combinations representing the cross product + * of all {@link ParameterSet ParameterSet}s except the one in the + * {@link LearningClassifierExpression#rounds} field, if any. + **/ + private Learner.Parameters[] getParameterCombinations() { + Class[] paramTypes = new Class[lce.parameterSets.size()]; + Object[][] arguments = new Object[paramTypes.length][]; + int[] lengths = new int[paramTypes.length]; + int totalCombinations = 1; + + Iterator iterator = lce.parameterSets.iterator(); + for (int i = 0; i < paramTypes.length; i++) { + ParameterSet ps = (ParameterSet) iterator.next(); + paramTypes[i] = ps.type.typeClass(); + arguments[i] = ps.toStringArray(); + lengths[i] = arguments[i].length; + totalCombinations *= lengths[i]; + } + + for (int i = 0; i < arguments.length; i++) { + Class t = paramTypes[i]; + if (t.isPrimitive()) { + if (t.getName().equals("int")) + for (int j = 0; j < lengths[i]; ++j) + arguments[i][j] = new Integer((String) arguments[i][j]); + else if (t.getName().equals("long")) + for (int j = 0; j < lengths[i]; ++j) + arguments[i][j] = new Long((String) arguments[i][j]); + else if (t.getName().equals("short")) + for (int j = 0; j < lengths[i]; ++j) + arguments[i][j] = new Short((String) arguments[i][j]); + else if (t.getName().equals("double")) + for (int j = 0; j < lengths[i]; ++j) + arguments[i][j] = new Double((String) arguments[i][j]); + else if (t.getName().equals("float")) + for (int j = 0; j < lengths[i]; ++j) + arguments[i][j] = new Float((String) arguments[i][j]); + else if (t.getName().equals("boolean")) + for (int j = 0; j < lengths[i]; ++j) + arguments[i][j] = new Boolean((String) arguments[i][j]); + } + } + + Constructor c = null; + try { + c = parametersClass.getConstructor(paramTypes); + } catch (Exception e) { + System.err.println("LBJava ERROR: Can't find a parameter tuning constructor for " + + getName() + ".Parameters."); + e.printStackTrace(); + System.exit(1); + } + + Learner.Parameters[] result = new Learner.Parameters[totalCombinations]; + int[] I = new int[paramTypes.length]; + Object[] a = new Object[paramTypes.length]; + int i = 0; + + do { + for (int j = 0; j < a.length; ++j) + a[j] = arguments[j][I[j]]; + try { + result[i++] = (Learner.Parameters) c.newInstance(a); + } catch (Exception e) { + System.err.println("LBJava ERROR: Can't instantiate " + getName() + + ".Parameters:"); + e.printStackTrace(); + System.exit(1); + } + } while (increment(I, lengths)); + + return result; + } + + + /** + * Determines the best parameters to use when training the learner, under + * the assumption that {@link ParameterSet}s were present. Here, "best" means the parameters + * that did the best out of some small set of particular parameter settings. + **/ + private Learner.Parameters tune() { + Learner.Parameters[] parameterCombinations = getParameterCombinations(); + int[] rounds = null; + if (lce.rounds != null) { + if (lce.rounds instanceof ParameterSet) + rounds = ((ParameterSet) lce.rounds).toSortedIntArray(); + else + rounds = new int[] {Integer.parseInt(((Constant) lce.rounds).value)}; + } else + rounds = new int[] {1}; + + if (lce.K != null) { + int k = Integer.parseInt(lce.K.value); + double alpha = Double.parseDouble(lce.alpha.value); + return trainer.tune(parameterCombinations, rounds, k, lce.splitPolicy, alpha, + testingMetric); + } + + return trainer.tune(parameterCombinations, rounds, testParser, testingMetric); + } + + + /** Performs the training and then generates the new code. */ + public void run() { + boolean tuningParameters = + lce.parameterSets.size() > 0 || lce.rounds != null + && lce.rounds instanceof ParameterSet; + + if (!lce.onlyCodeGeneration) { + // If there's a "from" clause, train. + try { + learner.beginTraining(); + try { + if (lce.parser != null) { + System.out.println("Training " + getName()); + if (preExtract) { + preExtractAndPrune(); + System.gc(); + } else + learner.saveLexicon(); + int trainingRounds = 1; + + if (tuningParameters) { + String parametersPath = getName(); + if (Main.classDirectory != null) + parametersPath = + Main.classDirectory + File.separator + parametersPath; + parametersPath += ".p"; + + Learner.Parameters bestParameters = tune(); + trainingRounds = bestParameters.rounds; + Learner.writeParameters(bestParameters, parametersPath); + System.out.println(" " + getName() + + ": Training on entire training set"); + } else { + if (lce.rounds != null) + trainingRounds = Integer.parseInt(((Constant) lce.rounds).value); + + if (lce.K != null) { + int[] rounds = {trainingRounds}; + int k = Integer.parseInt(lce.K.value); + double alpha = Double.parseDouble(lce.alpha.value); + trainer.crossValidation(rounds, k, lce.splitPolicy, alpha, + testingMetric, true); + System.out.println(" " + getName() + + ": Training on entire training set"); + } + } + trainer.train(lce.startingRound, trainingRounds); + } else + learner.saveLexicon(); // Writes .lex even if lexicon is empty. + } finally { + learner.doneTraining(); + } + + if (lce.parser != null && testParser != null) { + System.out.println("Testing " + getName()); + new Accuracy(true).test(learner, learner.getLabeler(), testParser); + } + + // save the final model. + System.out.println("Writing " + getName()); + learner.save(); // Doesn't write .lex if lexicon is empty. + } catch (Exception e) { + System.err.println("LBJava ERROR: Exception while training " + getName() + ":"); + e.printStackTrace(); + fatalError = true; + return; + } + + // Set learner's static instance field to the newly learned instance. + Field field = null; + try { + field = learnerClass.getField("instance"); + } catch (Exception e) { + System.err + .println("Can't access " + learnerClass + "'s 'instance' field: " + e); + System.exit(1); + } + + try { + field.set(null, learner); + } catch (Exception e) { + System.err.println("Can't set " + learnerClass + "'s 'instance' field: " + e); + System.exit(1); + } + } else + System.out.println("Generating code for " + lce.name); + + // Write the new code. + PrintStream out = TranslateToJava.open(lce); + if (out == null) + return; + + out.println(TranslateToJava.disclaimer); + out.print("// "); + TranslateToJava.compressAndPrint(lce.shallow(), out); + out.println("\n"); + + ast.symbolTable.generateHeader(out); + + if (lce.cacheIn != null) { + String f = lce.cacheIn.toString(); + boolean cachedInMap = f.equals(ClassifierAssignment.mapCache); + if (cachedInMap) + out.println("import java.util.WeakHashMap;"); + } + + out.println("\n"); + if (lce.comment != null) + out.println(lce.comment); + + out.println("\n\npublic class " + getName() + " extends " + lce.learnerName); + out.println("{"); + out.println(" private static java.net.URL _lcFilePath;"); + out.println(" private static java.net.URL _lexFilePath;"); + if (tuningParameters) + out.println(" private static java.net.URL parametersPath;"); + out.println(); + + out.println(" static"); + out.println(" {"); + out.println(" _lcFilePath = " + getName() + ".class.getResource(\"" + getName() + + ".lc\");"); + out.println(" _lexFilePath = " + getName() + ".class.getResource(\"" + getName() + + ".lex\");"); + + if (tuningParameters) { + out.println("\n parametersPath = " + getName() + ".class.getResource(\"" + + getName() + ".p\");\n"); + + out.println(" if (parametersPath == null)"); + out.println(" {"); + out.println(" throw new RuntimeException(\"ERROR: Can't locate " + getName() + + ".p in the class path.\");"); + out.println(" }"); + } + out.println(" }\n"); + + out.println(" private static void loadInstance()"); + out.println(" {"); + + out.println(" if (instance == null)"); + out.println(" {"); + + + out.println(" if (_lcFilePath == null)"); + out.println(" {"); + out.println(" throw new RuntimeException(\"Can't locate " + getName() + + ".lc in the class path.\");"); + out.println(" }"); + + out.println(" if (_lexFilePath == null)"); + out.println(" {"); + out.println(" throw new RuntimeException(\"Can't locate " + getName() + + ".lc in the class path.\");"); + out.println(" }"); + + out.println(" instance = (" + getName() + ") Learner.readLearner(_lcFilePath);"); + out.println(" instance.readLexiconOnDemand(_lexFilePath);"); + out.println(" }"); + out.println(" }\n"); + + if (tuningParameters) { + out.println(" private static " + lce.learnerName + ".Parameters bestParameters;\n"); + + out.println(" public static " + lce.learnerName + + ".Parameters getBestParameters()"); + out.println(" {"); + out.println(" if (bestParameters == null)"); + out.println(" bestParameters = (" + lce.learnerName + + ".Parameters) Learner.readParameters(parametersPath);"); + out.println(" return bestParameters;"); + out.println(" }\n"); + } + + if (exFilePath != null && lce.featuresStatus != RevisionAnalysis.UNAFFECTED + && new File(exFilePath).exists()) + out.println(" public static Parser getParser() { return new " + + "edu.illinois.cs.cogcomp.lbjava.parse.ArrayFileParser(\"" + + FileUtils.escapeFilePath(new File(exFilePath).getAbsolutePath()) + + "\"); }"); + else + out.println(" public static Parser getParser() { return " + lce.parser + "; }"); + + if (testExFilePath != null && lce.featuresStatus != RevisionAnalysis.UNAFFECTED + && new File(testExFilePath).exists()) + out.println(" public static Parser getTestParser() { return new " + + "edu.illinois.cs.cogcomp.lbjava.parse.ArrayFileParser(\"" + + FileUtils.escapeFilePath(new File(testExFilePath).getAbsolutePath()) + + "\"); }"); + else + out.println(" public static Parser getTestParser() { return " + lce.testParser + + "; }\n"); + + TranslateToJava.generateLearnerBody(out, lce); + + if (lce.parameterSets.size() > 0) { + out.println(); + out.println(" public static class Parameters extends " + lce.learnerName + + ".Parameters"); + out.println(" {"); + out.println(" public Parameters() { super(getBestParameters()); }"); + out.println(" }"); + } + + out.println("}\n"); + out.close(); + + executeReadyThreads(getName()); + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/TranslateToJava.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/TranslateToJava.java index ba83f441..fe910e4f 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/TranslateToJava.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/TranslateToJava.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava; @@ -107,4438 +104,4400 @@ /** - * This pass generates Java code from an AST, but does not perform any - * training. + * This pass generates Java code from an AST, but does not perform any training. * * @author Nick Rizzolo **/ -public class TranslateToJava extends Pass -{ - /** The commented message appearing at the top of all generated files. */ - public static final String disclaimer = - "// Modifying this comment will cause the next execution of LBJava to " - + "overwrite this file."; - /** - * This array contains string descriptions of methods that don't need to be - * overridden when generating code for a learner. - **/ - private static final String[] noOverride = - { - "native public int hashCode()", - "public boolean equals(java.lang.Object a0)", - "public double realValue(java.lang.Object a0)", - "public double[] realValueArray(java.lang.Object a0)", - "public edu.illinois.cs.cogcomp.lbjava.classify.Feature featureValue(java.lang.Object a0)", - "public edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector classify(java.lang.Object a0)", - "public edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector[] classify(java.lang.Object[] a0)", - "public java.lang.Object clone()", - "public java.lang.String discreteValue(java.lang.Object a0)", - "public java.lang.String getInputType()", - "public java.lang.String getOutputType()", - "public java.lang.String toString()", - "public java.lang.String[] allowableValues()", - "public java.lang.String[] discreteValueArray(java.lang.Object a0)", - "public java.util.LinkedList getCompositeChildren()", - "public short valueIndexOf(java.lang.String a0)", - "public void learn(java.lang.Object a0)", - "public void learn(java.lang.Object[] a0)", - "public void unclone()" - }; - /** - * The prefix of the name of the temporary variable in which a constraint's - * computed value should be stored. This variable is only used when - * {@link #constraintMode} is unset. - **/ - private static final String constraintResult = "LBJava$constraint$result$"; - - - /** Used for collecting the string representation of a method body. */ - private StringBuffer methodBody; - /** The indent level when collecting the method body. */ - private int indent; - /** Lets AST children know about the node they are contained in. */ - private CodeGenerator currentCG; - /** - * Lets {@link VariableDeclaration}s know if they are contained in the - * initialization portion of the header of a for loop. - **/ - private boolean forInit; - /** - * Filenames that have been generated during the processing of one - * statement. - **/ - private HashSet files; - /** - * When this flag is set, code generated for constraint expressions will - * create {@link edu.illinois.cs.cogcomp.lbjava.infer.Constraint} objects rather than computing the - * value of the constraint expression. - **/ - private boolean constraintMode; - /** - * This variable is appended to the {@link #constraintResult} variable to - * form the name of a new temporary variable. - **/ - private int constraintResultNumber; - /** The current constraint result variable name. */ - private String constraintResultName; - /** - * Lets AST children know the index that a given quantification variable - * occupies in an {@link EqualityArgumentReplacer}'s vector; the keys of - * the map are names of quantification variables, and the values are - * Integers. - **/ - private HashMap quantificationVariables; - /** - * Lets AST children know the index that a given context variable occupies - * in an {@link EqualityArgumentReplacer}'s vector; the keys of the map are - * names of context variables, and the values are Integers. - **/ - private HashMap contextVariables; - /** - * Lets AST nodes know how deeply nested inside - * {@link QuantifiedConstraintExpression}s they are. - **/ - private int quantifierNesting; - - /** - * Associates an AST with this pass. - * - * @param ast The AST to associate with this pass. - **/ - public TranslateToJava(AST ast) { - super(ast); - methodBody = new StringBuffer(); - files = new HashSet(); - } - - - /** - * Uses the current value of {@link #indent} to append the appropriate - * number of spaces to {@link #methodBody}. - **/ - private void appendIndent() { - for (int i = 0; i < indent; ++i) methodBody.append(" "); - } - - - /** - * Uses the current value of {@link #indent} to append the appropriate - * number of spaces to {@link #methodBody}, followed by the argument - * string. - * - * @param text The text to append after the indent. - **/ - private void appendIndent(String text) { - appendIndent(); - methodBody.append(text); - } - - - /** - * Appends the current indent via {@link #appendIndent()}, then appends the - * argument string and a newline. - * - * @param text The text to append as a new line. - **/ - private void appendLine(String text) { - appendIndent(text); - methodBody.append("\n"); - } - - - /** - * Sets the current code generator for this translator. - * - * @param cg The new current code generator. - **/ - public void setCurrentCG(CodeGenerator cg) { currentCG = cg; } - - - /** - * Sets the indentation level. - * - * @param i The new indentation level. - **/ - public void setIndent(int i) { indent = i; } - - - /** - * Gives access to the {@link #methodBody} member variable so that this - * pass can be invoked selectively on some subset of a method body. - * - * @return The contents of {@link #methodBody} in a String. - **/ - public String getMethodBody() { return methodBody.toString(); } - - - /** - * Create a PrintStream that writes to a Java file - * corresponding to the specified {@link CodeGenerator}. - * - * @param node The code producing node. - * @return The stream, or null if it couldn't be created. - **/ - public static PrintStream open(CodeGenerator node) { - return open(node.getName() + ".java"); - } - - - /** - * Create a PrintStream that writes to the specified file. - * - * @param name The name of the file to open. - * @return The stream, or null if it couldn't be created. - **/ - public static PrintStream open(String name) { - if (Main.generatedSourceDirectory != null) { - name = Main.generatedSourceDirectory + File.separator + name; - - String[] directories = name.split("\\" + File.separator + "+"); - File directory = new File(directories[0]); - - for (int i = 1; i < directories.length - 1; ++i) { - directory = new File(directory + File.separator + directories[i]); - - if (!directory.exists() && !directory.mkdir()) { - System.err.println("Can't create directory '" + directory + "'."); - return null; - } - } - } - else if (Main.sourceDirectory != null) - name = Main.sourceDirectory + File.separator + name; - - Main.fileNames.add(name); - - PrintStream out = null; - - try { out = new PrintStream(new FileOutputStream(name)); } - catch (Exception e) { - System.err.println("Can't open '" + name + "' for output: " + e); - } - - return out; - } - - - /** - * Generate the code that overrides certain methods of - * {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner} to check types and call themselves on the - * unique instance; also declares other methods and fields of the - * classifier's implementation. The explicitly overridden methods are: - *

    - *
  • getInputType()
  • - *
  • getOutputType()
  • - *
  • allowableValues()
  • - *
  • learn(Object)
  • - *
  • learn(Object[])
  • - *
  • classify(Object)
  • - *
  • classify(Object[])
  • - *
- * - * In addition, any methods defined by any subclass of - * {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner} down to the super class of this learner are - * overridden to call the super class's implementation on the unique - * instance. - * - * @param out The stream to write to. - * @param lce The {@link LearningClassifierExpression} representing the - * learner. - **/ - @SuppressWarnings("rawtypes") - public static void generateLearnerBody(PrintStream out, - LearningClassifierExpression lce) { - String lceName = lce.name.toString(); - String field = null; - boolean cachedInMap = false; - - out.println(" public static boolean isTraining;"); - out.println(" public static " + lceName + " instance;\n"); - - out.println(" public static " + lceName + " getInstance()"); - out.println(" {"); - out.println(" loadInstance();"); - out.println(" return instance;"); - out.println(" }\n"); - - if (lce.cacheIn != null) { - field = lce.cacheIn.toString(); - cachedInMap = field.equals(ClassifierAssignment.mapCache); - if (cachedInMap) - out.println(" private static final WeakHashMap __valueCache " - + "= new WeakHashMap();\n"); - } - - HashSet invoked = (HashSet) SemanticAnalysis.invokedGraph.get(lceName); - - if (invoked != null && invoked.size() > 0) { - for (Iterator I = invoked.iterator(); I.hasNext(); ) { - String name = (String) I.next(); - String nameNoDots = name.replace('.', '$'); - out.println(" private static final " + name + " __" + nameNoDots - + " = new " + name + "();"); - } - - out.println(); - } - - if (lce.parameterSets.size() == 0) { - out.println(" private " + lceName + "(boolean b)"); - out.println(" {"); - out.print(" super("); - - if (lce.learnerParameterBlock != null) out.print("new Parameters()"); - else { - if (lce.learnerConstructor.arguments.size() > 0) { - out.print(lce.learnerConstructor.arguments); - if (lce.attributeString.length() != 0) out.print(", "); - } - - if (lce.attributeString.length() != 0) out.print("attributeString"); - } - - out.println(");"); - out.println(" containingPackage = \"" - + AST.globalSymbolTable.getPackage() + "\";"); - out.println(" name = \"" + lceName + "\";"); - out.println(" setEncoding(" + lce.featureEncoding + ");"); - if (lce.labeler != null) - out.println(" setLabeler(new " + lce.labeler.name + "());"); +public class TranslateToJava extends Pass { + /** The commented message appearing at the top of all generated files. */ + public static final String disclaimer = + "// Modifying this comment will cause the next execution of LBJava to " + + "overwrite this file."; + /** + * This array contains string descriptions of methods that don't need to be overridden when + * generating code for a learner. + **/ + private static final String[] noOverride = + { + "native public int hashCode()", + "public boolean equals(java.lang.Object a0)", + "public double realValue(java.lang.Object a0)", + "public double[] realValueArray(java.lang.Object a0)", + "public edu.illinois.cs.cogcomp.lbjava.classify.Feature featureValue(java.lang.Object a0)", + "public edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector classify(java.lang.Object a0)", + "public edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector[] classify(java.lang.Object[] a0)", + "public java.lang.Object clone()", + "public java.lang.String discreteValue(java.lang.Object a0)", + "public java.lang.String getInputType()", + "public java.lang.String getOutputType()", + "public java.lang.String toString()", + "public java.lang.String[] allowableValues()", + "public java.lang.String[] discreteValueArray(java.lang.Object a0)", + "public java.util.LinkedList getCompositeChildren()", + "public short valueIndexOf(java.lang.String a0)", + "public void learn(java.lang.Object a0)", + "public void learn(java.lang.Object[] a0)", "public void unclone()"}; + /** + * The prefix of the name of the temporary variable in which a constraint's computed value + * should be stored. This variable is only used when {@link #constraintMode} is unset. + **/ + private static final String constraintResult = "LBJava$constraint$result$"; + + + /** Used for collecting the string representation of a method body. */ + private StringBuffer methodBody; + /** The indent level when collecting the method body. */ + private int indent; + /** Lets AST children know about the node they are contained in. */ + private CodeGenerator currentCG; + /** + * Lets {@link VariableDeclaration}s know if they are contained in the initialization portion of + * the header of a for loop. + **/ + private boolean forInit; + /** + * Filenames that have been generated during the processing of one statement. + **/ + private HashSet files; + /** + * When this flag is set, code generated for constraint expressions will create + * {@link edu.illinois.cs.cogcomp.lbjava.infer.Constraint} objects rather than computing the + * value of the constraint expression. + **/ + private boolean constraintMode; + /** + * This variable is appended to the {@link #constraintResult} variable to form the name of a new + * temporary variable. + **/ + private int constraintResultNumber; + /** The current constraint result variable name. */ + private String constraintResultName; + /** + * Lets AST children know the index that a given quantification variable occupies in an + * {@link EqualityArgumentReplacer}'s vector; the keys of the map are names of quantification + * variables, and the values are Integers. + **/ + private HashMap quantificationVariables; + /** + * Lets AST children know the index that a given context variable occupies in an + * {@link EqualityArgumentReplacer}'s vector; the keys of the map are names of context + * variables, and the values are Integers. + **/ + private HashMap contextVariables; + /** + * Lets AST nodes know how deeply nested inside {@link QuantifiedConstraintExpression}s they + * are. + **/ + private int quantifierNesting; + + /** + * Associates an AST with this pass. + * + * @param ast The AST to associate with this pass. + **/ + public TranslateToJava(AST ast) { + super(ast); + methodBody = new StringBuffer(); + files = new HashSet(); + } + + + /** + * Uses the current value of {@link #indent} to append the appropriate number of spaces to + * {@link #methodBody}. + **/ + private void appendIndent() { + for (int i = 0; i < indent; ++i) + methodBody.append(" "); + } + + + /** + * Uses the current value of {@link #indent} to append the appropriate number of spaces to + * {@link #methodBody}, followed by the argument string. + * + * @param text The text to append after the indent. + **/ + private void appendIndent(String text) { + appendIndent(); + methodBody.append(text); + } + + + /** + * Appends the current indent via {@link #appendIndent()}, then appends the argument string and + * a newline. + * + * @param text The text to append as a new line. + **/ + private void appendLine(String text) { + appendIndent(text); + methodBody.append("\n"); + } + + + /** + * Sets the current code generator for this translator. + * + * @param cg The new current code generator. + **/ + public void setCurrentCG(CodeGenerator cg) { + currentCG = cg; + } + + + /** + * Sets the indentation level. + * + * @param i The new indentation level. + **/ + public void setIndent(int i) { + indent = i; + } + + + /** + * Gives access to the {@link #methodBody} member variable so that this pass can be invoked + * selectively on some subset of a method body. + * + * @return The contents of {@link #methodBody} in a String. + **/ + public String getMethodBody() { + return methodBody.toString(); + } + + + /** + * Create a PrintStream that writes to a Java file corresponding to the specified + * {@link CodeGenerator}. + * + * @param node The code producing node. + * @return The stream, or null if it couldn't be created. + **/ + public static PrintStream open(CodeGenerator node) { + return open(node.getName() + ".java"); + } + + + /** + * Create a PrintStream that writes to the specified file. + * + * @param name The name of the file to open. + * @return The stream, or null if it couldn't be created. + **/ + public static PrintStream open(String name) { + if (Main.generatedSourceDirectory != null) { + name = Main.generatedSourceDirectory + File.separator + name; + + String[] directories = name.split("\\" + File.separator + "+"); + File directory = new File(directories[0]); + + for (int i = 1; i < directories.length - 1; ++i) { + directory = new File(directory + File.separator + directories[i]); + + if (!directory.exists() && !directory.mkdir()) { + System.err.println("Can't create directory '" + directory + "'."); + return null; + } + } + } else if (Main.sourceDirectory != null) + name = Main.sourceDirectory + File.separator + name; + + Main.fileNames.add(name); + + PrintStream out = null; + + try { + out = new PrintStream(new FileOutputStream(name)); + } catch (Exception e) { + System.err.println("Can't open '" + name + "' for output: " + e); + } + + return out; + } + + + /** + * Generate the code that overrides certain methods of + * {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner} to check types and call themselves on + * the unique instance; also declares other methods and fields of the classifier's + * implementation. The explicitly overridden methods are: + *
    + *
  • getInputType()
  • + *
  • getOutputType()
  • + *
  • allowableValues()
  • + *
  • learn(Object)
  • + *
  • learn(Object[])
  • + *
  • classify(Object)
  • + *
  • classify(Object[])
  • + *
+ * + * In addition, any methods defined by any subclass of + * {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner} down to the super class of this learner + * are overridden to call the super class's implementation on the unique instance. + * + * @param out The stream to write to. + * @param lce The {@link LearningClassifierExpression} representing the learner. + **/ + @SuppressWarnings("rawtypes") + public static void generateLearnerBody(PrintStream out, LearningClassifierExpression lce) { + String lceName = lce.name.toString(); + String field = null; + boolean cachedInMap = false; + + out.println(" public static boolean isTraining;"); + out.println(" public static " + lceName + " instance;\n"); + + out.println(" public static " + lceName + " getInstance()"); + out.println(" {"); + out.println(" loadInstance();"); + out.println(" return instance;"); + out.println(" }\n"); + + if (lce.cacheIn != null) { + field = lce.cacheIn.toString(); + cachedInMap = field.equals(ClassifierAssignment.mapCache); + if (cachedInMap) + out.println(" private static final WeakHashMap __valueCache " + + "= new WeakHashMap();\n"); + } + + HashSet invoked = (HashSet) SemanticAnalysis.invokedGraph.get(lceName); + + if (invoked != null && invoked.size() > 0) { + for (Iterator I = invoked.iterator(); I.hasNext();) { + String name = (String) I.next(); + String nameNoDots = name.replace('.', '$'); + out.println(" private static final " + name + " __" + nameNoDots + " = new " + + name + "();"); + } + + out.println(); + } + + if (lce.parameterSets.size() == 0) { + out.println(" private " + lceName + "(boolean b)"); + out.println(" {"); + out.print(" super("); + + if (lce.learnerParameterBlock != null) + out.print("new Parameters()"); + else { + if (lce.learnerConstructor.arguments.size() > 0) { + out.print(lce.learnerConstructor.arguments); + if (lce.attributeString.length() != 0) + out.print(", "); + } + + if (lce.attributeString.length() != 0) + out.print("attributeString"); + } + + out.println(");"); + out.println(" containingPackage = \"" + AST.globalSymbolTable.getPackage() + "\";"); + out.println(" name = \"" + lceName + "\";"); + out.println(" setEncoding(" + lce.featureEncoding + ");"); + if (lce.labeler != null) + out.println(" setLabeler(new " + lce.labeler.name + "());"); if (isField(lce.extractor)) { - String fieldClass = AST.globalSymbolTable.classForName(lce.extractor.name).getSimpleName(); + String fieldClass = + AST.globalSymbolTable.classForName(lce.extractor.name).getSimpleName(); out.println(" setExtractor(" + fieldClass + "." + lce.extractor.name + ");"); - } - else - out.println(" setExtractor(new " + lce.extractor.name + "());"); - out.println(" isClone = false;"); - out.println(" }\n"); - } - - out.println(" public static TestingMetric getTestingMetric() { return " - + lce.testingMetric + "; }\n"); - - if (lce.singleExampleCache) { - out.println( - " private static ThreadLocal __cache = new ThreadLocal(){ };"); - out.println(" private static ThreadLocal __exampleCache = " - + "new ThreadLocal(){ };"); - out.println(" public static void clearCache() { __exampleCache = new " - + "ThreadLocal() { }; }"); - } - - if (lce.attributeString.length() != 0 - && lce.learnerParameterBlock == null) - out.println(" private static final String attributeString = \"" - + lce.attributeString + "\";"); - - out.println("\n private boolean isClone;\n"); - - out.println(" public void unclone() { isClone = false; }\n"); - - out.println(" public " + lceName + "()"); - out.println(" {"); - String fqName = AST.globalSymbolTable.getPackage(); - if (fqName.length() > 0) fqName += "."; - fqName += lceName; - out.println(" super(\"" + fqName + "\");"); - out.println(" isClone = true;"); - out.println(" }\n"); - - //XXX Changed to add ability to read model/lexicon from classpath - String tabs = "\t"; - out.println(tabs + "public " + lceName - + "(String modelPath, String lexiconPath) { " - + "this(new Parameters(), modelPath, lexiconPath); }\n"); - out.println(tabs + "public " + lceName - + "(Parameters p, String modelPath, String lexiconPath) {"); - tabs = "\t\t"; - out.println(tabs + "super(p);"); - out.println(tabs + "try {"); - tabs = "\t\t\t"; - out.println(tabs + "lcFilePath = new java.net.URL(\"file:\" + " - + "modelPath);"); - out.println(tabs + "lexFilePath = new java.net.URL(\"file:\" + " - + "lexiconPath);"); - tabs = "\t\t"; - out.println(tabs + "}"); - out.println(tabs + "catch (Exception e) {"); - tabs = "\t\t\t"; - out.println(tabs + "System.err.println(\"ERROR: Can't create model or " - + "lexicon URL: \" + e);"); - out.println(tabs + "e.printStackTrace();"); - out.println(tabs + "System.exit(1);"); - tabs = "\t\t"; - out.println(tabs + "}\n"); - out.println(tabs + "if (new java.io.File(modelPath).exists()) {"); - tabs = "\t\t\t"; - out.println(tabs + "readModel(lcFilePath);"); - out.println(tabs + "readLexiconOnDemand(lexFilePath);"); - tabs = "\t\t"; - out.println(tabs + "}"); - out.println(tabs + "else if (IOUtilities.existsInClasspath(" + lceName + ".class, modelPath)) {"); - tabs = "\t\t\t"; - out.println(tabs + "readModel(IOUtilities.loadFromClasspath(" + lceName + ".class, modelPath));"); - out.println(tabs + "readLexiconOnDemand(IOUtilities.loadFromClasspath(" + lceName + ".class, lexiconPath));"); - tabs = "\t\t"; - out.println(tabs + "}"); - out.println(tabs + "else {"); - tabs = "\t\t\t"; - out.println(tabs + "containingPackage = \"" - + AST.globalSymbolTable.getPackage() + "\";"); - out.println(tabs + "name = \"" + lceName + "\";"); - out.println(tabs + "setLabeler(new " + lce.labeler.name + "());"); + } else + out.println(" setExtractor(new " + lce.extractor.name + "());"); + out.println(" isClone = false;"); + out.println(" }\n"); + } + + out.println(" public static TestingMetric getTestingMetric() { return " + + lce.testingMetric + "; }\n"); + + if (lce.singleExampleCache) { + out.println(" private static ThreadLocal __cache = new ThreadLocal(){ };"); + out.println(" private static ThreadLocal __exampleCache = " + "new ThreadLocal(){ };"); + out.println(" public static void clearCache() { __exampleCache = new " + + "ThreadLocal() { }; }"); + } + + if (lce.attributeString.length() != 0 && lce.learnerParameterBlock == null) + out.println(" private static final String attributeString = \"" + lce.attributeString + + "\";"); + + out.println("\n private boolean isClone;\n"); + + out.println(" public void unclone() { isClone = false; }\n"); + + out.println(" public " + lceName + "()"); + out.println(" {"); + String fqName = AST.globalSymbolTable.getPackage(); + if (fqName.length() > 0) + fqName += "."; + fqName += lceName; + out.println(" super(\"" + fqName + "\");"); + out.println(" isClone = true;"); + out.println(" }\n"); + + // XXX Changed to add ability to read model/lexicon from classpath + String tabs = " "; + out.println(tabs + "public " + lceName + "(String modelPath, String lexiconPath) { " + + "this(new Parameters(), modelPath, lexiconPath); }\n"); + out.println(tabs + "public " + lceName + + "(Parameters p, String modelPath, String lexiconPath) {"); + tabs = " "; + out.println(tabs + "super(p);"); + out.println(tabs + "try {"); + tabs = " "; + out.println(tabs + "lcFilePath = new java.net.URL(\"file:\" + " + "modelPath);"); + out.println(tabs + "lexFilePath = new java.net.URL(\"file:\" + " + "lexiconPath);"); + tabs = " "; + out.println(tabs + "}"); + out.println(tabs + "catch (Exception e) {"); + tabs = " "; + out.println(tabs + "System.err.println(\"ERROR: Can't create model or " + + "lexicon URL: \" + e);"); + out.println(tabs + "e.printStackTrace();"); + out.println(tabs + "System.exit(1);"); + tabs = " "; + out.println(tabs + "}\n"); + out.println(tabs + "java.io.File modelfile = new java.io.File(modelPath);\n"); + out.println(tabs + "if (modelfile.exists()) {"); + tabs = " "; + out.println(tabs + "System.out.println(\"Model file read from \"+modelfile.getAbsolutePath());"); + out.println(tabs + "readModel(lcFilePath);"); + out.println(tabs + "readLexiconOnDemand(lexFilePath);"); + tabs = " "; + out.println(tabs + "}"); + out.println(tabs + "else if (IOUtilities.existsInClasspath(" + lceName + + ".class, modelPath)) {"); + tabs = " "; + out.println(tabs + "System.out.println(\"Model file \"+modelfile.getAbsolutePath()+\" located in a jar file\");"); + out.println(tabs + "readModel(IOUtilities.loadFromClasspath(" + lceName + + ".class, modelPath));"); + out.println(tabs + "readLexiconOnDemand(IOUtilities.loadFromClasspath(" + lceName + + ".class, lexiconPath));"); + tabs = " "; + out.println(tabs + "}"); + out.println(tabs + "else {"); + tabs = " "; + out.println(tabs + "containingPackage = \"" + AST.globalSymbolTable.getPackage() + "\";"); + out.println(tabs + "name = \"" + lceName + "\";"); + out.println(tabs + "setLabeler(new " + lce.labeler.name + "());"); if (isField(lce.extractor)) { - String fieldClass = AST.globalSymbolTable.classForName(lce.extractor.name).getSimpleName(); + String fieldClass = + AST.globalSymbolTable.classForName(lce.extractor.name).getSimpleName(); out.println(tabs + "setExtractor(" + fieldClass + "." + lce.extractor.name + ");"); - } - else + } else out.println(tabs + "setExtractor(new " + lce.extractor.name + "());"); - tabs = "\t\t"; - out.println(tabs + "}\n"); - out.println(tabs + "isClone = false;"); - tabs = "\t"; - out.println(tabs + "}\n"); - - Type input = lce.argument.getType(); - String inputString = input.toString(); - int line = lce.line + 1; - - typeReturningMethods(out, input, lce.returnType); - - out.println("\n public void learn(Object example)"); - out.println(" {"); - out.println(" if (isClone)"); - out.println(" {"); - out.print( - generateTypeChecking(" ", lceName, "Classifier", false, - inputString, lce.line, "example", true)); - out.println(" loadInstance();"); - out.println(" instance.learn(example);"); - out.println(" return;"); - out.println(" }\n"); - - out.println(" if (example instanceof Object[])"); - out.println(" {"); - out.println(" Object[] a = (Object[]) example;"); - out.println(" if (a[0] instanceof int[])"); - out.println(" {"); - out.println(" super.learn((int[]) a[0], (double[]) a[1], (int[]) " - + "a[2], (double[]) a[3]);"); - out.println(" return;"); - out.println(" }"); - out.println(" }\n"); - - out.println(" super.learn(example);"); - out.println(" }\n"); - - out.println(" public void learn(Object[] examples)"); - out.println(" {"); - out.println(" if (isClone)"); - out.println(" {"); - out.print( - generateTypeChecking(" ", lceName, "Classifier", true, - inputString, lce.line, "examples", true)); - out.println(" loadInstance();"); - out.println(" instance.learn(examples);"); - out.println(" return;"); - out.println(" }\n"); - - out.println(" super.learn(examples);"); - out.println(" }\n"); - - StringBuffer preamble = new StringBuffer(); - StringBuffer body = new StringBuffer(); - StringBuffer post = null; - - preamble.append( " if (isClone)\n" - + " {\n"); - preamble.append( - generateTypeChecking(" ", lceName, "Classifier", false, - inputString, lce.line, "__example", true)); - preamble.append( " loadInstance();\n" - + " return instance.$METHOD$(__example);\n" - + " }\n\n"); - - preamble.append( - " if (__example instanceof Object[])\n" - + " {\n" - + " Object[] a = (Object[]) __example;\n" - + " if (a[0] instanceof int[])\n" - + " return super.$METHOD$((int[]) a[0], (double[]) a[1]);\n" - + " }\n\n"); - - boolean primitive = - lce.returnType.type == ClassifierReturnType.DISCRETE - || lce.returnType.type == ClassifierReturnType.REAL; - - if (lce.evaluation == null) { - body.append(" __result = super."); - body.append(primitive ? "featureValue" : "classify"); - body.append("(__example);\n"); - } - else { - TranslateToJava translator = new TranslateToJava(null); - translator.setRoot(lce.evaluation); - translator.setCurrentCG(lce); - translator.run(); - body.append(" __result = "); - body.append(translator.getMethodBody()); - body.append(";\n"); - } - - if (lce.checkDiscreteValues) { - post = new StringBuffer(); - String variable = "__result"; - String indent = ""; - - if (!primitive) { - post.append( - " for (int __i = 0; __i < __result.featuresSize(); ++__i)\n" - + " {\n" - + " Feature __f = __result.getFeature(__i);\n"); - variable = "__f"; - indent = " "; - } - - post.append(indent); - post.append(" if ("); - post.append(variable); - post.append(".getValueIndex() == -1)\n"); - - post.append(indent); - post.append(" {\n"); - - post.append(indent); - post.append(" System.err.println(\"Classifier "); - post.append(lceName); - post.append(" defined on line "); - post.append(line); - post.append(" of "); - post.append(Main.sourceFilename); - post.append(" tried to produce a feature with value '\" + "); - post.append(variable); - post.append(".getStringValue() + \"' which is not allowable.\");\n"); - - post.append(indent); - post.append(" System.exit(1);\n"); - - post.append(indent); - post.append(" }\n"); - - if (!primitive) post.append(" }\n"); - } - - generateClassificationMethods( - out, lce, preamble.toString(), body.toString(), false, - lce.evaluation != null, post == null ? null : post.toString()); - - out.println("\n public FeatureVector[] classify(Object[] examples)"); - out.println(" {"); - out.println(" if (isClone)"); - out.println(" {"); - out.print( - generateTypeChecking(" ", lceName, "Classifier", true, - inputString, lce.line, "examples", true)); - out.println(" loadInstance();"); - out.println(" return instance.classify(examples);"); - out.println(" }\n"); - - out.println(" FeatureVector[] result = super.classify(examples);"); - - if (lce.checkDiscreteValues) { - out.println(" for (int i = 0; i < result.length; ++i)"); - out.println(" for (int j = 0; j < result[i].featuresSize(); ++j)"); - out.println(" {"); - out.println(" Feature f = result[i].getFeature(j);"); - out.println(" if (f.getValueIndex() == -1)"); - out.println(" {"); - out.println(" System.err.println(\"Classifier " + lceName - + " defined on line " + line + " of " + Main.sourceFilename - + " tried to produce a feature with value '\" + " - + "f.getStringValue() + \"' which is not allowable.\");"); - out.println(" System.exit(1);"); - out.println(" }"); - out.println(" }\n"); - } - - out.println(" return result;"); - out.println(" }\n"); - - String pack = AST.globalSymbolTable.getPackage(); - String fullName = pack + (pack.length() == 0 ? "" : ".") + lceName; - out.println(" public static void main(String[] args)"); - out.println(" {"); - out.println(" String testParserName = null;"); - out.println(" String testFile = null;"); - out.println(" Parser testParser = getTestParser();\n"); - - out.println(" try"); - out.println(" {"); - out.println(" if (!args[0].equals(\"null\"))"); - out.println(" testParserName = args[0];"); - out.println(" if (args.length > 1) testFile = args[1];\n"); - - out.println(" if (testParserName == null && testParser == null)"); - out.println(" {"); - out.println(" System.err.println(\"The \\\"testFrom\\\" clause " - + "was not used in the learning classifier expression " - + "that\");"); - out.println(" System.err.println(\"generated this classifier, so " - + "a parser and input file must be specified.\\n\");"); - out.println(" throw new Exception();"); - out.println(" }"); - out.println(" }"); - out.println(" catch (Exception e)"); - out.println(" {"); - out.println(" System.err.println(\"usage: " + fullName - + " \\\\\");"); - out.println(" System.err.println(\" [ [ ...]]\\n\");"); - out.println(" System.err.println(\" * must be the " - + "fully qualified class name of a Parser, or " - + "\\\"null\\\"\");"); - out.println(" System.err.println(\" to use the default as " - + "specified by the \\\"testFrom\\\" clause.\");"); - out.println(" System.err.println(\" * is the " - + "relative or absolute path of a file, or \\\"null\\\" " - + "to\");"); - out.println(" System.err.println(\" use the parser arguments " - + "specified by the \\\"testFrom\\\" clause. can also be " - + "non-\\\"null\\\" when is \\\"null\\\" (when the " - + "parser\");"); - out.println(" System.err.println(\" specified by the " - + "\\\"testFrom\\\" clause has a single string argument\");"); - out.println(" System.err.println(\" constructor) to use an " - + "alternate file.\");"); - out.println(" System.err.println(\" * A is a label " - + "(or prediction) that should not count towards\");"); - out.println(" System.err.println(\" overall precision and " - + "recall assessments.\");"); - out.println(" System.exit(1);"); - out.println(" }\n"); - - out.println(" if (testParserName == null && testFile != null && " - + "!testFile.equals(\"null\"))"); - out.println(" testParserName = testParser.getClass().getName();"); - out.println(" if (testParserName != null)"); - out.println(" testParser = " - + "edu.illinois.cs.cogcomp.lbjava.util.ClassUtils.getParser(testParserName, new " - + "Class[]{ String.class }, new String[]{ testFile });"); - out.println(" " + lceName + " classifier = new " + lceName + "();"); - out.println(" TestDiscrete tester = new TestDiscrete();"); - out.println(" for (int i = 2; i < args.length; ++i)"); - out.println(" tester.addNull(args[i]);"); - out.println(" TestDiscrete.testDiscrete(tester, classifier, " - + "classifier.getLabeler(), testParser, true, 0);"); - - out.println(" }\n"); - - generateHashingMethods(out, lceName); - - Class lceClass = - AST.globalSymbolTable.classForName(lce.learnerName.toString()); - if (lceClass == null) { - reportError(lce.line, "Could not locate class for learner '" - + lce.learnerName + "'."); - return; - } - - Method[] methods = lceClass.getMethods(); - for (int i = 0; i < methods.length; ++i) { - int modifiers = methods[i].getModifiers(); - if (Modifier.isFinal(modifiers) || Modifier.isPrivate(modifiers) - || Modifier.isProtected(modifiers) || Modifier.isStatic(modifiers)) - continue; - - Class returned = methods[i].getReturnType(); - String name = methods[i].getName(); - Class[] parameters = methods[i].getParameterTypes(); - - String sig = signature(methods[i], modifiers, returned, name, parameters); - if (Arrays.binarySearch(noOverride, sig) >= 0) continue; - - out.println("\n " + sig); - out.println(" {"); - out.println(" if (isClone)"); - out.println(" {"); - out.println(" loadInstance();"); - - out.print(" "); - if (!returned.equals(void.class)) out.print("return "); - out.print("instance." + name + "("); - - if (parameters.length > 0) { - out.print("a0"); - for (int j = 1; j < parameters.length; ++j) out.print(", a" + j); - } - - out.println(");"); - - if (returned.equals(void.class)) out.println(" return;"); - out.println(" }\n"); - - out.print(" "); - if (!returned.equals(void.class)) out.print("return "); - out.print("super." + name + "("); - - if (parameters.length > 0) { - out.print("a0"); - for (int j = 1; j < parameters.length; ++j) out.print(", a" + j); - } - - out.println(");"); - out.println(" }"); - } - - if (lce.parameterSets.size() == 0) { - out.println(); - out.println(" public static class Parameters extends " - + lce.learnerName + ".Parameters"); - out.println(" {"); - - if (lce.learnerParameterBlock != null) { - TranslateToJava translator = new TranslateToJava(null); - translator.setRoot(lce.learnerParameterBlock); - translator.setCurrentCG(lce); - translator.setIndent(3); - translator.run(); - out.println(" public Parameters()"); - out.println(translator.getMethodBody()); - } - else - out.println(" public Parameters() { super((" + lce.learnerName - + ".Parameters) new " + lceName - + "(false).getParameters()); }"); - out.println(" }"); - } - } + tabs = " "; + out.println(tabs + "}\n"); + out.println(tabs + "isClone = false;"); + tabs = " "; + out.println(tabs + "}\n"); + + Type input = lce.argument.getType(); + String inputString = input.toString(); + int line = lce.line + 1; + + typeReturningMethods(out, input, lce.returnType); + + out.println("\n public void learn(Object example)"); + out.println(" {"); + out.println(" if (isClone)"); + out.println(" {"); + out.print(generateTypeChecking(" ", lceName, "Classifier", false, inputString, + lce.line, "example", true)); + out.println(" loadInstance();"); + out.println(" instance.learn(example);"); + out.println(" return;"); + out.println(" }\n"); + + out.println(" if (example instanceof Object[])"); + out.println(" {"); + out.println(" Object[] a = (Object[]) example;"); + out.println(" if (a[0] instanceof int[])"); + out.println(" {"); + out.println(" super.learn((int[]) a[0], (double[]) a[1], (int[]) " + + "a[2], (double[]) a[3]);"); + out.println(" return;"); + out.println(" }"); + out.println(" }\n"); + + out.println(" super.learn(example);"); + out.println(" }\n"); + + out.println(" public void learn(Object[] examples)"); + out.println(" {"); + out.println(" if (isClone)"); + out.println(" {"); + out.print(generateTypeChecking(" ", lceName, "Classifier", true, inputString, + lce.line, "examples", true)); + out.println(" loadInstance();"); + out.println(" instance.learn(examples);"); + out.println(" return;"); + out.println(" }\n"); + + out.println(" super.learn(examples);"); + out.println(" }\n"); + + StringBuffer preamble = new StringBuffer(); + StringBuffer body = new StringBuffer(); + StringBuffer post = null; + + preamble.append(" if (isClone)\n" + " {\n"); + preamble.append(generateTypeChecking(" ", lceName, "Classifier", false, inputString, + lce.line, "__example", true)); + preamble.append(" loadInstance();\n" + " return instance.$METHOD$(__example);\n" + + " }\n\n"); + + preamble.append(" if (__example instanceof Object[])\n" + " {\n" + + " Object[] a = (Object[]) __example;\n" + + " if (a[0] instanceof int[])\n" + + " return super.$METHOD$((int[]) a[0], (double[]) a[1]);\n" + " }\n\n"); + + boolean primitive = + lce.returnType.type == ClassifierReturnType.DISCRETE + || lce.returnType.type == ClassifierReturnType.REAL; + + if (lce.evaluation == null) { + body.append(" __result = super."); + body.append(primitive ? "featureValue" : "classify"); + body.append("(__example);\n"); + } else { + TranslateToJava translator = new TranslateToJava(null); + translator.setRoot(lce.evaluation); + translator.setCurrentCG(lce); + translator.run(); + body.append(" __result = "); + body.append(translator.getMethodBody()); + body.append(";\n"); + } + + if (lce.checkDiscreteValues) { + post = new StringBuffer(); + String variable = "__result"; + String indent = ""; + + if (!primitive) { + post.append(" for (int __i = 0; __i < __result.featuresSize(); ++__i)\n" + + " {\n" + " Feature __f = __result.getFeature(__i);\n"); + variable = "__f"; + indent = " "; + } + + post.append(indent); + post.append(" if ("); + post.append(variable); + post.append(".getValueIndex() == -1)\n"); + + post.append(indent); + post.append(" {\n"); + + post.append(indent); + post.append(" System.err.println(\"Classifier "); + post.append(lceName); + post.append(" defined on line "); + post.append(line); + post.append(" of "); + post.append(Main.sourceFilename); + post.append(" tried to produce a feature with value '\" + "); + post.append(variable); + post.append(".getStringValue() + \"' which is not allowable.\");\n"); + + post.append(indent); + post.append(" System.exit(1);\n"); + + post.append(indent); + post.append(" }\n"); + + if (!primitive) + post.append(" }\n"); + } + + generateClassificationMethods(out, lce, preamble.toString(), body.toString(), false, + lce.evaluation != null, post == null ? null : post.toString()); + + out.println("\n public FeatureVector[] classify(Object[] examples)"); + out.println(" {"); + out.println(" if (isClone)"); + out.println(" {"); + out.print(generateTypeChecking(" ", lceName, "Classifier", true, inputString, + lce.line, "examples", true)); + out.println(" loadInstance();"); + out.println(" return instance.classify(examples);"); + out.println(" }\n"); + + out.println(" FeatureVector[] result = super.classify(examples);"); + + if (lce.checkDiscreteValues) { + out.println(" for (int i = 0; i < result.length; ++i)"); + out.println(" for (int j = 0; j < result[i].featuresSize(); ++j)"); + out.println(" {"); + out.println(" Feature f = result[i].getFeature(j);"); + out.println(" if (f.getValueIndex() == -1)"); + out.println(" {"); + out.println(" System.err.println(\"Classifier " + lceName + + " defined on line " + line + " of " + Main.sourceFilename + + " tried to produce a feature with value '\" + " + + "f.getStringValue() + \"' which is not allowable.\");"); + out.println(" System.exit(1);"); + out.println(" }"); + out.println(" }\n"); + } + + out.println(" return result;"); + out.println(" }\n"); + + String pack = AST.globalSymbolTable.getPackage(); + String fullName = pack + (pack.length() == 0 ? "" : ".") + lceName; + out.println(" public static void main(String[] args)"); + out.println(" {"); + out.println(" String testParserName = null;"); + out.println(" String testFile = null;"); + out.println(" Parser testParser = getTestParser();\n"); + + out.println(" try"); + out.println(" {"); + out.println(" if (!args[0].equals(\"null\"))"); + out.println(" testParserName = args[0];"); + out.println(" if (args.length > 1) testFile = args[1];\n"); + + out.println(" if (testParserName == null && testParser == null)"); + out.println(" {"); + out.println(" System.err.println(\"The \\\"testFrom\\\" clause " + + "was not used in the learning classifier expression " + "that\");"); + out.println(" System.err.println(\"generated this classifier, so " + + "a parser and input file must be specified.\\n\");"); + out.println(" throw new Exception();"); + out.println(" }"); + out.println(" }"); + out.println(" catch (Exception e)"); + out.println(" {"); + out.println(" System.err.println(\"usage: " + fullName + " \\\\\");"); + out.println(" System.err.println(\" [ [ ...]]\\n\");"); + out.println(" System.err.println(\" * must be the " + + "fully qualified class name of a Parser, or " + "\\\"null\\\"\");"); + out.println(" System.err.println(\" to use the default as " + + "specified by the \\\"testFrom\\\" clause.\");"); + out.println(" System.err.println(\" * is the " + + "relative or absolute path of a file, or \\\"null\\\" " + "to\");"); + out.println(" System.err.println(\" use the parser arguments " + + "specified by the \\\"testFrom\\\" clause. can also be " + + "non-\\\"null\\\" when is \\\"null\\\" (when the " + "parser\");"); + out.println(" System.err.println(\" specified by the " + + "\\\"testFrom\\\" clause has a single string argument\");"); + out.println(" System.err.println(\" constructor) to use an " + + "alternate file.\");"); + out.println(" System.err.println(\" * A is a label " + + "(or prediction) that should not count towards\");"); + out.println(" System.err.println(\" overall precision and " + + "recall assessments.\");"); + out.println(" System.exit(1);"); + out.println(" }\n"); + + out.println(" if (testParserName == null && testFile != null && " + + "!testFile.equals(\"null\"))"); + out.println(" testParserName = testParser.getClass().getName();"); + out.println(" if (testParserName != null)"); + out.println(" testParser = " + + "edu.illinois.cs.cogcomp.lbjava.util.ClassUtils.getParser(testParserName, new " + + "Class[]{ String.class }, new String[]{ testFile });"); + out.println(" " + lceName + " classifier = new " + lceName + "();"); + out.println(" TestDiscrete tester = new TestDiscrete();"); + out.println(" for (int i = 2; i < args.length; ++i)"); + out.println(" tester.addNull(args[i]);"); + out.println(" TestDiscrete.testDiscrete(tester, classifier, " + + "classifier.getLabeler(), testParser, true, 0);"); + + out.println(" }\n"); + + generateHashingMethods(out, lceName); + + Class lceClass = AST.globalSymbolTable.classForName(lce.learnerName.toString()); + if (lceClass == null) { + reportError(lce.line, "Could not locate class for learner '" + lce.learnerName + "'."); + return; + } + + Method[] methods = lceClass.getMethods(); + for (int i = 0; i < methods.length; ++i) { + int modifiers = methods[i].getModifiers(); + if (Modifier.isFinal(modifiers) || Modifier.isPrivate(modifiers) + || Modifier.isProtected(modifiers) || Modifier.isStatic(modifiers)) + continue; + + Class returned = methods[i].getReturnType(); + String name = methods[i].getName(); + Class[] parameters = methods[i].getParameterTypes(); + + String sig = signature(methods[i], modifiers, returned, name, parameters); + if (Arrays.binarySearch(noOverride, sig) >= 0) + continue; + + out.println("\n " + sig); + out.println(" {"); + out.println(" if (isClone)"); + out.println(" {"); + out.println(" loadInstance();"); + + out.print(" "); + if (!returned.equals(void.class)) + out.print("return "); + out.print("instance." + name + "("); + + if (parameters.length > 0) { + out.print("a0"); + for (int j = 1; j < parameters.length; ++j) + out.print(", a" + j); + } + + out.println(");"); + + if (returned.equals(void.class)) + out.println(" return;"); + out.println(" }\n"); + + out.print(" "); + if (!returned.equals(void.class)) + out.print("return "); + out.print("super." + name + "("); + + if (parameters.length > 0) { + out.print("a0"); + for (int j = 1; j < parameters.length; ++j) + out.print(", a" + j); + } + + out.println(");"); + out.println(" }"); + } + + if (lce.parameterSets.size() == 0) { + out.println(); + out.println(" public static class Parameters extends " + lce.learnerName + + ".Parameters"); + out.println(" {"); + + if (lce.learnerParameterBlock != null) { + TranslateToJava translator = new TranslateToJava(null); + translator.setRoot(lce.learnerParameterBlock); + translator.setCurrentCG(lce); + translator.setIndent(3); + translator.run(); + out.println(" public Parameters()"); + out.println(translator.getMethodBody()); + } else + out.println(" public Parameters() { super((" + lce.learnerName + + ".Parameters) new " + lceName + "(false).getParameters()); }"); + out.println(" }"); + } + } private static boolean isField(ClassifierExpression ce) { return ce instanceof ClassifierName && ((ClassifierName) ce).isField; } - /** - * This method generates a string signature of the given method. The - * arguments other than m are supplied as arguments for - * efficiency reasons, since this method is only called by one other - * method. - * - * @see #generateLearnerBody(PrintStream,LearningClassifierExpression) - * @param m The method object. - * @param modifiers The integer representation of the method's modifiers. - * @param returned The return type of the method. - * @param name The name of the method. - * @param parameters The parameter types of the method. - * @return A string description of the method suitable for comparison with - * the elements of the {@link #noOverride} array. - **/ - @SuppressWarnings("rawtypes") - public static String signature(Method m, int modifiers, Class returned, - String name, Class[] parameters) { - Class[] thrown = m.getExceptionTypes(); - - String result = ""; - if (Modifier.isAbstract(modifiers)) result += "abstract "; - if (Modifier.isFinal(modifiers)) result += "final "; - if (Modifier.isNative(modifiers)) result += "native "; - if (Modifier.isPrivate(modifiers)) result += "private "; - if (Modifier.isProtected(modifiers)) result += "protected "; - if (Modifier.isPublic(modifiers)) result += "public "; - if (Modifier.isStatic(modifiers)) result += "static "; - if (Modifier.isStrict(modifiers)) result += "strictfp "; - if (Modifier.isSynchronized(modifiers)) result += "synchronized "; - - result += makeTypeReadable(returned.getName()) + " " + name + "("; - if (parameters.length > 0) { - result += makeTypeReadable(parameters[0].getName()) + " a0"; - for (int j = 1; j < parameters.length; ++j) - result += ", " + makeTypeReadable(parameters[j].getName()) + " a" + j; - } - - result += ")"; - if (thrown.length > 0) { - result += " throws " + thrown[0].getName(); - for (int j = 1; j < thrown.length; ++j) - result += ", " + thrown[j].getName(); - } - - return result; - } - - - /** - * The value returned by the Class.getName() method is not - * recognizable as a type by javac if the given class is an - * array; this method produces a representation that is recognizable by - * javac. This method also replaces '$' characters with '.' - * characters, under the assumption that '$' only appears in the name of an - * inner class. - * - * @param name The name of a class as produced by - * Class.getName(). - * @return A string representation of the class recognizable by - * javac. - **/ - public static String makeTypeReadable(String name) { - for (int i = name.indexOf('$'); i != -1; i = name.indexOf('$', i + 1)) - name = name.substring(0, i) + '.' + name.substring(i + 1); - - if (name.charAt(0) != '[') return name; - - while (name.charAt(0) == '[') name = name.substring(1) + "[]"; - - switch (name.charAt(0)) { - case 'B': return "boolean" + name.substring(1); - case 'C': return "char" + name.substring(1); - case 'D': return "double" + name.substring(1); - case 'F': return "float" + name.substring(1); - case 'I': return "int" + name.substring(1); - case 'J': return "long" + name.substring(1); - case 'L': - int colon = name.indexOf(';'); - return name.substring(1, colon) + name.substring(colon + 1); - case 'S': return "short" + name.substring(1); - case 'Z': return "boolean" + name.substring(1); - } - - assert false : "Unrecognized type string: " + name; - return null; - } - - - /** - * Generate code that overrides the methods of {@link Classifier} that - * return type information. The methods overridden are: - *
    - *
  • getInputType()
  • - *
  • getOutputType()
  • - *
  • allowableValues()
  • - *
- * - * @param out The stream to write to. - * @param input The input type of the classifier whose code this is. - * @param output The return type of the classifier whose code this is. - **/ - public static void typeReturningMethods(PrintStream out, - Type input, - ClassifierReturnType output) { - out.println(" public String getInputType() { return \"" - + input.typeClass().getName() + "\"; }"); - out.println(" public String getOutputType() { return \"" - + output.getTypeName() + "\"; }"); - - if (output.values.size() > 0) { - ConstantList values = output.values; - out.print("\n private static String[] __allowableValues = "); - boolean isBoolean = false; - - if (output.values.size() == 2) { - ASTNodeIterator I = values.iterator(); - String v1 = I.next().toString(); - String v2 = I.next().toString(); - if ((v1.equals("false") || v1.equals("\"false\"")) - && (v2.equals("true") || v2.equals("\"true\""))) { - isBoolean = true; - out.println("DiscreteFeature.BooleanValues;"); - } - } - - if (!isBoolean) { - ASTNodeIterator I = values.iterator(); - String v = I.next().toString(); - if (v.charAt(0) != '"') v = "\"" + v + "\""; - out.print("new String[]{ " + v); - while (I.hasNext()) { - v = I.next().toString(); - if (v.charAt(0) != '"') v = "\"" + v + "\""; - out.print(", " + v); - } - out.println(" };"); - } - - out.println(" public static String[] getAllowableValues() { return " - + "__allowableValues; }"); - out.println(" public String[] allowableValues() { return " - + "__allowableValues; }"); - } - } - - - /** - * Generates code that overrides the {@link Classifier#classify(Object[])} - * method so that it checks the types of its arguments. - * - * @param out The stream to write to. - * @param name The name of the classifier whose code this is. - * @param input The input type of the classifier whose code this is. - * @param line The line number on which this classifier is defined. - **/ - public static void typeCheckClassifyArray(PrintStream out, String name, - Type input, int line) { - out.println(" public FeatureVector[] classify(Object[] examples)"); - out.println(" {"); - out.print( - generateTypeChecking(" ", name, "Classifier", true, - input.toString(), line, "examples", false)); - out.println(" return super.classify(examples);"); - out.println(" }"); - } - - - /** - * Generates the equals(Object) method, which evaluates to - * true whenever the two objects are of the same type. This - * method should not be called when generating code for a - * {@link InferenceDeclaration}. - * - * @param out The stream to write to. - * @param name The name of the node whose equals(Object) - * method is being generated. - **/ - public static void generateHashingMethods(PrintStream out, String name) { - out.println(" public int hashCode() { return \"" + name - + "\".hashCode(); }"); - out.println(" public boolean equals(Object o) { return o instanceof " - + name + "; }"); - } - - - /** - * Generates the code appearing at the beginning of, for example, many - * classifiers' {@link Classifier#classify(Object)} methods that checks to - * see if that input Object has the appropriate type. - * - * @param indent The whitespace indentation in the generated code. - * @param name The name of the {@link CodeGenerator} whose input is - * being checked. - * @param type The type of {@link CodeGenerator} whose input is - * being checked (capitalized). - * @param array Whether or not the method being type checked takes - * an array of the input type. - * @param input The correct input type of the {@link CodeGenerator}. - * @param line The line number on which the {@link CodeGenerator} - * appears. - * @param exampleName The name of the example variable. - * @param preExtracted Whether or not the generated code should allow - * object arrays containing indexed features, as a - * learner can take as input. - * @return The generated code. - **/ - public static StringBuffer generateTypeChecking( - String indent, String name, String type, boolean array, String input, - int line, String exampleName, boolean preExtracted) { - StringBuffer result = new StringBuffer(); - - result.append(indent); - result.append("if (!("); - result.append(exampleName); - result.append(" instanceof "); - result.append(input); - if (array) result.append("[]"); - - if (preExtracted) { - result.append(" || "); - result.append(exampleName); - result.append(" instanceof Object[]"); - if (array) result.append("[]"); - } - - result.append("))\n"); - - result.append(indent); - result.append("{\n"); - - result.append(indent); - result.append(" String type = "); - result.append(exampleName); - result.append(" == null ? \"null\" : "); - result.append(exampleName); - result.append(".getClass().getName();\n"); - - result.append(indent); - result.append(" System.err.println(\""); - result.append(type); - result.append(" '"); - result.append(name); - result.append("("); - result.append(input); - result.append(")' defined on line "); - result.append(line + 1); - result.append(" of "); - result.append(Main.sourceFilename); - result.append(" received '\" + type + \"' as input.\");\n"); - - result.append(indent); - result.append(" new Exception().printStackTrace();\n"); - - result.append(indent); - result.append(" System.exit(1);\n"); - - result.append(indent); - result.append("}\n\n"); - return result; - } - - - /** - * Generates code that instantiates a primitive feature. - * - * @param discrete Whether or not the feature is discrete. - * @param array Whether or not the feature comes from an array. - * @param ref Code referring to an instance of the classifier from - * which package and name information will be taken for - * this feature. - * @param id Code that evaluates to the value of the feature's - * identifier. - * @param value Code that evaluates to the feature's value if - * array is false, or the array - * containing the feature's value if array - * is true. - * @param index Code that evaluates to the array index of this feature. - * This parameter is ignored if it is null - * or array is false. - * @param values Code that evaluates to the number of possible values - * that the feature can take. If set to - * null, - * "allowableValues().length" is - * substituted. This parameter is ignored if - * array is false. - * @param arrayInfo Code that evaluates to both the index and length - * arguments in a feature constructor, separated by a - * comma. If this parameter is null, it - * defaults to index followed by - * value with ".length" - * appended. - **/ - private static String primitiveFeatureConstructorInvocation( - boolean discrete, boolean array, String ref, String id, String value, - String index, String values, String arrayInfo) { - StringBuffer buffer = new StringBuffer("new "); - if (ref.length() > 0) ref += "."; - - buffer.append(discrete ? "Discrete" : "Real"); - buffer.append(array ? "Array" : "Primitive"); - buffer.append("StringFeature("); - buffer.append(ref); - buffer.append("containingPackage, "); - buffer.append(ref); - buffer.append("name, "); - buffer.append(id); - buffer.append(", "); - buffer.append(value); - - if (array && index != null) { - buffer.append('['); - buffer.append(index); - buffer.append(']'); - } - - if (discrete) { - buffer.append(", valueIndexOf("); - buffer.append(value); - if (array && index != null) { - buffer.append('['); - buffer.append(index); - buffer.append(']'); - } - - if (values == null) values = "allowableValues().length"; - buffer.append("), (short) "); - buffer.append(values); - } - - if (array) { - buffer.append(", "); - if (arrayInfo != null) buffer.append(arrayInfo); - else { - buffer.append(index); - buffer.append(", "); - buffer.append(value); - buffer.append(".length"); - } - } - - buffer.append(")"); - - return buffer.toString(); - } - - - /** - * This method generates the methods that return the features and values - * representing a classification. Implementations generated here take care - * of all caching we may want to take place. The explicitly overridden - * methods are a subset (depending on the classifier's type) of: - * - *
    - *
  • classify(Object)
  • - *
  • featureValue(Object)
  • - *
  • discreteValue(Object)
  • - *
  • discreteValueArray(Object)
  • - *
  • realValue(Object)
  • - *
  • realValueArray(Object)
  • - *
- * - *

If bodyPrimitive is true, - * body should implement a method that the caller assumes - * takes the same argument as the generated classifier and returns the - * appropriate primitive type (String or double) - * via a return statement. Otherwise, body - * should implement a method whose argument is Object - * __example and which stores the result of its computation in a - * variable named __result which has already been declared to - * have either type {@link edu.illinois.cs.cogcomp.lbjava.classify.Feature} or - * {@link edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector} as appropriate. - * - *

If post is non-null, the code therein will - * have access to the values computed by body. If - * bodyPrimitive is true, those values can be - * accessed via a primitive variable named __cachedValue. - * Otherwise, they must be accessed via the aforementioned - * __result variable. - * - * @param out The stream to write to. - * @param classifierExp The classifier for which code is being generated. - * @param preamble This code will be executed before any call to any - * of the generated methods. Any occurrences of the - * string "$METHOD$" (without the - * quotes) inside preamble will be - * replaced with the name of the method inside which - * code is currently being placed. If left - * null, it defaults to the code - * generated by - * {@link #generateTypeChecking(String,String,String,boolean,String,int,String,boolean)}. - * @param body Generated code that computes the classification - * result (in a FeatureVector, - * Feature, String, or - * double as appropriate). - * @param bodyPrimitive Set to true if the code in - * body computes a single - * String or double. - * @param bodyArgCast Set to true if the code in - * body assumes the classifier's - * argument in the original source code will be in - * scope. - * @param post Generated code that performs any post-processing - * that may be necessary on the computed - * classification result before it is finally - * returned. This parameter can be null - * if post-processing is not necessary. - **/ - private static void generateClassificationMethods( - PrintStream out, ClassifierExpression classifierExp, String preamble, - String body, boolean bodyPrimitive, boolean bodyArgCast, String post) { - String name = classifierExp.name.toString(); - Argument arg = classifierExp.argument; - String input = arg.getType().toString(); - String field = - classifierExp.cacheIn == null ? null : classifierExp.cacheIn.toString(); - boolean cachedInMap = ClassifierAssignment.mapCache.equals(field); - boolean anyCache = classifierExp.singleExampleCache || field != null; - boolean discrete, array, generator; - - { - ClassifierReturnType crt = classifierExp.returnType; - discrete = - crt.type == ClassifierReturnType.DISCRETE - || crt.type == ClassifierReturnType.DISCRETE_ARRAY - || crt.type == ClassifierReturnType.DISCRETE_GENERATOR; - array = - crt.type == ClassifierReturnType.DISCRETE_ARRAY - || crt.type == ClassifierReturnType.REAL_ARRAY; - generator = - crt.type == ClassifierReturnType.DISCRETE_GENERATOR - || crt.type == ClassifierReturnType.REAL_GENERATOR - || crt.type == ClassifierReturnType.MIXED_GENERATOR; - } - - String primitiveFeatureType = discrete ? "discrete" : "real"; - String primitiveType = discrete ? "String" : "double"; - String cachedValueType = primitiveType; - if (array) cachedValueType += "[]"; - String valueMethodName = primitiveFeatureType + "Value"; - if (array) valueMethodName += "Array"; - String cachedMethodReturnType = "Feature"; - if (array || generator) cachedMethodReturnType += "Vector"; - - if (preamble == null) - preamble = - generateTypeChecking( - " ", name, "Classifier", false, input, classifierExp.line, - "__example", false) - .toString(); - - if (anyCache) - out.println(" private " + cachedMethodReturnType - + " cachedFeatureValue(Object __example)"); - else if (array || generator) - out.println(" public FeatureVector classify(Object __example)"); - else if (!bodyPrimitive) - out.println(" public Feature featureValue(Object __example)"); - - if (anyCache || !bodyPrimitive) { - out.println(" {"); - if (classifierExp.singleExampleCache) { - out.println(" if (__example == __exampleCache.get()) return (" - + cachedMethodReturnType + ") __cache.get();"); - out.println(" __exampleCache.set(__example);"); - } - - if (field != null) { - if (cachedInMap) { - if (!array && !discrete) - out.println(" Double __dValue = " - + "(Double) __valueCache.get(__example);"); - } - else out.println(" " + arg + " = (" + input + ") __example;"); - - out.print(" " + cachedValueType + " __cachedValue = "); - - if (cachedInMap) { - if (!array && !discrete) - out.println("__dValue == null ? Double.NaN : " - + "__dValue.doubleValue();"); - else - out.println("(" + cachedValueType - + ") __valueCache.get(__example);"); - } - else out.println(field + ";"); - - out.print("\n if ("); - if (!array && !discrete) out.print("Double.doubleToLongBits("); - out.print("__cachedValue"); - if (!array && !discrete) out.print(")"); - out.print(" != "); - if (!array && !discrete) - out.print("Double.doubleToLongBits(Double.NaN)"); - else out.print("null"); - out.println(")"); - out.println(" {"); - out.print(" " + cachedMethodReturnType + " result = "); - - if (array) { - out.println("new FeatureVector();"); - out.println(" for (int i = 0; i < __cachedValue.length; ++i)"); - out.print(" result.addFeature("); - } - - out.print( - primitiveFeatureConstructorInvocation( - discrete, array, "", "\"\"", "__cachedValue", "i", null, - null)); - - if (array) out.print(")"); - out.println(";"); - if (classifierExp.singleExampleCache) - out.println(" __cache.set(result);"); - out.println(" return result;"); - out.println(" }\n"); - } - - if (bodyPrimitive) { - if (field != null) { - out.print(" __cachedValue = "); - if (!cachedInMap) out.print(field + " = "); - out.println("_" + valueMethodName + "(__example);"); - - if (cachedInMap) { - out.print(" __valueCache.put(__example, "); - if (!discrete) out.print("new Double("); - out.print("__cachedValue"); - if (!discrete) out.print(")"); - out.println(");"); - } - } - else - out.println(" " + primitiveType + " __cachedValue = _" - + valueMethodName + "(__example);"); - - if (post != null) { - out.println(); - out.println(post); - } - - out.println(" Feature __result = " - + primitiveFeatureConstructorInvocation( - discrete, false, "", "\"\"", "__cachedValue", null, - null, null) - + ";"); - } - else { - if (!anyCache) - out.print( - preamble.replaceAll("\\$METHOD\\$", - (array || generator ? "classify" - : "featureValue"))); - if (bodyArgCast && (field == null || cachedInMap)) - out.println(" " + arg + " = (" + input + ") __example;\n"); - out.println(" " + cachedMethodReturnType + " __result;"); - out.print(body); - - if (field != null) { - out.print(" __cachedValue = "); - if (!cachedInMap) out.print(field + " = "); - out.println("__result." - + (array ? valueMethodName - : (discrete ? "getStringValue" - : "getStrength")) - + "();"); - - if (cachedInMap) { - out.print(" __valueCache.put(__example, "); - if (!discrete && !array) out.print("new Double("); - out.print("__cachedValue"); - if (!discrete && !array) out.print(")"); - out.println(");"); - } - } - - if (post != null) { - out.println(); - out.println(post); - } - } - - if (classifierExp.singleExampleCache) - out.println(" __cache.set(__result);"); - out.println(" return __result;"); - out.println(" }"); - } - - if (anyCache || !(array || generator)) { - out.println("\n public FeatureVector classify(Object __example)"); - out.println(" {"); - if (anyCache) - out.print(preamble.replaceAll("\\$METHOD\\$", "classify")); - out.print(" return "); - if (!(array || generator)) out.print("new FeatureVector("); - out.print((anyCache ? "cachedFeatureValue" : "featureValue") - + "(__example)"); - if (!(array || generator)) out.print(")"); - out.println(";"); - out.println(" }"); - } - - if (!generator) { - if (!array && (anyCache || bodyPrimitive)) { - out.println("\n public Feature featureValue(Object __example)"); - out.println(" {"); - if (anyCache) - out.print(preamble.replaceAll("\\$METHOD\\$", "featureValue")); - - if (anyCache) - out.println(" return cachedFeatureValue(__example);"); - else { - out.println(" " + cachedValueType + " result = " - + valueMethodName + "(__example);"); - out.println( - " return " - + primitiveFeatureConstructorInvocation( - discrete, false, "", "\"\"", "result", null, null, null) - + ";"); - } - - out.println(" }"); - } - - if (anyCache || array || !bodyPrimitive || post != null) { - out.println("\n public " + cachedValueType + " " + valueMethodName - + "(Object __example)"); - out.println(" {"); - if (anyCache) - out.print(preamble.replaceAll("\\$METHOD\\$", valueMethodName)); - - if (array && field != null) { - out.println(" cachedFeatureValue(__example);"); - if (!cachedInMap) - out.println(" " + arg + " = (" + input + ") __example;"); - out.println(" return " - + (cachedInMap ? "(" + cachedValueType - + ") __valueCache.get(__example)" - : field) - + ";"); - } - else if (!anyCache && bodyPrimitive && post != null) { - out.print(preamble.replaceAll("\\$METHOD\\$", valueMethodName)); - out.println(" " + cachedValueType + " __cachedValue = _" - + valueMethodName + "(__example);\n"); - out.println(post); - out.println(" return __cachedValue;"); - } - else - out.println(" return " - + (anyCache ? "cachedFeatureValue" - : (array ? "classify" : "featureValue")) - + "(__example)." - + (array ? valueMethodName - : (discrete ? "getStringValue" - : "getStrength")) - + "();"); - out.println(" }"); - } - - if (bodyPrimitive) { - boolean helper = anyCache || post != null; - out.println("\n " + (helper ? "private" : "public") + " " - + cachedValueType + " " + (helper ? "_" : "") - + valueMethodName + "(Object __example)"); - out.println(" {"); - if (!helper) - out.print(preamble.replaceAll("\\$METHOD\\$", valueMethodName)); - if (bodyArgCast) - out.println(" " + arg + " = (" + input + ") __example;\n"); - out.print(body); - out.println(" }"); - } - } - } - - - /** - * Compress the textual representation of an {@link ASTNode}, convert to - * ASCII hexadecimal, and write the result to the specified stream. - * - * @param buffer The text representation to be written. - * @param out The stream to write to. - **/ - public static void compressAndPrint(StringBuffer buffer, PrintStream out) { - PrintStream converter = null; - ByteArrayOutputStream converted = new ByteArrayOutputStream(); - try { - converter = - new PrintStream(new GZIPOutputStream(new HexOutputStream(converted))); - } - catch (Exception e) { - System.err.println("Could not create converter stream."); - System.exit(1); - } - - converter.print(buffer.toString()); - converter.close(); - - try { converted.writeTo(out); } - catch (Exception e) { - System.err.println("Could not write the converted stream."); - System.exit(1); - } - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param ast The node to process. - **/ - public void run(AST ast) { - if (!RevisionAnalysis.noChanges) { - quantificationVariables = new HashMap(); - contextVariables = new HashMap(); - runOnChildren(ast); - } - } - - - /** - * Code is only generated for a {@link ClassifierName} when it is the only - * {@link ClassifierExpression} on the right hand side of the arrow (and - * there really shouldn't be a reason that a programmer would want to write - * such a declaration, but if he does, it will work). - * - * @param cn The node to process. - **/ - public void run(ClassifierName cn) { - String cnName = cn.name.toString(); - if (cn.name == cn.referent - || !RevisionAnalysis.revisionStatus.get(cnName) - .equals(RevisionAnalysis.REVISED)) - return; - - System.out.println("Generating code for " + cnName); - - PrintStream out = open(cn); - if (out == null) return; - - out.println(disclaimer); - out.println("// " + cn.shallow() + "\n"); - - ast.symbolTable.generateHeader(out); - - String field = null; - boolean cachedInMap = false; - if (cn.cacheIn != null) { - field = cn.cacheIn.toString(); - cachedInMap = field.equals(ClassifierAssignment.mapCache); - if (cachedInMap) out.println("import java.util.WeakHashMap;"); - } - - out.println("\n"); - if (cn.comment != null) out.println(cn.comment); - - out.println("public class " + cnName + " extends Classifier"); - out.println("{"); - - if (cachedInMap) - out.println(" private static final WeakHashMap __valueCache " - + "= new WeakHashMap();"); - - String referentNoDots = cn.referent.toString().replace('.', '$'); - out.println(" private static final " + cn.referent + " __" - + referentNoDots + " = new " + cn.referent + "();\n"); - - if (cn.singleExampleCache) { - out.println( - " private static ThreadLocal __cache = new ThreadLocal(){ };"); - out.println(" private static ThreadLocal __exampleCache = " - + "new ThreadLocal(){ };"); - out.println(" public static void clearCache() { __exampleCache = new " - + "ThreadLocal(){ }; }\n"); - } - - out.println(" public " + cnName + "()"); - out.println(" {"); - out.println(" containingPackage = \"" - + AST.globalSymbolTable.getPackage() + "\";"); - out.println(" name = \"" + cnName + "\";"); - out.println(" }\n"); - - typeReturningMethods(out, cn.argument.getType(), cn.returnType); - out.println(); - - boolean array = - cn.returnType.type == ClassifierReturnType.DISCRETE_ARRAY - || cn.returnType.type == ClassifierReturnType.REAL_ARRAY; - boolean generator = - cn.returnType.type == ClassifierReturnType.DISCRETE_GENERATOR - || cn.returnType.type == ClassifierReturnType.REAL_GENERATOR - || cn.returnType.type == ClassifierReturnType.MIXED_GENERATOR; - - StringBuffer body = new StringBuffer(); - body.append(" __result = __"); - body.append(referentNoDots); - body.append("."); - body.append(array || generator ? "classify" : "featureValue"); - body.append("(__example);\n"); - - generateClassificationMethods(out, cn, null, body.toString(), false, - false, null); - out.println(); - typeCheckClassifyArray(out, cnName, cn.argument.getType(), cn.line); - out.println(); - generateHashingMethods(out, cnName); - - out.println("}\n"); - out.close(); - } - - - /** - * Generates code for all nodes of the indicated type. - * - * @param cc The node to process. - **/ - public void run(CodedClassifier cc) { - String ccName = cc.name.toString(); - String fileName = ccName + ".java"; - if (fileName.indexOf("$$") != -1) files.add(fileName); - - if (!RevisionAnalysis.revisionStatus.get(ccName) - .equals(RevisionAnalysis.REVISED)) - return; - - System.out.println("Generating code for " + ccName); - - PrintStream out = open(fileName); - if (out == null) return; - - out.println(disclaimer); - out.print("// "); - compressAndPrint(cc.shallow(), out); - out.println("\n"); - - ast.symbolTable.generateHeader(out); - - String field = null; - boolean cachedInMap = false; - if (cc.cacheIn != null) { - field = cc.cacheIn.toString(); - cachedInMap = field.equals(ClassifierAssignment.mapCache); - if (cachedInMap) out.println("import java.util.WeakHashMap;"); - } - - out.println("\n"); - if (cc.comment != null) out.println(cc.comment); - - out.println("public class " + ccName + " extends Classifier"); - out.println("{"); - - if (cachedInMap) - out.println(" private static final WeakHashMap __valueCache " - + "= new WeakHashMap();"); - - HashSet invoked = SemanticAnalysis.invokedGraph.get(ccName); - if (invoked != null && invoked.size() > 0) { - for (Iterator I = invoked.iterator(); I.hasNext(); ) { - String name = I.next(); - String nameNoDots = name.replace('.', '$'); - out.println(" private static final " + name + " __" + nameNoDots - + " = new " + name + "();"); - } - - out.println(); - } - - if (cc.singleExampleCache) { - out.println( - " private static ThreadLocal __cache = new ThreadLocal(){ };"); - out.println(" private static ThreadLocal __exampleCache = " - + "new ThreadLocal(){ };"); - out.println(" public static void clearCache() { __exampleCache = new " - + "ThreadLocal(){ }; }\n"); - } - - out.println(" public " + ccName + "()"); - out.println(" {"); - out.println(" containingPackage = \"" - + AST.globalSymbolTable.getPackage() + "\";"); - out.println(" name = \"" + ccName + "\";"); - out.println(" }\n"); - - Type input = cc.argument.getType(); - typeReturningMethods(out, input, cc.returnType); - out.println(); - - indent = 2; - forInit = false; - constraintMode = false; - methodBody.delete(0, methodBody.length()); - currentCG = cc; - for (ASTNodeIterator I = cc.body.iterator(); I.hasNext(); ) { - I.next().runPass(this); - methodBody.append("\n"); - } - - StringBuffer body = new StringBuffer(); - StringBuffer post = null; - boolean primitive = - cc.returnType.type == ClassifierReturnType.DISCRETE - || cc.returnType.type == ClassifierReturnType.REAL; - - if (primitive) { - boolean discrete = cc.returnType.type == ClassifierReturnType.DISCRETE; - - body = methodBody; - if (discrete && cc.returnType.values.size() > 0) { - post = new StringBuffer(); - post.append(" if (valueIndexOf(__cachedValue) == -1)\n" - + " {\n" - + " System.err.println(\"Classifier '"); - post.append(ccName); - post.append("' defined on line "); - post.append(cc.line + 1); - post.append(" of "); - post.append(Main.sourceFilename); - post.append(" produced '\" + __cachedValue + \"' as a feature " - + "value, which is not allowable.\");\n" - + " System.exit(1);\n" - + " }\n"); - } - } - else { - body.append(" __result = new FeatureVector();\n"); - boolean array = - cc.returnType.type == ClassifierReturnType.DISCRETE_ARRAY - || cc.returnType.type == ClassifierReturnType.REAL_ARRAY; - if (array) - body.append(" int __featureIndex = 0;\n"); - if (cc.returnType.type == ClassifierReturnType.DISCRETE_GENERATOR - || cc.returnType.type == ClassifierReturnType.REAL_GENERATOR) - body.append(" String __id;\n"); - if (cc.returnType.type == ClassifierReturnType.DISCRETE_ARRAY - || cc.returnType.type == ClassifierReturnType.DISCRETE_GENERATOR - || cc.returnType.type == ClassifierReturnType.REAL_ARRAY - || cc.returnType.type == ClassifierReturnType.REAL_GENERATOR) { - body.append(" "); - body.append( - cc.returnType.type == ClassifierReturnType.DISCRETE_ARRAY - || cc.returnType.type == ClassifierReturnType.DISCRETE_GENERATOR - ? "String" : "double"); - body.append(" __value;\n"); - } - - body.append("\n"); - body.append(methodBody); - - if (array) { - post = new StringBuffer(); - post.append( - " for (int __i = 0; __i < __result.featuresSize(); ++__i)\n" - + " __result.getFeature(__i)" - + ".setArrayLength(__featureIndex);\n"); - } - } - - generateClassificationMethods( - out, cc, null, body.toString(), primitive, true, - post == null ? null : post.toString()); - out.println(); - typeCheckClassifyArray(out, ccName, input, cc.line); - out.println(); - generateHashingMethods(out, ccName); - - out.println("}\n"); - out.close(); - } - - - /** - * Generates code for all nodes of the indicated type. - * - * @param cg The node to process. - **/ - public void run(CompositeGenerator cg) { - String cgName = cg.name.toString(); - String fileName = cgName + ".java"; - if (fileName.indexOf("$$") != -1) { - files.add(fileName); - runOnChildren(cg); - } - else { - files.clear(); - - runOnChildren(cg); - - final String prefix = cgName + "$$"; - File[] leftOvers = - new File(System.getProperty("user.dir")).listFiles( - new FilenameFilter() { - public boolean accept(File directory, String name) { - int i = name.lastIndexOf('.'); - if (i == -1) return false; - String javaFile = name.substring(0, i) + ".java"; - return name.startsWith(prefix) && !files.contains(javaFile); - } - }); - - for (int i = 0; i < leftOvers.length; ++i) - if (leftOvers[i].exists() && !leftOvers[i].delete()) - reportError(0, "Could not delete '" + leftOvers[i].getName() - + "'."); - } - - if (!RevisionAnalysis.revisionStatus.get(cgName) - .equals(RevisionAnalysis.REVISED)) - return; - - System.out.println("Generating code for " + cgName); - - PrintStream out = open(fileName); - if (out == null) return; - - out.println(disclaimer); - out.print("// "); - compressAndPrint(cg.shallow(), out); - out.println("\n"); - - ast.symbolTable.generateHeader(out); - - out.println("\n"); - if (cg.comment != null) out.println(cg.comment); - - out.println("public class " + cgName + " extends Classifier"); - out.println("{"); - - { - HashSet declared = new HashSet(); - for (ClassifierExpressionList.ClassifierExpressionListIterator I = - cg.components.listIterator(); - I.hasNext(); ) { + /** + * This method generates a string signature of the given method. The arguments other than + * m are supplied as arguments for efficiency reasons, since this method is only + * called by one other method. + * + * @see #generateLearnerBody(PrintStream,LearningClassifierExpression) + * @param m The method object. + * @param modifiers The integer representation of the method's modifiers. + * @param returned The return type of the method. + * @param name The name of the method. + * @param parameters The parameter types of the method. + * @return A string description of the method suitable for comparison with the elements of the + * {@link #noOverride} array. + **/ + @SuppressWarnings("rawtypes") + public static String signature(Method m, int modifiers, Class returned, String name, + Class[] parameters) { + Class[] thrown = m.getExceptionTypes(); + + String result = ""; + if (Modifier.isAbstract(modifiers)) + result += "abstract "; + if (Modifier.isFinal(modifiers)) + result += "final "; + if (Modifier.isNative(modifiers)) + result += "native "; + if (Modifier.isPrivate(modifiers)) + result += "private "; + if (Modifier.isProtected(modifiers)) + result += "protected "; + if (Modifier.isPublic(modifiers)) + result += "public "; + if (Modifier.isStatic(modifiers)) + result += "static "; + if (Modifier.isStrict(modifiers)) + result += "strictfp "; + if (Modifier.isSynchronized(modifiers)) + result += "synchronized "; + + result += makeTypeReadable(returned.getName()) + " " + name + "("; + if (parameters.length > 0) { + result += makeTypeReadable(parameters[0].getName()) + " a0"; + for (int j = 1; j < parameters.length; ++j) + result += ", " + makeTypeReadable(parameters[j].getName()) + " a" + j; + } + + result += ")"; + if (thrown.length > 0) { + result += " throws " + thrown[0].getName(); + for (int j = 1; j < thrown.length; ++j) + result += ", " + thrown[j].getName(); + } + + return result; + } + + + /** + * The value returned by the Class.getName() method is not recognizable as a type + * by javac if the given class is an array; this method produces a representation + * that is recognizable by javac. This method also replaces '$' characters with '.' + * characters, under the assumption that '$' only appears in the name of an inner class. + * + * @param name The name of a class as produced by Class.getName(). + * @return A string representation of the class recognizable by javac. + **/ + public static String makeTypeReadable(String name) { + for (int i = name.indexOf('$'); i != -1; i = name.indexOf('$', i + 1)) + name = name.substring(0, i) + '.' + name.substring(i + 1); + + if (name.charAt(0) != '[') + return name; + + while (name.charAt(0) == '[') + name = name.substring(1) + "[]"; + + switch (name.charAt(0)) { + case 'B': + return "boolean" + name.substring(1); + case 'C': + return "char" + name.substring(1); + case 'D': + return "double" + name.substring(1); + case 'F': + return "float" + name.substring(1); + case 'I': + return "int" + name.substring(1); + case 'J': + return "long" + name.substring(1); + case 'L': + int colon = name.indexOf(';'); + return name.substring(1, colon) + name.substring(colon + 1); + case 'S': + return "short" + name.substring(1); + case 'Z': + return "boolean" + name.substring(1); + } + + assert false : "Unrecognized type string: " + name; + return null; + } + + + /** + * Generate code that overrides the methods of {@link Classifier} that return type information. + * The methods overridden are: + *

    + *
  • getInputType()
  • + *
  • getOutputType()
  • + *
  • allowableValues()
  • + *
+ * + * @param out The stream to write to. + * @param input The input type of the classifier whose code this is. + * @param output The return type of the classifier whose code this is. + **/ + public static void typeReturningMethods(PrintStream out, Type input, ClassifierReturnType output) { + out.println(" public String getInputType() { return \"" + input.typeClass().getName() + + "\"; }"); + out.println(" public String getOutputType() { return \"" + output.getTypeName() + "\"; }"); + + if (output.values.size() > 0) { + ConstantList values = output.values; + out.print("\n private static String[] __allowableValues = "); + boolean isBoolean = false; + + if (output.values.size() == 2) { + ASTNodeIterator I = values.iterator(); + String v1 = I.next().toString(); + String v2 = I.next().toString(); + if ((v1.equals("false") || v1.equals("\"false\"")) + && (v2.equals("true") || v2.equals("\"true\""))) { + isBoolean = true; + out.println("DiscreteFeature.BooleanValues;"); + } + } + + if (!isBoolean) { + ASTNodeIterator I = values.iterator(); + String v = I.next().toString(); + if (v.charAt(0) != '"') + v = "\"" + v + "\""; + out.print("new String[]{ " + v); + while (I.hasNext()) { + v = I.next().toString(); + if (v.charAt(0) != '"') + v = "\"" + v + "\""; + out.print(", " + v); + } + out.println(" };"); + } + + out.println(" public static String[] getAllowableValues() { return " + + "__allowableValues; }"); + out.println(" public String[] allowableValues() { return " + "__allowableValues; }"); + } + } + + + /** + * Generates code that overrides the {@link Classifier#classify(Object[])} method so that it + * checks the types of its arguments. + * + * @param out The stream to write to. + * @param name The name of the classifier whose code this is. + * @param input The input type of the classifier whose code this is. + * @param line The line number on which this classifier is defined. + **/ + public static void typeCheckClassifyArray(PrintStream out, String name, Type input, int line) { + out.println(" public FeatureVector[] classify(Object[] examples)"); + out.println(" {"); + out.print(generateTypeChecking(" ", name, "Classifier", true, input.toString(), line, + "examples", false)); + out.println(" return super.classify(examples);"); + out.println(" }"); + } + + + /** + * Generates the equals(Object) method, which evaluates to true + * whenever the two objects are of the same type. This method should not be called when + * generating code for a {@link InferenceDeclaration}. + * + * @param out The stream to write to. + * @param name The name of the node whose equals(Object) method is being generated. + **/ + public static void generateHashingMethods(PrintStream out, String name) { + out.println(" public int hashCode() { return \"" + name + "\".hashCode(); }"); + out.println(" public boolean equals(Object o) { return o instanceof " + name + "; }"); + } + + + /** + * Generates the code appearing at the beginning of, for example, many classifiers' + * {@link Classifier#classify(Object)} methods that checks to see if that input + * Object has the appropriate type. + * + * @param indent The whitespace indentation in the generated code. + * @param name The name of the {@link CodeGenerator} whose input is being checked. + * @param type The type of {@link CodeGenerator} whose input is being checked (capitalized). + * @param array Whether or not the method being type checked takes an array of the input type. + * @param input The correct input type of the {@link CodeGenerator}. + * @param line The line number on which the {@link CodeGenerator} appears. + * @param exampleName The name of the example variable. + * @param preExtracted Whether or not the generated code should allow object arrays containing + * indexed features, as a learner can take as input. + * @return The generated code. + **/ + public static StringBuffer generateTypeChecking(String indent, String name, String type, + boolean array, String input, int line, String exampleName, boolean preExtracted) { + StringBuffer result = new StringBuffer(); + + result.append(indent); + result.append("if (!("); + result.append(exampleName); + result.append(" instanceof "); + result.append(input); + if (array) + result.append("[]"); + + if (preExtracted) { + result.append(" || "); + result.append(exampleName); + result.append(" instanceof Object[]"); + if (array) + result.append("[]"); + } + + result.append("))\n"); + + result.append(indent); + result.append("{\n"); + + result.append(indent); + result.append(" String type = "); + result.append(exampleName); + result.append(" == null ? \"null\" : "); + result.append(exampleName); + result.append(".getClass().getName();\n"); + + result.append(indent); + result.append(" System.err.println(\""); + result.append(type); + result.append(" '"); + result.append(name); + result.append("("); + result.append(input); + result.append(")' defined on line "); + result.append(line + 1); + result.append(" of "); + result.append(Main.sourceFilename); + result.append(" received '\" + type + \"' as input.\");\n"); + + result.append(indent); + result.append(" new Exception().printStackTrace();\n"); + + result.append(indent); + result.append(" System.exit(1);\n"); + + result.append(indent); + result.append("}\n\n"); + return result; + } + + + /** + * Generates code that instantiates a primitive feature. + * + * @param discrete Whether or not the feature is discrete. + * @param array Whether or not the feature comes from an array. + * @param ref Code referring to an instance of the classifier from which package and name + * information will be taken for this feature. + * @param id Code that evaluates to the value of the feature's identifier. + * @param value Code that evaluates to the feature's value if array is + * false, or the array containing the feature's value if array + * is true. + * @param index Code that evaluates to the array index of this feature. This parameter is + * ignored if it is null or array is false. + * @param values Code that evaluates to the number of possible values that the feature can take. + * If set to null, "allowableValues().length" is substituted. + * This parameter is ignored if array is false. + * @param arrayInfo Code that evaluates to both the index and length arguments in a feature + * constructor, separated by a comma. If this parameter is null, it defaults + * to index followed by value with ".length" + * appended. + **/ + private static String primitiveFeatureConstructorInvocation(boolean discrete, boolean array, + String ref, String id, String value, String index, String values, String arrayInfo) { + StringBuffer buffer = new StringBuffer("new "); + if (ref.length() > 0) + ref += "."; + + buffer.append(discrete ? "Discrete" : "Real"); + buffer.append(array ? "Array" : "Primitive"); + buffer.append("StringFeature("); + buffer.append(ref); + buffer.append("containingPackage, "); + buffer.append(ref); + buffer.append("name, "); + buffer.append(id); + buffer.append(", "); + buffer.append(value); + + if (array && index != null) { + buffer.append('['); + buffer.append(index); + buffer.append(']'); + } + + if (discrete) { + buffer.append(", valueIndexOf("); + buffer.append(value); + if (array && index != null) { + buffer.append('['); + buffer.append(index); + buffer.append(']'); + } + + if (values == null) + values = "allowableValues().length"; + buffer.append("), (short) "); + buffer.append(values); + } + + if (array) { + buffer.append(", "); + if (arrayInfo != null) + buffer.append(arrayInfo); + else { + buffer.append(index); + buffer.append(", "); + buffer.append(value); + buffer.append(".length"); + } + } + + buffer.append(")"); + + return buffer.toString(); + } + + + /** + * This method generates the methods that return the features and values representing a + * classification. Implementations generated here take care of all caching we may want to take + * place. The explicitly overridden methods are a subset (depending on the classifier's type) + * of: + * + *
    + *
  • classify(Object)
  • + *
  • featureValue(Object)
  • + *
  • discreteValue(Object)
  • + *
  • discreteValueArray(Object)
  • + *
  • realValue(Object)
  • + *
  • realValueArray(Object)
  • + *
+ * + *

+ * If bodyPrimitive is true, body should implement a + * method that the caller assumes takes the same argument as the generated classifier and + * returns the appropriate primitive type (String or double) via a + * return statement. Otherwise, body should implement a method whose + * argument is Object + * __example and which stores the result of its computation in a variable named + * __result which has already been declared to have either type + * {@link edu.illinois.cs.cogcomp.lbjava.classify.Feature} or + * {@link edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector} as appropriate. + * + *

+ * If post is non-null, the code therein will have access to the + * values computed by body. If bodyPrimitive is true, + * those values can be accessed via a primitive variable named __cachedValue. + * Otherwise, they must be accessed via the aforementioned __result variable. + * + * @param out The stream to write to. + * @param classifierExp The classifier for which code is being generated. + * @param preamble This code will be executed before any call to any of the generated methods. + * Any occurrences of the string "$METHOD$" (without the quotes) inside + * preamble will be replaced with the name of the method inside which code + * is currently being placed. If left null, it defaults to the code + * generated by + * {@link #generateTypeChecking(String,String,String,boolean,String,int,String,boolean)}. + * @param body Generated code that computes the classification result (in a + * FeatureVector, Feature, String, or + * double as appropriate). + * @param bodyPrimitive Set to true if the code in body computes a + * single String or double. + * @param bodyArgCast Set to true if the code in body assumes the + * classifier's argument in the original source code will be in scope. + * @param post Generated code that performs any post-processing that may be necessary on the + * computed classification result before it is finally returned. This parameter can be + * null if post-processing is not necessary. + **/ + private static void generateClassificationMethods(PrintStream out, + ClassifierExpression classifierExp, String preamble, String body, + boolean bodyPrimitive, boolean bodyArgCast, String post) { + String name = classifierExp.name.toString(); + Argument arg = classifierExp.argument; + String input = arg.getType().toString(); + String field = classifierExp.cacheIn == null ? null : classifierExp.cacheIn.toString(); + boolean cachedInMap = ClassifierAssignment.mapCache.equals(field); + boolean anyCache = classifierExp.singleExampleCache || field != null; + boolean discrete, array, generator; + + { + ClassifierReturnType crt = classifierExp.returnType; + discrete = + crt.type == ClassifierReturnType.DISCRETE + || crt.type == ClassifierReturnType.DISCRETE_ARRAY + || crt.type == ClassifierReturnType.DISCRETE_GENERATOR; + array = + crt.type == ClassifierReturnType.DISCRETE_ARRAY + || crt.type == ClassifierReturnType.REAL_ARRAY; + generator = + crt.type == ClassifierReturnType.DISCRETE_GENERATOR + || crt.type == ClassifierReturnType.REAL_GENERATOR + || crt.type == ClassifierReturnType.MIXED_GENERATOR; + } + + String primitiveFeatureType = discrete ? "discrete" : "real"; + String primitiveType = discrete ? "String" : "double"; + String cachedValueType = primitiveType; + if (array) + cachedValueType += "[]"; + String valueMethodName = primitiveFeatureType + "Value"; + if (array) + valueMethodName += "Array"; + String cachedMethodReturnType = "Feature"; + if (array || generator) + cachedMethodReturnType += "Vector"; + + if (preamble == null) + preamble = + generateTypeChecking(" ", name, "Classifier", false, input, + classifierExp.line, "__example", false).toString(); + + if (anyCache) + out.println(" private " + cachedMethodReturnType + + " cachedFeatureValue(Object __example)"); + else if (array || generator) + out.println(" public FeatureVector classify(Object __example)"); + else if (!bodyPrimitive) + out.println(" public Feature featureValue(Object __example)"); + + if (anyCache || !bodyPrimitive) { + out.println(" {"); + if (classifierExp.singleExampleCache) { + out.println(" if (__example == __exampleCache.get()) return (" + + cachedMethodReturnType + ") __cache.get();"); + out.println(" __exampleCache.set(__example);"); + } + + if (field != null) { + if (cachedInMap) { + if (!array && !discrete) + out.println(" Double __dValue = " + + "(Double) __valueCache.get(__example);"); + } else + out.println(" " + arg + " = (" + input + ") __example;"); + + out.print(" " + cachedValueType + " __cachedValue = "); + + if (cachedInMap) { + if (!array && !discrete) + out.println("__dValue == null ? Double.NaN : " + "__dValue.doubleValue();"); + else + out.println("(" + cachedValueType + ") __valueCache.get(__example);"); + } else + out.println(field + ";"); + + out.print("\n if ("); + if (!array && !discrete) + out.print("Double.doubleToLongBits("); + out.print("__cachedValue"); + if (!array && !discrete) + out.print(")"); + out.print(" != "); + if (!array && !discrete) + out.print("Double.doubleToLongBits(Double.NaN)"); + else + out.print("null"); + out.println(")"); + out.println(" {"); + out.print(" " + cachedMethodReturnType + " result = "); + + if (array) { + out.println("new FeatureVector();"); + out.println(" for (int i = 0; i < __cachedValue.length; ++i)"); + out.print(" result.addFeature("); + } + + out.print(primitiveFeatureConstructorInvocation(discrete, array, "", "\"\"", + "__cachedValue", "i", null, null)); + + if (array) + out.print(")"); + out.println(";"); + if (classifierExp.singleExampleCache) + out.println(" __cache.set(result);"); + out.println(" return result;"); + out.println(" }\n"); + } + + if (bodyPrimitive) { + if (field != null) { + out.print(" __cachedValue = "); + if (!cachedInMap) + out.print(field + " = "); + out.println("_" + valueMethodName + "(__example);"); + + if (cachedInMap) { + out.print(" __valueCache.put(__example, "); + if (!discrete) + out.print("new Double("); + out.print("__cachedValue"); + if (!discrete) + out.print(")"); + out.println(");"); + } + } else + out.println(" " + primitiveType + " __cachedValue = _" + valueMethodName + + "(__example);"); + + if (post != null) { + out.println(); + out.println(post); + } + + out.println(" Feature __result = " + + primitiveFeatureConstructorInvocation(discrete, false, "", "\"\"", + "__cachedValue", null, null, null) + ";"); + } else { + if (!anyCache) + out.print(preamble.replaceAll("\\$METHOD\\$", (array || generator ? "classify" + : "featureValue"))); + if (bodyArgCast && (field == null || cachedInMap)) + out.println(" " + arg + " = (" + input + ") __example;\n"); + out.println(" " + cachedMethodReturnType + " __result;"); + out.print(body); + + if (field != null) { + out.print(" __cachedValue = "); + if (!cachedInMap) + out.print(field + " = "); + out.println("__result." + + (array ? valueMethodName : (discrete ? "getStringValue" + : "getStrength")) + "();"); + + if (cachedInMap) { + out.print(" __valueCache.put(__example, "); + if (!discrete && !array) + out.print("new Double("); + out.print("__cachedValue"); + if (!discrete && !array) + out.print(")"); + out.println(");"); + } + } + + if (post != null) { + out.println(); + out.println(post); + } + } + + if (classifierExp.singleExampleCache) + out.println(" __cache.set(__result);"); + out.println(" return __result;"); + out.println(" }"); + } + + if (anyCache || !(array || generator)) { + out.println("\n public FeatureVector classify(Object __example)"); + out.println(" {"); + if (anyCache) + out.print(preamble.replaceAll("\\$METHOD\\$", "classify")); + out.print(" return "); + if (!(array || generator)) + out.print("new FeatureVector("); + out.print((anyCache ? "cachedFeatureValue" : "featureValue") + "(__example)"); + if (!(array || generator)) + out.print(")"); + out.println(";"); + out.println(" }"); + } + + if (!generator) { + if (!array && (anyCache || bodyPrimitive)) { + out.println("\n public Feature featureValue(Object __example)"); + out.println(" {"); + if (anyCache) + out.print(preamble.replaceAll("\\$METHOD\\$", "featureValue")); + + if (anyCache) + out.println(" return cachedFeatureValue(__example);"); + else { + out.println(" " + cachedValueType + " result = " + valueMethodName + + "(__example);"); + out.println(" return " + + primitiveFeatureConstructorInvocation(discrete, false, "", "\"\"", + "result", null, null, null) + ";"); + } + + out.println(" }"); + } + + if (anyCache || array || !bodyPrimitive || post != null) { + out.println("\n public " + cachedValueType + " " + valueMethodName + + "(Object __example)"); + out.println(" {"); + if (anyCache) + out.print(preamble.replaceAll("\\$METHOD\\$", valueMethodName)); + + if (array && field != null) { + out.println(" cachedFeatureValue(__example);"); + if (!cachedInMap) + out.println(" " + arg + " = (" + input + ") __example;"); + out.println(" return " + + (cachedInMap ? "(" + cachedValueType + + ") __valueCache.get(__example)" : field) + ";"); + } else if (!anyCache && bodyPrimitive && post != null) { + out.print(preamble.replaceAll("\\$METHOD\\$", valueMethodName)); + out.println(" " + cachedValueType + " __cachedValue = _" + valueMethodName + + "(__example);\n"); + out.println(post); + out.println(" return __cachedValue;"); + } else + out.println(" return " + + (anyCache ? "cachedFeatureValue" : (array ? "classify" + : "featureValue")) + + "(__example)." + + (array ? valueMethodName : (discrete ? "getStringValue" + : "getStrength")) + "();"); + out.println(" }"); + } + + if (bodyPrimitive) { + boolean helper = anyCache || post != null; + out.println("\n " + (helper ? "private" : "public") + " " + cachedValueType + " " + + (helper ? "_" : "") + valueMethodName + "(Object __example)"); + out.println(" {"); + if (!helper) + out.print(preamble.replaceAll("\\$METHOD\\$", valueMethodName)); + if (bodyArgCast) + out.println(" " + arg + " = (" + input + ") __example;\n"); + out.print(body); + out.println(" }"); + } + } + } + + + /** + * Compress the textual representation of an {@link ASTNode}, convert to ASCII hexadecimal, and + * write the result to the specified stream. + * + * @param buffer The text representation to be written. + * @param out The stream to write to. + **/ + public static void compressAndPrint(StringBuffer buffer, PrintStream out) { + PrintStream converter = null; + ByteArrayOutputStream converted = new ByteArrayOutputStream(); + try { + converter = new PrintStream(new GZIPOutputStream(new HexOutputStream(converted))); + } catch (Exception e) { + System.err.println("Could not create converter stream."); + System.exit(1); + } + + converter.print(buffer.toString()); + converter.close(); + + try { + converted.writeTo(out); + } catch (Exception e) { + System.err.println("Could not write the converted stream."); + System.exit(1); + } + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param ast The node to process. + **/ + public void run(AST ast) { + if (!RevisionAnalysis.noChanges) { + quantificationVariables = new HashMap(); + contextVariables = new HashMap(); + runOnChildren(ast); + } + } + + + /** + * Code is only generated for a {@link ClassifierName} when it is the only + * {@link ClassifierExpression} on the right hand side of the arrow (and there really shouldn't + * be a reason that a programmer would want to write such a declaration, but if he does, it will + * work). + * + * @param cn The node to process. + **/ + public void run(ClassifierName cn) { + String cnName = cn.name.toString(); + if (cn.name == cn.referent + || !RevisionAnalysis.revisionStatus.get(cnName).equals(RevisionAnalysis.REVISED)) + return; + + System.out.println("Generating code for " + cnName); + + PrintStream out = open(cn); + if (out == null) + return; + + out.println(disclaimer); + out.println("// " + cn.shallow() + "\n"); + + ast.symbolTable.generateHeader(out); + + String field = null; + boolean cachedInMap = false; + if (cn.cacheIn != null) { + field = cn.cacheIn.toString(); + cachedInMap = field.equals(ClassifierAssignment.mapCache); + if (cachedInMap) + out.println("import java.util.WeakHashMap;"); + } + + out.println("\n"); + if (cn.comment != null) + out.println(cn.comment); + + out.println("public class " + cnName + " extends Classifier"); + out.println("{"); + + if (cachedInMap) + out.println(" private static final WeakHashMap __valueCache " + "= new WeakHashMap();"); + + String referentNoDots = cn.referent.toString().replace('.', '$'); + out.println(" private static final " + cn.referent + " __" + referentNoDots + " = new " + + cn.referent + "();\n"); + + if (cn.singleExampleCache) { + out.println(" private static ThreadLocal __cache = new ThreadLocal(){ };"); + out.println(" private static ThreadLocal __exampleCache = " + "new ThreadLocal(){ };"); + out.println(" public static void clearCache() { __exampleCache = new " + + "ThreadLocal(){ }; }\n"); + } + + out.println(" public " + cnName + "()"); + out.println(" {"); + out.println(" containingPackage = \"" + AST.globalSymbolTable.getPackage() + "\";"); + out.println(" name = \"" + cnName + "\";"); + out.println(" }\n"); + + typeReturningMethods(out, cn.argument.getType(), cn.returnType); + out.println(); + + boolean array = + cn.returnType.type == ClassifierReturnType.DISCRETE_ARRAY + || cn.returnType.type == ClassifierReturnType.REAL_ARRAY; + boolean generator = + cn.returnType.type == ClassifierReturnType.DISCRETE_GENERATOR + || cn.returnType.type == ClassifierReturnType.REAL_GENERATOR + || cn.returnType.type == ClassifierReturnType.MIXED_GENERATOR; + + StringBuffer body = new StringBuffer(); + body.append(" __result = __"); + body.append(referentNoDots); + body.append("."); + body.append(array || generator ? "classify" : "featureValue"); + body.append("(__example);\n"); + + generateClassificationMethods(out, cn, null, body.toString(), false, false, null); + out.println(); + typeCheckClassifyArray(out, cnName, cn.argument.getType(), cn.line); + out.println(); + generateHashingMethods(out, cnName); + + out.println("}\n"); + out.close(); + } + + + /** + * Generates code for all nodes of the indicated type. + * + * @param cc The node to process. + **/ + public void run(CodedClassifier cc) { + String ccName = cc.name.toString(); + String fileName = ccName + ".java"; + if (fileName.indexOf("$$") != -1) + files.add(fileName); + + if (!RevisionAnalysis.revisionStatus.get(ccName).equals(RevisionAnalysis.REVISED)) + return; + + System.out.println("Generating code for " + ccName); + + PrintStream out = open(fileName); + if (out == null) + return; + + out.println(disclaimer); + out.print("// "); + compressAndPrint(cc.shallow(), out); + out.println("\n"); + + ast.symbolTable.generateHeader(out); + + String field = null; + boolean cachedInMap = false; + if (cc.cacheIn != null) { + field = cc.cacheIn.toString(); + cachedInMap = field.equals(ClassifierAssignment.mapCache); + if (cachedInMap) + out.println("import java.util.WeakHashMap;"); + } + + out.println("\n"); + if (cc.comment != null) + out.println(cc.comment); + + out.println("public class " + ccName + " extends Classifier"); + out.println("{"); + + if (cachedInMap) + out.println(" private static final WeakHashMap __valueCache " + "= new WeakHashMap();"); + + HashSet invoked = SemanticAnalysis.invokedGraph.get(ccName); + if (invoked != null && invoked.size() > 0) { + for (Iterator I = invoked.iterator(); I.hasNext();) { + String name = I.next(); + String nameNoDots = name.replace('.', '$'); + out.println(" private static final " + name + " __" + nameNoDots + " = new " + + name + "();"); + } + + out.println(); + } + + if (cc.singleExampleCache) { + out.println(" private static ThreadLocal __cache = new ThreadLocal(){ };"); + out.println(" private static ThreadLocal __exampleCache = " + "new ThreadLocal(){ };"); + out.println(" public static void clearCache() { __exampleCache = new " + + "ThreadLocal(){ }; }\n"); + } + + out.println(" public " + ccName + "()"); + out.println(" {"); + out.println(" containingPackage = \"" + AST.globalSymbolTable.getPackage() + "\";"); + out.println(" name = \"" + ccName + "\";"); + out.println(" }\n"); + + Type input = cc.argument.getType(); + typeReturningMethods(out, input, cc.returnType); + out.println(); + + indent = 2; + forInit = false; + constraintMode = false; + methodBody.delete(0, methodBody.length()); + currentCG = cc; + for (ASTNodeIterator I = cc.body.iterator(); I.hasNext();) { + I.next().runPass(this); + methodBody.append("\n"); + } + + StringBuffer body = new StringBuffer(); + StringBuffer post = null; + boolean primitive = + cc.returnType.type == ClassifierReturnType.DISCRETE + || cc.returnType.type == ClassifierReturnType.REAL; + + if (primitive) { + boolean discrete = cc.returnType.type == ClassifierReturnType.DISCRETE; + + body = methodBody; + if (discrete && cc.returnType.values.size() > 0) { + post = new StringBuffer(); + post.append(" if (valueIndexOf(__cachedValue) == -1)\n" + " {\n" + + " System.err.println(\"Classifier '"); + post.append(ccName); + post.append("' defined on line "); + post.append(cc.line + 1); + post.append(" of "); + post.append(Main.sourceFilename); + post.append(" produced '\" + __cachedValue + \"' as a feature " + + "value, which is not allowable.\");\n" + " System.exit(1);\n" + + " }\n"); + } + } else { + body.append(" __result = new FeatureVector();\n"); + boolean array = + cc.returnType.type == ClassifierReturnType.DISCRETE_ARRAY + || cc.returnType.type == ClassifierReturnType.REAL_ARRAY; + if (array) + body.append(" int __featureIndex = 0;\n"); + if (cc.returnType.type == ClassifierReturnType.DISCRETE_GENERATOR + || cc.returnType.type == ClassifierReturnType.REAL_GENERATOR) + body.append(" String __id;\n"); + if (cc.returnType.type == ClassifierReturnType.DISCRETE_ARRAY + || cc.returnType.type == ClassifierReturnType.DISCRETE_GENERATOR + || cc.returnType.type == ClassifierReturnType.REAL_ARRAY + || cc.returnType.type == ClassifierReturnType.REAL_GENERATOR) { + body.append(" "); + body.append(cc.returnType.type == ClassifierReturnType.DISCRETE_ARRAY + || cc.returnType.type == ClassifierReturnType.DISCRETE_GENERATOR ? "String" + : "double"); + body.append(" __value;\n"); + } + + body.append("\n"); + body.append(methodBody); + + if (array) { + post = new StringBuffer(); + post.append(" for (int __i = 0; __i < __result.featuresSize(); ++__i)\n" + + " __result.getFeature(__i)" + ".setArrayLength(__featureIndex);\n"); + } + } + + generateClassificationMethods(out, cc, null, body.toString(), primitive, true, + post == null ? null : post.toString()); + out.println(); + typeCheckClassifyArray(out, ccName, input, cc.line); + out.println(); + generateHashingMethods(out, ccName); + + out.println("}\n"); + out.close(); + } + + + /** + * Generates code for all nodes of the indicated type. + * + * @param cg The node to process. + **/ + public void run(CompositeGenerator cg) { + String cgName = cg.name.toString(); + String fileName = cgName + ".java"; + if (fileName.indexOf("$$") != -1) { + files.add(fileName); + runOnChildren(cg); + } else { + files.clear(); + + runOnChildren(cg); + + final String prefix = cgName + "$$"; + File[] leftOvers = + new File(System.getProperty("user.dir")).listFiles(new FilenameFilter() { + public boolean accept(File directory, String name) { + int i = name.lastIndexOf('.'); + if (i == -1) + return false; + String javaFile = name.substring(0, i) + ".java"; + return name.startsWith(prefix) && !files.contains(javaFile); + } + }); + + for (int i = 0; i < leftOvers.length; ++i) + if (leftOvers[i].exists() && !leftOvers[i].delete()) + reportError(0, "Could not delete '" + leftOvers[i].getName() + "'."); + } + + if (!RevisionAnalysis.revisionStatus.get(cgName).equals(RevisionAnalysis.REVISED)) + return; + + System.out.println("Generating code for " + cgName); + + PrintStream out = open(fileName); + if (out == null) + return; + + out.println(disclaimer); + out.print("// "); + compressAndPrint(cg.shallow(), out); + out.println("\n"); + + ast.symbolTable.generateHeader(out); + + out.println("\n"); + if (cg.comment != null) + out.println(cg.comment); + + out.println("public class " + cgName + " extends Classifier"); + out.println("{"); + + { + HashSet declared = new HashSet(); + for (ClassifierExpressionList.ClassifierExpressionListIterator I = + cg.components.listIterator(); I.hasNext();) { ClassifierExpression ce = I.nextItem(); String name = ce.name.toString(); - if (declared.add(name)) { + if (declared.add(name)) { String nameNoDots = name.replace('.', '$'); if (isField(ce)) { - String fieldClass = AST.globalSymbolTable.classForName(ce.name).getSimpleName(); - out.println(" private static final " + fieldClass + " __" + nameNoDots + " = " + fieldClass + "." + name + ";"); + String fieldClass = + AST.globalSymbolTable.classForName(ce.name).getSimpleName(); + out.println(" private static final " + fieldClass + " __" + nameNoDots + + " = " + fieldClass + "." + name + ";"); + } else { + out.println(" private static final " + name + " __" + nameNoDots + + " = new " + name + "();"); } - else { - out.println(" private static final " + name + " __" + nameNoDots + " = new " + name + "();"); + } + } + } + + if (cg.singleExampleCache) { + out.println("\n private static ThreadLocal __cache = new ThreadLocal(){ };"); + out.println(" private static ThreadLocal __exampleCache = " + "new ThreadLocal(){ };"); + out.println(" public static void clearCache() { __exampleCache = new " + + "ThreadLocal() { }; }"); + } + + out.println("\n public " + cgName + "()"); + out.println(" {"); + out.println(" containingPackage = \"" + AST.globalSymbolTable.getPackage() + "\";"); + out.println(" name = \"" + cgName + "\";"); + out.println(" }\n"); + + Type input = cg.argument.getType(); + typeReturningMethods(out, input, cg.returnType); + out.println(); + + StringBuffer body = new StringBuffer(); + body.append(" __result = new FeatureVector();\n"); + + for (ClassifierExpressionList.ClassifierExpressionListIterator I = + cg.components.listIterator(); I.hasNext();) { + ClassifierExpression component = I.nextItem(); + String nameNoDots = ("__" + component.name).replace('.', '$'); + if (component.returnType.type == ClassifierReturnType.DISCRETE + || component.returnType.type == ClassifierReturnType.REAL) { + body.append(" __result.addFeature("); + body.append(nameNoDots); + body.append(".featureValue(__example));\n"); + } else { + body.append(" __result.addFeatures("); + body.append(nameNoDots); + body.append(".classify(__example));\n"); + } + } + + generateClassificationMethods(out, cg, null, body.toString(), false, false, null); + out.println(); + typeCheckClassifyArray(out, cgName, input, cg.line); + out.println(); + generateHashingMethods(out, cgName); + + out.println("\n public java.util.LinkedList getCompositeChildren()"); + out.println(" {"); + out.println(" java.util.LinkedList result = new " + "java.util.LinkedList();"); + + for (ClassifierExpressionList.ClassifierExpressionListIterator I = + cg.components.listIterator(); I.hasNext();) { + String nameNoDots = ("__" + I.nextItem().name).replace('.', '$'); + out.println(" result.add(" + nameNoDots + ");"); + } + + out.println(" return result;"); + out.println(" }"); + + out.println("}\n"); + out.close(); + } + + + /** + * Generates code for all nodes of the indicated type. + * + * @param ii The node to process. + **/ + public void run(InferenceInvocation ii) { + String iiName = ii.name.toString(); + if (!RevisionAnalysis.revisionStatus.get(iiName).equals(RevisionAnalysis.REVISED)) + return; + + System.out.println("Generating code for " + iiName); + + PrintStream out = open(ii); + if (out == null) + return; + + out.println(disclaimer); + out.println("// " + ii.shallow() + "\n"); + + ast.symbolTable.generateHeader(out); + + String field = null; + boolean cachedInMap = false; + if (ii.cacheIn != null) { + field = ii.cacheIn.toString(); + cachedInMap = field.equals(ClassifierAssignment.mapCache); + if (cachedInMap) + out.println("import java.util.WeakHashMap;"); + } + + out.println("\n"); + if (ii.comment != null) + out.println(ii.comment); + + out.println("public class " + iiName + " extends Classifier"); + out.println("{"); + + if (cachedInMap) + out.println(" private static final WeakHashMap __valueCache " + "= new WeakHashMap();"); + + String iiClassifierName = ii.classifier.toString(); + out.println(" private static final " + iiClassifierName + " __" + iiClassifierName + + " = new " + iiClassifierName + "();\n"); + + if (ii.singleExampleCache) { + out.println(" private static ThreadLocal __cache = new ThreadLocal(){ };"); + out.println(" private static ThreadLocal __exampleCache = " + "new ThreadLocal(){ };"); + out.println(" public static void clearCache() { __exampleCache = new " + + "ThreadLocal(){ }; }\n"); + } + + out.println(" public " + iiName + "()"); + out.println(" {"); + out.println(" containingPackage = \"" + AST.globalSymbolTable.getPackage() + "\";"); + out.println(" name = \"" + iiName + "\";"); + out.println(" }\n"); + + ClassifierType iiType = (ClassifierType) ii.classifier.typeCache; + Type input = iiType.getInput(); + typeReturningMethods(out, input, iiType.getOutput()); + out.println(); + + InferenceType inferenceType = (InferenceType) ii.inference.typeCache; + + String iiInference = ii.inference.toString(); + String fqInferenceName = iiInference; + if (ast.symbolTable.containsKey(ii.inference) && ast.symbolTable.getPackage().length() != 0) + fqInferenceName = ast.symbolTable.getPackage() + "." + fqInferenceName; + + StringBuffer body = new StringBuffer(" "); + body.append(inferenceType.getHeadType().toString()); + body.append(" head = "); + body.append(iiInference); + body.append(".findHead(("); + body.append(input.toString()); + body.append(") __example);\n"); + + body.append(" "); + body.append(iiInference); + body.append(" inference = ("); + body.append(iiInference); + body.append(") InferenceManager.get(\""); + body.append(fqInferenceName); + body.append("\", head);\n\n"); + + body.append(" if (inference == null)\n" + " {\n" + " inference = new " + + ii.inference + "(head);\n" + " InferenceManager.put(inference);\n" + + " }\n\n" + + + " String result = null;\n\n" + + + " try { result = inference.valueOf(__"); + body.append(iiClassifierName); + body.append(", __example); }\n" + " catch (Exception e)\n" + " {\n" + + " System.err.println(\"LBJava ERROR: Fatal error while " + + "evaluating classifier "); + body.append(iiName); + body.append(": \" + e);\n" + " e.printStackTrace();\n" + " System.exit(1);\n" + + " }\n\n" + + + " return result;\n"); + + generateClassificationMethods(out, ii, null, body.toString(), true, false, null); + out.println(); + typeCheckClassifyArray(out, iiName, input, ii.line); + out.println(); + generateHashingMethods(out, iiName); + + out.println("}\n"); + out.close(); + } + + + /** + * Generates code for all nodes of the indicated type. + * + * @param lce The node to process. + **/ + public void run(LearningClassifierExpression lce) { + String lceName = lce.name.toString(); + String fileName = lceName + ".java"; + + if (fileName.indexOf("$$") != -1) { + files.add(fileName); + runOnChildren(lce); + } else { + files.clear(); + + runOnChildren(lce); + + final String prefix = lceName + "$$"; + File[] leftOvers = + new File(System.getProperty("user.dir")).listFiles(new FilenameFilter() { + public boolean accept(File directory, String name) { + int i = name.lastIndexOf('.'); + if (i == -1) + return false; + String javaFile = name.substring(0, i) + ".java"; + return name.startsWith(prefix) && !files.contains(javaFile); + } + }); + + for (int i = 0; i < leftOvers.length; ++i) + if (leftOvers[i].exists() && !leftOvers[i].delete()) + reportError(0, "Could not delete '" + leftOvers[i].getName() + "'."); + } + + if ((lce.parser == null ? !RevisionAnalysis.revisionStatus.get(lceName).equals( + RevisionAnalysis.REVISED) : RevisionAnalysis.revisionStatus.get(lceName).equals( + RevisionAnalysis.UNAFFECTED) + || lce.startingRound > 1) + || lce.onlyCodeGeneration) + // In the last condition above involving lce.startingRound, note that + // before setting lce.startingRound > 1, RevisionAnalysis also ensures + // that the lce is unaffected other than the number of rounds and that + // there will be no parameter tuning or cross validation. + return; + + System.out.println("Generating code for " + lceName); + + PrintStream out = open(fileName); + if (out == null) + return; + + out.println(disclaimer); + out.println("// \n"); + + ast.symbolTable.generateHeader(out); + + if (lce.cacheIn != null) { + String field = lce.cacheIn.toString(); + boolean cachedInMap = field.equals(ClassifierAssignment.mapCache); + if (cachedInMap) + out.println("import java.util.WeakHashMap;"); + } + + out.println("\n"); + if (lce.comment != null) + out.println(lce.comment); + + out.println("public class " + lceName + " extends " + lce.learnerName); + out.println("{"); + out.println(" private static void loadInstance()"); + out.println(" {"); + out.println(" if (instance == null) instance = new " + lceName + "(true);"); + out.println(" }\n"); + + String formalParameterString = ""; + String firstArgumentsString = ""; + String argumentString = ""; + + if (lce.parameterSets.size() > 0) { + for (ListIterator I = lce.parameterSets.listIterator(); I.hasNext();) { + ParameterSet e = I.next(); + + StringBuffer typeStringB = new StringBuffer(); + e.type.write(typeStringB); + + formalParameterString += ", " + e.type + " " + e.getParameterName(); + firstArgumentsString += ", " + e.getFirst(); + argumentString += ", " + e.getParameterName(); + } + + formalParameterString = formalParameterString.substring(2); + firstArgumentsString = firstArgumentsString.substring(2); + argumentString = argumentString.substring(2); + + out.print(" private " + lceName + "(boolean b) { this("); + if (lce.learnerParameterBlock == null) + out.print(firstArgumentsString); + else + out.print("new Parameters()"); + out.println("); }"); + + out.print(" private " + lceName + "("); + if (lce.learnerParameterBlock == null) + out.print(formalParameterString); + else + out.print("Parameters parameters"); + out.println(")"); + out.println(" {"); + out.print(" super("); + + if (lce.learnerParameterBlock == null) { + if (lce.learnerConstructor.arguments.size() > 0) { + out.print(argumentString); + if (lce.attributeString.length() != 0) { + out.print(", "); } - } - } - } - - if (cg.singleExampleCache) { - out.println( - "\n private static ThreadLocal __cache = new ThreadLocal(){ };"); - out.println(" private static ThreadLocal __exampleCache = " - + "new ThreadLocal(){ };"); - out.println(" public static void clearCache() { __exampleCache = new " - + "ThreadLocal() { }; }"); - } - - out.println("\n public " + cgName + "()"); - out.println(" {"); - out.println(" containingPackage = \"" - + AST.globalSymbolTable.getPackage() + "\";"); - out.println(" name = \"" + cgName + "\";"); - out.println(" }\n"); - - Type input = cg.argument.getType(); - typeReturningMethods(out, input, cg.returnType); - out.println(); - - StringBuffer body = new StringBuffer(); - body.append(" __result = new FeatureVector();\n"); - - for (ClassifierExpressionList.ClassifierExpressionListIterator I = - cg.components.listIterator(); - I.hasNext(); ) { - ClassifierExpression component = I.nextItem(); - String nameNoDots = ("__" + component.name).replace('.', '$'); - if (component.returnType.type == ClassifierReturnType.DISCRETE - || component.returnType.type == ClassifierReturnType.REAL) { - body.append(" __result.addFeature("); - body.append(nameNoDots); - body.append(".featureValue(__example));\n"); - } - else { - body.append(" __result.addFeatures("); - body.append(nameNoDots); - body.append(".classify(__example));\n"); - } - } - - generateClassificationMethods(out, cg, null, body.toString(), false, - false, null); - out.println(); - typeCheckClassifyArray(out, cgName, input, cg.line); - out.println(); - generateHashingMethods(out, cgName); - - out.println("\n public java.util.LinkedList getCompositeChildren()"); - out.println(" {"); - out.println(" java.util.LinkedList result = new " - + "java.util.LinkedList();"); - - for (ClassifierExpressionList.ClassifierExpressionListIterator I = - cg.components.listIterator(); - I.hasNext(); ) { - String nameNoDots = ("__" + I.nextItem().name).replace('.', '$'); - out.println(" result.add(" + nameNoDots + ");"); - } - - out.println(" return result;"); - out.println(" }"); - - out.println("}\n"); - out.close(); - } - - - /** - * Generates code for all nodes of the indicated type. - * - * @param ii The node to process. - **/ - public void run(InferenceInvocation ii) { - String iiName = ii.name.toString(); - if (!RevisionAnalysis.revisionStatus.get(iiName) - .equals(RevisionAnalysis.REVISED)) - return; - - System.out.println("Generating code for " + iiName); - - PrintStream out = open(ii); - if (out == null) return; - - out.println(disclaimer); - out.println("// " + ii.shallow() + "\n"); - - ast.symbolTable.generateHeader(out); - - String field = null; - boolean cachedInMap = false; - if (ii.cacheIn != null) { - field = ii.cacheIn.toString(); - cachedInMap = field.equals(ClassifierAssignment.mapCache); - if (cachedInMap) out.println("import java.util.WeakHashMap;"); - } - - out.println("\n"); - if (ii.comment != null) out.println(ii.comment); - - out.println("public class " + iiName + " extends Classifier"); - out.println("{"); - - if (cachedInMap) - out.println(" private static final WeakHashMap __valueCache " - + "= new WeakHashMap();"); - - String iiClassifierName = ii.classifier.toString(); - out.println(" private static final " + iiClassifierName + " __" - + iiClassifierName + " = new " + iiClassifierName + "();\n"); - - if (ii.singleExampleCache) { - out.println( - " private static ThreadLocal __cache = new ThreadLocal(){ };"); - out.println(" private static ThreadLocal __exampleCache = " - + "new ThreadLocal(){ };"); - out.println(" public static void clearCache() { __exampleCache = new " - + "ThreadLocal(){ }; }\n"); - } - - out.println(" public " + iiName + "()"); - out.println(" {"); - out.println(" containingPackage = \"" - + AST.globalSymbolTable.getPackage() + "\";"); - out.println(" name = \"" + iiName + "\";"); - out.println(" }\n"); - - ClassifierType iiType = (ClassifierType) ii.classifier.typeCache; - Type input = iiType.getInput(); - typeReturningMethods(out, input, iiType.getOutput()); - out.println(); - - InferenceType inferenceType = (InferenceType) ii.inference.typeCache; - - String iiInference = ii.inference.toString(); - String fqInferenceName = iiInference; - if (ast.symbolTable.containsKey(ii.inference) - && ast.symbolTable.getPackage().length() != 0) - fqInferenceName = ast.symbolTable.getPackage() + "." + fqInferenceName; - - StringBuffer body = new StringBuffer(" "); - body.append(inferenceType.getHeadType().toString()); - body.append(" head = "); - body.append(iiInference); - body.append(".findHead(("); - body.append(input.toString()); - body.append(") __example);\n"); - - body.append(" "); - body.append(iiInference); - body.append(" inference = ("); - body.append(iiInference); - body.append(") InferenceManager.get(\""); - body.append(fqInferenceName); - body.append("\", head);\n\n"); - - body.append( - " if (inference == null)\n" - + " {\n" - + " inference = new " + ii.inference + "(head);\n" - + " InferenceManager.put(inference);\n" - + " }\n\n" - - + " String result = null;\n\n" - - + " try { result = inference.valueOf(__"); - body.append(iiClassifierName); - body.append(", __example); }\n" - + " catch (Exception e)\n" - + " {\n" - + " System.err.println(\"LBJava ERROR: Fatal error while " - + "evaluating classifier "); - body.append(iiName); - body.append(": \" + e);\n" - + " e.printStackTrace();\n" - + " System.exit(1);\n" - + " }\n\n" - - + " return result;\n"); - - generateClassificationMethods(out, ii, null, body.toString(), true, false, - null); - out.println(); - typeCheckClassifyArray(out, iiName, input, ii.line); - out.println(); - generateHashingMethods(out, iiName); - - out.println("}\n"); - out.close(); - } - - - /** - * Generates code for all nodes of the indicated type. - * - * @param lce The node to process. - **/ - public void run(LearningClassifierExpression lce) { - String lceName = lce.name.toString(); - String fileName = lceName + ".java"; - - if (fileName.indexOf("$$") != -1) { - files.add(fileName); - runOnChildren(lce); - } - else { - files.clear(); - - runOnChildren(lce); - - final String prefix = lceName + "$$"; - File[] leftOvers = - new File(System.getProperty("user.dir")).listFiles( - new FilenameFilter() { - public boolean accept(File directory, String name) { - int i = name.lastIndexOf('.'); - if (i == -1) return false; - String javaFile = name.substring(0, i) + ".java"; - return name.startsWith(prefix) && !files.contains(javaFile); - } - }); - - for (int i = 0; i < leftOvers.length; ++i) - if (leftOvers[i].exists() && !leftOvers[i].delete()) - reportError(0, "Could not delete '" + leftOvers[i].getName() - + "'."); - } - - if ((lce.parser == null - ? !RevisionAnalysis.revisionStatus.get(lceName) - .equals(RevisionAnalysis.REVISED) - : RevisionAnalysis.revisionStatus.get(lceName) - .equals(RevisionAnalysis.UNAFFECTED) - || lce.startingRound > 1) - || lce.onlyCodeGeneration) - // In the last condition above involving lce.startingRound, note that - // before setting lce.startingRound > 1, RevisionAnalysis also ensures - // that the lce is unaffected other than the number of rounds and that - // there will be no parameter tuning or cross validation. - return; - - System.out.println("Generating code for " + lceName); - - PrintStream out = open(fileName); - if (out == null) return; - - out.println(disclaimer); - out.println("// \n"); - - ast.symbolTable.generateHeader(out); - - if (lce.cacheIn != null) { - String field = lce.cacheIn.toString(); - boolean cachedInMap = field.equals(ClassifierAssignment.mapCache); - if (cachedInMap) out.println("import java.util.WeakHashMap;"); - } - - out.println("\n"); - if (lce.comment != null) out.println(lce.comment); - - out.println("public class " + lceName + " extends " + lce.learnerName); - out.println("{"); - out.println(" private static void loadInstance()"); - out.println(" {"); - out.println(" if (instance == null) instance = new " + lceName - + "(true);"); - out.println(" }\n"); - - String formalParameterString = ""; - String firstArgumentsString = ""; - String argumentString = ""; - - if (lce.parameterSets.size() > 0) { - for (ListIterator I = lce.parameterSets.listIterator(); I.hasNext(); ) { - ParameterSet e = I.next(); - - StringBuffer typeStringB = new StringBuffer(); - e.type.write(typeStringB); - - formalParameterString += ", " + e.type + " " + e.getParameterName(); - firstArgumentsString += ", " + e.getFirst(); - argumentString += ", " + e.getParameterName(); - } - - formalParameterString = formalParameterString.substring(2); - firstArgumentsString = firstArgumentsString.substring(2); - argumentString = argumentString.substring(2); - - out.print(" private " + lceName + "(boolean b) { this("); - if (lce.learnerParameterBlock == null) out.print(firstArgumentsString); - else out.print("new Parameters()"); - out.println("); }"); - - out.print(" private " + lceName + "("); - if (lce.learnerParameterBlock == null) out.print(formalParameterString); - else out.print("Parameters parameters"); - out.println(")"); - out.println(" {"); - out.print(" super("); - - if (lce.learnerParameterBlock == null) { - if (lce.learnerConstructor.arguments.size() > 0) { - out.print(argumentString); - if (lce.attributeString.length() != 0) { - out.print(", "); - } - } - - if (lce.attributeString.length() != 0) out.print("attributeString"); - } - else out.print("parameters"); - - out.println(");"); - out.println(" containingPackage = \"" - + AST.globalSymbolTable.getPackage() + "\";"); - out.println(" name = \"" + lceName + "\";"); - out.println(" setEncoding(" + lce.featureEncoding + ");"); - if (lce.labeler != null) - out.println(" setLabeler(new " + lce.labeler.name + "());"); - out.println(" setExtractor(new " + lce.extractor.name + "());"); - out.println(" isClone = false;"); - out.println(" }\n"); - } - - out.println(" public static Parser getParser() { return " + lce.parser - + "; }"); - out.println(" public static Parser getTestParser() { return " - + lce.testParser + "; }\n"); - - generateLearnerBody(out, lce); - - if (lce.parameterSets.size() > 0) { - out.println(); - out.println(" public static class Parameters extends " - + lce.learnerName + ".Parameters"); - out.println(" {"); - out.println(" public Parameters() { this(" + firstArgumentsString - + "); }"); - out.println(" public Parameters(" + formalParameterString + ")"); - - if (lce.learnerParameterBlock != null) { - TranslateToJava translator = new TranslateToJava(null); - translator.setRoot(lce.learnerParameterBlock); - translator.setCurrentCG(currentCG); - translator.setIndent(3); - translator.run(); - out.println(translator.getMethodBody()); - } - else { - out.println(" {"); - out.println(" super((" + lce.learnerName + ".Parameters) new " - + lceName + "(" + argumentString + ").getParameters());"); - out.println(" }"); - } - out.println(" }"); - } - - out.println("}\n"); - out.close(); - } - - - /** - * Generates code for all nodes of the indicated type. - * - * @param c The node to process. - **/ - public void run(Conjunction c) { - String cName = c.name.toString(); - String fileName = cName + ".java"; - if (fileName.indexOf("$$") != -1) { - files.add(fileName); - runOnChildren(c); - } - else { - files.clear(); - - runOnChildren(c); - - final String prefix = cName + "$$"; - File[] leftOvers = - new File(System.getProperty("user.dir")).listFiles( - new FilenameFilter() { - public boolean accept(File directory, String name) { - int i = name.lastIndexOf('.'); - if (i == -1) return false; - String javaFile = name.substring(0, i) + ".java"; - return name.startsWith(prefix) && !files.contains(javaFile); - } - }); - - for (int i = 0; i < leftOvers.length; ++i) - if (leftOvers[i].exists() && !leftOvers[i].delete()) - reportError(0, "Could not delete '" + leftOvers[i].getName() - + "'."); - } - - if (!RevisionAnalysis.revisionStatus.get(cName) - .equals(RevisionAnalysis.REVISED)) - return; - - System.out.println("Generating code for " + cName); - - PrintStream out = open(fileName); - if (out == null) return; - - out.println(disclaimer); - out.println("// " + c.shallow() + "\n"); - - ast.symbolTable.generateHeader(out); - - out.println("\n"); - if (c.comment != null) out.println(c.comment); - - out.println("public class " + cName + " extends Classifier"); - out.println("{"); - - String leftName = c.left.name.toString(); - String rightName = c.right.name.toString(); - out.println(" private static final " + leftName + " left = new " - + leftName + "();"); - if (!leftName.equals(rightName)) - out.println(" private static final " + rightName + " right = new " - + c.right.name + "();\n"); - - if (c.singleExampleCache) { - out.println( - " private static ThreadLocal __cache = new ThreadLocal(){ };"); - out.println(" private static ThreadLocal __exampleCache = " - + "new ThreadLocal(){ };"); - out.println(" public static void clearCache() { __exampleCache = new " - + "ThreadLocal() { }; }\n"); - } - - out.println(" public " + cName + "()"); - out.println(" {"); - out.println(" containingPackage = \"" - + AST.globalSymbolTable.getPackage() + "\";"); - out.println(" name = \"" + cName + "\";"); - out.println(" }\n"); - - Type input = c.argument.getType(); - typeReturningMethods(out, input, c.returnType); - out.println(); - - boolean primitive = - c.returnType.type == ClassifierReturnType.DISCRETE - || c.returnType.type == ClassifierReturnType.REAL; - boolean mixed = c.returnType.type == ClassifierReturnType.MIXED_GENERATOR; - int leftType = c.left.returnType.type; - int rightType = c.right.returnType.type; - boolean sameType = leftType == rightType; - boolean leftIsGenerator = - leftType == ClassifierReturnType.DISCRETE_GENERATOR - || rightType == ClassifierReturnType.REAL_GENERATOR; - boolean leftIsPrimitive = - leftType == ClassifierReturnType.DISCRETE - || leftType == ClassifierReturnType.REAL; - boolean rightIsPrimitive = - rightType == ClassifierReturnType.DISCRETE - || rightType == ClassifierReturnType.REAL; - boolean bothMulti = !leftIsPrimitive && !rightIsPrimitive; - - StringBuffer body = new StringBuffer(); - - if (primitive) - body.append(" __result = left.featureValue(__example)" - + ".conjunction(right.featureValue(__example), this);\n"); - else { - body.append(" __result = new FeatureVector();\n"); - if (leftIsPrimitive) - body.append(" Feature lf = left.featureValue(__example);\n"); - else - body.append( - " FeatureVector leftVector = left.classify(__example);\n" - + " int N = leftVector.featuresSize();\n"); - - if (c.left.equals(c.right)) { - // SemanticAnalysis ensures that neither classifier is primitive here. - body.append( " for (int j = 1; j < N; ++j)\n" - + " {\n"); - body.append( " Feature rf = leftVector.getFeature(j);\n" - + " for (int i = 0; i < j; ++i)\n" - + " __result.addFeature(leftVector.getFeature(i)" - + ".conjunction(rf, this));\n" - + " }\n"); - } - else { - if (rightIsPrimitive) - body.append(" Feature rf = right.featureValue(__example);\n"); - else - body.append( - " FeatureVector rightVector = right.classify(__example);\n" - + " int M = rightVector.featuresSize();\n"); - - String in = ""; - if (!leftIsPrimitive) { - body.append( " for (int i = 0; i < N; ++i)\n" - + " {\n" - + " Feature lf = leftVector.getFeature(i);\n"); - in += " "; - } - - if (!rightIsPrimitive) { - body.append(in); - body.append(" for (int j = 0; j < M; ++j)\n"); - body.append(in); - body.append(" {\n"); - body.append(in); - body.append(" Feature rf = rightVector.getFeature(j);\n"); - in += " "; - } - - if (mixed || leftIsGenerator && sameType) { - body.append(in); - body.append(" if (lf.equals(rf)) continue;\n"); - } - - body.append(in); - body.append(" __result.addFeature(lf.conjunction(rf, this));\n"); - - if (!rightIsPrimitive) { - in = in.substring(2); - body.append(in); - body.append(" }\n"); - } - - if (!leftIsPrimitive) body.append(" }\n"); - body.append("\n"); - } - - if (bothMulti) body.append(" __result.sort();\n"); - } - - generateClassificationMethods(out, c, null, body.toString(), false, false, - null); - out.println(); - typeCheckClassifyArray(out, cName, input, c.line); - out.println(); - generateHashingMethods(out, cName); - - out.println("}\n"); - out.close(); - } - - - /** - * Generates code for all nodes of the indicated type. - * - * @param cd The node to process. - **/ - public void run(ConstraintDeclaration cd) { - String cdName = cd.name.toString(); - String fileName = cdName + ".java"; - - if (!RevisionAnalysis.revisionStatus.get(cdName) - .equals(RevisionAnalysis.REVISED)) - return; - - System.out.println("Generating code for " + cdName); - - PrintStream out = open(fileName); - if (out == null) return; - - out.println(disclaimer); - out.print("// "); - compressAndPrint(cd.shallow(), out); - out.println("\n"); - - ast.symbolTable.generateHeader(out); - - out.println("\n"); - if (cd.comment != null) out.println(cd.comment); - - out.println("public class " + cdName - + " extends ParameterizedConstraint"); - out.println("{"); - - HashSet invoked = SemanticAnalysis.invokedGraph.get(cdName); - if (invoked != null && invoked.size() > 0) { - for (Iterator I = invoked.iterator(); I.hasNext(); ) { - String name = I.next(); - String nameNoDots = name.replace('.', '$'); - out.println(" private static final " + name + " __" + nameNoDots - + " = new " + name + "();"); - } - - out.println(); - } - - String fqName = ast.symbolTable.getPackage(); - if (fqName.length() > 0) fqName += "."; - fqName += cdName; - out.println(" public " + cdName + "() { super(\"" + fqName + "\"); }\n"); - - Type input = cd.argument.getType(); - out.println(" public String getInputType() { return \"" - + input.typeClass().getName() + "\"; }\n"); - - indent = 2; - forInit = false; - constraintMode = false; - methodBody.delete(0, methodBody.length()); - currentCG = cd; - for (ASTNodeIterator I = cd.body.iterator(); I.hasNext(); ) { - I.next().runPass(this); - methodBody.append("\n"); - } - - out.println(" public String discreteValue(Object __example)"); - out.println(" {"); - out.print( - generateTypeChecking(" ", cdName, "Constraint", false, - input.toString(), cd.line, "__example", false)); - - out.println(" " + cd.argument + " = (" + input + ") __example;\n"); - - out.println(methodBody); - - out.println(" return \"true\";"); - out.println(" }"); - - out.println(); - typeCheckClassifyArray(out, cdName, input, cd.line); - out.println(); - generateHashingMethods(out, cdName); - - indent = 2; - forInit = false; - constraintMode = true; - methodBody.delete(0, methodBody.length()); - for (ASTNodeIterator I = cd.body.iterator(); I.hasNext(); ) { - I.next().runPass(this); - methodBody.append("\n"); - } - - out.println("\n public FirstOrderConstraint makeConstraint(Object " - + "__example)"); - out.println(" {"); - out.print( - generateTypeChecking(" ", cdName, "Constraint", false, - input.toString(), cd.line, "__example", false)); - - out.println(" " + cd.argument + " = (" + input + ") __example;"); - out.println(" FirstOrderConstraint __result = new " - + "FirstOrderConstant(true);\n"); - - out.println(methodBody); - - out.println(" return __result;"); - out.println(" }"); - - out.println("}\n"); - out.close(); - } - - - /** - * Generates code for all nodes of the indicated type. - * - * @param in The node to process. - **/ - public void run(InferenceDeclaration in) { - in.constraint.runPass(this); - - String inName = in.name.toString(); - String fileName = inName + ".java"; - - if (!RevisionAnalysis.revisionStatus.get(inName) - .equals(RevisionAnalysis.REVISED)) - return; - - System.out.println("Generating code for " + inName); - - PrintStream out = open(fileName); - if (out == null) return; - - out.println(disclaimer); - out.print("// "); - compressAndPrint(in.shallow(), out); - out.println("\n"); - - ast.symbolTable.generateHeader(out); - out.println("import java.util.*;\n\n"); - - currentCG = in; - String defaultNormalizer = "new IdentityNormalizer()"; - - if (in.comment != null) out.println(in.comment); - - out.println("public class " + inName + " extends " + in.algorithm.name); - out.println("{"); - - if (in.containsTypeSpecificNormalizer()) { - out.println(" private static final HashMap normalizers = new " - + "HashMap();"); - out.println(" static"); - out.println(" {"); - for (int i = 0; i < in.normalizerDeclarations.length; ++i) { - if (in.normalizerDeclarations[i].learner != null) - out.println(" normalizers.put(new " - + in.normalizerDeclarations[i].learner + "(), " - + in.normalizerDeclarations[i].normalizer + ");"); - else - defaultNormalizer = - in.normalizerDeclarations[i].normalizer.toString(); - } - - out.println(" }\n"); - } - else - for (int i = 0; i < in.normalizerDeclarations.length; ++i) - defaultNormalizer = - in.normalizerDeclarations[i].normalizer.toString(); - - indent = 1; - forInit = false; - constraintMode = false; - methodBody.delete(0, methodBody.length()); - for (int i = 0; i < in.headFinders.length; ++i) - in.headFinders[i].runPass(this); - out.println(methodBody); - - out.println(" public " + inName + "() { }"); - out.println(" public " + inName + "(" + in.head.getType() + " head)"); - out.println(" {"); - out.print(" super(head"); - if (in.algorithm.arguments.size() > 0) - out.print(", " + in.algorithm.arguments); - out.println(");"); - out.println(" constraint = new " + in.constraint.name - + "().makeConstraint(head);"); - out.println(" }\n"); - - out.println(" public String getHeadType() { return \"" - + in.head.getType().typeClass().getName() + "\"; }"); - out.println(" public String[] getHeadFinderTypes()"); - out.println(" {"); - out.print(" return new String[]{ \"" - + in.headFinders[0].argument.getType().typeClass().getName() - + "\""); - for (int i = 1; i < in.headFinders.length; ++i) - out.print(", \"" - + in.headFinders[i].argument.getType().typeClass().getName() - + "\""); - out.println(" };"); - out.println(" }\n"); - - out.println(" public Normalizer getNormalizer(Learner c)"); - out.println(" {"); - - if (in.containsTypeSpecificNormalizer()) { - out.println(" Normalizer result = (Normalizer) normalizers.get(c);"); - out.println(" if (result == null)"); - out.println(" result = " + defaultNormalizer + ";"); - out.println(" return result;"); - } - else out.println(" return " + defaultNormalizer + ";"); - - out.println(" }"); - - out.println("}\n"); - out.close(); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param h The node to process. - **/ - public void run(InferenceDeclaration.HeadFinder h) { - appendIndent("public static "); - methodBody.append(((InferenceDeclaration) currentCG).head.getType()); - methodBody.append(" findHead(" + h.argument + ")\n"); - ++indent; - h.body.runPass(this); - --indent; - methodBody.append("\n\n"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param b The node to process. - **/ - public void run(Block b) { - --indent; - appendLine("{"); - - ++indent; - runOnChildren(b); - methodBody.append("\n"); - --indent; - - appendIndent("}"); - ++indent; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param l The node to process. - **/ - public void run(StatementList l) { - ASTNodeIterator I = l.iterator(); - if (!I.hasNext()) return; - - if (I.hasNext()) I.next().runPass(this); - while (I.hasNext()) { - methodBody.append("\n"); - I.next().runPass(this); - } - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param s The node to process. - **/ - public void run(AssertStatement s) { - appendIndent("assert "); - s.condition.runPass(this); - - if (s.message != null) { - methodBody.append(" : "); - s.message.runPass(this); - } - - methodBody.append(";"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param s The node to process. - **/ - public void run(BreakStatement s) { - appendIndent("break"); - - if (s.label != null) { - methodBody.append(" "); - methodBody.append(s.label); - } - - methodBody.append(";"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param s The node to process. - **/ - public void run(ContinueStatement s) { - appendIndent("continue"); - - if (s.label != null) { - methodBody.append(" "); - methodBody.append(s.label); - } - - methodBody.append(";"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param s The node to process. - **/ - public void run(ExpressionStatement s) { - if (s.expression instanceof ConstraintStatementExpression) - s.expression.runPass(this); - else { - appendIndent(); - s.expression.runPass(this); - methodBody.append(";"); - } - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param s The node to process. - **/ - public void run(ForStatement s) { - appendIndent("for ("); - - if (s.initializers != null) { - s.initializers.runPass(this); - methodBody.append("; "); - } - else if (s.initializer != null) { - forInit = true; - s.initializer.runPass(this); - methodBody.append(" "); - forInit = false; - } - else methodBody.append("; "); - - if (s.condition != null) s.condition.runPass(this); - methodBody.append("; "); - if (s.updaters != null) s.updaters.runPass(this); - methodBody.append(")\n"); - ++indent; - s.body.runPass(this); - --indent; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param s The node to process. - **/ - public void run(IfStatement s) { - appendIndent("if ("); - s.condition.runPass(this); - methodBody.append(")\n"); - ++indent; - s.thenClause.runPass(this); - --indent; - - if (s.elseClause != null) { - methodBody.append("\n"); - appendLine("else"); - ++indent; - s.elseClause.runPass(this); - --indent; - } - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param s The node to process. - **/ - public void run(LabeledStatement s) { - appendIndent(s.label + ": "); - s.statement.runPass(this); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param s The node to process. - **/ - public void run(ReturnStatement s) { - appendIndent(); - - if (currentCG instanceof CodedClassifier - && ((CodedClassifier) currentCG).returnType.type - == ClassifierReturnType.DISCRETE) { - String literal = toStringLiteral(s.expression); - methodBody.append("return "); - if (literal != null) methodBody.append(literal); - else { - methodBody.append("\"\" + ("); - s.expression.runPass(this); - methodBody.append(')'); - } - } - else { - methodBody.append("return "); - s.expression.runPass(this); - } - - methodBody.append(";"); - } - - - /** - * If the given expression can be converted to a string at compile time, - * this method returns that string. - * - * @param e The given expression. - * @return The compile time conversion of the expression to a string, or - * null if it wasn't possible to convert. - **/ - private static String toStringLiteral(Expression e) { - if (e instanceof Constant) { - Constant c = (Constant) e; - if (c.typeCache instanceof PrimitiveType) return "\"" + c.value + "\""; - return c.value; - } - - return null; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param ss The node to process. - **/ - public void run(SenseStatement ss) { - CodedClassifier currentCC = (CodedClassifier) currentCG; - - if (ss.value instanceof MethodInvocation) { - MethodInvocation m = (MethodInvocation) ss.value; - if (m.isClassifierInvocation) { - ClassifierReturnType invokedType = - ((ClassifierType) m.name.typeCache).getOutput(); - int t = invokedType.type; - - if ((currentCC.returnType.type - == ClassifierReturnType.DISCRETE_GENERATOR - || currentCC.returnType.type - == ClassifierReturnType.REAL_GENERATOR) - && (t == ClassifierReturnType.DISCRETE_GENERATOR - || t == ClassifierReturnType.REAL_GENERATOR - || t == ClassifierReturnType.DISCRETE_ARRAY - || t == ClassifierReturnType.REAL_ARRAY)) { - appendIndent("__id = "); - String s = toStringLiteral(ss.name); - if (s != null) methodBody.append(s); - else { - methodBody.append("\"\" + ("); - ss.name.runPass(this); - methodBody.append(")"); - } - methodBody.append(";\n"); - - appendLine("{"); - ++indent; - - appendIndent("FeatureVector __temp = "); - ss.value.runPass(this); - methodBody.append(";\n"); - - appendLine("for (int __i = 0; __i < __temp.featuresSize(); ++__i)"); - ++indent; - - boolean isDiscrete = t == ClassifierReturnType.DISCRETE_GENERATOR - || t == ClassifierReturnType.DISCRETE_ARRAY; - - appendIndent("__result.addFeature(new "); - methodBody.append(isDiscrete ? "Discrete" : "Real"); - methodBody.append("ReferringStringFeature"); - methodBody.append("(this, __id, ("); - methodBody.append(isDiscrete ? "Discrete" : "Real"); - methodBody.append("Feature) __temp.getFeature(__i)"); - if (currentCC.returnType.values.size() > 0 - && !currentCC.returnType.values.equals(invokedType.values)) { - methodBody.append(", "); - methodBody.append(m.name.toString()); - methodBody.append(".getAllowableValues()"); - } - methodBody.append("));\n"); - - indent -= 2; - appendIndent("}"); - return; - } - else if ((currentCC.returnType.type - == ClassifierReturnType.DISCRETE_ARRAY - || currentCC.returnType.type - == ClassifierReturnType.REAL_ARRAY) - && (t == ClassifierReturnType.DISCRETE_ARRAY - || t == ClassifierReturnType.REAL_ARRAY)) { - appendLine("{"); - ++indent; - boolean isDiscrete = t == ClassifierReturnType.DISCRETE_ARRAY; - - appendIndent("FeatureVector __temp = "); - ss.value.runPass(this); - methodBody.append(";\n"); - - appendLine("for (int __i = 0; __i < __temp.featuresSize(); ++__i)"); - appendLine("{"); - ++indent; - appendLine("Feature __f = __temp.getFeature(__i);"); - appendIndent("__value = __f."); - methodBody.append(isDiscrete ? "getStringValue" : "getStrength"); - methodBody.append("();\n"); - - appendIndent("__result.addFeature("); - methodBody.append( - primitiveFeatureConstructorInvocation( - isDiscrete, true, "this", "\"\"", "__value", null, - "" + currentCC.returnType.values.size(), - "__featureIndex++, 0")); - methodBody.append(");\n"); - - --indent; - appendLine("}"); - - --indent; - appendIndent("}"); - return; - } - } - } - - boolean discrete = - currentCC.returnType.type == ClassifierReturnType.DISCRETE_GENERATOR - || currentCC.returnType.type == ClassifierReturnType.DISCRETE_ARRAY; - - if (ss.senseall) { - if (ss.name != null) { // if we're inside a generator - appendIndent("Object __values = "); - ss.name.runPass(this); - methodBody.append(";\n\n"); - - appendLine("if (__values instanceof java.util.Collection)"); - appendLine("{"); - ++indent; - - appendLine( - "for (java.util.Iterator __I = ((java.util.Collection) " - + "__values).iterator(); __I.hasNext(); )"); - appendLine("{"); - ++indent; - appendLine("__id = __I.next().toString();"); - senseFeature(ss, currentCC, discrete, discrete ? "\"true\"" : "1"); - methodBody.append("\n"); - - --indent; - appendLine("}"); - --indent; - appendLine("}"); - - appendLine("else"); - appendLine("{"); - ++indent; - appendLine( - "for (java.util.Iterator __I = ((java.util.Map) " - + "__values).entrySet().iterator(); __I.hasNext(); )"); - appendLine("{"); - ++indent; - appendLine( - "java.util.Map.Entry __e = (java.util.Map.Entry) __I.next();"); - appendLine("__id = __e.getKey().toString();"); - appendIndent("__value = "); - methodBody.append( - discrete ? "__e.getValue().toString()" - : "((Double) __e.getValue()).doubleValue()"); - methodBody.append(";\n"); - - senseFeature(ss, currentCC, discrete, null); - methodBody.append("\n"); - --indent; - appendLine("}"); - --indent; - appendLine("}"); - } - else { - appendIndent("java.util.Collection __values = "); - ss.value.runPass(this); - methodBody.append(";\n\n"); - - appendLine( - "for (java.util.Iterator __I = ((java.util.Collection) " - + "__values).iterator(); __I.hasNext(); )"); - appendLine("{"); - ++indent; - - appendIndent("__value = "); - methodBody.append( - discrete ? "__I.next().toString()" - : "((Double) __I.next()).doubleValue()"); - methodBody.append(";\n"); - - senseFeature(ss, currentCC, discrete, null); - methodBody.append("\n"); - --indent; - appendIndent("}"); - } - } - else { - if (ss.name != null) { // if we're inside a generator - appendIndent("__id = "); - String s = toStringLiteral(ss.name); - if (s != null) methodBody.append(s); - else { - methodBody.append("\"\" + ("); - ss.name.runPass(this); - methodBody.append(")"); - } - methodBody.append(";\n"); - } - - appendIndent("__value = "); - if (discrete) { - String s = toStringLiteral(ss.value); - if (s != null) methodBody.append(s); - else { - methodBody.append("\"\" + ("); - ss.value.runPass(this); - methodBody.append(")"); - } - } - else ss.value.runPass(this); - methodBody.append(";\n"); - - senseFeature(ss, currentCC, discrete, null); - } - } - - - /** - * Generates the statement that adds a new feature of the appropriate type - * to the returned FeatureVector when a sense - * statement is executed. The code generated by this method assumes the - * following: - *

    - *
  • - * if the containing classifier is a generator, code has already been - * generated to set the value of a string named __id - * representing the identifier of the new feature and - *
  • - * if value is null, code has already been - * generated to set the value of a string or double as appropriate - * named __value representing the value of the new - * feature. - *
- * - * @param s The sense statement. - * @param cc The current coded classifier. - * @param discrete Whether or not cc is discrete. - * @param value Generated code evaluating to the new feature's value. - * If this parameter is null, it will default - * to "__value". - **/ - private void senseFeature(SenseStatement s, CodedClassifier cc, - boolean discrete, String value) { - appendIndent("__result.addFeature("); - boolean array = s.name == null; - String id = array ? "\"\"" : "__id"; - if (value == null) value = "__value"; - methodBody.append( - primitiveFeatureConstructorInvocation( - discrete, array, "this", id, value, null, - "" + cc.returnType.values.size(), "__featureIndex++, 0")); - methodBody.append(");"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param s The node to process. - **/ - public void run(SwitchStatement s) { - appendIndent("switch ("); - s.expression.runPass(this); - methodBody.append(")\n"); - s.block.runPass(this); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param s The node to process. - **/ - public void run(SynchronizedStatement s) { - appendIndent("synchronized ("); - s.data.runPass(this); - methodBody.append(")\n"); - ++indent; - s.block.runPass(this); - --indent; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param s The node to process. - **/ - public void run(ThrowStatement s) { - appendIndent("throw "); - s.exception.runPass(this); - methodBody.append(";\n"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param s The node to process. - **/ - public void run(TryStatement s) { - appendLine("try"); - ++indent; - s.block.runPass(this); - --indent; - s.catchList.runPass(this); - if (s.finallyBlock != null) { - appendLine("finally"); - s.finallyBlock.runPass(this); - } - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param s The node to process. - **/ - public void run(VariableDeclaration s) { - if (!forInit) appendIndent(); - if (s.isFinal) methodBody.append("final "); - s.type.runPass(this); - - ASTNodeIterator N = s.names.iterator(); - methodBody.append(" " + N.next()); - ExpressionList.ExpressionListIterator I = s.initializers.listIterator(); - Expression i = I.nextItem(); - if (i != null) { - methodBody.append(" = "); - i.runPass(this); - } - - while (N.hasNext()) { - methodBody.append(", " + N.next()); - i = I.nextItem(); - if (i != null) { - methodBody.append(" = "); - i.runPass(this); - } - } - - methodBody.append(";"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param s The node to process. - **/ - public void run(WhileStatement s) { - appendIndent("while ("); - s.condition.runPass(this); - methodBody.append(")\n"); - ++indent; - s.body.runPass(this); - --indent; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param s The node to process. - **/ - public void run(DoStatement s) { - appendLine("do"); - ++indent; - s.body.runPass(this); - --indent; - - appendIndent("while ("); - s.condition.runPass(this); - methodBody.append(");\n"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param l The node to process. - **/ - public void run(SwitchGroupList l) { - ASTNodeIterator I = l.iterator(); - if (!I.hasNext()) return; - - I.next().runPass(this); - while (I.hasNext()) { - methodBody.append("\n"); - I.next().runPass(this); - } - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param g The node to process. - **/ - public void run(SwitchGroup g) { - appendIndent(); - g.labels.runPass(this); - methodBody.append("\n"); - ++indent; - g.statements.runPass(this); - --indent; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param l The node to process. - **/ - public void run(SwitchLabelList l) { - ASTNodeIterator I = l.iterator(); - if (!I.hasNext()) return; - - I.next().runPass(this); - while (I.hasNext()) { - methodBody.append(" "); - I.next().runPass(this); - } - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param l The node to process. - **/ - public void run(SwitchLabel l) { - methodBody.append("case "); - l.value.runPass(this); - methodBody.append(":"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param l The node to process. - **/ - public void run(CatchList l) { - ASTNodeIterator I = l.iterator(); - if (!I.hasNext()) return; - - I.next().runPass(this); - while (I.hasNext()) { - methodBody.append("\n"); - I.next().runPass(this); - } - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param c The node to process. - **/ - public void run(CatchClause c) { - appendIndent("catch ("); - c.argument.runPass(this); - methodBody.append(")\n"); - ++indent; - c.block.runPass(this); - --indent; - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param a The node to process. - **/ - public void run(Argument a) { - if (a.getFinal()) methodBody.append("final "); - a.getType().runPass(this); - methodBody.append(" " + a.getName()); - } - - - /** - * This method generates the code for a new temporary variable used when - * translating constraints. - * - * @param name The name of the temporary variable. - **/ - private void constraintTemporary(String name) { - appendIndent(); - if (constraintMode) methodBody.append("FirstOrderConstraint "); - else methodBody.append("boolean "); - methodBody.append(name); - if (constraintMode) methodBody.append(" = null;\n"); - else methodBody.append(";\n"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(ConstraintStatementExpression e) { - constraintResultNumber = 0; - appendLine("{"); - ++indent; - - if (constraintMode && e.constraint.containsQuantifiedVariable()) { - StringBuffer buffer = new StringBuffer(); - int i = 0; - HashSet referenced = e.constraint.getVariableTypes(); - for (Iterator I = referenced.iterator(); I.hasNext(); ) { - Argument a = I.next(); - Type t = a.getType(); - if (t.quantifierArgumentType) continue; - - for (int j = 0; j < indent; ++j) buffer.append(" "); - buffer.append("LBJ$constraint$context["); - buffer.append(i); - buffer.append("] = "); - if (t instanceof PrimitiveType) { - String primitiveTypeName = null; - if (((PrimitiveType) t).type == PrimitiveType.INT) - primitiveTypeName = "Integer"; - else { - primitiveTypeName = t.toString(); - primitiveTypeName = - Character.toUpperCase(primitiveTypeName.charAt(0)) - + primitiveTypeName.substring(1); - } - - buffer.append("new "); - buffer.append(primitiveTypeName); - buffer.append("("); - } - - buffer.append(a.getName()); - if (t instanceof PrimitiveType) buffer.append(")"); - buffer.append(";\n"); - - contextVariables.put(a.getName(), new Integer(i++)); - } - - appendIndent("Object[] LBJ$constraint$context = new Object["); - methodBody.append(i); - methodBody.append("];\n"); - methodBody.append(buffer); - } - - String childResultName = constraintResult + constraintResultNumber; - constraintResultName = childResultName; - constraintTemporary(childResultName); - quantifierNesting = 0; - - e.constraint.runPass(this); - - appendIndent(); - if (constraintMode) { - methodBody.append("__result = new FirstOrderConjunction(__result, "); - methodBody.append(childResultName); - methodBody.append(");\n"); - } - else { - methodBody.append("if (!"); - methodBody.append(childResultName); - methodBody.append(") return \"false\";\n"); - } - - --indent; - appendIndent("}"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(BinaryConstraintExpression e) { - String myResultName = constraintResultName; - String leftResultName = constraintResult + ++constraintResultNumber; - - appendLine("{"); - ++indent; - - constraintTemporary(leftResultName); - constraintResultName = leftResultName; - e.left.runPass(this); - - if (constraintMode - || e.operation.operation == Operator.DOUBLE_IMPLICATION) { - String rightResultName = constraintResult + ++constraintResultNumber; - constraintTemporary(rightResultName); - constraintResultName = rightResultName; - e.right.runPass(this); - - appendIndent(myResultName); - - if (constraintMode) { - methodBody.append(" = new FirstOrder"); - if (e.operation.operation == Operator.LOGICAL_CONJUNCTION) - methodBody.append("Conjunction"); - else if (e.operation.operation == Operator.LOGICAL_DISJUNCTION) - methodBody.append("Disjunction"); - else if (e.operation.operation == Operator.IMPLICATION) - methodBody.append("Implication"); - else methodBody.append("DoubleImplication"); - - methodBody.append("("); - methodBody.append(leftResultName); - methodBody.append(", "); - methodBody.append(rightResultName); - methodBody.append(");\n"); - } - else { - methodBody.append(" = "); - methodBody.append(leftResultName); - methodBody.append(" == "); - methodBody.append(rightResultName); - methodBody.append(";\n"); - } - } - else { - appendIndent("if ("); - if (e.operation.operation == Operator.LOGICAL_DISJUNCTION) - methodBody.append("!"); - methodBody.append(leftResultName); - methodBody.append(")\n"); - ++indent; - - constraintResultName = myResultName; - e.right.runPass(this); - - --indent; - appendIndent("else "); - methodBody.append(myResultName); - methodBody.append(" = "); - methodBody.append(e.operation.operation == Operator.LOGICAL_DISJUNCTION - || e.operation.operation == Operator.IMPLICATION); - methodBody.append(";\n"); - } - - --indent; - appendLine("}"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(NegatedConstraintExpression e) { - String myResultName = constraintResultName; - String childResultName = constraintResult + ++constraintResultNumber; - - appendLine("{"); - ++indent; - - constraintTemporary(childResultName); - constraintResultName = childResultName; - e.constraint.runPass(this); - - appendIndent(myResultName); - methodBody.append(" = "); - if (constraintMode) methodBody.append("new FirstOrderNegation("); - else methodBody.append("!"); - methodBody.append(childResultName); - if (constraintMode) methodBody.append(")"); - methodBody.append(";\n"); - - --indent; - appendLine("}"); - } - - - /** - * Generates the code necessary at the top of a replacer method - * implementation to declare the variables that will be used in the method. - * - * @param expression The expression to be evaluted in the replacer method. - **/ - private void generateReplacerMethodEnvironment(Expression expression) { - for (Iterator I = expression.getVariableTypes().iterator(); I.hasNext(); ) - { - Argument a = I.next(); - Type type = a.getType(); - String primitiveTypeName = null; - if (type instanceof PrimitiveType) { - if (((PrimitiveType) type).type == PrimitiveType.INT) - primitiveTypeName = "Integer"; - else { - primitiveTypeName = type.toString(); - primitiveTypeName = - Character.toUpperCase(primitiveTypeName.charAt(0)) - + primitiveTypeName.substring(1); - } - } - - appendIndent(); - a.runPass(this); - methodBody.append(" = ("); - if (primitiveTypeName == null) type.runPass(this); - else methodBody.append("(" + primitiveTypeName); - methodBody.append(") "); - - if (type.quantifierArgumentType) { - methodBody.append("quantificationVariables.get("); - methodBody.append( - ((Integer) quantificationVariables.get(a.getName())) - .intValue()); - methodBody.append(")"); - } - else { - methodBody.append("context["); - methodBody.append( - ((Integer) contextVariables.get(a.getName())).intValue()); - methodBody.append("]"); - } - - if (primitiveTypeName != null) - methodBody.append(")." + type + "Value()"); - methodBody.append(";\n"); - } - } - - - /** - * Translates an expression from a quantified - * {@link ConstraintEqualityExpression} into the appropriate method of an - * {@link EqualityArgumentReplacer}. - * - * @param right Indicates if expression comes - * from the right hand side of the equality. - * @param expression The expression. - * @param isDiscreteLearner This flag is set if expression - * represents a variable. - **/ - private void generateEARMethod(boolean right, Expression expression, - boolean isDiscreteLearner) { - appendIndent("public "); - methodBody.append(isDiscreteLearner ? "Object" : "String"); - methodBody.append(" get"); - methodBody.append(right ? "Right" : "Left"); - methodBody.append(isDiscreteLearner ? "Object" : "Value"); - methodBody.append("()\n"); - - appendLine("{"); - ++indent; - - generateReplacerMethodEnvironment(expression); - - appendIndent("return "); - if (isDiscreteLearner) - ((MethodInvocation) expression).arguments.runPass(this); - else { - methodBody.append("\"\" + ("); - expression.runPass(this); - methodBody.append(")"); - } - - methodBody.append(";\n"); - - --indent; - appendLine("}"); - } - - - /** - * Translates an unquantified expression not representing a first order - * variable from a {@link ConstraintEqualityExpression} into an argument of - * a {@link FirstOrderEquality}. - * - * @param left This flag is set if expression came from - * the left hand side of the equality. - * @param expression The expression. - **/ - private void generateNotVariable(boolean left, Expression expression) { - if (left) methodBody.append("("); - methodBody.append("\"\" + ("); - expression.runPass(this); - methodBody.append(")"); - if (left) methodBody.append(")"); - } - - - /** - * Translates an expression representing a first order variable from a - * {@link ConstraintEqualityExpression} into an argument of a - * {@link FirstOrderEquality}. - * - * @param expression The expression. - * @param isQuantified This flag is set if expression contains - * a quantified variable. - **/ - private void generateVariable(Expression expression, boolean isQuantified) { - MethodInvocation method = (MethodInvocation) expression; - methodBody.append("new FirstOrderVariable("); - methodBody.append(("__" + method.name).replace('.', '$')); - - if (isQuantified) methodBody.append(", null)"); - else { - methodBody.append(", "); - method.arguments.runPass(this); - methodBody.append(")"); - } - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(ConstraintEqualityExpression e) { - String myResultName = constraintResultName; - - boolean leftIsDiscreteLearner = e.leftIsDiscreteLearner; - boolean rightIsDiscreteLearner = e.rightIsDiscreteLearner; - boolean leftIsQuantified = e.leftIsQuantified; - boolean rightIsQuantified = e.rightIsQuantified; - Expression left = e.left; - Expression right = e.right; - - if (!leftIsDiscreteLearner && rightIsDiscreteLearner) { - leftIsDiscreteLearner = true; - rightIsDiscreteLearner = false; - - leftIsQuantified ^= rightIsQuantified; - rightIsQuantified ^= leftIsQuantified; - leftIsQuantified ^= rightIsQuantified; - - Expression temp = left; - left = right; - right = temp; - } - - if (!(constraintMode && (leftIsQuantified || rightIsQuantified))) { - appendIndent(myResultName); - methodBody.append(" = "); - - if (constraintMode) { - methodBody.append("new FirstOrder"); - - if (leftIsDiscreteLearner) { - if (rightIsDiscreteLearner) - methodBody.append("EqualityWithVariable"); - else methodBody.append("EqualityWithValue"); - methodBody.append("("); - - methodBody.append(e.operation.operation - == Operator.CONSTRAINT_EQUAL); - methodBody.append(", "); - generateVariable(left, false); - methodBody.append(", "); - if (rightIsDiscreteLearner) generateVariable(right, false); - else generateNotVariable(false, right); - - methodBody.append(");\n"); - return; - } - - methodBody.append("Constant("); - } - - if (e.operation.operation == Operator.CONSTRAINT_NOT_EQUAL) - methodBody.append("!"); - generateNotVariable(true, left); - methodBody.append(".equals("); - generateNotVariable(false, right); - methodBody.append(")"); - - if (constraintMode) methodBody.append(")"); - methodBody.append(";\n"); - return; - } - - appendLine("{"); - ++indent; - - appendLine("EqualityArgumentReplacer LBJ$EAR ="); - ++indent; - - appendIndent("new EqualityArgumentReplacer(LBJ$constraint$context"); - if (!(leftIsQuantified && rightIsQuantified)) { - methodBody.append(", "); - methodBody.append(leftIsQuantified); - } - - methodBody.append(")\n"); - appendLine("{"); - ++indent; - - if (leftIsQuantified) - generateEARMethod(false, left, leftIsDiscreteLearner); - - if (rightIsQuantified) { - if (leftIsQuantified) methodBody.append("\n"); - generateEARMethod(true, right, rightIsDiscreteLearner); - } - - --indent; - appendLine("};"); - --indent; - - appendIndent(myResultName); - methodBody.append(" = new FirstOrderEquality"); - if (leftIsDiscreteLearner) { - if (rightIsDiscreteLearner) methodBody.append("WithVariable"); - else methodBody.append("WithValue"); - } - else methodBody.append("TwoValues"); - - methodBody.append("("); - methodBody.append(e.operation.operation == Operator.CONSTRAINT_EQUAL); - methodBody.append(", "); - if (leftIsDiscreteLearner) generateVariable(left, leftIsQuantified); - else if (leftIsQuantified) methodBody.append("null"); - else generateNotVariable(true, left); - methodBody.append(", "); - if (rightIsDiscreteLearner) generateVariable(right, rightIsQuantified); - else if (rightIsQuantified) methodBody.append("null"); - else generateNotVariable(false, right); - methodBody.append(", LBJ$EAR);\n"); - - --indent; - appendLine("}"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(ConstraintInvocation e) { - String myResultName = constraintResultName; - - if (!(constraintMode && e.invocationIsQuantified)) { - appendIndent(myResultName); - methodBody.append((" = __" + e.invocation.name).replace('.', '$')); - if (constraintMode) methodBody.append(".makeConstraint("); - else methodBody.append(".discreteValue("); - e.invocation.arguments.runPass(this); - methodBody.append(")"); - if (!constraintMode) methodBody.append(".equals(\"true\")"); - methodBody.append(";\n"); - return; - } - - appendLine("{"); - ++indent; - - appendLine("InvocationArgumentReplacer LBJ$IAR ="); - ++indent; - - appendLine("new InvocationArgumentReplacer(LBJ$constraint$context)"); - appendLine("{"); - ++indent; - - appendLine("public Object compute()"); - appendLine("{"); - ++indent; - - Expression argument = e.invocation.arguments.listIterator().nextItem(); - generateReplacerMethodEnvironment(argument); - - appendIndent("return "); - argument.runPass(this); - methodBody.append(";\n"); - - --indent; - appendLine("}"); - - --indent; - appendLine("};"); - --indent; - - appendIndent(myResultName); - methodBody.append(" = new QuantifiedConstraintInvocation("); - methodBody.append(("__" + e.invocation.name).replace('.', '$')); - methodBody.append(", LBJ$IAR);\n"); - - --indent; - appendLine("}"); - } - - - /** - * {@link UniversalQuantifierExpression}s and - * {@link ExistentialQuantifierExpression}s generate their code through - * this method. - * - * @param e The node to process. - **/ - private void generateSimpleQuantifier(QuantifiedConstraintExpression e) { - boolean universal = e instanceof UniversalQuantifierExpression; - String myResultName = constraintResultName; - - if (!constraintMode) { - String inductionVariable = "__I" + quantifierNesting; - - appendLine("{"); - ++indent; - - appendIndent(myResultName); - methodBody.append(" = "); - methodBody.append(universal); - methodBody.append(";\n"); - - appendIndent("for (java.util.Iterator "); - methodBody.append(inductionVariable); - methodBody.append(" = ("); - e.collection.runPass(this); - methodBody.append(").iterator(); "); - methodBody.append(inductionVariable); - methodBody.append(".hasNext() && "); - if (!universal) methodBody.append("!"); - methodBody.append(myResultName); - methodBody.append("; )\n"); - - appendLine("{"); - ++indent; - - appendIndent(); - e.argument.runPass(this); - methodBody.append(" = ("); - e.argument.getType().runPass(this); - methodBody.append(") "); - methodBody.append(inductionVariable); - methodBody.append(".next();\n"); - - ++quantifierNesting; - e.constraint.runPass(this); - --quantifierNesting; - - --indent; - appendLine("}"); - - --indent; - appendLine("}"); - return; - } - - appendLine("{"); - ++indent; - - String childResultName = constraintResult + ++constraintResultNumber; - constraintTemporary(childResultName); - constraintResultName = childResultName; - - quantificationVariables.put(e.argument.getName(), - new Integer(quantifierNesting++)); - e.constraint.runPass(this); - --quantifierNesting; - - if (!e.collectionIsQuantified) { - appendIndent(myResultName); - methodBody.append(" = new "); - if (universal) methodBody.append("Universal"); - else methodBody.append("Existential"); - methodBody.append("Quantifier(\""); - methodBody.append(e.argument.getName()); - methodBody.append("\", "); - e.collection.runPass(this); - methodBody.append(", "); - methodBody.append(childResultName); - methodBody.append(");\n"); - } - else { - appendLine("QuantifierArgumentReplacer LBJ$QAR ="); - ++indent; - - appendLine("new QuantifierArgumentReplacer(LBJ$constraint$context)"); - appendLine("{"); - ++indent; - - appendLine("public java.util.Collection getCollection()"); - appendLine("{"); - ++indent; - - generateReplacerMethodEnvironment(e.collection); - - appendIndent("return "); - e.collection.runPass(this); - methodBody.append(";\n"); - - --indent; - appendLine("}"); - - --indent; - appendLine("};"); - --indent; - - appendIndent(myResultName); - methodBody.append(" = new "); - if (universal) methodBody.append("Universal"); - else methodBody.append("Existential"); - methodBody.append("Quantifier(\""); - methodBody.append(e.argument.getName()); - methodBody.append("\", null, "); - methodBody.append(childResultName); - methodBody.append(", LBJ$QAR);\n"); - } - - --indent; - appendLine("}"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(UniversalQuantifierExpression e) { - generateSimpleQuantifier(e); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(ExistentialQuantifierExpression e) { - generateSimpleQuantifier(e); - } - - - /** - * {@link AtLeastQuantifierExpression}s and - * {@link AtMostQuantifierExpression}s generate their code through this - * method. - * - * @param e The node to process. - **/ - public void generateBoundedQuantifier(QuantifiedConstraintExpression e) { - boolean atleast = e instanceof AtLeastQuantifierExpression; - AtLeastQuantifierExpression ale = null; - AtMostQuantifierExpression ame = null; - if (atleast) ale = (AtLeastQuantifierExpression) e; - else ame = (AtMostQuantifierExpression) e; - - String myResultName = constraintResultName; - String childResultName = constraintResult + ++constraintResultNumber; - - if (!constraintMode) { - appendLine("{"); - ++indent; - - String m = "LBJ$m$" + quantifierNesting; - String bound = "LBJ$bound$" + quantifierNesting; - - appendIndent("int "); - methodBody.append(m); - methodBody.append(" = 0;\n"); - appendIndent("int "); - methodBody.append(bound); - methodBody.append(" = "); - if (atleast) ale.lowerBound.runPass(this); - else ame.upperBound.runPass(this); - methodBody.append(";\n"); - - String inductionVariable = "__I" + quantifierNesting; - - appendIndent("for (java.util.Iterator "); - methodBody.append(inductionVariable); - methodBody.append(" = ("); - e.collection.runPass(this); - methodBody.append(").iterator(); "); - methodBody.append(inductionVariable); - methodBody.append(".hasNext() && "); - methodBody.append(m); - if (atleast) methodBody.append(" < "); - else methodBody.append(" <= "); - methodBody.append(bound); - methodBody.append("; )\n"); - - appendLine("{"); - ++indent; - - appendIndent(); - e.argument.runPass(this); - methodBody.append(" = ("); - e.argument.getType().runPass(this); - methodBody.append(") "); - methodBody.append(inductionVariable); - methodBody.append(".next();\n"); - - constraintTemporary(childResultName); - constraintResultName = childResultName; - ++quantifierNesting; - e.constraint.runPass(this); - --quantifierNesting; - - appendIndent("if ("); - methodBody.append(childResultName); - methodBody.append(") ++"); - methodBody.append(m); - methodBody.append(";\n"); - - --indent; - appendLine("}"); - - appendIndent(myResultName); - methodBody.append(" = "); - methodBody.append(m); - if (atleast) methodBody.append(" >= "); - else methodBody.append(" <= "); - methodBody.append(bound); - methodBody.append(";\n"); - - --indent; - appendLine("}"); - return; - } - - appendLine("{"); - ++indent; - - constraintTemporary(childResultName); - constraintResultName = childResultName; - quantificationVariables.put(e.argument.getName(), - new Integer(quantifierNesting++)); - e.constraint.runPass(this); - --quantifierNesting; - - if (!(e.collectionIsQuantified - || atleast && ale.lowerBoundIsQuantified - || !atleast && ame.upperBoundIsQuantified)) { - appendIndent(myResultName); - methodBody.append(" = new "); - if (atleast) methodBody.append("AtLeast"); - else methodBody.append("AtMost"); - methodBody.append("Quantifier(\""); - methodBody.append(e.argument.getName()); - methodBody.append("\", "); - e.collection.runPass(this); - methodBody.append(", "); - methodBody.append(childResultName); - methodBody.append(", "); - if (atleast) methodBody.append(ale.lowerBound); - else methodBody.append(ame.upperBound); - methodBody.append(");\n"); - } - else { - appendLine("QuantifierArgumentReplacer LBJ$QAR ="); - ++indent; - - appendIndent("new QuantifierArgumentReplacer(LBJ$constraint$context"); - if (!(e.collectionIsQuantified - && (atleast && ale.lowerBoundIsQuantified - || !atleast && ame.upperBoundIsQuantified))) { - methodBody.append(", "); - methodBody.append(e.collectionIsQuantified); - } - - methodBody.append(")\n"); - - appendLine("{"); - ++indent; - - if (e.collectionIsQuantified) { - appendLine("public java.util.Collection getCollection()"); - appendLine("{"); - ++indent; - - generateReplacerMethodEnvironment(e.collection); - - appendIndent("return "); - e.collection.runPass(this); - methodBody.append(";\n"); - - --indent; - appendLine("}"); - } - - if (atleast && ale.lowerBoundIsQuantified - || !atleast && ame.upperBoundIsQuantified) { - if (e.collectionIsQuantified) methodBody.append("\n"); - appendLine("public int getBound()"); - appendLine("{"); - ++indent; - - if (atleast) generateReplacerMethodEnvironment(ale.lowerBound); - else generateReplacerMethodEnvironment(ame.upperBound); - - appendIndent("return "); - if (atleast) ale.lowerBound.runPass(this); - else ame.upperBound.runPass(this); - methodBody.append(";\n"); - - --indent; - appendLine("}"); - } - - --indent; - appendLine("};"); - --indent; - - appendIndent(myResultName); - methodBody.append(" = new "); - if (atleast) methodBody.append("AtLeast"); - else methodBody.append("AtMost"); - methodBody.append("Quantifier(\""); - methodBody.append(e.argument.getName()); - methodBody.append("\", "); - if (e.collectionIsQuantified) methodBody.append("null"); - else e.collection.runPass(this); - methodBody.append(", "); - methodBody.append(childResultName); - methodBody.append(", "); - if (atleast && ale.lowerBoundIsQuantified - || !atleast && ame.upperBoundIsQuantified) - methodBody.append("0"); - else if (atleast) ale.lowerBound.runPass(this); - else ame.upperBound.runPass(this); - methodBody.append(", LBJ$QAR);\n"); - } - - --indent; - appendLine("}"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(AtLeastQuantifierExpression e) { - generateBoundedQuantifier(e); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(AtMostQuantifierExpression e) { - generateBoundedQuantifier(e); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param l The node to process. - **/ - public void run(ExpressionList l) { - ASTNodeIterator I = l.iterator(); - if (!I.hasNext()) return; - - I.next().runPass(this); - while (I.hasNext()) { - methodBody.append(", "); - I.next().runPass(this); - } - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(ArrayCreationExpression e) { - if (e.parenthesized) methodBody.append("("); - - methodBody.append("new "); - e.elementType.runPass(this); - - int d = 0; - for (ASTNodeIterator I = e.sizes.iterator(); I.hasNext(); ++d) { - methodBody.append("["); - I.next().runPass(this); - methodBody.append("]"); - } - - for (; d < e.dimensions; ++d) methodBody.append("[]"); - - if (e.parenthesized) methodBody.append(")"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(ArrayInitializer e) { - if (e.parenthesized) methodBody.append("("); - methodBody.append("{ "); - e.values.runPass(this); - methodBody.append(" }"); - if (e.parenthesized) methodBody.append(")"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(CastExpression e) { - if (e.parenthesized) methodBody.append("("); - methodBody.append("("); - e.type.runPass(this); - methodBody.append(") "); - e.expression.runPass(this); - if (e.parenthesized) methodBody.append(")"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(Conditional e) { - if (e.parenthesized) methodBody.append("("); - e.condition.runPass(this); - methodBody.append(" ? "); - e.thenClause.runPass(this); - methodBody.append(" : "); - e.elseClause.runPass(this); - if (e.parenthesized) methodBody.append(")"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(Constant e) { - if (e.parenthesized) methodBody.append("("); - methodBody.append(e.value); - if (e.parenthesized) methodBody.append(")"); - } - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(ParameterSet e) { - methodBody.append(e.getParameterName()); - } - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(InstanceofExpression e) { - if (e.parenthesized) methodBody.append("("); - e.left.runPass(this); - methodBody.append(" instanceof "); - e.right.runPass(this); - if (e.parenthesized) methodBody.append(")"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(Assignment e) { - if (e.parenthesized) methodBody.append("("); - e.left.runPass(this); - methodBody.append(" " + e.operation + " "); - e.right.runPass(this); - if (e.parenthesized) methodBody.append(")"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(IncrementExpression e) { - if (e.parenthesized) methodBody.append("("); - runOnChildren(e); - if (e.parenthesized) methodBody.append(")"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(InstanceCreationExpression e) { - if (e.parenthesized) methodBody.append("("); - - if (e.parentObject != null) { - e.parentObject.runPass(this); - methodBody.append("."); - } - - methodBody.append("new "); - e.name.runPass(this); - methodBody.append("("); - e.arguments.runPass(this); - methodBody.append(")"); - - if (e.parenthesized) methodBody.append(")"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(MethodInvocation e) { - if (e.parenthesized) methodBody.append("("); - - if (e.isClassifierInvocation) { - methodBody.append(("__" + e.name).replace('.', '$') + "."); - - ClassifierType invokedType = (ClassifierType) e.name.typeCache; - int t = invokedType.getOutput().type; - if (t == ClassifierReturnType.DISCRETE) - methodBody.append("discreteValue("); - else if (t == ClassifierReturnType.REAL) - methodBody.append("realValue("); - else if (!e.isSensedValue) { - if (t == ClassifierReturnType.DISCRETE_ARRAY) - methodBody.append("discreteValueArray("); - else if (t == ClassifierReturnType.REAL_ARRAY) - methodBody.append("realValueArray("); - } - else methodBody.append("classify("); - - if (invokedType.getInput() instanceof ArrayType) - methodBody.append("(Object) "); - e.arguments.runPass(this); - methodBody.append(")"); - } - else { - if (e.parentObject != null) { - e.parentObject.runPass(this); - methodBody.append("."); - } - - e.name.runPass(this); - methodBody.append("("); - e.arguments.runPass(this); - methodBody.append(")"); - } - - if (e.parenthesized) methodBody.append(")"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param be The node to process. - **/ - public void run(BinaryExpression be) { - if (be.parenthesized) methodBody.append("("); - be.left.runPass(this); - methodBody.append(" " + be.operation + " "); - be.right.runPass(this); - if (be.parenthesized) methodBody.append(")"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(UnaryExpression e) { - if (e.parenthesized) methodBody.append("("); - runOnChildren(e); - if (e.parenthesized) methodBody.append(")"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(FieldAccess e) { - if (e.parenthesized) methodBody.append("("); - e.object.runPass(this); - methodBody.append("." + e.name); - if (e.parenthesized) methodBody.append(")"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param e The node to process. - **/ - public void run(SubscriptVariable e) { - if (e.parenthesized) methodBody.append("("); - e.array.runPass(this); - methodBody.append("["); - e.subscript.runPass(this); - methodBody.append("]"); - if (e.parenthesized) methodBody.append(")"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param n The node to process. - **/ - public void run(Name n) { - if (n.parenthesized) methodBody.append("("); - boolean translated = false; - - if (currentCG != null && n.name.length > 1) { - HashSet invoked = SemanticAnalysis.invokedGraph.get(currentCG.getName()); - if (invoked != null) { - String className = n.toString(); - className = className.substring(0, className.lastIndexOf('.')); - String fieldOrMethod = n.name[n.name.length - 1]; - - if (invoked.contains(className)) { - String nameNoDots = className.replace('.', '$'); - methodBody.append("__"); - methodBody.append(nameNoDots); - methodBody.append("."); - methodBody.append(fieldOrMethod); - translated = true; - } - } - } - - if (!translated) methodBody.append(n); - if (n.parenthesized) methodBody.append(")"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param t The node to process. - **/ - public void run(ArrayType t) { - t.type.runPass(this); - methodBody.append("[]"); - } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param t The node to process. - **/ - public void run(PrimitiveType t) { methodBody.append(t); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param t The node to process. - **/ - public void run(ReferenceType t) { methodBody.append(t); } - - - /** - * Runs this pass on all nodes of the indicated type. - * - * @param o The node to process. - **/ - public void run(Operator o) { methodBody.append(o); } -} + } + + if (lce.attributeString.length() != 0) + out.print("attributeString"); + } else + out.print("parameters"); + + out.println(");"); + out.println(" containingPackage = \"" + AST.globalSymbolTable.getPackage() + "\";"); + out.println(" name = \"" + lceName + "\";"); + out.println(" setEncoding(" + lce.featureEncoding + ");"); + if (lce.labeler != null) + out.println(" setLabeler(new " + lce.labeler.name + "());"); + out.println(" setExtractor(new " + lce.extractor.name + "());"); + out.println(" isClone = false;"); + out.println(" }\n"); + } + + out.println(" public static Parser getParser() { return " + lce.parser + "; }"); + out.println(" public static Parser getTestParser() { return " + lce.testParser + "; }\n"); + + generateLearnerBody(out, lce); + + if (lce.parameterSets.size() > 0) { + out.println(); + out.println(" public static class Parameters extends " + lce.learnerName + + ".Parameters"); + out.println(" {"); + out.println(" public Parameters() { this(" + firstArgumentsString + "); }"); + out.println(" public Parameters(" + formalParameterString + ")"); + + if (lce.learnerParameterBlock != null) { + TranslateToJava translator = new TranslateToJava(null); + translator.setRoot(lce.learnerParameterBlock); + translator.setCurrentCG(currentCG); + translator.setIndent(3); + translator.run(); + out.println(translator.getMethodBody()); + } else { + out.println(" {"); + out.println(" super((" + lce.learnerName + ".Parameters) new " + lceName + "(" + + argumentString + ").getParameters());"); + out.println(" }"); + } + out.println(" }"); + } + out.println("}\n"); + out.close(); + } + + + /** + * Generates code for all nodes of the indicated type. + * + * @param c The node to process. + **/ + public void run(Conjunction c) { + String cName = c.name.toString(); + String fileName = cName + ".java"; + if (fileName.indexOf("$$") != -1) { + files.add(fileName); + runOnChildren(c); + } else { + files.clear(); + + runOnChildren(c); + + final String prefix = cName + "$$"; + File[] leftOvers = + new File(System.getProperty("user.dir")).listFiles(new FilenameFilter() { + public boolean accept(File directory, String name) { + int i = name.lastIndexOf('.'); + if (i == -1) + return false; + String javaFile = name.substring(0, i) + ".java"; + return name.startsWith(prefix) && !files.contains(javaFile); + } + }); + + for (int i = 0; i < leftOvers.length; ++i) + if (leftOvers[i].exists() && !leftOvers[i].delete()) + reportError(0, "Could not delete '" + leftOvers[i].getName() + "'."); + } + + if (!RevisionAnalysis.revisionStatus.get(cName).equals(RevisionAnalysis.REVISED)) + return; + + System.out.println("Generating code for " + cName); + + PrintStream out = open(fileName); + if (out == null) + return; + + out.println(disclaimer); + out.println("// " + c.shallow() + "\n"); + + ast.symbolTable.generateHeader(out); + + out.println("\n"); + if (c.comment != null) + out.println(c.comment); + + out.println("public class " + cName + " extends Classifier"); + out.println("{"); + + String leftName = c.left.name.toString(); + String rightName = c.right.name.toString(); + out.println(" private static final " + leftName + " left = new " + leftName + "();"); + if (!leftName.equals(rightName)) + out.println(" private static final " + rightName + " right = new " + c.right.name + + "();\n"); + + if (c.singleExampleCache) { + out.println(" private static ThreadLocal __cache = new ThreadLocal(){ };"); + out.println(" private static ThreadLocal __exampleCache = " + "new ThreadLocal(){ };"); + out.println(" public static void clearCache() { __exampleCache = new " + + "ThreadLocal() { }; }\n"); + } + + out.println(" public " + cName + "()"); + out.println(" {"); + out.println(" containingPackage = \"" + AST.globalSymbolTable.getPackage() + "\";"); + out.println(" name = \"" + cName + "\";"); + out.println(" }\n"); + + Type input = c.argument.getType(); + typeReturningMethods(out, input, c.returnType); + out.println(); + + boolean primitive = + c.returnType.type == ClassifierReturnType.DISCRETE + || c.returnType.type == ClassifierReturnType.REAL; + boolean mixed = c.returnType.type == ClassifierReturnType.MIXED_GENERATOR; + int leftType = c.left.returnType.type; + int rightType = c.right.returnType.type; + boolean sameType = leftType == rightType; + boolean leftIsGenerator = + leftType == ClassifierReturnType.DISCRETE_GENERATOR + || rightType == ClassifierReturnType.REAL_GENERATOR; + boolean leftIsPrimitive = + leftType == ClassifierReturnType.DISCRETE || leftType == ClassifierReturnType.REAL; + boolean rightIsPrimitive = + rightType == ClassifierReturnType.DISCRETE + || rightType == ClassifierReturnType.REAL; + boolean bothMulti = !leftIsPrimitive && !rightIsPrimitive; + + StringBuffer body = new StringBuffer(); + + if (primitive) + body.append(" __result = left.featureValue(__example)" + + ".conjunction(right.featureValue(__example), this);\n"); + else { + body.append(" __result = new FeatureVector();\n"); + if (leftIsPrimitive) + body.append(" Feature lf = left.featureValue(__example);\n"); + else + body.append(" FeatureVector leftVector = left.classify(__example);\n" + + " int N = leftVector.featuresSize();\n"); + + if (c.left.equals(c.right)) { + // SemanticAnalysis ensures that neither classifier is primitive here. + body.append(" for (int j = 1; j < N; ++j)\n" + " {\n"); + body.append(" Feature rf = leftVector.getFeature(j);\n" + + " for (int i = 0; i < j; ++i)\n" + + " __result.addFeature(leftVector.getFeature(i)" + + ".conjunction(rf, this));\n" + " }\n"); + } else { + if (rightIsPrimitive) + body.append(" Feature rf = right.featureValue(__example);\n"); + else + body.append(" FeatureVector rightVector = right.classify(__example);\n" + + " int M = rightVector.featuresSize();\n"); + + String in = ""; + if (!leftIsPrimitive) { + body.append(" for (int i = 0; i < N; ++i)\n" + " {\n" + + " Feature lf = leftVector.getFeature(i);\n"); + in += " "; + } + + if (!rightIsPrimitive) { + body.append(in); + body.append(" for (int j = 0; j < M; ++j)\n"); + body.append(in); + body.append(" {\n"); + body.append(in); + body.append(" Feature rf = rightVector.getFeature(j);\n"); + in += " "; + } + + if (mixed || leftIsGenerator && sameType) { + body.append(in); + body.append(" if (lf.equals(rf)) continue;\n"); + } + + body.append(in); + body.append(" __result.addFeature(lf.conjunction(rf, this));\n"); + + if (!rightIsPrimitive) { + in = in.substring(2); + body.append(in); + body.append(" }\n"); + } + + if (!leftIsPrimitive) + body.append(" }\n"); + body.append("\n"); + } + + if (bothMulti) + body.append(" __result.sort();\n"); + } + + generateClassificationMethods(out, c, null, body.toString(), false, false, null); + out.println(); + typeCheckClassifyArray(out, cName, input, c.line); + out.println(); + generateHashingMethods(out, cName); + + out.println("}\n"); + out.close(); + } + + + /** + * Generates code for all nodes of the indicated type. + * + * @param cd The node to process. + **/ + public void run(ConstraintDeclaration cd) { + String cdName = cd.name.toString(); + String fileName = cdName + ".java"; + + if (!RevisionAnalysis.revisionStatus.get(cdName).equals(RevisionAnalysis.REVISED)) + return; + + System.out.println("Generating code for " + cdName); + + PrintStream out = open(fileName); + if (out == null) + return; + + out.println(disclaimer); + out.print("// "); + compressAndPrint(cd.shallow(), out); + out.println("\n"); + + ast.symbolTable.generateHeader(out); + + out.println("\n"); + if (cd.comment != null) + out.println(cd.comment); + + out.println("public class " + cdName + " extends ParameterizedConstraint"); + out.println("{"); + + HashSet invoked = SemanticAnalysis.invokedGraph.get(cdName); + if (invoked != null && invoked.size() > 0) { + for (Iterator I = invoked.iterator(); I.hasNext();) { + String name = I.next(); + String nameNoDots = name.replace('.', '$'); + out.println(" private static final " + name + " __" + nameNoDots + " = new " + + name + "();"); + } + + out.println(); + } + + String fqName = ast.symbolTable.getPackage(); + if (fqName.length() > 0) + fqName += "."; + fqName += cdName; + out.println(" public " + cdName + "() { super(\"" + fqName + "\"); }\n"); + + Type input = cd.argument.getType(); + out.println(" public String getInputType() { return \"" + input.typeClass().getName() + + "\"; }\n"); + + indent = 2; + forInit = false; + constraintMode = false; + methodBody.delete(0, methodBody.length()); + currentCG = cd; + for (ASTNodeIterator I = cd.body.iterator(); I.hasNext();) { + I.next().runPass(this); + methodBody.append("\n"); + } + + out.println(" public String discreteValue(Object __example)"); + out.println(" {"); + out.print(generateTypeChecking(" ", cdName, "Constraint", false, input.toString(), + cd.line, "__example", false)); + + out.println(" " + cd.argument + " = (" + input + ") __example;\n"); + + out.println(methodBody); + + out.println(" return \"true\";"); + out.println(" }"); + + out.println(); + typeCheckClassifyArray(out, cdName, input, cd.line); + out.println(); + generateHashingMethods(out, cdName); + + indent = 2; + forInit = false; + constraintMode = true; + methodBody.delete(0, methodBody.length()); + for (ASTNodeIterator I = cd.body.iterator(); I.hasNext();) { + I.next().runPass(this); + methodBody.append("\n"); + } + + out.println("\n public FirstOrderConstraint makeConstraint(Object " + "__example)"); + out.println(" {"); + out.print(generateTypeChecking(" ", cdName, "Constraint", false, input.toString(), + cd.line, "__example", false)); + + out.println(" " + cd.argument + " = (" + input + ") __example;"); + out.println(" FirstOrderConstraint __result = new " + "FirstOrderConstant(true);\n"); + + out.println(methodBody); + + out.println(" return __result;"); + out.println(" }"); + + out.println("}\n"); + out.close(); + } + + + /** + * Generates code for all nodes of the indicated type. + * + * @param in The node to process. + **/ + public void run(InferenceDeclaration in) { + in.constraint.runPass(this); + + String inName = in.name.toString(); + String fileName = inName + ".java"; + + if (!RevisionAnalysis.revisionStatus.get(inName).equals(RevisionAnalysis.REVISED)) + return; + + System.out.println("Generating code for " + inName); + + PrintStream out = open(fileName); + if (out == null) + return; + + out.println(disclaimer); + out.print("// "); + compressAndPrint(in.shallow(), out); + out.println("\n"); + + ast.symbolTable.generateHeader(out); + out.println("import java.util.*;\n\n"); + + currentCG = in; + String defaultNormalizer = "new IdentityNormalizer()"; + + if (in.comment != null) + out.println(in.comment); + + out.println("public class " + inName + " extends " + in.algorithm.name); + out.println("{"); + + if (in.containsTypeSpecificNormalizer()) { + out.println(" private static final HashMap normalizers = new " + "HashMap();"); + out.println(" static"); + out.println(" {"); + for (int i = 0; i < in.normalizerDeclarations.length; ++i) { + if (in.normalizerDeclarations[i].learner != null) + out.println(" normalizers.put(new " + in.normalizerDeclarations[i].learner + + "(), " + in.normalizerDeclarations[i].normalizer + ");"); + else + defaultNormalizer = in.normalizerDeclarations[i].normalizer.toString(); + } + + out.println(" }\n"); + } else + for (int i = 0; i < in.normalizerDeclarations.length; ++i) + defaultNormalizer = in.normalizerDeclarations[i].normalizer.toString(); + + indent = 1; + forInit = false; + constraintMode = false; + methodBody.delete(0, methodBody.length()); + for (int i = 0; i < in.headFinders.length; ++i) + in.headFinders[i].runPass(this); + out.println(methodBody); + + out.println(" public " + inName + "() { }"); + out.println(" public " + inName + "(" + in.head.getType() + " head)"); + out.println(" {"); + out.print(" super(head"); + if (in.algorithm.arguments.size() > 0) + out.print(", " + in.algorithm.arguments); + out.println(");"); + out.println(" constraint = new " + in.constraint.name + "().makeConstraint(head);"); + out.println(" }\n"); + + out.println(" public String getHeadType() { return \"" + + in.head.getType().typeClass().getName() + "\"; }"); + out.println(" public String[] getHeadFinderTypes()"); + out.println(" {"); + out.print(" return new String[]{ \"" + + in.headFinders[0].argument.getType().typeClass().getName() + "\""); + for (int i = 1; i < in.headFinders.length; ++i) + out.print(", \"" + in.headFinders[i].argument.getType().typeClass().getName() + "\""); + out.println(" };"); + out.println(" }\n"); + + out.println(" public Normalizer getNormalizer(Learner c)"); + out.println(" {"); + + if (in.containsTypeSpecificNormalizer()) { + out.println(" Normalizer result = (Normalizer) normalizers.get(c);"); + out.println(" if (result == null)"); + out.println(" result = " + defaultNormalizer + ";"); + out.println(" return result;"); + } else + out.println(" return " + defaultNormalizer + ";"); + + out.println(" }"); + + out.println("}\n"); + out.close(); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param h The node to process. + **/ + public void run(InferenceDeclaration.HeadFinder h) { + appendIndent("public static "); + methodBody.append(((InferenceDeclaration) currentCG).head.getType()); + methodBody.append(" findHead(" + h.argument + ")\n"); + ++indent; + h.body.runPass(this); + --indent; + methodBody.append("\n\n"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param b The node to process. + **/ + public void run(Block b) { + --indent; + appendLine("{"); + + ++indent; + runOnChildren(b); + methodBody.append("\n"); + --indent; + + appendIndent("}"); + ++indent; + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param l The node to process. + **/ + public void run(StatementList l) { + ASTNodeIterator I = l.iterator(); + if (!I.hasNext()) + return; + + if (I.hasNext()) + I.next().runPass(this); + while (I.hasNext()) { + methodBody.append("\n"); + I.next().runPass(this); + } + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param s The node to process. + **/ + public void run(AssertStatement s) { + appendIndent("assert "); + s.condition.runPass(this); + + if (s.message != null) { + methodBody.append(" : "); + s.message.runPass(this); + } + + methodBody.append(";"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param s The node to process. + **/ + public void run(BreakStatement s) { + appendIndent("break"); + + if (s.label != null) { + methodBody.append(" "); + methodBody.append(s.label); + } + + methodBody.append(";"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param s The node to process. + **/ + public void run(ContinueStatement s) { + appendIndent("continue"); + + if (s.label != null) { + methodBody.append(" "); + methodBody.append(s.label); + } + + methodBody.append(";"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param s The node to process. + **/ + public void run(ExpressionStatement s) { + if (s.expression instanceof ConstraintStatementExpression) + s.expression.runPass(this); + else { + appendIndent(); + s.expression.runPass(this); + methodBody.append(";"); + } + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param s The node to process. + **/ + public void run(ForStatement s) { + appendIndent("for ("); + + if (s.initializers != null) { + s.initializers.runPass(this); + methodBody.append("; "); + } else if (s.initializer != null) { + forInit = true; + s.initializer.runPass(this); + methodBody.append(" "); + forInit = false; + } else + methodBody.append("; "); + + if (s.condition != null) + s.condition.runPass(this); + methodBody.append("; "); + if (s.updaters != null) + s.updaters.runPass(this); + methodBody.append(")\n"); + ++indent; + s.body.runPass(this); + --indent; + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param s The node to process. + **/ + public void run(IfStatement s) { + appendIndent("if ("); + s.condition.runPass(this); + methodBody.append(")\n"); + ++indent; + s.thenClause.runPass(this); + --indent; + + if (s.elseClause != null) { + methodBody.append("\n"); + appendLine("else"); + ++indent; + s.elseClause.runPass(this); + --indent; + } + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param s The node to process. + **/ + public void run(LabeledStatement s) { + appendIndent(s.label + ": "); + s.statement.runPass(this); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param s The node to process. + **/ + public void run(ReturnStatement s) { + appendIndent(); + + if (currentCG instanceof CodedClassifier + && ((CodedClassifier) currentCG).returnType.type == ClassifierReturnType.DISCRETE) { + String literal = toStringLiteral(s.expression); + methodBody.append("return "); + if (literal != null) + methodBody.append(literal); + else { + methodBody.append("\"\" + ("); + s.expression.runPass(this); + methodBody.append(')'); + } + } else { + methodBody.append("return "); + s.expression.runPass(this); + } + + methodBody.append(";"); + } + + + /** + * If the given expression can be converted to a string at compile time, this method returns + * that string. + * + * @param e The given expression. + * @return The compile time conversion of the expression to a string, or null if it + * wasn't possible to convert. + **/ + private static String toStringLiteral(Expression e) { + if (e instanceof Constant) { + Constant c = (Constant) e; + if (c.typeCache instanceof PrimitiveType) + return "\"" + c.value + "\""; + return c.value; + } + + return null; + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param ss The node to process. + **/ + public void run(SenseStatement ss) { + CodedClassifier currentCC = (CodedClassifier) currentCG; + + if (ss.value instanceof MethodInvocation) { + MethodInvocation m = (MethodInvocation) ss.value; + if (m.isClassifierInvocation) { + ClassifierReturnType invokedType = ((ClassifierType) m.name.typeCache).getOutput(); + int t = invokedType.type; + + if ((currentCC.returnType.type == ClassifierReturnType.DISCRETE_GENERATOR || currentCC.returnType.type == ClassifierReturnType.REAL_GENERATOR) + && (t == ClassifierReturnType.DISCRETE_GENERATOR + || t == ClassifierReturnType.REAL_GENERATOR + || t == ClassifierReturnType.DISCRETE_ARRAY || t == ClassifierReturnType.REAL_ARRAY)) { + appendIndent("__id = "); + String s = toStringLiteral(ss.name); + if (s != null) + methodBody.append(s); + else { + methodBody.append("\"\" + ("); + ss.name.runPass(this); + methodBody.append(")"); + } + methodBody.append(";\n"); + + appendLine("{"); + ++indent; + + appendIndent("FeatureVector __temp = "); + ss.value.runPass(this); + methodBody.append(";\n"); + + appendLine("for (int __i = 0; __i < __temp.featuresSize(); ++__i)"); + ++indent; + + boolean isDiscrete = + t == ClassifierReturnType.DISCRETE_GENERATOR + || t == ClassifierReturnType.DISCRETE_ARRAY; + + appendIndent("__result.addFeature(new "); + methodBody.append(isDiscrete ? "Discrete" : "Real"); + methodBody.append("ReferringStringFeature"); + methodBody.append("(this, __id, ("); + methodBody.append(isDiscrete ? "Discrete" : "Real"); + methodBody.append("Feature) __temp.getFeature(__i)"); + if (currentCC.returnType.values.size() > 0 + && !currentCC.returnType.values.equals(invokedType.values)) { + methodBody.append(", "); + methodBody.append(m.name.toString()); + methodBody.append(".getAllowableValues()"); + } + methodBody.append("));\n"); + + indent -= 2; + appendIndent("}"); + return; + } else if ((currentCC.returnType.type == ClassifierReturnType.DISCRETE_ARRAY || currentCC.returnType.type == ClassifierReturnType.REAL_ARRAY) + && (t == ClassifierReturnType.DISCRETE_ARRAY || t == ClassifierReturnType.REAL_ARRAY)) { + appendLine("{"); + ++indent; + boolean isDiscrete = t == ClassifierReturnType.DISCRETE_ARRAY; + + appendIndent("FeatureVector __temp = "); + ss.value.runPass(this); + methodBody.append(";\n"); + + appendLine("for (int __i = 0; __i < __temp.featuresSize(); ++__i)"); + appendLine("{"); + ++indent; + appendLine("Feature __f = __temp.getFeature(__i);"); + appendIndent("__value = __f."); + methodBody.append(isDiscrete ? "getStringValue" : "getStrength"); + methodBody.append("();\n"); + + appendIndent("__result.addFeature("); + methodBody.append(primitiveFeatureConstructorInvocation(isDiscrete, true, + "this", "\"\"", "__value", null, + "" + currentCC.returnType.values.size(), "__featureIndex++, 0")); + methodBody.append(");\n"); + + --indent; + appendLine("}"); + + --indent; + appendIndent("}"); + return; + } + } + } + + boolean discrete = + currentCC.returnType.type == ClassifierReturnType.DISCRETE_GENERATOR + || currentCC.returnType.type == ClassifierReturnType.DISCRETE_ARRAY; + + if (ss.senseall) { + if (ss.name != null) { // if we're inside a generator + appendIndent("Object __values = "); + ss.name.runPass(this); + methodBody.append(";\n\n"); + + appendLine("if (__values instanceof java.util.Collection)"); + appendLine("{"); + ++indent; + + appendLine("for (java.util.Iterator __I = ((java.util.Collection) " + + "__values).iterator(); __I.hasNext(); )"); + appendLine("{"); + ++indent; + appendLine("__id = __I.next().toString();"); + senseFeature(ss, currentCC, discrete, discrete ? "\"true\"" : "1"); + methodBody.append("\n"); + + --indent; + appendLine("}"); + --indent; + appendLine("}"); + + appendLine("else"); + appendLine("{"); + ++indent; + appendLine("for (java.util.Iterator __I = ((java.util.Map) " + + "__values).entrySet().iterator(); __I.hasNext(); )"); + appendLine("{"); + ++indent; + appendLine("java.util.Map.Entry __e = (java.util.Map.Entry) __I.next();"); + appendLine("__id = __e.getKey().toString();"); + appendIndent("__value = "); + methodBody.append(discrete ? "__e.getValue().toString()" + : "((Double) __e.getValue()).doubleValue()"); + methodBody.append(";\n"); + + senseFeature(ss, currentCC, discrete, null); + methodBody.append("\n"); + --indent; + appendLine("}"); + --indent; + appendLine("}"); + } else { + appendIndent("java.util.Collection __values = "); + ss.value.runPass(this); + methodBody.append(";\n\n"); + + appendLine("for (java.util.Iterator __I = ((java.util.Collection) " + + "__values).iterator(); __I.hasNext(); )"); + appendLine("{"); + ++indent; + + appendIndent("__value = "); + methodBody.append(discrete ? "__I.next().toString()" + : "((Double) __I.next()).doubleValue()"); + methodBody.append(";\n"); + + senseFeature(ss, currentCC, discrete, null); + methodBody.append("\n"); + --indent; + appendIndent("}"); + } + } else { + if (ss.name != null) { // if we're inside a generator + appendIndent("__id = "); + String s = toStringLiteral(ss.name); + if (s != null) + methodBody.append(s); + else { + methodBody.append("\"\" + ("); + ss.name.runPass(this); + methodBody.append(")"); + } + methodBody.append(";\n"); + } + + appendIndent("__value = "); + if (discrete) { + String s = toStringLiteral(ss.value); + if (s != null) + methodBody.append(s); + else { + methodBody.append("\"\" + ("); + ss.value.runPass(this); + methodBody.append(")"); + } + } else + ss.value.runPass(this); + methodBody.append(";\n"); + + senseFeature(ss, currentCC, discrete, null); + } + } + + + /** + * Generates the statement that adds a new feature of the appropriate type to the returned + * FeatureVector when a sense statement is executed. The code + * generated by this method assumes the following: + *
    + *
  • + * if the containing classifier is a generator, code has already been generated to set the value + * of a string named __id representing the identifier of the new feature and + *
  • + * if value is null, code has already been generated to set the value + * of a string or double as appropriate named __value representing the value of the + * new feature. + *
+ * + * @param s The sense statement. + * @param cc The current coded classifier. + * @param discrete Whether or not cc is discrete. + * @param value Generated code evaluating to the new feature's value. If this parameter is + * null, it will default to "__value". + **/ + private void senseFeature(SenseStatement s, CodedClassifier cc, boolean discrete, String value) { + appendIndent("__result.addFeature("); + boolean array = s.name == null; + String id = array ? "\"\"" : "__id"; + if (value == null) + value = "__value"; + methodBody.append(primitiveFeatureConstructorInvocation(discrete, array, "this", id, value, + null, "" + cc.returnType.values.size(), "__featureIndex++, 0")); + methodBody.append(");"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param s The node to process. + **/ + public void run(SwitchStatement s) { + appendIndent("switch ("); + s.expression.runPass(this); + methodBody.append(")\n"); + s.block.runPass(this); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param s The node to process. + **/ + public void run(SynchronizedStatement s) { + appendIndent("synchronized ("); + s.data.runPass(this); + methodBody.append(")\n"); + ++indent; + s.block.runPass(this); + --indent; + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param s The node to process. + **/ + public void run(ThrowStatement s) { + appendIndent("throw "); + s.exception.runPass(this); + methodBody.append(";\n"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param s The node to process. + **/ + public void run(TryStatement s) { + appendLine("try"); + ++indent; + s.block.runPass(this); + --indent; + s.catchList.runPass(this); + if (s.finallyBlock != null) { + appendLine("finally"); + s.finallyBlock.runPass(this); + } + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param s The node to process. + **/ + public void run(VariableDeclaration s) { + if (!forInit) + appendIndent(); + if (s.isFinal) + methodBody.append("final "); + s.type.runPass(this); + + ASTNodeIterator N = s.names.iterator(); + methodBody.append(" " + N.next()); + ExpressionList.ExpressionListIterator I = s.initializers.listIterator(); + Expression i = I.nextItem(); + if (i != null) { + methodBody.append(" = "); + i.runPass(this); + } + + while (N.hasNext()) { + methodBody.append(", " + N.next()); + i = I.nextItem(); + if (i != null) { + methodBody.append(" = "); + i.runPass(this); + } + } + + methodBody.append(";"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param s The node to process. + **/ + public void run(WhileStatement s) { + appendIndent("while ("); + s.condition.runPass(this); + methodBody.append(")\n"); + ++indent; + s.body.runPass(this); + --indent; + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param s The node to process. + **/ + public void run(DoStatement s) { + appendLine("do"); + ++indent; + s.body.runPass(this); + --indent; + + appendIndent("while ("); + s.condition.runPass(this); + methodBody.append(");\n"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param l The node to process. + **/ + public void run(SwitchGroupList l) { + ASTNodeIterator I = l.iterator(); + if (!I.hasNext()) + return; + + I.next().runPass(this); + while (I.hasNext()) { + methodBody.append("\n"); + I.next().runPass(this); + } + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param g The node to process. + **/ + public void run(SwitchGroup g) { + appendIndent(); + g.labels.runPass(this); + methodBody.append("\n"); + ++indent; + g.statements.runPass(this); + --indent; + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param l The node to process. + **/ + public void run(SwitchLabelList l) { + ASTNodeIterator I = l.iterator(); + if (!I.hasNext()) + return; + + I.next().runPass(this); + while (I.hasNext()) { + methodBody.append(" "); + I.next().runPass(this); + } + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param l The node to process. + **/ + public void run(SwitchLabel l) { + methodBody.append("case "); + l.value.runPass(this); + methodBody.append(":"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param l The node to process. + **/ + public void run(CatchList l) { + ASTNodeIterator I = l.iterator(); + if (!I.hasNext()) + return; + + I.next().runPass(this); + while (I.hasNext()) { + methodBody.append("\n"); + I.next().runPass(this); + } + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param c The node to process. + **/ + public void run(CatchClause c) { + appendIndent("catch ("); + c.argument.runPass(this); + methodBody.append(")\n"); + ++indent; + c.block.runPass(this); + --indent; + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param a The node to process. + **/ + public void run(Argument a) { + if (a.getFinal()) + methodBody.append("final "); + a.getType().runPass(this); + methodBody.append(" " + a.getName()); + } + + + /** + * This method generates the code for a new temporary variable used when translating + * constraints. + * + * @param name The name of the temporary variable. + **/ + private void constraintTemporary(String name) { + appendIndent(); + if (constraintMode) + methodBody.append("FirstOrderConstraint "); + else + methodBody.append("boolean "); + methodBody.append(name); + if (constraintMode) + methodBody.append(" = null;\n"); + else + methodBody.append(";\n"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(ConstraintStatementExpression e) { + constraintResultNumber = 0; + appendLine("{"); + ++indent; + + if (constraintMode && e.constraint.containsQuantifiedVariable()) { + StringBuffer buffer = new StringBuffer(); + int i = 0; + HashSet referenced = e.constraint.getVariableTypes(); + for (Iterator I = referenced.iterator(); I.hasNext();) { + Argument a = I.next(); + Type t = a.getType(); + if (t.quantifierArgumentType) + continue; + + for (int j = 0; j < indent; ++j) + buffer.append(" "); + buffer.append("LBJ$constraint$context["); + buffer.append(i); + buffer.append("] = "); + if (t instanceof PrimitiveType) { + String primitiveTypeName = null; + if (((PrimitiveType) t).type == PrimitiveType.INT) + primitiveTypeName = "Integer"; + else { + primitiveTypeName = t.toString(); + primitiveTypeName = + Character.toUpperCase(primitiveTypeName.charAt(0)) + + primitiveTypeName.substring(1); + } + + buffer.append("new "); + buffer.append(primitiveTypeName); + buffer.append("("); + } + + buffer.append(a.getName()); + if (t instanceof PrimitiveType) + buffer.append(")"); + buffer.append(";\n"); + + contextVariables.put(a.getName(), new Integer(i++)); + } + + appendIndent("Object[] LBJ$constraint$context = new Object["); + methodBody.append(i); + methodBody.append("];\n"); + methodBody.append(buffer); + } + + String childResultName = constraintResult + constraintResultNumber; + constraintResultName = childResultName; + constraintTemporary(childResultName); + quantifierNesting = 0; + + e.constraint.runPass(this); + + appendIndent(); + if (constraintMode) { + methodBody.append("__result = new FirstOrderConjunction(__result, "); + methodBody.append(childResultName); + methodBody.append(");\n"); + } else { + methodBody.append("if (!"); + methodBody.append(childResultName); + methodBody.append(") return \"false\";\n"); + } + + --indent; + appendIndent("}"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(BinaryConstraintExpression e) { + String myResultName = constraintResultName; + String leftResultName = constraintResult + ++constraintResultNumber; + + appendLine("{"); + ++indent; + + constraintTemporary(leftResultName); + constraintResultName = leftResultName; + e.left.runPass(this); + + if (constraintMode || e.operation.operation == Operator.DOUBLE_IMPLICATION) { + String rightResultName = constraintResult + ++constraintResultNumber; + constraintTemporary(rightResultName); + constraintResultName = rightResultName; + e.right.runPass(this); + + appendIndent(myResultName); + + if (constraintMode) { + methodBody.append(" = new FirstOrder"); + if (e.operation.operation == Operator.LOGICAL_CONJUNCTION) + methodBody.append("Conjunction"); + else if (e.operation.operation == Operator.LOGICAL_DISJUNCTION) + methodBody.append("Disjunction"); + else if (e.operation.operation == Operator.IMPLICATION) + methodBody.append("Implication"); + else + methodBody.append("DoubleImplication"); + + methodBody.append("("); + methodBody.append(leftResultName); + methodBody.append(", "); + methodBody.append(rightResultName); + methodBody.append(");\n"); + } else { + methodBody.append(" = "); + methodBody.append(leftResultName); + methodBody.append(" == "); + methodBody.append(rightResultName); + methodBody.append(";\n"); + } + } else { + appendIndent("if ("); + if (e.operation.operation == Operator.LOGICAL_DISJUNCTION) + methodBody.append("!"); + methodBody.append(leftResultName); + methodBody.append(")\n"); + ++indent; + + constraintResultName = myResultName; + e.right.runPass(this); + + --indent; + appendIndent("else "); + methodBody.append(myResultName); + methodBody.append(" = "); + methodBody.append(e.operation.operation == Operator.LOGICAL_DISJUNCTION + || e.operation.operation == Operator.IMPLICATION); + methodBody.append(";\n"); + } + + --indent; + appendLine("}"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(NegatedConstraintExpression e) { + String myResultName = constraintResultName; + String childResultName = constraintResult + ++constraintResultNumber; + + appendLine("{"); + ++indent; + + constraintTemporary(childResultName); + constraintResultName = childResultName; + e.constraint.runPass(this); + + appendIndent(myResultName); + methodBody.append(" = "); + if (constraintMode) + methodBody.append("new FirstOrderNegation("); + else + methodBody.append("!"); + methodBody.append(childResultName); + if (constraintMode) + methodBody.append(")"); + methodBody.append(";\n"); + + --indent; + appendLine("}"); + } + + + /** + * Generates the code necessary at the top of a replacer method implementation to declare the + * variables that will be used in the method. + * + * @param expression The expression to be evaluted in the replacer method. + **/ + private void generateReplacerMethodEnvironment(Expression expression) { + for (Iterator I = expression.getVariableTypes().iterator(); I.hasNext();) { + Argument a = I.next(); + Type type = a.getType(); + String primitiveTypeName = null; + if (type instanceof PrimitiveType) { + if (((PrimitiveType) type).type == PrimitiveType.INT) + primitiveTypeName = "Integer"; + else { + primitiveTypeName = type.toString(); + primitiveTypeName = + Character.toUpperCase(primitiveTypeName.charAt(0)) + + primitiveTypeName.substring(1); + } + } + + appendIndent(); + a.runPass(this); + methodBody.append(" = ("); + if (primitiveTypeName == null) + type.runPass(this); + else + methodBody.append("(" + primitiveTypeName); + methodBody.append(") "); + + if (type.quantifierArgumentType) { + methodBody.append("quantificationVariables.get("); + methodBody.append(((Integer) quantificationVariables.get(a.getName())).intValue()); + methodBody.append(")"); + } else { + methodBody.append("context["); + methodBody.append(((Integer) contextVariables.get(a.getName())).intValue()); + methodBody.append("]"); + } + + if (primitiveTypeName != null) + methodBody.append(")." + type + "Value()"); + methodBody.append(";\n"); + } + } + + + /** + * Translates an expression from a quantified {@link ConstraintEqualityExpression} into the + * appropriate method of an {@link EqualityArgumentReplacer}. + * + * @param right Indicates if expression comes from the right hand side of the + * equality. + * @param expression The expression. + * @param isDiscreteLearner This flag is set if expression represents a variable. + **/ + private void generateEARMethod(boolean right, Expression expression, boolean isDiscreteLearner) { + appendIndent("public "); + methodBody.append(isDiscreteLearner ? "Object" : "String"); + methodBody.append(" get"); + methodBody.append(right ? "Right" : "Left"); + methodBody.append(isDiscreteLearner ? "Object" : "Value"); + methodBody.append("()\n"); + + appendLine("{"); + ++indent; + + generateReplacerMethodEnvironment(expression); + + appendIndent("return "); + if (isDiscreteLearner) + ((MethodInvocation) expression).arguments.runPass(this); + else { + methodBody.append("\"\" + ("); + expression.runPass(this); + methodBody.append(")"); + } + + methodBody.append(";\n"); + + --indent; + appendLine("}"); + } + + + /** + * Translates an unquantified expression not representing a first order variable from a + * {@link ConstraintEqualityExpression} into an argument of a {@link FirstOrderEquality}. + * + * @param left This flag is set if expression came from the left hand side of the + * equality. + * @param expression The expression. + **/ + private void generateNotVariable(boolean left, Expression expression) { + if (left) + methodBody.append("("); + methodBody.append("\"\" + ("); + expression.runPass(this); + methodBody.append(")"); + if (left) + methodBody.append(")"); + } + + + /** + * Translates an expression representing a first order variable from a + * {@link ConstraintEqualityExpression} into an argument of a {@link FirstOrderEquality}. + * + * @param expression The expression. + * @param isQuantified This flag is set if expression contains a quantified + * variable. + **/ + private void generateVariable(Expression expression, boolean isQuantified) { + MethodInvocation method = (MethodInvocation) expression; + methodBody.append("new FirstOrderVariable("); + methodBody.append(("__" + method.name).replace('.', '$')); + + if (isQuantified) + methodBody.append(", null)"); + else { + methodBody.append(", "); + method.arguments.runPass(this); + methodBody.append(")"); + } + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(ConstraintEqualityExpression e) { + String myResultName = constraintResultName; + + boolean leftIsDiscreteLearner = e.leftIsDiscreteLearner; + boolean rightIsDiscreteLearner = e.rightIsDiscreteLearner; + boolean leftIsQuantified = e.leftIsQuantified; + boolean rightIsQuantified = e.rightIsQuantified; + Expression left = e.left; + Expression right = e.right; + + if (!leftIsDiscreteLearner && rightIsDiscreteLearner) { + leftIsDiscreteLearner = true; + rightIsDiscreteLearner = false; + + leftIsQuantified ^= rightIsQuantified; + rightIsQuantified ^= leftIsQuantified; + leftIsQuantified ^= rightIsQuantified; + + Expression temp = left; + left = right; + right = temp; + } + + if (!(constraintMode && (leftIsQuantified || rightIsQuantified))) { + appendIndent(myResultName); + methodBody.append(" = "); + + if (constraintMode) { + methodBody.append("new FirstOrder"); + + if (leftIsDiscreteLearner) { + if (rightIsDiscreteLearner) + methodBody.append("EqualityWithVariable"); + else + methodBody.append("EqualityWithValue"); + methodBody.append("("); + + methodBody.append(e.operation.operation == Operator.CONSTRAINT_EQUAL); + methodBody.append(", "); + generateVariable(left, false); + methodBody.append(", "); + if (rightIsDiscreteLearner) + generateVariable(right, false); + else + generateNotVariable(false, right); + + methodBody.append(");\n"); + return; + } + + methodBody.append("Constant("); + } + + if (e.operation.operation == Operator.CONSTRAINT_NOT_EQUAL) + methodBody.append("!"); + generateNotVariable(true, left); + methodBody.append(".equals("); + generateNotVariable(false, right); + methodBody.append(")"); + + if (constraintMode) + methodBody.append(")"); + methodBody.append(";\n"); + return; + } + + appendLine("{"); + ++indent; + + appendLine("EqualityArgumentReplacer LBJ$EAR ="); + ++indent; + + appendIndent("new EqualityArgumentReplacer(LBJ$constraint$context"); + if (!(leftIsQuantified && rightIsQuantified)) { + methodBody.append(", "); + methodBody.append(leftIsQuantified); + } + + methodBody.append(")\n"); + appendLine("{"); + ++indent; + + if (leftIsQuantified) + generateEARMethod(false, left, leftIsDiscreteLearner); + + if (rightIsQuantified) { + if (leftIsQuantified) + methodBody.append("\n"); + generateEARMethod(true, right, rightIsDiscreteLearner); + } + + --indent; + appendLine("};"); + --indent; + + appendIndent(myResultName); + methodBody.append(" = new FirstOrderEquality"); + if (leftIsDiscreteLearner) { + if (rightIsDiscreteLearner) + methodBody.append("WithVariable"); + else + methodBody.append("WithValue"); + } else + methodBody.append("TwoValues"); + + methodBody.append("("); + methodBody.append(e.operation.operation == Operator.CONSTRAINT_EQUAL); + methodBody.append(", "); + if (leftIsDiscreteLearner) + generateVariable(left, leftIsQuantified); + else if (leftIsQuantified) + methodBody.append("null"); + else + generateNotVariable(true, left); + methodBody.append(", "); + if (rightIsDiscreteLearner) + generateVariable(right, rightIsQuantified); + else if (rightIsQuantified) + methodBody.append("null"); + else + generateNotVariable(false, right); + methodBody.append(", LBJ$EAR);\n"); + + --indent; + appendLine("}"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(ConstraintInvocation e) { + String myResultName = constraintResultName; + + if (!(constraintMode && e.invocationIsQuantified)) { + appendIndent(myResultName); + methodBody.append((" = __" + e.invocation.name).replace('.', '$')); + if (constraintMode) + methodBody.append(".makeConstraint("); + else + methodBody.append(".discreteValue("); + e.invocation.arguments.runPass(this); + methodBody.append(")"); + if (!constraintMode) + methodBody.append(".equals(\"true\")"); + methodBody.append(";\n"); + return; + } + + appendLine("{"); + ++indent; + + appendLine("InvocationArgumentReplacer LBJ$IAR ="); + ++indent; + + appendLine("new InvocationArgumentReplacer(LBJ$constraint$context)"); + appendLine("{"); + ++indent; + + appendLine("public Object compute()"); + appendLine("{"); + ++indent; + + Expression argument = e.invocation.arguments.listIterator().nextItem(); + generateReplacerMethodEnvironment(argument); + + appendIndent("return "); + argument.runPass(this); + methodBody.append(";\n"); + + --indent; + appendLine("}"); + + --indent; + appendLine("};"); + --indent; + + appendIndent(myResultName); + methodBody.append(" = new QuantifiedConstraintInvocation("); + methodBody.append(("__" + e.invocation.name).replace('.', '$')); + methodBody.append(", LBJ$IAR);\n"); + + --indent; + appendLine("}"); + } + + + /** + * {@link UniversalQuantifierExpression}s and {@link ExistentialQuantifierExpression}s generate + * their code through this method. + * + * @param e The node to process. + **/ + private void generateSimpleQuantifier(QuantifiedConstraintExpression e) { + boolean universal = e instanceof UniversalQuantifierExpression; + String myResultName = constraintResultName; + + if (!constraintMode) { + String inductionVariable = "__I" + quantifierNesting; + + appendLine("{"); + ++indent; + + appendIndent(myResultName); + methodBody.append(" = "); + methodBody.append(universal); + methodBody.append(";\n"); + + appendIndent("for (java.util.Iterator "); + methodBody.append(inductionVariable); + methodBody.append(" = ("); + e.collection.runPass(this); + methodBody.append(").iterator(); "); + methodBody.append(inductionVariable); + methodBody.append(".hasNext() && "); + if (!universal) + methodBody.append("!"); + methodBody.append(myResultName); + methodBody.append("; )\n"); + + appendLine("{"); + ++indent; + + appendIndent(); + e.argument.runPass(this); + methodBody.append(" = ("); + e.argument.getType().runPass(this); + methodBody.append(") "); + methodBody.append(inductionVariable); + methodBody.append(".next();\n"); + + ++quantifierNesting; + e.constraint.runPass(this); + --quantifierNesting; + + --indent; + appendLine("}"); + + --indent; + appendLine("}"); + return; + } + + appendLine("{"); + ++indent; + + String childResultName = constraintResult + ++constraintResultNumber; + constraintTemporary(childResultName); + constraintResultName = childResultName; + + quantificationVariables.put(e.argument.getName(), new Integer(quantifierNesting++)); + e.constraint.runPass(this); + --quantifierNesting; + + if (!e.collectionIsQuantified) { + appendIndent(myResultName); + methodBody.append(" = new "); + if (universal) + methodBody.append("Universal"); + else + methodBody.append("Existential"); + methodBody.append("Quantifier(\""); + methodBody.append(e.argument.getName()); + methodBody.append("\", "); + e.collection.runPass(this); + methodBody.append(", "); + methodBody.append(childResultName); + methodBody.append(");\n"); + } else { + appendLine("QuantifierArgumentReplacer LBJ$QAR ="); + ++indent; + + appendLine("new QuantifierArgumentReplacer(LBJ$constraint$context)"); + appendLine("{"); + ++indent; + + appendLine("public java.util.Collection getCollection()"); + appendLine("{"); + ++indent; + + generateReplacerMethodEnvironment(e.collection); + + appendIndent("return "); + e.collection.runPass(this); + methodBody.append(";\n"); + + --indent; + appendLine("}"); + + --indent; + appendLine("};"); + --indent; + + appendIndent(myResultName); + methodBody.append(" = new "); + if (universal) + methodBody.append("Universal"); + else + methodBody.append("Existential"); + methodBody.append("Quantifier(\""); + methodBody.append(e.argument.getName()); + methodBody.append("\", null, "); + methodBody.append(childResultName); + methodBody.append(", LBJ$QAR);\n"); + } + + --indent; + appendLine("}"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(UniversalQuantifierExpression e) { + generateSimpleQuantifier(e); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(ExistentialQuantifierExpression e) { + generateSimpleQuantifier(e); + } + + + /** + * {@link AtLeastQuantifierExpression}s and {@link AtMostQuantifierExpression}s generate their + * code through this method. + * + * @param e The node to process. + **/ + public void generateBoundedQuantifier(QuantifiedConstraintExpression e) { + boolean atleast = e instanceof AtLeastQuantifierExpression; + AtLeastQuantifierExpression ale = null; + AtMostQuantifierExpression ame = null; + if (atleast) + ale = (AtLeastQuantifierExpression) e; + else + ame = (AtMostQuantifierExpression) e; + + String myResultName = constraintResultName; + String childResultName = constraintResult + ++constraintResultNumber; + + if (!constraintMode) { + appendLine("{"); + ++indent; + + String m = "LBJ$m$" + quantifierNesting; + String bound = "LBJ$bound$" + quantifierNesting; + + appendIndent("int "); + methodBody.append(m); + methodBody.append(" = 0;\n"); + appendIndent("int "); + methodBody.append(bound); + methodBody.append(" = "); + if (atleast) + ale.lowerBound.runPass(this); + else + ame.upperBound.runPass(this); + methodBody.append(";\n"); + + String inductionVariable = "__I" + quantifierNesting; + + appendIndent("for (java.util.Iterator "); + methodBody.append(inductionVariable); + methodBody.append(" = ("); + e.collection.runPass(this); + methodBody.append(").iterator(); "); + methodBody.append(inductionVariable); + methodBody.append(".hasNext() && "); + methodBody.append(m); + if (atleast) + methodBody.append(" < "); + else + methodBody.append(" <= "); + methodBody.append(bound); + methodBody.append("; )\n"); + + appendLine("{"); + ++indent; + + appendIndent(); + e.argument.runPass(this); + methodBody.append(" = ("); + e.argument.getType().runPass(this); + methodBody.append(") "); + methodBody.append(inductionVariable); + methodBody.append(".next();\n"); + + constraintTemporary(childResultName); + constraintResultName = childResultName; + ++quantifierNesting; + e.constraint.runPass(this); + --quantifierNesting; + + appendIndent("if ("); + methodBody.append(childResultName); + methodBody.append(") ++"); + methodBody.append(m); + methodBody.append(";\n"); + + --indent; + appendLine("}"); + + appendIndent(myResultName); + methodBody.append(" = "); + methodBody.append(m); + if (atleast) + methodBody.append(" >= "); + else + methodBody.append(" <= "); + methodBody.append(bound); + methodBody.append(";\n"); + + --indent; + appendLine("}"); + return; + } + + appendLine("{"); + ++indent; + + constraintTemporary(childResultName); + constraintResultName = childResultName; + quantificationVariables.put(e.argument.getName(), new Integer(quantifierNesting++)); + e.constraint.runPass(this); + --quantifierNesting; + + if (!(e.collectionIsQuantified || atleast && ale.lowerBoundIsQuantified || !atleast + && ame.upperBoundIsQuantified)) { + appendIndent(myResultName); + methodBody.append(" = new "); + if (atleast) + methodBody.append("AtLeast"); + else + methodBody.append("AtMost"); + methodBody.append("Quantifier(\""); + methodBody.append(e.argument.getName()); + methodBody.append("\", "); + e.collection.runPass(this); + methodBody.append(", "); + methodBody.append(childResultName); + methodBody.append(", "); + if (atleast) + methodBody.append(ale.lowerBound); + else + methodBody.append(ame.upperBound); + methodBody.append(");\n"); + } else { + appendLine("QuantifierArgumentReplacer LBJ$QAR ="); + ++indent; + + appendIndent("new QuantifierArgumentReplacer(LBJ$constraint$context"); + if (!(e.collectionIsQuantified && (atleast && ale.lowerBoundIsQuantified || !atleast + && ame.upperBoundIsQuantified))) { + methodBody.append(", "); + methodBody.append(e.collectionIsQuantified); + } + + methodBody.append(")\n"); + + appendLine("{"); + ++indent; + + if (e.collectionIsQuantified) { + appendLine("public java.util.Collection getCollection()"); + appendLine("{"); + ++indent; + + generateReplacerMethodEnvironment(e.collection); + + appendIndent("return "); + e.collection.runPass(this); + methodBody.append(";\n"); + + --indent; + appendLine("}"); + } + + if (atleast && ale.lowerBoundIsQuantified || !atleast && ame.upperBoundIsQuantified) { + if (e.collectionIsQuantified) + methodBody.append("\n"); + appendLine("public int getBound()"); + appendLine("{"); + ++indent; + + if (atleast) + generateReplacerMethodEnvironment(ale.lowerBound); + else + generateReplacerMethodEnvironment(ame.upperBound); + + appendIndent("return "); + if (atleast) + ale.lowerBound.runPass(this); + else + ame.upperBound.runPass(this); + methodBody.append(";\n"); + + --indent; + appendLine("}"); + } + + --indent; + appendLine("};"); + --indent; + + appendIndent(myResultName); + methodBody.append(" = new "); + if (atleast) + methodBody.append("AtLeast"); + else + methodBody.append("AtMost"); + methodBody.append("Quantifier(\""); + methodBody.append(e.argument.getName()); + methodBody.append("\", "); + if (e.collectionIsQuantified) + methodBody.append("null"); + else + e.collection.runPass(this); + methodBody.append(", "); + methodBody.append(childResultName); + methodBody.append(", "); + if (atleast && ale.lowerBoundIsQuantified || !atleast && ame.upperBoundIsQuantified) + methodBody.append("0"); + else if (atleast) + ale.lowerBound.runPass(this); + else + ame.upperBound.runPass(this); + methodBody.append(", LBJ$QAR);\n"); + } + + --indent; + appendLine("}"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(AtLeastQuantifierExpression e) { + generateBoundedQuantifier(e); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(AtMostQuantifierExpression e) { + generateBoundedQuantifier(e); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param l The node to process. + **/ + public void run(ExpressionList l) { + ASTNodeIterator I = l.iterator(); + if (!I.hasNext()) + return; + + I.next().runPass(this); + while (I.hasNext()) { + methodBody.append(", "); + I.next().runPass(this); + } + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(ArrayCreationExpression e) { + if (e.parenthesized) + methodBody.append("("); + + methodBody.append("new "); + e.elementType.runPass(this); + + int d = 0; + for (ASTNodeIterator I = e.sizes.iterator(); I.hasNext(); ++d) { + methodBody.append("["); + I.next().runPass(this); + methodBody.append("]"); + } + + for (; d < e.dimensions; ++d) + methodBody.append("[]"); + + if (e.parenthesized) + methodBody.append(")"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(ArrayInitializer e) { + if (e.parenthesized) + methodBody.append("("); + methodBody.append("{ "); + e.values.runPass(this); + methodBody.append(" }"); + if (e.parenthesized) + methodBody.append(")"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(CastExpression e) { + if (e.parenthesized) + methodBody.append("("); + methodBody.append("("); + e.type.runPass(this); + methodBody.append(") "); + e.expression.runPass(this); + if (e.parenthesized) + methodBody.append(")"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(Conditional e) { + if (e.parenthesized) + methodBody.append("("); + e.condition.runPass(this); + methodBody.append(" ? "); + e.thenClause.runPass(this); + methodBody.append(" : "); + e.elseClause.runPass(this); + if (e.parenthesized) + methodBody.append(")"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(Constant e) { + if (e.parenthesized) + methodBody.append("("); + methodBody.append(e.value); + if (e.parenthesized) + methodBody.append(")"); + } + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(ParameterSet e) { + methodBody.append(e.getParameterName()); + } + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(InstanceofExpression e) { + if (e.parenthesized) + methodBody.append("("); + e.left.runPass(this); + methodBody.append(" instanceof "); + e.right.runPass(this); + if (e.parenthesized) + methodBody.append(")"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(Assignment e) { + if (e.parenthesized) + methodBody.append("("); + e.left.runPass(this); + methodBody.append(" " + e.operation + " "); + e.right.runPass(this); + if (e.parenthesized) + methodBody.append(")"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(IncrementExpression e) { + if (e.parenthesized) + methodBody.append("("); + runOnChildren(e); + if (e.parenthesized) + methodBody.append(")"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(InstanceCreationExpression e) { + if (e.parenthesized) + methodBody.append("("); + + if (e.parentObject != null) { + e.parentObject.runPass(this); + methodBody.append("."); + } + + methodBody.append("new "); + e.name.runPass(this); + methodBody.append("("); + e.arguments.runPass(this); + methodBody.append(")"); + + if (e.parenthesized) + methodBody.append(")"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(MethodInvocation e) { + if (e.parenthesized) + methodBody.append("("); + + if (e.isClassifierInvocation) { + methodBody.append(("__" + e.name).replace('.', '$') + "."); + + ClassifierType invokedType = (ClassifierType) e.name.typeCache; + int t = invokedType.getOutput().type; + if (t == ClassifierReturnType.DISCRETE) + methodBody.append("discreteValue("); + else if (t == ClassifierReturnType.REAL) + methodBody.append("realValue("); + else if (!e.isSensedValue) { + if (t == ClassifierReturnType.DISCRETE_ARRAY) + methodBody.append("discreteValueArray("); + else if (t == ClassifierReturnType.REAL_ARRAY) + methodBody.append("realValueArray("); + } else + methodBody.append("classify("); + + if (invokedType.getInput() instanceof ArrayType) + methodBody.append("(Object) "); + e.arguments.runPass(this); + methodBody.append(")"); + } else { + if (e.parentObject != null) { + e.parentObject.runPass(this); + methodBody.append("."); + } + + e.name.runPass(this); + methodBody.append("("); + e.arguments.runPass(this); + methodBody.append(")"); + } + + if (e.parenthesized) + methodBody.append(")"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param be The node to process. + **/ + public void run(BinaryExpression be) { + if (be.parenthesized) + methodBody.append("("); + be.left.runPass(this); + methodBody.append(" " + be.operation + " "); + be.right.runPass(this); + if (be.parenthesized) + methodBody.append(")"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(UnaryExpression e) { + if (e.parenthesized) + methodBody.append("("); + runOnChildren(e); + if (e.parenthesized) + methodBody.append(")"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(FieldAccess e) { + if (e.parenthesized) + methodBody.append("("); + e.object.runPass(this); + methodBody.append("." + e.name); + if (e.parenthesized) + methodBody.append(")"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param e The node to process. + **/ + public void run(SubscriptVariable e) { + if (e.parenthesized) + methodBody.append("("); + e.array.runPass(this); + methodBody.append("["); + e.subscript.runPass(this); + methodBody.append("]"); + if (e.parenthesized) + methodBody.append(")"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param n The node to process. + **/ + public void run(Name n) { + if (n.parenthesized) + methodBody.append("("); + boolean translated = false; + + if (currentCG != null && n.name.length > 1) { + HashSet invoked = SemanticAnalysis.invokedGraph.get(currentCG.getName()); + if (invoked != null) { + String className = n.toString(); + className = className.substring(0, className.lastIndexOf('.')); + String fieldOrMethod = n.name[n.name.length - 1]; + + if (invoked.contains(className)) { + String nameNoDots = className.replace('.', '$'); + methodBody.append("__"); + methodBody.append(nameNoDots); + methodBody.append("."); + methodBody.append(fieldOrMethod); + translated = true; + } + } + } + + if (!translated) + methodBody.append(n); + if (n.parenthesized) + methodBody.append(")"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param t The node to process. + **/ + public void run(ArrayType t) { + t.type.runPass(this); + methodBody.append("[]"); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param t The node to process. + **/ + public void run(PrimitiveType t) { + methodBody.append(t); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param t The node to process. + **/ + public void run(ReferenceType t) { + methodBody.append(t); + } + + + /** + * Runs this pass on all nodes of the indicated type. + * + * @param o The node to process. + **/ + public void run(Operator o) { + methodBody.append(o); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/Classifier.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/Classifier.java index 0e5511ea..55e1146c 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/Classifier.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/Classifier.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; @@ -15,288 +12,276 @@ /** - * Objects of this class represent functions that make some multi-valued - * decision about an object. - * - * @author Nick Rizzolo + * Objects of this class represent functions that make some multi-valued decision about an object. + * + * @author Nick Rizzolo **/ -public abstract class Classifier implements Cloneable, Serializable -{ - /** - * Measures the performance of a classifier as compared with the values - * produced by an oracle. - * - * @param subject The classifier to test. - * @param oracle A classifier that produces the correct classifications. - * @param o The set of objects to test the subject on. - * @return The accuracy of the subject classifier. - **/ - public static double test(Classifier subject, Classifier oracle, Object[] o) - { - int correct = 0; - for (int i = 0; i < o.length; ++i) - if (subject.classify(o[i]).valueEquals(oracle.classify(o[i]))) - ++correct; - return correct / (double) o.length; - } - - - /** The name of the package containing this classifier. */ - public String containingPackage; - /** - * The name of the classifier usually becomes the identifier of produced - * features. - **/ - public String name; - - - /** Does nothing. */ - protected Classifier() { } - - /** - * Initializing constructor. - * - * @param n The name of the classifier, which can be fully qualified. - **/ - protected Classifier(String n) { - int lastDot = n.lastIndexOf('.'); - containingPackage = lastDot == -1 ? "" : n.substring(0, lastDot); - containingPackage = containingPackage.intern(); - name = n.substring(lastDot + 1).intern(); - } - - - /** - * This method makes one or more decisions about a single object, returning - * those decisions as {@link Feature}s in a vector. - * - * @param o The object to make decisions about. - * @return A vector of {@link Feature}s about the input object. - **/ - public abstract FeatureVector classify(Object o); - - - /** - * Use this method to make a batch of classification decisions about - * several objects. This function is implemented in the most naive way - * (simply calling {@link #classify(Object)} repeatedly) and should be - * overridden if there is a more efficient implementation. - * - * @param o The objects to make decisions about. - * @return An array of feature vectors, one per input object. - **/ - public FeatureVector[] classify(Object[] o) { - FeatureVector[] result = new FeatureVector[o.length]; - for (int i = 0; i < o.length; ++i) - result[i] = classify(o[i]); - return result; - } - - - /** - * Returns a string describing the input type of this classifier. The - * type name must be fully specified (i.e. including its package name). - * For example, the default return value of this method is:
- * - *
 "java.lang.Object" 
- * - * This method should be overridden by derived classes. - * - * @return A string representation of the expected input type of this - * classifier. - **/ - public String getInputType() { return "java.lang.Object"; } - - - /** - * Returns a string describing the output feature type of this classifier. - * It should either contain the basic type (discrete or - * real) and square brackets or a percent sign if the - * classifier returns an array or is a generator respectively, or simply - * mixed%. In the case that the basic type is - * discrete, the curly braces containing a list of allowable - * values should be omitted, as this list is provided by the - * {@link #allowableValues()} method. The default return value of this - * method is:
- * - *
 "discrete" 
- * - * This method should be overridden by derived classes. - * - * @return A string representation of the output feature type of this - * classifier. - **/ - public String getOutputType() { return "discrete"; } - - - /** - * Returns the array of allowable values that a feature returned by this - * classifier may take. If the array has length 0, it means either that - * the feature has discrete type and allowable values were not specified or - * that the feature has real or mixed type. The default return value of - * this method is a 0 length array. - * - *

This method should be overridden by derived classes. - * - * @return The allowable values that a feature returned by this classifier - * may take. - **/ - public String[] allowableValues() { return new String[0]; } - - - /** - * Locates the specified discrete feature value in the array of allowable - * values defined for this classifier. - * - * @param value The value to locate. - * @return The index of the specified value, or -1 if it wasn't found. - **/ - public short valueIndexOf(String value) { - String[] allowable = allowableValues(); - short result = 0; - while (result < allowable.length && !allowable[result].equals(value)) - ++result; - return result == allowable.length ? -1 : result; - } - - - /** - * Returns the classification of the given example object as a single - * feature instead of a {@link FeatureVector}. By default, this method is - * implemented to simply throw an - * UnsupportedOperationException since some classifiers return - * zero or multiple features at once. - * - * @param o The object to classify. - * @return The classification of o as a feature. - **/ - public Feature featureValue(Object o) { - throw - new UnsupportedOperationException( - "The featureValue(Object) method has not been overridden in class '" - + getClass().getName() + "'."); - } - - - /** - * Returns the value of the discrete feature that would be returned by this - * classifier. This method should only be called when overridden by a - * classifier returning a single discrete feature. - * - * @param o The object to classify. - * @return The value of the feature produced for the input object. - **/ - public String discreteValue(Object o) { - throw - new UnsupportedOperationException( - "The discreteValue(Object) method has not been overridden in class '" - + getClass().getName() + "'."); - } - - - /** - * Returns the value of the real feature that would be returned by this - * classifier. This method should only be called when overridden by a - * classifier returning a single real feature. - * - * @param o The object to classify. - * @return The value of the feature produced for the input object. - **/ - public double realValue(Object o) { - throw - new UnsupportedOperationException( - "The realValue(Object) method has not been overridden in class '" - + getClass().getName() + "'."); - } - - /** - * Returns the values of the discrete array of features that would be - * returned by this classifier. This method should only be called when - * overridden by a classifier returning an array of discrete features. - * - * @param o The object to classify. - * @return The values of the array of features produced for the input - * object. - **/ - public String[] discreteValueArray(Object o) { - throw - new UnsupportedOperationException( - "The discreteValueArray(Object) method has not been overridden in " - + "class '" + getClass().getName() + "'."); - } - - - /** - * Returns the values of the real array of features that would be returned - * by this classifier. This method should only be called when overridden - * by a classifier returning an array of real features. - * - * @param o The object to classify. - * @return The value of the array of features produced for the input - * object. - **/ - public double[] realValueArray(Object o) { - throw - new UnsupportedOperationException( - "The realValueArray(Object) method has not been overridden in class '" - + getClass().getName() + "'."); - } - - - /** - * If this classifier is a composite generator, this method will be - * overridden such that it returns all the classifiers it calls on in a - * list. - * - * @return All the classifiers that take part in this composite classifier, - * or null if this classifier is not a composite - * classifier. - **/ - public java.util.LinkedList getCompositeChildren() { - throw - new UnsupportedOperationException( - "The getCompositeChildren() method has not been overridden in class '" - + getClass().getName() + "'."); - } - - - /** - * Simply returns the name of the classifier. - * - * @return The name of the classifier. - **/ - public String toString() { return name; } - - - /** - * This method returns a shallow clone. - * - * @return A shallow clone. - **/ - public Object clone() { - Object clone = null; - - try { clone = super.clone(); } - catch (Exception e) { - System.err.println("Error cloning " + getClass().getName() + ":"); - e.printStackTrace(); - System.exit(1); +public abstract class Classifier implements Cloneable, Serializable { + /** + * Measures the performance of a classifier as compared with the values produced by an oracle. + * + * @param subject The classifier to test. + * @param oracle A classifier that produces the correct classifications. + * @param o The set of objects to test the subject on. + * @return The accuracy of the subject classifier. + **/ + public static double test(Classifier subject, Classifier oracle, Object[] o) { + int correct = 0; + for (int i = 0; i < o.length; ++i) + if (subject.classify(o[i]).valueEquals(oracle.classify(o[i]))) + ++correct; + return correct / (double) o.length; } - return clone; - } - - - /** - * Special handling during deserialization to ensure that - * Strings are intern()ed. - * - * @param in The stream to deserialize from. - **/ - private void readObject(java.io.ObjectInputStream in) - throws IOException, ClassNotFoundException { - in.defaultReadObject(); - containingPackage = containingPackage.intern(); - } -} + /** The name of the package containing this classifier. */ + public String containingPackage; + /** + * The name of the classifier usually becomes the identifier of produced features. + **/ + public String name; + + + /** Does nothing. */ + protected Classifier() {} + + /** + * Initializing constructor. + * + * @param n The name of the classifier, which can be fully qualified. + **/ + protected Classifier(String n) { + int lastDot = n.lastIndexOf('.'); + containingPackage = lastDot == -1 ? "" : n.substring(0, lastDot); + containingPackage = containingPackage.intern(); + name = n.substring(lastDot + 1).intern(); + } + + + /** + * This method makes one or more decisions about a single object, returning those decisions as + * {@link Feature}s in a vector. + * + * @param o The object to make decisions about. + * @return A vector of {@link Feature}s about the input object. + **/ + public abstract FeatureVector classify(Object o); + + + /** + * Use this method to make a batch of classification decisions about several objects. This + * function is implemented in the most naive way (simply calling {@link #classify(Object)} + * repeatedly) and should be overridden if there is a more efficient implementation. + * + * @param o The objects to make decisions about. + * @return An array of feature vectors, one per input object. + **/ + public FeatureVector[] classify(Object[] o) { + FeatureVector[] result = new FeatureVector[o.length]; + for (int i = 0; i < o.length; ++i) + result[i] = classify(o[i]); + return result; + } + + + /** + * Returns a string describing the input type of this classifier. The type name must be fully + * specified (i.e. including its package name). For example, the default return value of this + * method is:
+ * + *

+     * "java.lang.Object"
+     * 
+ * + * This method should be overridden by derived classes. + * + * @return A string representation of the expected input type of this classifier. + **/ + public String getInputType() { + return "java.lang.Object"; + } + + + /** + * Returns a string describing the output feature type of this classifier. It should either + * contain the basic type (discrete or real) and square brackets or a + * percent sign if the classifier returns an array or is a generator respectively, or simply + * mixed%. In the case that the basic type is discrete, the curly + * braces containing a list of allowable values should be omitted, as this list is provided by + * the {@link #allowableValues()} method. The default return value of this method is:
+ * + *
+     * "discrete"
+     * 
+ * + * This method should be overridden by derived classes. + * + * @return A string representation of the output feature type of this classifier. + **/ + public String getOutputType() { + return "discrete"; + } + + + /** + * Returns the array of allowable values that a feature returned by this classifier may take. If + * the array has length 0, it means either that the feature has discrete type and allowable + * values were not specified or that the feature has real or mixed type. The default return + * value of this method is a 0 length array. + * + *

+ * This method should be overridden by derived classes. + * + * @return The allowable values that a feature returned by this classifier may take. + **/ + public String[] allowableValues() { + return new String[0]; + } + + + /** + * Locates the specified discrete feature value in the array of allowable values defined for + * this classifier. + * + * @param value The value to locate. + * @return The index of the specified value, or -1 if it wasn't found. + **/ + public short valueIndexOf(String value) { + String[] allowable = allowableValues(); + short result = 0; + while (result < allowable.length && !allowable[result].equals(value)) + ++result; + return result == allowable.length ? -1 : result; + } + + + /** + * Returns the classification of the given example object as a single feature instead of a + * {@link FeatureVector}. By default, this method is implemented to simply throw an + * UnsupportedOperationException since some classifiers return zero or multiple + * features at once. + * + * @param o The object to classify. + * @return The classification of o as a feature. + **/ + public Feature featureValue(Object o) { + throw new UnsupportedOperationException( + "The featureValue(Object) method has not been overridden in class '" + + getClass().getName() + "'."); + } + + + /** + * Returns the value of the discrete feature that would be returned by this classifier. This + * method should only be called when overridden by a classifier returning a single discrete + * feature. + * + * @param o The object to classify. + * @return The value of the feature produced for the input object. + **/ + public String discreteValue(Object o) { + throw new UnsupportedOperationException( + "The discreteValue(Object) method has not been overridden in class '" + + getClass().getName() + "'."); + } + + + /** + * Returns the value of the real feature that would be returned by this classifier. This method + * should only be called when overridden by a classifier returning a single real feature. + * + * @param o The object to classify. + * @return The value of the feature produced for the input object. + **/ + public double realValue(Object o) { + throw new UnsupportedOperationException( + "The realValue(Object) method has not been overridden in class '" + + getClass().getName() + "'."); + } + + /** + * Returns the values of the discrete array of features that would be returned by this + * classifier. This method should only be called when overridden by a classifier returning an + * array of discrete features. + * + * @param o The object to classify. + * @return The values of the array of features produced for the input object. + **/ + public String[] discreteValueArray(Object o) { + throw new UnsupportedOperationException( + "The discreteValueArray(Object) method has not been overridden in " + "class '" + + getClass().getName() + "'."); + } + + + /** + * Returns the values of the real array of features that would be returned by this classifier. + * This method should only be called when overridden by a classifier returning an array of real + * features. + * + * @param o The object to classify. + * @return The value of the array of features produced for the input object. + **/ + public double[] realValueArray(Object o) { + throw new UnsupportedOperationException( + "The realValueArray(Object) method has not been overridden in class '" + + getClass().getName() + "'."); + } + + + /** + * If this classifier is a composite generator, this method will be overridden such that it + * returns all the classifiers it calls on in a list. + * + * @return All the classifiers that take part in this composite classifier, or null + * if this classifier is not a composite classifier. + **/ + public java.util.LinkedList getCompositeChildren() { + throw new UnsupportedOperationException( + "The getCompositeChildren() method has not been overridden in class '" + + getClass().getName() + "'."); + } + + + /** + * Simply returns the name of the classifier. + * + * @return The name of the classifier. + **/ + public String toString() { + return name; + } + + + /** + * This method returns a shallow clone. + * + * @return A shallow clone. + **/ + public Object clone() { + Object clone = null; + + try { + clone = super.clone(); + } catch (Exception e) { + System.err.println("Error cloning " + getClass().getName() + ":"); + e.printStackTrace(); + System.exit(1); + } + + return clone; + } + + + /** + * Special handling during deserialization to ensure that Strings are + * intern()ed. + * + * @param in The stream to deserialize from. + **/ + private void readObject(java.io.ObjectInputStream in) throws IOException, + ClassNotFoundException { + in.defaultReadObject(); + containingPackage = containingPackage.intern(); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteArrayFeature.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteArrayFeature.java index 33972029..311ec8db 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteArrayFeature.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteArrayFeature.java @@ -1,332 +1,314 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; import edu.illinois.cs.cogcomp.lbjava.util.ByteString; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - /** - * A discrete array feature keeps track of its index in the classifier's - * returned array as well as the total number of features in that array. - * - * @author Nick Rizzolo + * A discrete array feature keeps track of its index in the classifier's returned array as well as + * the total number of features in that array. + * + * @author Nick Rizzolo **/ -public class DiscreteArrayFeature extends DiscretePrimitiveFeature -{ - /** The feature's index in the returned array it is contained in. */ - protected int arrayIndex; - /** The size of the returned array this feature is contained in. */ - protected int arrayLength; - - - /** - * For internal use only. - * - * @see Feature#readFeature(ExceptionlessInputStream) - **/ - protected DiscreteArrayFeature() { } - - /** - * Sets the identifier, value, array index, and size of the containing - * array. The value index and total allowable values, having not been - * specified, default to -1 and 0 respectively. - * - * @param p The new discrete feature's package. - * @param c The name of the classifier that produced this feature. - * @param id The new discrete feature's identifier. - * @param v The new discrete feature's value. - * @param i The index of this feature in the returned array. - * @param l The length of the array this feature is contained in. - **/ - public DiscreteArrayFeature(String p, String c, ByteString id, ByteString v, - int i, int l) { - this(p, c, id, v, (short) -1, (short) 0, i, l); - } - - /** - * Sets all member variables. - * - * @param p The new discrete feature's package. - * @param c The name of the classifier that produced this feature. - * @param id The new discrete feature's identifier. - * @param v The new discrete feature's value. - * @param vi The index corresponding to the value. - * @param t The total allowable values for this feature. - * @param i The index of this feature in the returned array. - * @param l The length of the array this feature is contained in. - **/ - public DiscreteArrayFeature(String p, String c, ByteString id, ByteString v, - short vi, short t, int i, int l) { - super(p, c, id, v, vi, t); - arrayIndex = i; - arrayLength = l; - } - - - /** Returns the array index of this feature. */ - public int getArrayIndex() { return arrayIndex; } - - - /** Returns the length of the feature array that this feature comes from. */ - public int getArrayLength() { return arrayLength; } - - - /** - * Determines if this feature comes from an array. - * - * @return true. - **/ - public boolean fromArray() { return true; } - - - /** - * If this feature is an array feature, call this method to set its array - * length; otherwise, this method has no effect. - * - * @param l The new length. - **/ - public void setArrayLength(int l) { arrayLength = l; } - - - /** - * Return the feature that should be used to index this feature into a - * lexicon. If the feature is binary, we return the feature with an empty - * value so that it will be mapped to the same weight whether it is active - * or not. If the feature can take multiple values, then simply return it - * as-is. - * - * @param lexicon The lexicon into which this feature will be indexed. - * @param training Whether or not the learner is currently training. - * @param label The label of the example containing this feature, or -1 - * if we aren't doing per class feature counting. - * @return A feature object appropriate for use as the key of a map. - **/ - public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { - if (totalValues() == 2) - return - new DiscreteArrayFeature( - containingPackage, generatingClassifier, identifier, - ByteString.emptyString, (short) -1, (short) 2, getArrayIndex(), - 0); - return this; - } - - - /** - * Returns a {@link RealArrayFeature} whose - * {@link RealPrimitiveFeature#value value} field is set to the strength of - * the current feature, and whose {@link #identifier} field contains all - * the information necessary to distinguish this feature from other - * features. - **/ - public RealFeature makeReal() { - if (totalValues == 2) - return - new RealArrayFeature(containingPackage, generatingClassifier, - identifier, valueIndex, arrayIndex, arrayLength); - else { - ByteString id = (ByteString) identifier.clone(); - ByteString[] toAppend = - { new ByteString("_", id.getEncoding()), value }; - id.append(toAppend); - return - new RealArrayFeature(containingPackage, generatingClassifier, id, 1, - arrayIndex, arrayLength); +public class DiscreteArrayFeature extends DiscretePrimitiveFeature { + /** The feature's index in the returned array it is contained in. */ + protected int arrayIndex; + /** The size of the returned array this feature is contained in. */ + protected int arrayLength; + + + /** + * For internal use only. + * + * @see Feature#readFeature(ExceptionlessInputStream) + **/ + protected DiscreteArrayFeature() {} + + /** + * Sets the identifier, value, array index, and size of the containing array. The value index + * and total allowable values, having not been specified, default to -1 and 0 respectively. + * + * @param p The new discrete feature's package. + * @param c The name of the classifier that produced this feature. + * @param id The new discrete feature's identifier. + * @param v The new discrete feature's value. + * @param i The index of this feature in the returned array. + * @param l The length of the array this feature is contained in. + **/ + public DiscreteArrayFeature(String p, String c, ByteString id, ByteString v, int i, int l) { + this(p, c, id, v, (short) -1, (short) 0, i, l); } - } - - - /** - * Returns a new feature object that's identical to this feature except its - * strength is given by s. - * - * @param s The strength of the new feature. - * @return A new feature object as above, or null if this - * feature cannot take the specified strength. - **/ - public Feature withStrength(double s) { - if (totalValues != 2 || !(s == 0 || s == 1)) return null; - return - new DiscreteArrayFeature( - containingPackage, generatingClassifier, identifier, - ByteString.emptyString, (short) Math.round(s), (short) 2, - arrayIndex, arrayLength); - } - - - /** - * The hash code of a DiscreteArrayFeature is the sum of the - * hash codes of the containing package, the identifier, the value and the - * array index. - * - * @return The hash code of this Feature. - **/ - public int hashCode() { return 31 * super.hashCode() + arrayIndex; } - - - /** - * Two DiscreteArrayFeatures are equivalent when their - * containing packages, identifiers, indices, and values are equivalent. - * - * @param o The object with which to compare this Feature. - * @return True iff the parameter is an equivalent Feature. - **/ - public boolean equals(Object o) { - return - super.equals(o) - && (o instanceof DiscreteArrayFeature - ? arrayIndex == ((DiscreteArrayFeature) o).arrayIndex - : arrayIndex == ((DiscreteArrayStringFeature) o).arrayIndex); - } - - - /** - * Some features are functionally equivalent, differing only in the - * encoding of their values; this method will return true iff - * the class of this feature and f are different, but they - * differ only because they encode their values differently. This method - * does not compare the values themselves, however. - * - * @param f Another feature. - * @return See above. - **/ - public boolean classEquivalent(Feature f) { - return f instanceof DiscreteArrayStringFeature; - } - - - /** - * Used to sort features into an order that is convenient both to page - * through and for the lexicon to read off disk. - * - * @param o An object to compare with. - * @return Integers appropriate for sorting features first by package, then - * by identifier, then by array index, then by value. - **/ - public int compareTo(Object o) { - int d = compareNameStrings(o); - if (d != 0) return d; - DiscreteArrayFeature f = (DiscreteArrayFeature) o; - d = identifier.compareTo(f.identifier); - if (d != 0) return d; - d = arrayIndex - f.arrayIndex; - if (d != 0) return d; - return value.compareTo(f.value); - } - - - /** - * Writes a string representation of this Feature to the - * specified buffer. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - writeNameString(buffer); - buffer.append("["); - buffer.append(arrayIndex); - buffer.append("]("); - buffer.append(value.toString()); - buffer.append(")"); - } - - - /** - * Writes a complete binary representation of the feature. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - out.writeInt(arrayIndex); - out.writeInt(arrayLength); - } - - - /** - * Reads the representation of a feaeture with this object's run-time type - * from the given stream, overwriting the data in this object. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - arrayIndex = in.readInt(); - arrayLength = in.readInt(); - } - - - /** - * Writes a binary representation of the feature intended for use by a - * lexicon, omitting redundant information when possible. - * - * @param out The output stream. - * @param lex The lexicon out of which this feature is being written. - * @param c The fully qualified name of the assumed class. The runtime - * class of this feature won't be written if it's equivalent to - * c. - * @param p The assumed package string. This feature's package string - * won't be written if it's equivalent to p. - * @param g The assumed classifier name string. This feature's - * classifier name string won't be written if it's equivalent - * to g. - * @param si The assumed identifier as a string. If this feature has a - * string identifier, it won't be written if it's equivalent to - * si. - * @param bi The assumed identifier as a byte string. If this feature - * has a byte string identifier, it won't be written if it's - * equivalent to bi. - * @return The name of the runtime type of this feature. - **/ - public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, - String p, String g, String si, ByteString bi) { - String result = super.lexWrite(out, lex, c, p, g, si, bi); - out.writeInt(arrayIndex); - out.writeInt(arrayLength); - return result; - } - - - /** - * Reads the representation of a feature with this object's run-time type - * as stored by a lexicon, overwriting the data in this object. - * - *

This method is appropriate for reading features as written by - * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. - * - * @param in The input stream. - * @param lex The lexicon we are reading in to. - * @param p The assumed package string. If no package name is given in - * the input stream, the instantiated feature is given this - * package. - * @param g The assumed classifier name string. If no classifier name - * is given in the input stream, the instantiated feature is - * given this classifier name. - * @param si The assumed identifier as a string. If the feature being - * read has a string identifier field and no identifier is - * given in the input stream, the feature is given this - * identifier. - * @param bi The assumed identifier as a byte string. If the feature - * being read has a byte string identifier field and no - * identifier is given in the input stream, the feature is - * given this identifier. - **/ - public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, - String g, String si, ByteString bi) { - super.lexRead(in, lex, p, g, si, bi); - arrayIndex = in.readInt(); - arrayLength = in.readInt(); - } -} + /** + * Sets all member variables. + * + * @param p The new discrete feature's package. + * @param c The name of the classifier that produced this feature. + * @param id The new discrete feature's identifier. + * @param v The new discrete feature's value. + * @param vi The index corresponding to the value. + * @param t The total allowable values for this feature. + * @param i The index of this feature in the returned array. + * @param l The length of the array this feature is contained in. + **/ + public DiscreteArrayFeature(String p, String c, ByteString id, ByteString v, short vi, short t, + int i, int l) { + super(p, c, id, v, vi, t); + arrayIndex = i; + arrayLength = l; + } + + + /** Returns the array index of this feature. */ + public int getArrayIndex() { + return arrayIndex; + } + + + /** Returns the length of the feature array that this feature comes from. */ + public int getArrayLength() { + return arrayLength; + } + + + /** + * Determines if this feature comes from an array. + * + * @return true. + **/ + public boolean fromArray() { + return true; + } + + + /** + * If this feature is an array feature, call this method to set its array length; otherwise, + * this method has no effect. + * + * @param l The new length. + **/ + public void setArrayLength(int l) { + arrayLength = l; + } + + + /** + * Return the feature that should be used to index this feature into a lexicon. If the feature + * is binary, we return the feature with an empty value so that it will be mapped to the same + * weight whether it is active or not. If the feature can take multiple values, then simply + * return it as-is. + * + * @param lexicon The lexicon into which this feature will be indexed. + * @param training Whether or not the learner is currently training. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return A feature object appropriate for use as the key of a map. + **/ + public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { + if (totalValues() == 2) + return new DiscreteArrayFeature(containingPackage, generatingClassifier, identifier, + ByteString.emptyString, (short) -1, (short) 2, getArrayIndex(), 0); + return this; + } + + + /** + * Returns a {@link RealArrayFeature} whose {@link RealPrimitiveFeature#value value} field is + * set to the strength of the current feature, and whose {@link #identifier} field contains all + * the information necessary to distinguish this feature from other features. + **/ + public RealFeature makeReal() { + if (totalValues == 2) + return new RealArrayFeature(containingPackage, generatingClassifier, identifier, + valueIndex, arrayIndex, arrayLength); + else { + ByteString id = (ByteString) identifier.clone(); + ByteString[] toAppend = {new ByteString("_", id.getEncoding()), value}; + id.append(toAppend); + return new RealArrayFeature(containingPackage, generatingClassifier, id, 1, arrayIndex, + arrayLength); + } + } + + + /** + * Returns a new feature object that's identical to this feature except its strength is given by + * s. + * + * @param s The strength of the new feature. + * @return A new feature object as above, or null if this feature cannot take the + * specified strength. + **/ + public Feature withStrength(double s) { + if (totalValues != 2 || !(s == 0 || s == 1)) + return null; + return new DiscreteArrayFeature(containingPackage, generatingClassifier, identifier, + ByteString.emptyString, (short) Math.round(s), (short) 2, arrayIndex, arrayLength); + } + + + /** + * The hash code of a DiscreteArrayFeature is the sum of the hash codes of the + * containing package, the identifier, the value and the array index. + * + * @return The hash code of this Feature. + **/ + public int hashCode() { + return 31 * super.hashCode() + arrayIndex; + } + + + /** + * Two DiscreteArrayFeatures are equivalent when their containing packages, + * identifiers, indices, and values are equivalent. + * + * @param o The object with which to compare this Feature. + * @return True iff the parameter is an equivalent Feature. + **/ + public boolean equals(Object o) { + return super.equals(o) + && (o instanceof DiscreteArrayFeature ? arrayIndex == ((DiscreteArrayFeature) o).arrayIndex + : arrayIndex == ((DiscreteArrayStringFeature) o).arrayIndex); + } + + + /** + * Some features are functionally equivalent, differing only in the encoding of their values; + * this method will return true iff the class of this feature and f + * are different, but they differ only because they encode their values differently. This method + * does not compare the values themselves, however. + * + * @param f Another feature. + * @return See above. + **/ + public boolean classEquivalent(Feature f) { + return f instanceof DiscreteArrayStringFeature; + } + + + /** + * Used to sort features into an order that is convenient both to page through and for the + * lexicon to read off disk. + * + * @param o An object to compare with. + * @return Integers appropriate for sorting features first by package, then by identifier, then + * by array index, then by value. + **/ + public int compareTo(Object o) { + int d = compareNameStrings(o); + if (d != 0) + return d; + DiscreteArrayFeature f = (DiscreteArrayFeature) o; + d = identifier.compareTo(f.identifier); + if (d != 0) + return d; + d = arrayIndex - f.arrayIndex; + if (d != 0) + return d; + return value.compareTo(f.value); + } + + + /** + * Writes a string representation of this Feature to the specified buffer. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + writeNameString(buffer); + buffer.append("["); + buffer.append(arrayIndex); + buffer.append("]("); + buffer.append(value.toString()); + buffer.append(")"); + } + + + /** + * Writes a complete binary representation of the feature. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeInt(arrayIndex); + out.writeInt(arrayLength); + } + + + /** + * Reads the representation of a feaeture with this object's run-time type from the given + * stream, overwriting the data in this object. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + arrayIndex = in.readInt(); + arrayLength = in.readInt(); + } + + + /** + * Writes a binary representation of the feature intended for use by a lexicon, omitting + * redundant information when possible. + * + * @param out The output stream. + * @param lex The lexicon out of which this feature is being written. + * @param c The fully qualified name of the assumed class. The runtime class of this feature + * won't be written if it's equivalent to c. + * @param p The assumed package string. This feature's package string won't be written if it's + * equivalent to p. + * @param g The assumed classifier name string. This feature's classifier name string won't be + * written if it's equivalent to g. + * @param si The assumed identifier as a string. If this feature has a string identifier, it + * won't be written if it's equivalent to si. + * @param bi The assumed identifier as a byte string. If this feature has a byte string + * identifier, it won't be written if it's equivalent to bi. + * @return The name of the runtime type of this feature. + **/ + public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, String p, + String g, String si, ByteString bi) { + String result = super.lexWrite(out, lex, c, p, g, si, bi); + out.writeInt(arrayIndex); + out.writeInt(arrayLength); + return result; + } + + + /** + * Reads the representation of a feature with this object's run-time type as stored by a + * lexicon, overwriting the data in this object. + * + *

+ * This method is appropriate for reading features as written by + * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. + * + * @param in The input stream. + * @param lex The lexicon we are reading in to. + * @param p The assumed package string. If no package name is given in the input stream, the + * instantiated feature is given this package. + * @param g The assumed classifier name string. If no classifier name is given in the input + * stream, the instantiated feature is given this classifier name. + * @param si The assumed identifier as a string. If the feature being read has a string + * identifier field and no identifier is given in the input stream, the feature is given + * this identifier. + * @param bi The assumed identifier as a byte string. If the feature being read has a byte + * string identifier field and no identifier is given in the input stream, the feature is + * given this identifier. + **/ + public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, String g, String si, + ByteString bi) { + super.lexRead(in, lex, p, g, si, bi); + arrayIndex = in.readInt(); + arrayLength = in.readInt(); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteArrayStringFeature.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteArrayStringFeature.java index ea50c59f..c73a1693 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteArrayStringFeature.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteArrayStringFeature.java @@ -1,351 +1,331 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; import edu.illinois.cs.cogcomp.lbjava.util.ByteString; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - /** - * A discrete array feature keeps track of its index in the classifier's - * returned array as well as the total number of features in that array. - * - * @author Nick Rizzolo + * A discrete array feature keeps track of its index in the classifier's returned array as well as + * the total number of features in that array. + * + * @author Nick Rizzolo **/ -public class DiscreteArrayStringFeature extends DiscretePrimitiveStringFeature -{ - /** The feature's index in the returned array it is contained in. */ - protected int arrayIndex; - /** The size of the returned array this feature is contained in. */ - protected int arrayLength; - - - /** - * For internal use only. - * - * @see Feature#readFeature(ExceptionlessInputStream) - **/ - protected DiscreteArrayStringFeature() { } - - /** - * Sets the identifier, value, array index, and size of the containing - * array. The value index and total allowable values, having not been - * specified, default to -1 and 0 respectively. - * - * @param p The new discrete feature's package. - * @param c The name of the classifier that produced this feature. - * @param id The new discrete feature's identifier. - * @param v The new discrete feature's value. - * @param i The index of this feature in the returned array. - * @param l The length of the array this feature is contained in. - **/ - public DiscreteArrayStringFeature(String p, String c, String id, - String v, int i, int l) { - this(p, c, id, v, (short) -1, (short) 0, i, l); - } - - /** - * Sets all member variables. - * - * @param p The new discrete feature's package. - * @param c The name of the classifier that produced this feature. - * @param id The new discrete feature's identifier. - * @param v The new discrete feature's value. - * @param vi The index corresponding to the value. - * @param t The total allowable values for this feature. - * @param i The index of this feature in the returned array. - * @param l The length of the array this feature is contained in. - **/ - public DiscreteArrayStringFeature(String p, String c, String id, String v, - short vi, short t, int i, int l) - { - super(p, c, id, v, vi, t); - arrayIndex = i; - arrayLength = l; - } - - - /** Returns the array index of this feature. */ - public int getArrayIndex() { return arrayIndex; } - - - /** Returns the length of the feature array that this feature comes from. */ - public int getArrayLength() { return arrayLength; } - - - /** - * Determines if this feature comes from an array. - * - * @return true. - **/ - public boolean fromArray() { return true; } - - - /** - * If this feature is an array feature, call this method to set its array - * length; otherwise, this method has no effect. - * - * @param l The new length. - **/ - public void setArrayLength(int l) { arrayLength = l; } - - - /** - * Return the feature that should be used to index this feature into a - * lexicon. If the feature is binary, we return the feature with an empty - * value so that it will be mapped to the same weight whether it is active - * or not. If the feature can take multiple values, then simply return it - * as-is. - * - * @param lexicon The lexicon into which this feature will be indexed. - * @param training Whether or not the learner is currently training. - * @param label The label of the example containing this feature, or -1 - * if we aren't doing per class feature counting. - * @return A feature object appropriate for use as the key of a map. - **/ - public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { - if (totalValues() == 2) - return - new DiscreteArrayStringFeature( - containingPackage, generatingClassifier, identifier, "", - (short) -1, (short) 2, getArrayIndex(), 0); - return this; - } - - - /** - * Returns a {@link RealArrayFeature} whose - * {@link RealPrimitiveFeature#value value} field is set to the strength of - * the current feature, and whose {@link #identifier} field contains all - * the information necessary to distinguish this feature from other - * features. - **/ - public RealFeature makeReal() { - if (totalValues == 2) - return - new RealArrayStringFeature( - containingPackage, generatingClassifier, identifier, valueIndex, - arrayIndex, arrayLength); - else { - StringBuffer id = new StringBuffer(identifier); - id.append('_'); - id.append(value); - return - new RealArrayStringFeature( - containingPackage, generatingClassifier, id.toString(), 1, - arrayIndex, arrayLength); +public class DiscreteArrayStringFeature extends DiscretePrimitiveStringFeature { + /** The feature's index in the returned array it is contained in. */ + protected int arrayIndex; + /** The size of the returned array this feature is contained in. */ + protected int arrayLength; + + + /** + * For internal use only. + * + * @see Feature#readFeature(ExceptionlessInputStream) + **/ + protected DiscreteArrayStringFeature() {} + + /** + * Sets the identifier, value, array index, and size of the containing array. The value index + * and total allowable values, having not been specified, default to -1 and 0 respectively. + * + * @param p The new discrete feature's package. + * @param c The name of the classifier that produced this feature. + * @param id The new discrete feature's identifier. + * @param v The new discrete feature's value. + * @param i The index of this feature in the returned array. + * @param l The length of the array this feature is contained in. + **/ + public DiscreteArrayStringFeature(String p, String c, String id, String v, int i, int l) { + this(p, c, id, v, (short) -1, (short) 0, i, l); } - } - - - /** - * Returns a new feature object that's identical to this feature except its - * strength is given by s. - * - * @param s The strength of the new feature. - * @return A new feature object as above, or null if this - * feature cannot take the specified strength. - **/ - public Feature withStrength(double s) { - if (totalValues != 2 || !(s == 0 || s == 1)) return null; - return - new DiscreteArrayStringFeature( - containingPackage, generatingClassifier, identifier, "", - (short) Math.round(s), (short) 2, arrayIndex, arrayLength); - } - - - /** - * Returns a feature object in which any strings that are being used to - * represent an identifier or value have been encoded in byte strings. - * - * @param e The encoding to use. - * @return A feature object as above; possible this object. - **/ - public Feature encode(String e) { - if (e == null || e == "String") return this; - ByteString id = - identifier.length() == 0 ? ByteString.emptyString - : new ByteString(identifier, e); - return - new DiscreteArrayFeature(containingPackage, generatingClassifier, id, - new ByteString(value, e), valueIndex, - totalValues, arrayIndex, arrayLength); - } - - - /** - * The hash code of a DiscreteArrayStringFeature is the sum of - * the hash codes of the containing package, the identifier, the value and - * the array index. - * - * @return The hash code of this Feature. - **/ - public int hashCode() { return 31 * super.hashCode() + arrayIndex; } - - - /** - * Two DiscreteArrayStringFeatures are equivalent when their - * containing packages, identifiers, indices, and values are equivalent. - * - * @param o The object with which to compare this Feature. - * @return True iff the parameter is an equivalent Feature. - **/ - public boolean equals(Object o) { - return - super.equals(o) - && (o instanceof DiscreteArrayStringFeature - ? arrayIndex == ((DiscreteArrayStringFeature) o).arrayIndex - : arrayIndex == ((DiscreteArrayFeature) o).arrayIndex); - } - - - /** - * Some features are functionally equivalent, differing only in the - * encoding of their values; this method will return true iff - * the class of this feature and f are different, but they - * differ only because they encode their values differently. This method - * does not compare the values themselves, however. - * - * @param f Another feature. - * @return See above. - **/ - public boolean classEquivalent(Feature f) { - return f instanceof DiscreteArrayFeature; - } - - - /** - * Used to sort features into an order that is convenient both to page - * through and for the lexicon to read off disk. - * - * @param o An object to compare with. - * @return Integers appropriate for sorting features first by package, then - * by identifier, then by array index, then by value. - **/ - public int compareTo(Object o) { - int d = compareNameStrings(o); - if (d != 0) return d; - DiscreteArrayStringFeature f = (DiscreteArrayStringFeature) o; - d = identifier.compareTo(f.identifier); - if (d != 0) return d; - d = arrayIndex - f.arrayIndex; - if (d != 0) return d; - return value.compareTo(f.value); - } - - - /** - * Writes a string representation of this Feature to the - * specified buffer. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - writeNameString(buffer); - buffer.append("["); - buffer.append(arrayIndex); - buffer.append("]("); - buffer.append(value); - buffer.append(")"); - } - - - /** - * Writes a complete binary representation of the feature. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - out.writeInt(arrayIndex); - out.writeInt(arrayLength); - } - - - /** - * Reads the representation of a feaeture with this object's run-time type - * from the given stream, overwriting the data in this object. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - arrayIndex = in.readInt(); - arrayLength = in.readInt(); - } - - - /** - * Writes a binary representation of the feature intended for use by a - * lexicon, omitting redundant information when possible. - * - * @param out The output stream. - * @param lex The lexicon out of which this feature is being written. - * @param c The fully qualified name of the assumed class. The runtime - * class of this feature won't be written if it's equivalent to - * c. - * @param p The assumed package string. This feature's package string - * won't be written if it's equivalent to p. - * @param g The assumed classifier name string. This feature's - * classifier name string won't be written if it's equivalent - * to g. - * @param si The assumed identifier as a string. If this feature has a - * string identifier, it won't be written if it's equivalent to - * si. - * @param bi The assumed identifier as a byte string. If this feature - * has a byte string identifier, it won't be written if it's - * equivalent to bi. - * @return The name of the runtime type of this feature. - **/ - public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, - String p, String g, String si, ByteString bi) { - String result = super.lexWrite(out, lex, c, p, g, si, bi); - out.writeInt(arrayIndex); - out.writeInt(arrayLength); - return result; - } - - - /** - * Reads the representation of a feature with this object's run-time type - * as stored by a lexicon, overwriting the data in this object. - * - *

This method is appropriate for reading features as written by - * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. - * - * @param in The input stream. - * @param lex The lexicon we are reading in to. - * @param p The assumed package string. If no package name is given in - * the input stream, the instantiated feature is given this - * package. - * @param g The assumed classifier name string. If no classifier name - * is given in the input stream, the instantiated feature is - * given this classifier name. - * @param si The assumed identifier as a string. If the feature being - * read has a string identifier field and no identifier is - * given in the input stream, the feature is given this - * identifier. - * @param bi The assumed identifier as a byte string. If the feature - * being read has a byte string identifier field and no - * identifier is given in the input stream, the feature is - * given this identifier. - **/ - public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, - String g, String si, ByteString bi) { - super.lexRead(in, lex, p, g, si, bi); - arrayIndex = in.readInt(); - arrayLength = in.readInt(); - } -} + /** + * Sets all member variables. + * + * @param p The new discrete feature's package. + * @param c The name of the classifier that produced this feature. + * @param id The new discrete feature's identifier. + * @param v The new discrete feature's value. + * @param vi The index corresponding to the value. + * @param t The total allowable values for this feature. + * @param i The index of this feature in the returned array. + * @param l The length of the array this feature is contained in. + **/ + public DiscreteArrayStringFeature(String p, String c, String id, String v, short vi, short t, + int i, int l) { + super(p, c, id, v, vi, t); + arrayIndex = i; + arrayLength = l; + } + + + /** Returns the array index of this feature. */ + public int getArrayIndex() { + return arrayIndex; + } + + + /** Returns the length of the feature array that this feature comes from. */ + public int getArrayLength() { + return arrayLength; + } + + + /** + * Determines if this feature comes from an array. + * + * @return true. + **/ + public boolean fromArray() { + return true; + } + + + /** + * If this feature is an array feature, call this method to set its array length; otherwise, + * this method has no effect. + * + * @param l The new length. + **/ + public void setArrayLength(int l) { + arrayLength = l; + } + + + /** + * Return the feature that should be used to index this feature into a lexicon. If the feature + * is binary, we return the feature with an empty value so that it will be mapped to the same + * weight whether it is active or not. If the feature can take multiple values, then simply + * return it as-is. + * + * @param lexicon The lexicon into which this feature will be indexed. + * @param training Whether or not the learner is currently training. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return A feature object appropriate for use as the key of a map. + **/ + public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { + if (totalValues() == 2) + return new DiscreteArrayStringFeature(containingPackage, generatingClassifier, + identifier, "", (short) -1, (short) 2, getArrayIndex(), 0); + return this; + } + + + /** + * Returns a {@link RealArrayFeature} whose {@link RealPrimitiveFeature#value value} field is + * set to the strength of the current feature, and whose {@link #identifier} field contains all + * the information necessary to distinguish this feature from other features. + **/ + public RealFeature makeReal() { + if (totalValues == 2) + return new RealArrayStringFeature(containingPackage, generatingClassifier, identifier, + valueIndex, arrayIndex, arrayLength); + else { + StringBuffer id = new StringBuffer(identifier); + id.append('_'); + id.append(value); + return new RealArrayStringFeature(containingPackage, generatingClassifier, + id.toString(), 1, arrayIndex, arrayLength); + } + } + + + /** + * Returns a new feature object that's identical to this feature except its strength is given by + * s. + * + * @param s The strength of the new feature. + * @return A new feature object as above, or null if this feature cannot take the + * specified strength. + **/ + public Feature withStrength(double s) { + if (totalValues != 2 || !(s == 0 || s == 1)) + return null; + return new DiscreteArrayStringFeature(containingPackage, generatingClassifier, identifier, + "", (short) Math.round(s), (short) 2, arrayIndex, arrayLength); + } + + + /** + * Returns a feature object in which any strings that are being used to represent an identifier + * or value have been encoded in byte strings. + * + * @param e The encoding to use. + * @return A feature object as above; possible this object. + **/ + public Feature encode(String e) { + if (e == null || e == "String") + return this; + ByteString id = + identifier.length() == 0 ? ByteString.emptyString : new ByteString(identifier, e); + return new DiscreteArrayFeature(containingPackage, generatingClassifier, id, + new ByteString(value, e), valueIndex, totalValues, arrayIndex, arrayLength); + } + + + /** + * The hash code of a DiscreteArrayStringFeature is the sum of the hash codes of + * the containing package, the identifier, the value and the array index. + * + * @return The hash code of this Feature. + **/ + public int hashCode() { + return 31 * super.hashCode() + arrayIndex; + } + + + /** + * Two DiscreteArrayStringFeatures are equivalent when their containing packages, + * identifiers, indices, and values are equivalent. + * + * @param o The object with which to compare this Feature. + * @return True iff the parameter is an equivalent Feature. + **/ + public boolean equals(Object o) { + return super.equals(o) + && (o instanceof DiscreteArrayStringFeature ? arrayIndex == ((DiscreteArrayStringFeature) o).arrayIndex + : arrayIndex == ((DiscreteArrayFeature) o).arrayIndex); + } + + + /** + * Some features are functionally equivalent, differing only in the encoding of their values; + * this method will return true iff the class of this feature and f + * are different, but they differ only because they encode their values differently. This method + * does not compare the values themselves, however. + * + * @param f Another feature. + * @return See above. + **/ + public boolean classEquivalent(Feature f) { + return f instanceof DiscreteArrayFeature; + } + + + /** + * Used to sort features into an order that is convenient both to page through and for the + * lexicon to read off disk. + * + * @param o An object to compare with. + * @return Integers appropriate for sorting features first by package, then by identifier, then + * by array index, then by value. + **/ + public int compareTo(Object o) { + int d = compareNameStrings(o); + if (d != 0) + return d; + DiscreteArrayStringFeature f = (DiscreteArrayStringFeature) o; + d = identifier.compareTo(f.identifier); + if (d != 0) + return d; + d = arrayIndex - f.arrayIndex; + if (d != 0) + return d; + return value.compareTo(f.value); + } + + + /** + * Writes a string representation of this Feature to the specified buffer. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + writeNameString(buffer); + buffer.append("["); + buffer.append(arrayIndex); + buffer.append("]("); + buffer.append(value); + buffer.append(")"); + } + + + /** + * Writes a complete binary representation of the feature. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeInt(arrayIndex); + out.writeInt(arrayLength); + } + + + /** + * Reads the representation of a feaeture with this object's run-time type from the given + * stream, overwriting the data in this object. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + arrayIndex = in.readInt(); + arrayLength = in.readInt(); + } + + + /** + * Writes a binary representation of the feature intended for use by a lexicon, omitting + * redundant information when possible. + * + * @param out The output stream. + * @param lex The lexicon out of which this feature is being written. + * @param c The fully qualified name of the assumed class. The runtime class of this feature + * won't be written if it's equivalent to c. + * @param p The assumed package string. This feature's package string won't be written if it's + * equivalent to p. + * @param g The assumed classifier name string. This feature's classifier name string won't be + * written if it's equivalent to g. + * @param si The assumed identifier as a string. If this feature has a string identifier, it + * won't be written if it's equivalent to si. + * @param bi The assumed identifier as a byte string. If this feature has a byte string + * identifier, it won't be written if it's equivalent to bi. + * @return The name of the runtime type of this feature. + **/ + public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, String p, + String g, String si, ByteString bi) { + String result = super.lexWrite(out, lex, c, p, g, si, bi); + out.writeInt(arrayIndex); + out.writeInt(arrayLength); + return result; + } + + + /** + * Reads the representation of a feature with this object's run-time type as stored by a + * lexicon, overwriting the data in this object. + * + *

+ * This method is appropriate for reading features as written by + * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. + * + * @param in The input stream. + * @param lex The lexicon we are reading in to. + * @param p The assumed package string. If no package name is given in the input stream, the + * instantiated feature is given this package. + * @param g The assumed classifier name string. If no classifier name is given in the input + * stream, the instantiated feature is given this classifier name. + * @param si The assumed identifier as a string. If the feature being read has a string + * identifier field and no identifier is given in the input stream, the feature is given + * this identifier. + * @param bi The assumed identifier as a byte string. If the feature being read has a byte + * string identifier field and no identifier is given in the input stream, the feature is + * given this identifier. + **/ + public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, String g, String si, + ByteString bi) { + super.lexRead(in, lex, p, g, si, bi); + arrayIndex = in.readInt(); + arrayLength = in.readInt(); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteConjunctiveFeature.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteConjunctiveFeature.java index 818ed3eb..fd6a3650 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteConjunctiveFeature.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteConjunctiveFeature.java @@ -1,453 +1,441 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.learn.ChildLexicon; import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; import edu.illinois.cs.cogcomp.lbjava.util.ByteString; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - /** - * Represents the conjunction of two discrete features. - * - * @author Nick Rizzolo + * Represents the conjunction of two discrete features. + * + * @author Nick Rizzolo **/ -public class DiscreteConjunctiveFeature extends DiscreteFeature -{ - /** One feature argument. */ - protected DiscreteFeature left; - /** The other feature argument. */ - protected DiscreteFeature right; - - - /** - * For internal use only. - * - * @see Feature#readFeature(ExceptionlessInputStream) - **/ - public DiscreteConjunctiveFeature() { } - - /** - * Creates a new conjunctive feature. - * - * @param p The new conjunctive feature's package. - * @param c The name of the classifier that produced this feature. - * @param l One feature argument. - * @param r The other feature argument. - **/ - public DiscreteConjunctiveFeature(String p, String c, DiscreteFeature l, - DiscreteFeature r) { - this(p, c, l, r, (short) -1, (short) 0); - } - - /** - * Creates a new conjunctive feature. - * - * @param p The new discrete feature's package. - * @param c The name of the classifier that produced this feature. - * @param l One feature argument. - * @param r The other feature argument. - * @param vi The index corresponding to the value. - * @param t The total allowable values for this feature. - **/ - public DiscreteConjunctiveFeature(String p, String c, DiscreteFeature l, - DiscreteFeature r, short vi, short t) { - super(p, c, vi, t); - left = l; - right = r; - } - - /** - * Creates a new conjunctive feature taking the package and name of the - * given classifier. - * - * @param c The classifier from which package and name information is - * taken. - * @param l One feature argument. - * @param r The other feature argument. - **/ - public DiscreteConjunctiveFeature(Classifier c, DiscreteFeature l, - DiscreteFeature r) { - this(c.containingPackage, c.name, l, r); - - if (c.allowableValues().length == 0) valueIndex = -1; - else { - short lTotal = l.totalValues(); - valueIndex = (short) (lTotal * r.getValueIndex() + l.getValueIndex()); - totalValues = (short) (lTotal * r.totalValues()); +public class DiscreteConjunctiveFeature extends DiscreteFeature { + /** One feature argument. */ + protected DiscreteFeature left; + /** The other feature argument. */ + protected DiscreteFeature right; + + + /** + * For internal use only. + * + * @see Feature#readFeature(ExceptionlessInputStream) + **/ + public DiscreteConjunctiveFeature() {} + + /** + * Creates a new conjunctive feature. + * + * @param p The new conjunctive feature's package. + * @param c The name of the classifier that produced this feature. + * @param l One feature argument. + * @param r The other feature argument. + **/ + public DiscreteConjunctiveFeature(String p, String c, DiscreteFeature l, DiscreteFeature r) { + this(p, c, l, r, (short) -1, (short) 0); + } + + /** + * Creates a new conjunctive feature. + * + * @param p The new discrete feature's package. + * @param c The name of the classifier that produced this feature. + * @param l One feature argument. + * @param r The other feature argument. + * @param vi The index corresponding to the value. + * @param t The total allowable values for this feature. + **/ + public DiscreteConjunctiveFeature(String p, String c, DiscreteFeature l, DiscreteFeature r, + short vi, short t) { + super(p, c, vi, t); + left = l; + right = r; + } + + /** + * Creates a new conjunctive feature taking the package and name of the given classifier. + * + * @param c The classifier from which package and name information is taken. + * @param l One feature argument. + * @param r The other feature argument. + **/ + public DiscreteConjunctiveFeature(Classifier c, DiscreteFeature l, DiscreteFeature r) { + this(c.containingPackage, c.name, l, r); + + if (c.allowableValues().length == 0) + valueIndex = -1; + else { + short lTotal = l.totalValues(); + valueIndex = (short) (lTotal * r.getValueIndex() + l.getValueIndex()); + totalValues = (short) (lTotal * r.totalValues()); + } + } + + + /** + * Determines if this feature is conjunctive. + * + * @return true iff this feature is conjunctive. + **/ + public boolean isConjunctive() { + return true; + } + + + /** Returns the value of {@link #left}. */ + public DiscreteFeature getLeft() { + return left; + } + + /** Returns the value of {@link #right}. */ + public DiscreteFeature getRight() { + return right; + } + + + /** + * Retrieves this feature's identifier as a string. + * + * @return The empty string, since conjunctive features don't have identifiers. + **/ + public String getStringIdentifier() { + return ""; + } + + + /** + * Retrieves this feature's identifier as a byte string. + * + * @return {@link ByteString#emptyString}, since conjunctive features don't have identifiers. + **/ + public ByteString getByteStringIdentifier() { + return ByteString.emptyString; + } + + + /** + * Gives a string representation of the value of this feature. + * + * @return A string representation of the value of this feature. + **/ + public String getStringValue() { + return left.getStringValue() + right.getStringValue(); + } + + + /** + * Gives a string representation of the value of this feature. + * + * @return A string representation of the value of this feature. + **/ + public ByteString getByteStringValue() { + ByteString b = left.getByteStringValue(); + ByteString[] toAppend = {new ByteString("&", b.getEncoding()), right.getByteStringValue()}; + return b.append(toAppend); + } + + + /** + * The depth of a feature is one more than the maximum depth of any of its children, or 0 if it + * has no children. + * + * @return The depth of this feature as described above. + **/ + public int depth() { + return Math.max(left.depth(), right.depth()) + 1; + } + + + /** + * Determines whether or not the parameter is equivalent to the string representation of the + * value of this feature. + * + * @param v The string to compare against. + * @return true iff the parameter is equivalent to the string representation of the + * value of this feature. + **/ + public boolean valueEquals(String v) { + return getStringValue().equals(v); + } + + + /** + * Returns the strength of this feature if it were to be placed in a mathematical vector space. + **/ + public double getStrength() { + return 1; + } + + + /** + * Return the feature that should be used to index this feature into a lexicon. + * + * @param lexicon The lexicon into which this feature will be indexed. + * @param training Whether or not the learner is currently training. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return A feature object appropriate for use as the key of a map. + **/ + public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { + if (!training) + return this; + return new DiscreteConjunctiveFeature(containingPackage, generatingClassifier, + getArgumentKey(left, lexicon, label), getArgumentKey(right, lexicon, label), + valueIndex, totalValues); + } + + + /** + * A helper method for {@link #getFeatureKey(Lexicon,boolean,int)}, this method computes the + * feature keys corresponding to the arguments of the conjunction. Here, we lookup the arguments + * to the conjunction in the lexicon so that their counts are never less than the conjunction's, + * and we return the actual feature object that's already a key in the lexicon. + * + * @param f The argument feature for which a key will be computed. + * @param lexicon The lexicon into which this feature will be indexed. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return A feature object appropriate for use as the key of a map. + **/ + protected DiscreteFeature getArgumentKey(Feature f, Lexicon lexicon, int label) { + if (!f.isPrimitive()) + f = f.getFeatureKey(lexicon, true, label); + return (DiscreteFeature) lexicon.getChildFeature(f, label); } - } - - - /** - * Determines if this feature is conjunctive. - * - * @return true iff this feature is conjunctive. - **/ - public boolean isConjunctive() { return true; } - - - /** Returns the value of {@link #left}. */ - public DiscreteFeature getLeft() { return left; } - /** Returns the value of {@link #right}. */ - public DiscreteFeature getRight() { return right; } - - - /** - * Retrieves this feature's identifier as a string. - * - * @return The empty string, since conjunctive features don't have - * identifiers. - **/ - public String getStringIdentifier() { return ""; } - - - /** - * Retrieves this feature's identifier as a byte string. - * - * @return {@link ByteString#emptyString}, since conjunctive features don't - * have identifiers. - **/ - public ByteString getByteStringIdentifier() { - return ByteString.emptyString; - } - - - /** - * Gives a string representation of the value of this feature. - * - * @return A string representation of the value of this feature. - **/ - public String getStringValue() { - return left.getStringValue() + right.getStringValue(); - } - - - /** - * Gives a string representation of the value of this feature. - * - * @return A string representation of the value of this feature. - **/ - public ByteString getByteStringValue() { - ByteString b = left.getByteStringValue(); - ByteString[] toAppend = - { new ByteString("&", b.getEncoding()), right.getByteStringValue() }; - return b.append(toAppend); - } - - - /** - * The depth of a feature is one more than the maximum depth of any of its - * children, or 0 if it has no children. - * - * @return The depth of this feature as described above. - **/ - public int depth() { return Math.max(left.depth(), right.depth()) + 1; } - - - /** - * Determines whether or not the parameter is equivalent to the string - * representation of the value of this feature. - * - * @param v The string to compare against. - * @return true iff the parameter is equivalent to the string - * representation of the value of this feature. - **/ - public boolean valueEquals(String v) { return getStringValue().equals(v); } - - - /** - * Returns the strength of this feature if it were to be placed in a - * mathematical vector space. - **/ - public double getStrength() { return 1; } - - - /** - * Return the feature that should be used to index this feature into a - * lexicon. - * - * @param lexicon The lexicon into which this feature will be indexed. - * @param training Whether or not the learner is currently training. - * @param label The label of the example containing this feature, or -1 - * if we aren't doing per class feature counting. - * @return A feature object appropriate for use as the key of a map. - **/ - public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { - if (!training) return this; - return - new DiscreteConjunctiveFeature( - containingPackage, generatingClassifier, - getArgumentKey(left, lexicon, label), - getArgumentKey(right, lexicon, label), - valueIndex, totalValues); - } - - - /** - * A helper method for {@link #getFeatureKey(Lexicon,boolean,int)}, this - * method computes the feature keys corresponding to the arguments of the - * conjunction. Here, we lookup the arguments to the conjunction in the - * lexicon so that their counts are never less than the conjunction's, and - * we return the actual feature object that's already a key in the lexicon. - * - * @param f The argument feature for which a key will be computed. - * @param lexicon The lexicon into which this feature will be indexed. - * @param label The label of the example containing this feature, or -1 - * if we aren't doing per class feature counting. - * @return A feature object appropriate for use as the key of a map. - **/ - protected DiscreteFeature getArgumentKey(Feature f, Lexicon lexicon, - int label) { - if (!f.isPrimitive()) f = f.getFeatureKey(lexicon, true, label); - return (DiscreteFeature) lexicon.getChildFeature(f, label); - } - - - /** - * Returns a {@link RealConjunctiveFeature} with exactly the same children - * as this feature. - **/ - public RealFeature makeReal() { - return - new RealConjunctiveFeature(containingPackage, generatingClassifier, - left, right); - } - - - /** - * Returns a new feature object that's identical to this feature except its - * strength is given by s. - * - * @param s The strength of the new feature. - * @return A new feature object as above, or null if this - * feature cannot take the specified strength. - **/ - public Feature withStrength(double s) { return s == 1 ? this : null; } - - - /** - * Returns a feature object in which any strings that are being used to - * represent an identifier or value have been encoded in byte strings. - * - * @param e The encoding to use. - * @return A feature object as above; possible this object. - **/ - public Feature encode(String e) { - DiscreteFeature newLeft = (DiscreteFeature) left.encode(e); - DiscreteFeature newRight = (DiscreteFeature) right.encode(e); - if (newLeft == left && newRight == right) return this; - return - new DiscreteConjunctiveFeature( - containingPackage, generatingClassifier, newLeft, newRight, - valueIndex, totalValues); - } - - - /** - * Takes care of any feature-type-specific tasks that need to be taken care - * of when removing a feature of this type from a {@link ChildLexicon}, in - * particular updating parent counts and removing children of this feature - * if necessary. - * - * @param lex The child lexicon this feature is being removed from. - **/ - public void removeFromChildLexicon(ChildLexicon lex) { - lex.decrementParentCounts(left); - lex.decrementParentCounts(right); - } - - - /** - * Does a feature-type-specific lookup of this feature in the given - * {@link ChildLexicon}. - * - * @param lex The child lexicon this feature is being looked up in. - * @param label The label of the example containing this feature, or -1 if - * we aren't doing per class feature counting. - * @return The index of f in this lexicon. - **/ - public int childLexiconLookup(ChildLexicon lex, int label) { - return lex.childLexiconLookup(this, label); - } - - - /** - * Returns a hash code based on the hash codes of {@link #left} and - * {@link #right}. - * - * @return The hash code of this feature. - **/ - public int hashCode() { - return 31 * super.hashCode() + 17 * left.hashCode() + right.hashCode(); - } - - - /** - * Two conjunctions are equivalent when their arguments are equivalent. - * - * @return true iff the argument is an equivalent - * Feature. - **/ - public boolean equals(Object o) { - if (!super.equals(o)) return false; - DiscreteConjunctiveFeature c = (DiscreteConjunctiveFeature) o; - return (left == c.left || left.equals(c.left)) - && (right == c.right || right.equals(c.right)); - } - - - /** - * Used to sort features into an order that is convenient both to page - * through and for the lexicon to read off disk. - * - * @param o An object to compare with. - * @return Integers appropriate for sorting features first by {@link #left} - * and then by {@link #right}. - **/ - public int compareTo(Object o) { - int d = compareNameStrings(o); - if (d != 0) return d; - DiscreteConjunctiveFeature c = (DiscreteConjunctiveFeature) o; - d = left.compareTo(c.left); - if (d != 0) return d; - return right.compareTo(c.right); - } - - - /** - * Writes a string representation of this Feature to the - * specified buffer. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - writeNameString(buffer); - buffer.append('{'); - left.write(buffer); - buffer.append(", "); - right.write(buffer); - buffer.append('}'); - } - - - /** - * Writes a string representation of this Feature to the - * specified buffer, omitting the package name. - * - * @param buffer The buffer to write to. - **/ - public void writeNoPackage(StringBuffer buffer) { - String p = containingPackage; - containingPackage = null; - writeNameString(buffer); - buffer.append('{'); - left.writeNoPackage(buffer); - buffer.append(", "); - right.writeNoPackage(buffer); - buffer.append('}'); - containingPackage = p; - } - - - /** - * Writes a complete binary representation of the feature. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - left.write(out); - right.write(out); - } - - - /** - * Reads the representation of a feature with this object's run-time type - * from the given stream, overwriting the data in this object. - * - *

This method is appropriate for reading features as written by - * {@link #write(ExceptionlessOutputStream)}. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - left = (DiscreteFeature) Feature.readFeature(in); - right = (DiscreteFeature) Feature.readFeature(in); - } - - - /** - * Writes a binary representation of the feature intended for use by a - * lexicon, omitting redundant information when possible. - * - * @param out The output stream. - * @param lex The lexicon out of which this feature is being written. - * @param c The fully qualified name of the assumed class. The runtime - * class of this feature won't be written if it's equivalent to - * c. - * @param p The assumed package string. This feature's package string - * won't be written if it's equivalent to p. - * @param g The assumed classifier name string. This feature's - * classifier name string won't be written if it's equivalent - * to g. - * @param si The assumed identifier as a string. If this feature has a - * string identifier, it won't be written if it's equivalent to - * si. - * @param bi The assumed identifier as a byte string. If this feature - * has a byte string identifier, it won't be written if it's - * equivalent to bi. - * @return The name of the runtime type of this feature. - **/ - public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, - String p, String g, String si, ByteString bi) { - String result = super.lexWrite(out, lex, c, p, g, si, bi); - out.writeInt(lex.lookupChild(left)); - out.writeInt(lex.lookupChild(right)); - return result; - } - - - /** - * Reads the representation of a feature with this object's run-time type - * as stored by a lexicon, overwriting the data in this object. - * - *

This method is appropriate for reading features as written by - * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. - * - * @param in The input stream. - * @param lex The lexicon we are reading in to. - * @param p The assumed package string. If no package name is given in - * the input stream, the instantiated feature is given this - * package. - * @param g The assumed classifier name string. If no classifier name - * is given in the input stream, the instantiated feature is - * given this classifier name. - * @param si The assumed identifier as a string. If the feature being - * read has a string identifier field and no identifier is - * given in the input stream, the feature is given this - * identifier. - * @param bi The assumed identifier as a byte string. If the feature - * being read has a byte string identifier field and no - * identifier is given in the input stream, the feature is - * given this identifier. - **/ - public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, - String g, String si, ByteString bi) { - super.lexRead(in, lex, p, g, si, bi); - left = (DiscreteFeature) lex.lookupKey(in.readInt()); - right = (DiscreteFeature) lex.lookupKey(in.readInt()); - } -} + + /** + * Returns a {@link RealConjunctiveFeature} with exactly the same children as this feature. + **/ + public RealFeature makeReal() { + return new RealConjunctiveFeature(containingPackage, generatingClassifier, left, right); + } + + + /** + * Returns a new feature object that's identical to this feature except its strength is given by + * s. + * + * @param s The strength of the new feature. + * @return A new feature object as above, or null if this feature cannot take the + * specified strength. + **/ + public Feature withStrength(double s) { + return s == 1 ? this : null; + } + + + /** + * Returns a feature object in which any strings that are being used to represent an identifier + * or value have been encoded in byte strings. + * + * @param e The encoding to use. + * @return A feature object as above; possible this object. + **/ + public Feature encode(String e) { + DiscreteFeature newLeft = (DiscreteFeature) left.encode(e); + DiscreteFeature newRight = (DiscreteFeature) right.encode(e); + if (newLeft == left && newRight == right) + return this; + return new DiscreteConjunctiveFeature(containingPackage, generatingClassifier, newLeft, + newRight, valueIndex, totalValues); + } + + + /** + * Takes care of any feature-type-specific tasks that need to be taken care of when removing a + * feature of this type from a {@link ChildLexicon}, in particular updating parent counts and + * removing children of this feature if necessary. + * + * @param lex The child lexicon this feature is being removed from. + **/ + public void removeFromChildLexicon(ChildLexicon lex) { + lex.decrementParentCounts(left); + lex.decrementParentCounts(right); + } + + + /** + * Does a feature-type-specific lookup of this feature in the given {@link ChildLexicon}. + * + * @param lex The child lexicon this feature is being looked up in. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return The index of f in this lexicon. + **/ + public int childLexiconLookup(ChildLexicon lex, int label) { + return lex.childLexiconLookup(this, label); + } + + + /** + * Returns a hash code based on the hash codes of {@link #left} and {@link #right}. + * + * @return The hash code of this feature. + **/ + public int hashCode() { + return 31 * super.hashCode() + 17 * left.hashCode() + right.hashCode(); + } + + + /** + * Two conjunctions are equivalent when their arguments are equivalent. + * + * @return true iff the argument is an equivalent Feature. + **/ + public boolean equals(Object o) { + if (!super.equals(o)) + return false; + DiscreteConjunctiveFeature c = (DiscreteConjunctiveFeature) o; + return (left == c.left || left.equals(c.left)) + && (right == c.right || right.equals(c.right)); + } + + + /** + * Used to sort features into an order that is convenient both to page through and for the + * lexicon to read off disk. + * + * @param o An object to compare with. + * @return Integers appropriate for sorting features first by {@link #left} and then by + * {@link #right}. + **/ + public int compareTo(Object o) { + int d = compareNameStrings(o); + if (d != 0) + return d; + DiscreteConjunctiveFeature c = (DiscreteConjunctiveFeature) o; + d = left.compareTo(c.left); + if (d != 0) + return d; + return right.compareTo(c.right); + } + + + /** + * Writes a string representation of this Feature to the specified buffer. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + writeNameString(buffer); + buffer.append('{'); + left.write(buffer); + buffer.append(", "); + right.write(buffer); + buffer.append('}'); + } + + + /** + * Writes a string representation of this Feature to the specified buffer, omitting + * the package name. + * + * @param buffer The buffer to write to. + **/ + public void writeNoPackage(StringBuffer buffer) { + String p = containingPackage; + containingPackage = null; + writeNameString(buffer); + buffer.append('{'); + left.writeNoPackage(buffer); + buffer.append(", "); + right.writeNoPackage(buffer); + buffer.append('}'); + containingPackage = p; + } + + + /** + * Writes a complete binary representation of the feature. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + left.write(out); + right.write(out); + } + + + /** + * Reads the representation of a feature with this object's run-time type from the given stream, + * overwriting the data in this object. + * + *

+ * This method is appropriate for reading features as written by + * {@link #write(ExceptionlessOutputStream)}. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + left = (DiscreteFeature) Feature.readFeature(in); + right = (DiscreteFeature) Feature.readFeature(in); + } + + + /** + * Writes a binary representation of the feature intended for use by a lexicon, omitting + * redundant information when possible. + * + * @param out The output stream. + * @param lex The lexicon out of which this feature is being written. + * @param c The fully qualified name of the assumed class. The runtime class of this feature + * won't be written if it's equivalent to c. + * @param p The assumed package string. This feature's package string won't be written if it's + * equivalent to p. + * @param g The assumed classifier name string. This feature's classifier name string won't be + * written if it's equivalent to g. + * @param si The assumed identifier as a string. If this feature has a string identifier, it + * won't be written if it's equivalent to si. + * @param bi The assumed identifier as a byte string. If this feature has a byte string + * identifier, it won't be written if it's equivalent to bi. + * @return The name of the runtime type of this feature. + **/ + public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, String p, + String g, String si, ByteString bi) { + String result = super.lexWrite(out, lex, c, p, g, si, bi); + out.writeInt(lex.lookupChild(left)); + out.writeInt(lex.lookupChild(right)); + return result; + } + + + /** + * Reads the representation of a feature with this object's run-time type as stored by a + * lexicon, overwriting the data in this object. + * + *

+ * This method is appropriate for reading features as written by + * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. + * + * @param in The input stream. + * @param lex The lexicon we are reading in to. + * @param p The assumed package string. If no package name is given in the input stream, the + * instantiated feature is given this package. + * @param g The assumed classifier name string. If no classifier name is given in the input + * stream, the instantiated feature is given this classifier name. + * @param si The assumed identifier as a string. If the feature being read has a string + * identifier field and no identifier is given in the input stream, the feature is given + * this identifier. + * @param bi The assumed identifier as a byte string. If the feature being read has a byte + * string identifier field and no identifier is given in the input stream, the feature is + * given this identifier. + **/ + public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, String g, String si, + ByteString bi) { + super.lexRead(in, lex, p, g, si, bi); + left = (DiscreteFeature) lex.lookupKey(in.readInt()); + right = (DiscreteFeature) lex.lookupKey(in.readInt()); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteFeature.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteFeature.java index 8205c658..317e6b67 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteFeature.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteFeature.java @@ -1,209 +1,198 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; import edu.illinois.cs.cogcomp.lbjava.util.ByteString; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - /** - * A discrete feature takes on one value from a set of discontinuous values. - * The set of values that a given DiscreteFeature may take is - * defined in the Classifier that produced the feature. - * - * @author Nick Rizzolo + * A discrete feature takes on one value from a set of discontinuous values. The set of values that + * a given DiscreteFeature may take is defined in the Classifier that + * produced the feature. + * + * @author Nick Rizzolo **/ -public abstract class DiscreteFeature extends Feature -{ - /** Convient access to a common allowable value set. */ - public static final String[] BooleanValues = { "false", "true" }; - - - /** Index into the set of allowable values corresponding to this value. */ - protected short valueIndex; - /** The total number of allowable values for this feature. */ - protected short totalValues; - - - /** - * For internal use only. - * - * @see Feature#readFeature(ExceptionlessInputStream) - **/ - DiscreteFeature() { } - - /** - * Sets the identifier, value, value index, and total allowable values. - * - * @param p The new discrete feature's package. - * @param c The name of the classifier that produced this feature. - * @param vi The index corresponding to the value. - * @param t The total allowable values for this feature. - **/ - DiscreteFeature(String p, String c, short vi, short t) { - super(p, c); - valueIndex = vi; - totalValues = t; - } - - /** - * Determines if this feature is discrete. - * - * @return true iff this is discrete. - **/ - public boolean isDiscrete() { return true; } - - - /** - * Returns the index in the generating classifier's value list of this - * feature's value. - * - * @return A non-negative integer index, or -1 if this feature doesn't have - * a value list. - **/ - public short getValueIndex() { return valueIndex; } - - - /** - * Returns the total number of values this feature might possibly be set - * to. - * - * @return Some integer greater than 1 iff this feature is a discrete - * feature with a specified value list, and 0 otherwise. - **/ - public short totalValues() { return totalValues; } - - - /** - * Returns the strength of this feature if it were to be placed in a - * mathematical vector space. - **/ - public double getStrength() { return totalValues == 2 ? valueIndex : 1; } - - - /** - * Create a feature representing the conjunction of this feature with the - * given argument feature. - * - * @param f The feature to conjunct with. - * @param c The classifier producing the resulting feature. - * @return A feature representing the conjunction of this feature and - * f. - **/ - public Feature conjunction(Feature f, Classifier c) { - return f.conjunctWith(this, c); - } - - - /** - * Create a feature representing the conjunction of this feature with the - * given argument feature. - * - * @param f The feature to conjunct with. - * @param c The classifier producing the resulting feature. - * @return A feature representing the conjunction of this feature and - * f. - **/ - protected Feature conjunctWith(DiscreteFeature f, Classifier c) { - return new DiscreteConjunctiveFeature(c, f, this); - } - - - /** - * Writes a complete binary representation of the feature. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - out.writeShort(valueIndex); - out.writeShort(totalValues); - } - - - /** - * Reads the representation of a feaeture with this object's run-time type - * from the given stream, overwriting the data in this object. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - valueIndex = in.readShort(); - totalValues = in.readShort(); - } - - - /** - * Writes a binary representation of the feature intended for use by a - * lexicon, omitting redundant information when possible. - * - * @param out The output stream. - * @param lex The lexicon out of which this feature is being written. - * @param c The fully qualified name of the assumed class. The runtime - * class of this feature won't be written if it's equivalent to - * c. - * @param p The assumed package string. This feature's package string - * won't be written if it's equivalent to p. - * @param g The assumed classifier name string. This feature's - * classifier name string won't be written if it's equivalent - * to g. - * @param si The assumed identifier as a string. If this feature has a - * string identifier, it won't be written if it's equivalent to - * si. - * @param bi The assumed identifier as a byte string. If this feature - * has a byte string identifier, it won't be written if it's - * equivalent to bi. - * @return The name of the runtime type of this feature. - **/ - public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, - String p, String g, String si, ByteString bi) { - String result = super.lexWrite(out, lex, c, p, g, si, bi); - out.writeShort(valueIndex); - out.writeShort(totalValues); - return result; - } - - - /** - * Reads the representation of a feature with this object's run-time type - * as stored by a lexicon, overwriting the data in this object. - * - *

This method is appropriate for reading features as written by - * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. - * - * @param in The input stream. - * @param lex The lexicon we are reading in to. - * @param p The assumed package string. If no package name is given in - * the input stream, the instantiated feature is given this - * package. - * @param g The assumed classifier name string. If no classifier name - * is given in the input stream, the instantiated feature is - * given this classifier name. - * @param si The assumed identifier as a string. If the feature being - * read has a string identifier field and no identifier is - * given in the input stream, the feature is given this - * identifier. - * @param bi The assumed identifier as a byte string. If the feature - * being read has a byte string identifier field and no - * identifier is given in the input stream, the feature is - * given this identifier. - **/ - public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, - String g, String si, ByteString bi) { - super.lexRead(in, lex, p, g, si, bi); - valueIndex = in.readShort(); - totalValues = in.readShort(); - } +public abstract class DiscreteFeature extends Feature { + /** Convient access to a common allowable value set. */ + public static final String[] BooleanValues = {"false", "true"}; + + + /** Index into the set of allowable values corresponding to this value. */ + protected short valueIndex; + /** The total number of allowable values for this feature. */ + protected short totalValues; + + + /** + * For internal use only. + * + * @see Feature#readFeature(ExceptionlessInputStream) + **/ + DiscreteFeature() {} + + /** + * Sets the identifier, value, value index, and total allowable values. + * + * @param p The new discrete feature's package. + * @param c The name of the classifier that produced this feature. + * @param vi The index corresponding to the value. + * @param t The total allowable values for this feature. + **/ + DiscreteFeature(String p, String c, short vi, short t) { + super(p, c); + valueIndex = vi; + totalValues = t; + } + + /** + * Determines if this feature is discrete. + * + * @return true iff this is discrete. + **/ + public boolean isDiscrete() { + return true; + } + + + /** + * Returns the index in the generating classifier's value list of this feature's value. + * + * @return A non-negative integer index, or -1 if this feature doesn't have a value list. + **/ + public short getValueIndex() { + return valueIndex; + } + + + /** + * Returns the total number of values this feature might possibly be set to. + * + * @return Some integer greater than 1 iff this feature is a discrete feature with a specified + * value list, and 0 otherwise. + **/ + public short totalValues() { + return totalValues; + } + + + /** + * Returns the strength of this feature if it were to be placed in a mathematical vector space. + **/ + public double getStrength() { + return totalValues == 2 ? valueIndex : 1; + } + + + /** + * Create a feature representing the conjunction of this feature with the given argument + * feature. + * + * @param f The feature to conjunct with. + * @param c The classifier producing the resulting feature. + * @return A feature representing the conjunction of this feature and f. + **/ + public Feature conjunction(Feature f, Classifier c) { + return f.conjunctWith(this, c); + } + + + /** + * Create a feature representing the conjunction of this feature with the given argument + * feature. + * + * @param f The feature to conjunct with. + * @param c The classifier producing the resulting feature. + * @return A feature representing the conjunction of this feature and f. + **/ + protected Feature conjunctWith(DiscreteFeature f, Classifier c) { + return new DiscreteConjunctiveFeature(c, f, this); + } + + + /** + * Writes a complete binary representation of the feature. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeShort(valueIndex); + out.writeShort(totalValues); + } + + + /** + * Reads the representation of a feaeture with this object's run-time type from the given + * stream, overwriting the data in this object. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + valueIndex = in.readShort(); + totalValues = in.readShort(); + } + + + /** + * Writes a binary representation of the feature intended for use by a lexicon, omitting + * redundant information when possible. + * + * @param out The output stream. + * @param lex The lexicon out of which this feature is being written. + * @param c The fully qualified name of the assumed class. The runtime class of this feature + * won't be written if it's equivalent to c. + * @param p The assumed package string. This feature's package string won't be written if it's + * equivalent to p. + * @param g The assumed classifier name string. This feature's classifier name string won't be + * written if it's equivalent to g. + * @param si The assumed identifier as a string. If this feature has a string identifier, it + * won't be written if it's equivalent to si. + * @param bi The assumed identifier as a byte string. If this feature has a byte string + * identifier, it won't be written if it's equivalent to bi. + * @return The name of the runtime type of this feature. + **/ + public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, String p, + String g, String si, ByteString bi) { + String result = super.lexWrite(out, lex, c, p, g, si, bi); + out.writeShort(valueIndex); + out.writeShort(totalValues); + return result; + } + + + /** + * Reads the representation of a feature with this object's run-time type as stored by a + * lexicon, overwriting the data in this object. + * + *

+ * This method is appropriate for reading features as written by + * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. + * + * @param in The input stream. + * @param lex The lexicon we are reading in to. + * @param p The assumed package string. If no package name is given in the input stream, the + * instantiated feature is given this package. + * @param g The assumed classifier name string. If no classifier name is given in the input + * stream, the instantiated feature is given this classifier name. + * @param si The assumed identifier as a string. If the feature being read has a string + * identifier field and no identifier is given in the input stream, the feature is given + * this identifier. + * @param bi The assumed identifier as a byte string. If the feature being read has a byte + * string identifier field and no identifier is given in the input stream, the feature is + * given this identifier. + **/ + public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, String g, String si, + ByteString bi) { + super.lexRead(in, lex, p, g, si, bi); + valueIndex = in.readShort(); + totalValues = in.readShort(); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscretePrimitiveFeature.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscretePrimitiveFeature.java index 2aa4fece..65ea9ea9 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscretePrimitiveFeature.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscretePrimitiveFeature.java @@ -1,401 +1,384 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; import edu.illinois.cs.cogcomp.lbjava.util.ByteString; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - /** - * A primitive discrete feature is a discrete feature with a string value. - * - * @author Nick Rizzolo + * A primitive discrete feature is a discrete feature with a string value. + * + * @author Nick Rizzolo **/ -public class DiscretePrimitiveFeature extends DiscreteFeature -{ - /** - * The identifier string distinguishes this - * Feature from other Features. - **/ - protected ByteString identifier; - /** The discrete value is represented as a string. */ - protected ByteString value; - - - /** - * For internal use only. - * - * @see Feature#readFeature(ExceptionlessInputStream) - **/ - protected DiscretePrimitiveFeature() { } - - /** - * Sets both the identifier and the value. The value index and total - * allowable values, having not been specified, default to -1 and 0 - * respectively. - * - * @param p The new discrete feature's package. - * @param c The name of the classifier that produced this feature. - * @param i The new discrete feature's identifier. - * @param v The new discrete feature's value. - **/ - public DiscretePrimitiveFeature(String p, String c, ByteString i, - ByteString v) { - this(p, c, i, v, (short) -1, (short) 0); - } - - /** - * Sets the identifier, value, value index, and total allowable values. - * - * @param p The new discrete feature's package. - * @param c The name of the classifier that produced this feature. - * @param i The new discrete feature's identifier. - * @param v The new discrete feature's value. - * @param vi The index corresponding to the value. - * @param t The total allowable values for this feature. - **/ - public DiscretePrimitiveFeature(String p, String c, ByteString i, - ByteString v, short vi, short t) { - super(p, c, vi, t); - identifier = i; - value = v; - } - - - /** - * Determines if this feature contains a byte string identifier field. - * - * @return true iff this feature contains a byte string - * identifier field. - **/ - public boolean hasByteStringIdentifier() { return true; } - - - /** - * Determines if this feature is primitive. - * - * @return true iff this is primitive. - **/ - public boolean isPrimitive() { return true; } - - - /** - * Retrieves this feature's identifier as a string. - * - * @return This feature's identifier as a string. - **/ - public String getStringIdentifier() { return identifier.toString(); } - - - /** - * Retrieves this feature's identifier as a byte string. - * - * @return This feature's identifier as a byte string. - **/ - public ByteString getByteStringIdentifier() { - return (ByteString) identifier.clone(); - } - - - /** - * Gives a string representation of the value of this feature. - * - * @return The string decoding of {@link #value}. - **/ - public String getStringValue() { return value.toString(); } - - - /** - * Gives a string representation of the value of this feature. - * - * @return A clone of {@link #value}. - **/ - public ByteString getByteStringValue() { - return (ByteString) value.clone(); - } - - - /** - * Determines whether or not the parameter is equivalent to the string - * representation of the value of this feature. - * - * @param v The string to compare against. - * @return true iff the parameter is equivalent to the string - * representation of the value of this feature. - **/ - public boolean valueEquals(String v) { return value.equals(v); } - - - /** - * Return the feature that should be used to index this feature into a - * lexicon. If it is a binary feature, we return the feature with an empty - * value so that the feature will be mapped to the same weight whether it - * is active or not. If the feature can take multiple values, then simply - * return the feature object as-is. - * - * @param lexicon The lexicon into which this feature will be indexed. - * @param training Whether or not the learner is currently training. - * @param label The label of the example containing this feature, or -1 - * if we aren't doing per class feature counting. - * @return A feature object appropriate for use as the key of a map. - **/ - public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { - if (totalValues() == 2) - return - new DiscretePrimitiveFeature( - containingPackage, generatingClassifier, identifier, - ByteString.emptyString, (short) -1, (short) 2); - return this; - } - - - /** - * Returns a {@link RealPrimitiveFeature} whose - * {@link RealPrimitiveFeature#value value} field is set to the strength of - * the current feature, and whose {@link #identifier} field contains all - * the information necessary to distinguish this feature from other - * features. - **/ - public RealFeature makeReal() { - if (totalValues == 2) - return - new RealPrimitiveFeature(containingPackage, generatingClassifier, - identifier, valueIndex); - else { - ByteString id = (ByteString) identifier.clone(); - ByteString[] toAppend = - { new ByteString("_", id.getEncoding()), value }; - id.append(toAppend); - return - new RealPrimitiveFeature(containingPackage, generatingClassifier, id, - 1); +public class DiscretePrimitiveFeature extends DiscreteFeature { + /** + * The identifier string distinguishes this Feature from other + * Features. + **/ + protected ByteString identifier; + /** The discrete value is represented as a string. */ + protected ByteString value; + + + /** + * For internal use only. + * + * @see Feature#readFeature(ExceptionlessInputStream) + **/ + protected DiscretePrimitiveFeature() {} + + /** + * Sets both the identifier and the value. The value index and total allowable values, having + * not been specified, default to -1 and 0 respectively. + * + * @param p The new discrete feature's package. + * @param c The name of the classifier that produced this feature. + * @param i The new discrete feature's identifier. + * @param v The new discrete feature's value. + **/ + public DiscretePrimitiveFeature(String p, String c, ByteString i, ByteString v) { + this(p, c, i, v, (short) -1, (short) 0); } - } - - - /** - * Returns a new feature object that's identical to this feature except its - * strength is given by s. - * - * @param s The strength of the new feature. - * @return A new feature object as above, or null if this - * feature cannot take the specified strength. - **/ - public Feature withStrength(double s) { - if (totalValues != 2 || !(s == 0 || s == 1)) return null; - return - new DiscretePrimitiveFeature( - containingPackage, generatingClassifier, identifier, - ByteString.emptyString, (short) Math.round(s), (short) 2); - } - - - /** - * Returns a feature object in which any strings that are being used to - * represent an identifier or value have been encoded in byte strings. - * - * @param e The encoding to use. - * @return A feature object as above; possible this object. - **/ - public Feature encode(String e) { return this; } - - - /** - * The hash code of a DiscretePrimitiveFeature is the sum of - * the hash codes of its containing package, identifier, and value. - * - * @return The hash code of this feature. - **/ - public int hashCode() { - return 31 * super.hashCode() + 17 * identifier.hashCode() - + value.hashCode(); - } - - - /** - * Two DiscretePrimitive(String)Features are equivalent when - * their containing packages, identifiers, and values are equivalent. - * - * @param o The object with which to compare this feature. - * @return true iff the parameter is an equivalent feature. - **/ - public boolean equals(Object o) { - if (!super.equals(o)) return false; - if (o instanceof DiscretePrimitiveFeature) { - DiscretePrimitiveFeature f = (DiscretePrimitiveFeature) o; - return identifier.equals(f.identifier) - && valueIndex > -1 ? valueIndex == f.valueIndex - : value.equals(f.value); + + /** + * Sets the identifier, value, value index, and total allowable values. + * + * @param p The new discrete feature's package. + * @param c The name of the classifier that produced this feature. + * @param i The new discrete feature's identifier. + * @param v The new discrete feature's value. + * @param vi The index corresponding to the value. + * @param t The total allowable values for this feature. + **/ + public DiscretePrimitiveFeature(String p, String c, ByteString i, ByteString v, short vi, + short t) { + super(p, c, vi, t); + identifier = i; + value = v; } - DiscretePrimitiveStringFeature f = (DiscretePrimitiveStringFeature) o; - return identifier.equals(f.identifier) - && valueIndex > -1 ? valueIndex == f.valueIndex - : value.equals(f.value); - } - - - /** - * Some features are functionally equivalent, differing only in the - * encoding of their values; this method will return true iff - * the class of this feature and f are different, but they - * differ only because they encode their values differently. This method - * does not compare the values themselves, however. - * - * @param f Another feature. - * @return See above. - **/ - public boolean classEquivalent(Feature f) { - return f instanceof DiscretePrimitiveStringFeature; - } - - - /** - * Used to sort features into an order that is convenient both to page - * through and for the lexicon to read off disk. - * - * @param o An object to compare with. - * @return Integers appropriate for sorting features first by package, then - * by identifier, then by value. - **/ - public int compareTo(Object o) { - int d = compareNameStrings(o); - if (d != 0) return d; - DiscretePrimitiveFeature f = (DiscretePrimitiveFeature) o; - d = identifier.compareTo(f.identifier); - if (d != 0) return d; - return value.compareTo(f.value); - } - - - /** - * Writes a string representation of this Feature to the - * specified buffer. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - writeNameString(buffer); - buffer.append("("); - buffer.append(value.toString()); - buffer.append(")"); - } - - - /** - * Writes a string representation of this Feature's package, - * generating classifier, and identifier information to the specified - * buffer. - * - * @param buffer The buffer to write to. - **/ - public void writeNameString(StringBuffer buffer) { - super.writeNameString(buffer); - buffer.append(":"); - buffer.append(identifier.toString()); - } - - - /** - * Writes a complete binary representation of the feature. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - identifier.write(out); - value.write(out); - } - - - /** - * Reads the representation of a feaeture with this object's run-time type - * from the given stream, overwriting the data in this object. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - identifier = ByteString.readByteString(in); - value = ByteString.readByteString(in); - } - - - /** - * Writes a binary representation of the feature intended for use by a - * lexicon, omitting redundant information when possible. - * - * @param out The output stream. - * @param lex The lexicon out of which this feature is being written. - * @param c The fully qualified name of the assumed class. The runtime - * class of this feature won't be written if it's equivalent to - * c. - * @param p The assumed package string. This feature's package string - * won't be written if it's equivalent to p. - * @param g The assumed classifier name string. This feature's - * classifier name string won't be written if it's equivalent - * to g. - * @param si The assumed identifier as a string. If this feature has a - * string identifier, it won't be written if it's equivalent to - * si. - * @param bi The assumed identifier as a byte string. If this feature - * has a byte string identifier, it won't be written if it's - * equivalent to bi. - * @return The name of the runtime type of this feature. - **/ - public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, - String p, String g, String si, ByteString bi) { - String result = super.lexWrite(out, lex, c, p, g, si, bi); - identifier.lexWrite(out, bi); - // This method does not have an "assumed value" parameter because we don't - // expect the value of the current feature to be the same as the value of - // the previous feature very often. However, it should always be the case - // that the identifier and value of this feature have the same encoding. - // So, the line below uses the identifier as the "assumed value". - value.lexWrite(out, identifier); - return result; - } - - - /** - * Reads the representation of a feature with this object's run-time type - * as stored by a lexicon, overwriting the data in this object. - * - *

This method is appropriate for reading features as written by - * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. - * - * @param in The input stream. - * @param lex The lexicon we are reading in to. - * @param p The assumed package string. If no package name is given in - * the input stream, the instantiated feature is given this - * package. - * @param g The assumed classifier name string. If no classifier name - * is given in the input stream, the instantiated feature is - * given this classifier name. - * @param si The assumed identifier as a string. If the feature being - * read has a string identifier field and no identifier is - * given in the input stream, the feature is given this - * identifier. - * @param bi The assumed identifier as a byte string. If the feature - * being read has a byte string identifier field and no - * identifier is given in the input stream, the feature is - * given this identifier. - **/ - public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, - String g, String si, ByteString bi) { - super.lexRead(in, lex, p, g, si, bi); - identifier = ByteString.lexReadByteString(in, bi); - value = ByteString.lexReadByteString(in, identifier); - } -} + /** + * Determines if this feature contains a byte string identifier field. + * + * @return true iff this feature contains a byte string identifier field. + **/ + public boolean hasByteStringIdentifier() { + return true; + } + + + /** + * Determines if this feature is primitive. + * + * @return true iff this is primitive. + **/ + public boolean isPrimitive() { + return true; + } + + + /** + * Retrieves this feature's identifier as a string. + * + * @return This feature's identifier as a string. + **/ + public String getStringIdentifier() { + return identifier.toString(); + } + + + /** + * Retrieves this feature's identifier as a byte string. + * + * @return This feature's identifier as a byte string. + **/ + public ByteString getByteStringIdentifier() { + return (ByteString) identifier.clone(); + } + + + /** + * Gives a string representation of the value of this feature. + * + * @return The string decoding of {@link #value}. + **/ + public String getStringValue() { + return value.toString(); + } + + + /** + * Gives a string representation of the value of this feature. + * + * @return A clone of {@link #value}. + **/ + public ByteString getByteStringValue() { + return (ByteString) value.clone(); + } + + + /** + * Determines whether or not the parameter is equivalent to the string representation of the + * value of this feature. + * + * @param v The string to compare against. + * @return true iff the parameter is equivalent to the string representation of the + * value of this feature. + **/ + public boolean valueEquals(String v) { + return value.equals(v); + } + + + /** + * Return the feature that should be used to index this feature into a lexicon. If it is a + * binary feature, we return the feature with an empty value so that the feature will be mapped + * to the same weight whether it is active or not. If the feature can take multiple values, then + * simply return the feature object as-is. + * + * @param lexicon The lexicon into which this feature will be indexed. + * @param training Whether or not the learner is currently training. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return A feature object appropriate for use as the key of a map. + **/ + public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { + if (totalValues() == 2) + return new DiscretePrimitiveFeature(containingPackage, generatingClassifier, + identifier, ByteString.emptyString, (short) -1, (short) 2); + return this; + } + + + /** + * Returns a {@link RealPrimitiveFeature} whose {@link RealPrimitiveFeature#value value} field + * is set to the strength of the current feature, and whose {@link #identifier} field contains + * all the information necessary to distinguish this feature from other features. + **/ + public RealFeature makeReal() { + if (totalValues == 2) + return new RealPrimitiveFeature(containingPackage, generatingClassifier, identifier, + valueIndex); + else { + ByteString id = (ByteString) identifier.clone(); + ByteString[] toAppend = {new ByteString("_", id.getEncoding()), value}; + id.append(toAppend); + return new RealPrimitiveFeature(containingPackage, generatingClassifier, id, 1); + } + } + + + /** + * Returns a new feature object that's identical to this feature except its strength is given by + * s. + * + * @param s The strength of the new feature. + * @return A new feature object as above, or null if this feature cannot take the + * specified strength. + **/ + public Feature withStrength(double s) { + if (totalValues != 2 || !(s == 0 || s == 1)) + return null; + return new DiscretePrimitiveFeature(containingPackage, generatingClassifier, identifier, + ByteString.emptyString, (short) Math.round(s), (short) 2); + } + + + /** + * Returns a feature object in which any strings that are being used to represent an identifier + * or value have been encoded in byte strings. + * + * @param e The encoding to use. + * @return A feature object as above; possible this object. + **/ + public Feature encode(String e) { + return this; + } + + + /** + * The hash code of a DiscretePrimitiveFeature is the sum of the hash codes of its + * containing package, identifier, and value. + * + * @return The hash code of this feature. + **/ + public int hashCode() { + return 31 * super.hashCode() + 17 * identifier.hashCode() + value.hashCode(); + } + + + /** + * Two DiscretePrimitive(String)Features are equivalent when their containing + * packages, identifiers, and values are equivalent. + * + * @param o The object with which to compare this feature. + * @return true iff the parameter is an equivalent feature. + **/ + public boolean equals(Object o) { + if (!super.equals(o)) + return false; + if (o instanceof DiscretePrimitiveFeature) { + DiscretePrimitiveFeature f = (DiscretePrimitiveFeature) o; + return identifier.equals(f.identifier) && valueIndex > -1 ? valueIndex == f.valueIndex + : value.equals(f.value); + } + + DiscretePrimitiveStringFeature f = (DiscretePrimitiveStringFeature) o; + return identifier.equals(f.identifier) && valueIndex > -1 ? valueIndex == f.valueIndex + : value.equals(f.value); + } + + + /** + * Some features are functionally equivalent, differing only in the encoding of their values; + * this method will return true iff the class of this feature and f + * are different, but they differ only because they encode their values differently. This method + * does not compare the values themselves, however. + * + * @param f Another feature. + * @return See above. + **/ + public boolean classEquivalent(Feature f) { + return f instanceof DiscretePrimitiveStringFeature; + } + + + /** + * Used to sort features into an order that is convenient both to page through and for the + * lexicon to read off disk. + * + * @param o An object to compare with. + * @return Integers appropriate for sorting features first by package, then by identifier, then + * by value. + **/ + public int compareTo(Object o) { + int d = compareNameStrings(o); + if (d != 0) + return d; + DiscretePrimitiveFeature f = (DiscretePrimitiveFeature) o; + d = identifier.compareTo(f.identifier); + if (d != 0) + return d; + return value.compareTo(f.value); + } + + + /** + * Writes a string representation of this Feature to the specified buffer. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + writeNameString(buffer); + buffer.append("("); + buffer.append(value.toString()); + buffer.append(")"); + } + + + /** + * Writes a string representation of this Feature's package, generating classifier, + * and identifier information to the specified buffer. + * + * @param buffer The buffer to write to. + **/ + public void writeNameString(StringBuffer buffer) { + super.writeNameString(buffer); + buffer.append(":"); + buffer.append(identifier.toString()); + } + + + /** + * Writes a complete binary representation of the feature. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + identifier.write(out); + value.write(out); + } + + + /** + * Reads the representation of a feaeture with this object's run-time type from the given + * stream, overwriting the data in this object. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + identifier = ByteString.readByteString(in); + value = ByteString.readByteString(in); + } + + + /** + * Writes a binary representation of the feature intended for use by a lexicon, omitting + * redundant information when possible. + * + * @param out The output stream. + * @param lex The lexicon out of which this feature is being written. + * @param c The fully qualified name of the assumed class. The runtime class of this feature + * won't be written if it's equivalent to c. + * @param p The assumed package string. This feature's package string won't be written if it's + * equivalent to p. + * @param g The assumed classifier name string. This feature's classifier name string won't be + * written if it's equivalent to g. + * @param si The assumed identifier as a string. If this feature has a string identifier, it + * won't be written if it's equivalent to si. + * @param bi The assumed identifier as a byte string. If this feature has a byte string + * identifier, it won't be written if it's equivalent to bi. + * @return The name of the runtime type of this feature. + **/ + public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, String p, + String g, String si, ByteString bi) { + String result = super.lexWrite(out, lex, c, p, g, si, bi); + identifier.lexWrite(out, bi); + // This method does not have an "assumed value" parameter because we don't + // expect the value of the current feature to be the same as the value of + // the previous feature very often. However, it should always be the case + // that the identifier and value of this feature have the same encoding. + // So, the line below uses the identifier as the "assumed value". + value.lexWrite(out, identifier); + return result; + } + + + /** + * Reads the representation of a feature with this object's run-time type as stored by a + * lexicon, overwriting the data in this object. + * + *

+ * This method is appropriate for reading features as written by + * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. + * + * @param in The input stream. + * @param lex The lexicon we are reading in to. + * @param p The assumed package string. If no package name is given in the input stream, the + * instantiated feature is given this package. + * @param g The assumed classifier name string. If no classifier name is given in the input + * stream, the instantiated feature is given this classifier name. + * @param si The assumed identifier as a string. If the feature being read has a string + * identifier field and no identifier is given in the input stream, the feature is given + * this identifier. + * @param bi The assumed identifier as a byte string. If the feature being read has a byte + * string identifier field and no identifier is given in the input stream, the feature is + * given this identifier. + **/ + public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, String g, String si, + ByteString bi) { + super.lexRead(in, lex, p, g, si, bi); + identifier = ByteString.lexReadByteString(in, bi); + value = ByteString.lexReadByteString(in, identifier); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscretePrimitiveStringFeature.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscretePrimitiveStringFeature.java index 29e28030..792d5032 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscretePrimitiveStringFeature.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscretePrimitiveStringFeature.java @@ -1,411 +1,391 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; import edu.illinois.cs.cogcomp.lbjava.util.ByteString; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - /** - * This feature is functionally equivalent to - * {@link DiscretePrimitiveFeature}, however its {@link #value} is stored as - * a String instead of a {@link ByteString}. Discrete - * classifiers return features of this type (or - * {@link DiscreteConjunctiveFeature}s or {@link DiscreteReferringFeature}s - * that contain features of this type). Before storing these features in a - * lexicon, however, they are converted to {@link DiscretePrimitiveFeature}s - * using the specified encoding. - * - * @author Nick Rizzolo + * This feature is functionally equivalent to {@link DiscretePrimitiveFeature}, however its + * {@link #value} is stored as a String instead of a {@link ByteString}. Discrete + * classifiers return features of this type (or {@link DiscreteConjunctiveFeature}s or + * {@link DiscreteReferringFeature}s that contain features of this type). Before storing these + * features in a lexicon, however, they are converted to {@link DiscretePrimitiveFeature}s using the + * specified encoding. + * + * @author Nick Rizzolo **/ -public class DiscretePrimitiveStringFeature extends DiscreteFeature -{ - /** - * The identifier string distinguishes this - * Feature from other Features. - **/ - protected String identifier; - /** The discrete value is represented as a string. */ - protected String value; - - - /** - * For internal use only. - * - * @see Feature#readFeature(ExceptionlessInputStream) - **/ - protected DiscretePrimitiveStringFeature() { } - - /** - * Sets both the identifier and the value. The value index and total - * allowable values, having not been specified, default to -1 and 0 - * respectively. - * - * @param p The new discrete feature's package. - * @param c The name of the classifier that produced this feature. - * @param i The new discrete feature's identifier. - * @param v The new discrete feature's value. - **/ - public DiscretePrimitiveStringFeature(String p, String c, String i, - String v) { - this(p, c, i, v, (short) -1, (short) 0); - } - - /** - * Sets the identifier, value, value index, and total allowable values. - * - * @param p The new discrete feature's package. - * @param c The name of the classifier that produced this feature. - * @param i The new discrete feature's identifier. - * @param v The new discrete feature's value. - * @param vi The index corresponding to the value. - * @param t The total allowable values for this feature. - **/ - public DiscretePrimitiveStringFeature(String p, String c, String i, - String v, short vi, short t) { - super(p, c, vi, t); - identifier = i; - value = v; - } - - - /** - * Determines if this feature contains a string identifier field. - * - * @return true iff this feature contains a string identifier - * field. - **/ - public boolean hasStringIdentifier() { return true; } - - - /** - * Determines if this feature is primitive. - * - * @return true iff this is primitive. - **/ - public boolean isPrimitive() { return true; } - - - /** - * Retrieves this feature's identifier as a string. - * - * @return This feature's identifier as a string. - **/ - public String getStringIdentifier() { return identifier; } - - - /** - * Retrieves this feature's identifier as a byte string. - * - * @return This feature's identifier as a byte string. - **/ - public ByteString getByteStringIdentifier() { - return new ByteString(identifier); - } - - - /** - * Gives a string representation of the value of this feature. - * - * @return {@link #value}. - **/ - public String getStringValue() { return value; } - - - /** - * Gives a string representation of the value of this feature. - * - * @return The byte string encoding of {@link #value}. - **/ - public ByteString getByteStringValue() { return new ByteString(value); } - - - /** - * Determines whether or not the parameter is equivalent to the string - * representation of the value of this feature. - * - * @param v The string to compare against. - * @return true iff the parameter is equivalent to the string - * representation of the value of this feature. - **/ - public boolean valueEquals(String v) { return v.equals(value); } - - - /** - * Return the feature that should be used to index this feature into a - * lexicon. If it is a binary feature, we return the feature with an empty - * value so that the feature will be mapped to the same weight whether it - * is active or not. If the feature can take multiple values, then simply - * return the feature object as-is. - * - * @param lexicon The lexicon into which this feature will be indexed. - * @param training Whether or not the learner is currently training. - * @param label The label of the example containing this feature, or -1 - * if we aren't doing per class feature counting. - * @return A feature object appropriate for use as the key of a map. - **/ - public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { - if (totalValues() == 2) - return - new DiscretePrimitiveStringFeature( - containingPackage, generatingClassifier, identifier, "", - (short) -1, (short) 2); - return this; - } - - - /** - * Returns a {@link RealPrimitiveFeature} whose - * {@link RealPrimitiveFeature#value value} field is set to the strength of - * the current feature, and whose {@link #identifier} field contains all - * the information necessary to distinguish this feature from other - * features. - **/ - public RealFeature makeReal() { - if (totalValues == 2) - return - new RealPrimitiveStringFeature( - containingPackage, generatingClassifier, identifier, - valueIndex); - else { - StringBuffer id = new StringBuffer(identifier); - id.append('_'); - id.append(value); - return - new RealPrimitiveStringFeature( - containingPackage, generatingClassifier, id.toString(), 1); +public class DiscretePrimitiveStringFeature extends DiscreteFeature { + /** + * The identifier string distinguishes this Feature from other + * Features. + **/ + protected String identifier; + /** The discrete value is represented as a string. */ + protected String value; + + + /** + * For internal use only. + * + * @see Feature#readFeature(ExceptionlessInputStream) + **/ + protected DiscretePrimitiveStringFeature() {} + + /** + * Sets both the identifier and the value. The value index and total allowable values, having + * not been specified, default to -1 and 0 respectively. + * + * @param p The new discrete feature's package. + * @param c The name of the classifier that produced this feature. + * @param i The new discrete feature's identifier. + * @param v The new discrete feature's value. + **/ + public DiscretePrimitiveStringFeature(String p, String c, String i, String v) { + this(p, c, i, v, (short) -1, (short) 0); } - } - - - /** - * Returns a new feature object that's identical to this feature except its - * strength is given by s. - * - * @param s The strength of the new feature. - * @return A new feature object as above, or null if this - * feature cannot take the specified strength. - **/ - public Feature withStrength(double s) { - if (totalValues != 2 || !(s == 0 || s == 1)) return null; - return - new DiscretePrimitiveStringFeature( - containingPackage, generatingClassifier, identifier, "", - (short) Math.round(s), (short) 2); - } - - - /** - * Returns a feature object in which any strings that are being used to - * represent an identifier or value have been encoded in byte strings. - * - * @param e The encoding to use. - * @return A feature object as above; possible this object. - **/ - public Feature encode(String e) { - if (e == null || e == "String") return this; - ByteString id = - identifier.length() == 0 ? ByteString.emptyString - : new ByteString(identifier, e); - return - new DiscretePrimitiveFeature(containingPackage, generatingClassifier, - id, new ByteString(value, e), valueIndex, - totalValues); - } - - - /** - * The hash code of a DiscretePrimitiveStringFeature is the - * sum of the hash codes of its containing package, identifier, and value. - * - * @return The hash code of this feature. - **/ - public int hashCode() { - return 31 * super.hashCode() + 17 * identifier.hashCode() - + value.hashCode(); - } - - - /** - * Two DiscretePrimitiveStringFeatures are equivalent when - * their containing packages, identifiers, and values are equivalent. - * - * @param o The object with which to compare this feature. - * @return true iff the parameter is an equivalent feature. - **/ - public boolean equals(Object o) { - if (!super.equals(o)) return false; - if (o instanceof DiscretePrimitiveStringFeature) { - DiscretePrimitiveStringFeature f = (DiscretePrimitiveStringFeature) o; - return identifier.equals(f.identifier) - && valueIndex > -1 ? valueIndex == f.valueIndex - : value.equals(f.value); + + /** + * Sets the identifier, value, value index, and total allowable values. + * + * @param p The new discrete feature's package. + * @param c The name of the classifier that produced this feature. + * @param i The new discrete feature's identifier. + * @param v The new discrete feature's value. + * @param vi The index corresponding to the value. + * @param t The total allowable values for this feature. + **/ + public DiscretePrimitiveStringFeature(String p, String c, String i, String v, short vi, short t) { + super(p, c, vi, t); + identifier = i; + value = v; } - DiscretePrimitiveFeature f = (DiscretePrimitiveFeature) o; - return f.identifier.equals(identifier) - && valueIndex > -1 ? valueIndex == f.valueIndex - : f.value.equals(value); - } - - - /** - * Some features are functionally equivalent, differing only in the - * encoding of their values; this method will return true iff - * the class of this feature and f are different, but they - * differ only because they encode their values differently. This method - * does not compare the values themselves, however. - * - * @param f Another feature. - * @return See above. - **/ - public boolean classEquivalent(Feature f) { - return f instanceof DiscretePrimitiveFeature; - } - - - /** - * Used to sort features into an order that is convenient both to page - * through and for the lexicon to read off disk. - * - * @param o An object to compare with. - * @return Integers appropriate for sorting features first by package, then - * by identifier, then by value. - **/ - public int compareTo(Object o) { - int d = compareNameStrings(o); - if (d != 0) return d; - DiscretePrimitiveStringFeature f = (DiscretePrimitiveStringFeature) o; - d = identifier.compareTo(f.identifier); - if (d != 0) return d; - return value.compareTo(f.value); - } - - - /** - * Writes a string representation of this Feature to the - * specified buffer. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - writeNameString(buffer); - buffer.append("("); - buffer.append(value); - buffer.append(")"); - } - - - /** - * Writes a string representation of this Feature's package, - * generating classifier, and identifier information to the specified - * buffer. - * - * @param buffer The buffer to write to. - **/ - public void writeNameString(StringBuffer buffer) { - super.writeNameString(buffer); - buffer.append(":"); - buffer.append(identifier); - } - - - /** - * Writes a complete binary representation of the feature. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - out.writeString(identifier); - out.writeString(value); - } - - - /** - * Reads the representation of a feaeture with this object's run-time type - * from the given stream, overwriting the data in this object. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - identifier = in.readString(); - value = in.readString(); - } - - - /** - * Writes a binary representation of the feature intended for use by a - * lexicon, omitting redundant information when possible. - * - * @param out The output stream. - * @param lex The lexicon out of which this feature is being written. - * @param c The fully qualified name of the assumed class. The runtime - * class of this feature won't be written if it's equivalent to - * c. - * @param p The assumed package string. This feature's package string - * won't be written if it's equivalent to p. - * @param g The assumed classifier name string. This feature's - * classifier name string won't be written if it's equivalent - * to g. - * @param si The assumed identifier as a string. If this feature has a - * string identifier, it won't be written if it's equivalent to - * si. - * @param bi The assumed identifier as a byte string. If this feature - * has a byte string identifier, it won't be written if it's - * equivalent to bi. - * @return The name of the runtime type of this feature. - **/ - public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, - String p, String g, String si, ByteString bi) { - String result = super.lexWrite(out, lex, c, p, g, si, bi); - out.writeString(identifier.equals(si) ? null : identifier); - out.writeString(value); - return result; - } - - - /** - * Reads the representation of a feature with this object's run-time type - * as stored by a lexicon, overwriting the data in this object. - * - *

This method is appropriate for reading features as written by - * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. - * - * @param in The input stream. - * @param lex The lexicon we are reading in to. - * @param p The assumed package string. If no package name is given in - * the input stream, the instantiated feature is given this - * package. - * @param g The assumed classifier name string. If no classifier name - * is given in the input stream, the instantiated feature is - * given this classifier name. - * @param si The assumed identifier as a string. If the feature being - * read has a string identifier field and no identifier is - * given in the input stream, the feature is given this - * identifier. - * @param bi The assumed identifier as a byte string. If the feature - * being read has a byte string identifier field and no - * identifier is given in the input stream, the feature is - * given this identifier. - **/ - public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, - String g, String si, ByteString bi) { - super.lexRead(in, lex, p, g, si, bi); - identifier = in.readString(); - if (identifier == null) identifier = si; - value = in.readString(); - } -} + /** + * Determines if this feature contains a string identifier field. + * + * @return true iff this feature contains a string identifier field. + **/ + public boolean hasStringIdentifier() { + return true; + } + + + /** + * Determines if this feature is primitive. + * + * @return true iff this is primitive. + **/ + public boolean isPrimitive() { + return true; + } + + + /** + * Retrieves this feature's identifier as a string. + * + * @return This feature's identifier as a string. + **/ + public String getStringIdentifier() { + return identifier; + } + + + /** + * Retrieves this feature's identifier as a byte string. + * + * @return This feature's identifier as a byte string. + **/ + public ByteString getByteStringIdentifier() { + return new ByteString(identifier); + } + + + /** + * Gives a string representation of the value of this feature. + * + * @return {@link #value}. + **/ + public String getStringValue() { + return value; + } + + + /** + * Gives a string representation of the value of this feature. + * + * @return The byte string encoding of {@link #value}. + **/ + public ByteString getByteStringValue() { + return new ByteString(value); + } + + + /** + * Determines whether or not the parameter is equivalent to the string representation of the + * value of this feature. + * + * @param v The string to compare against. + * @return true iff the parameter is equivalent to the string representation of the + * value of this feature. + **/ + public boolean valueEquals(String v) { + return v.equals(value); + } + + + /** + * Return the feature that should be used to index this feature into a lexicon. If it is a + * binary feature, we return the feature with an empty value so that the feature will be mapped + * to the same weight whether it is active or not. If the feature can take multiple values, then + * simply return the feature object as-is. + * + * @param lexicon The lexicon into which this feature will be indexed. + * @param training Whether or not the learner is currently training. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return A feature object appropriate for use as the key of a map. + **/ + public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { + if (totalValues() == 2) + return new DiscretePrimitiveStringFeature(containingPackage, generatingClassifier, + identifier, "", (short) -1, (short) 2); + return this; + } + + + /** + * Returns a {@link RealPrimitiveFeature} whose {@link RealPrimitiveFeature#value value} field + * is set to the strength of the current feature, and whose {@link #identifier} field contains + * all the information necessary to distinguish this feature from other features. + **/ + public RealFeature makeReal() { + if (totalValues == 2) + return new RealPrimitiveStringFeature(containingPackage, generatingClassifier, + identifier, valueIndex); + else { + StringBuffer id = new StringBuffer(identifier); + id.append('_'); + id.append(value); + return new RealPrimitiveStringFeature(containingPackage, generatingClassifier, + id.toString(), 1); + } + } + + + /** + * Returns a new feature object that's identical to this feature except its strength is given by + * s. + * + * @param s The strength of the new feature. + * @return A new feature object as above, or null if this feature cannot take the + * specified strength. + **/ + public Feature withStrength(double s) { + if (totalValues != 2 || !(s == 0 || s == 1)) + return null; + return new DiscretePrimitiveStringFeature(containingPackage, generatingClassifier, + identifier, "", (short) Math.round(s), (short) 2); + } + + + /** + * Returns a feature object in which any strings that are being used to represent an identifier + * or value have been encoded in byte strings. + * + * @param e The encoding to use. + * @return A feature object as above; possible this object. + **/ + public Feature encode(String e) { + if (e == null || e == "String") + return this; + ByteString id = + identifier.length() == 0 ? ByteString.emptyString : new ByteString(identifier, e); + return new DiscretePrimitiveFeature(containingPackage, generatingClassifier, id, + new ByteString(value, e), valueIndex, totalValues); + } + + + /** + * The hash code of a DiscretePrimitiveStringFeature is the sum of the hash codes + * of its containing package, identifier, and value. + * + * @return The hash code of this feature. + **/ + public int hashCode() { + return 31 * super.hashCode() + 17 * identifier.hashCode() + value.hashCode(); + } + + + /** + * Two DiscretePrimitiveStringFeatures are equivalent when their containing + * packages, identifiers, and values are equivalent. + * + * @param o The object with which to compare this feature. + * @return true iff the parameter is an equivalent feature. + **/ + public boolean equals(Object o) { + if (!super.equals(o)) + return false; + if (o instanceof DiscretePrimitiveStringFeature) { + DiscretePrimitiveStringFeature f = (DiscretePrimitiveStringFeature) o; + return identifier.equals(f.identifier) && valueIndex > -1 ? valueIndex == f.valueIndex + : value.equals(f.value); + } + + DiscretePrimitiveFeature f = (DiscretePrimitiveFeature) o; + return f.identifier.equals(identifier) && valueIndex > -1 ? valueIndex == f.valueIndex + : f.value.equals(value); + } + + + /** + * Some features are functionally equivalent, differing only in the encoding of their values; + * this method will return true iff the class of this feature and f + * are different, but they differ only because they encode their values differently. This method + * does not compare the values themselves, however. + * + * @param f Another feature. + * @return See above. + **/ + public boolean classEquivalent(Feature f) { + return f instanceof DiscretePrimitiveFeature; + } + + + /** + * Used to sort features into an order that is convenient both to page through and for the + * lexicon to read off disk. + * + * @param o An object to compare with. + * @return Integers appropriate for sorting features first by package, then by identifier, then + * by value. + **/ + public int compareTo(Object o) { + int d = compareNameStrings(o); + if (d != 0) + return d; + DiscretePrimitiveStringFeature f = (DiscretePrimitiveStringFeature) o; + d = identifier.compareTo(f.identifier); + if (d != 0) + return d; + return value.compareTo(f.value); + } + + + /** + * Writes a string representation of this Feature to the specified buffer. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + writeNameString(buffer); + buffer.append("("); + buffer.append(value); + buffer.append(")"); + } + + + /** + * Writes a string representation of this Feature's package, generating classifier, + * and identifier information to the specified buffer. + * + * @param buffer The buffer to write to. + **/ + public void writeNameString(StringBuffer buffer) { + super.writeNameString(buffer); + buffer.append(":"); + buffer.append(identifier); + } + + + /** + * Writes a complete binary representation of the feature. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeString(identifier); + out.writeString(value); + } + + + /** + * Reads the representation of a feaeture with this object's run-time type from the given + * stream, overwriting the data in this object. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + identifier = in.readString(); + value = in.readString(); + } + + + /** + * Writes a binary representation of the feature intended for use by a lexicon, omitting + * redundant information when possible. + * + * @param out The output stream. + * @param lex The lexicon out of which this feature is being written. + * @param c The fully qualified name of the assumed class. The runtime class of this feature + * won't be written if it's equivalent to c. + * @param p The assumed package string. This feature's package string won't be written if it's + * equivalent to p. + * @param g The assumed classifier name string. This feature's classifier name string won't be + * written if it's equivalent to g. + * @param si The assumed identifier as a string. If this feature has a string identifier, it + * won't be written if it's equivalent to si. + * @param bi The assumed identifier as a byte string. If this feature has a byte string + * identifier, it won't be written if it's equivalent to bi. + * @return The name of the runtime type of this feature. + **/ + public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, String p, + String g, String si, ByteString bi) { + String result = super.lexWrite(out, lex, c, p, g, si, bi); + out.writeString(identifier.equals(si) ? null : identifier); + out.writeString(value); + return result; + } + + + /** + * Reads the representation of a feature with this object's run-time type as stored by a + * lexicon, overwriting the data in this object. + * + *

+ * This method is appropriate for reading features as written by + * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. + * + * @param in The input stream. + * @param lex The lexicon we are reading in to. + * @param p The assumed package string. If no package name is given in the input stream, the + * instantiated feature is given this package. + * @param g The assumed classifier name string. If no classifier name is given in the input + * stream, the instantiated feature is given this classifier name. + * @param si The assumed identifier as a string. If the feature being read has a string + * identifier field and no identifier is given in the input stream, the feature is given + * this identifier. + * @param bi The assumed identifier as a byte string. If the feature being read has a byte + * string identifier field and no identifier is given in the input stream, the feature is + * given this identifier. + **/ + public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, String g, String si, + ByteString bi) { + super.lexRead(in, lex, p, g, si, bi); + identifier = in.readString(); + if (identifier == null) + identifier = si; + value = in.readString(); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteReferrer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteReferrer.java index b0260ba2..6369d53f 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteReferrer.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteReferrer.java @@ -1,296 +1,287 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.learn.ChildLexicon; import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; import edu.illinois.cs.cogcomp.lbjava.util.ByteString; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - /** - * A referring discrete feature is one that has its own identifier, but whose - * value comes from a separate feature that it refers to. - * - * @author Nick Rizzolo + * A referring discrete feature is one that has its own identifier, but whose value comes from a + * separate feature that it refers to. + * + * @author Nick Rizzolo **/ -public abstract class DiscreteReferrer extends DiscreteFeature -{ - /** The feature being referred to. */ - protected DiscreteFeature referent; - - - /** - * For internal use only. - * - * @see Feature#readFeature(ExceptionlessInputStream) - **/ - protected DiscreteReferrer() { } - - /** - * Sets both the identifier and the referent. - * - * @param c The classifier that produced this feature. - * @param r The discrete feature referred to by this new feature. - **/ - public DiscreteReferrer(Classifier c, DiscreteFeature r) { - this(c.containingPackage, c.name, r, r.getValueIndex(), r.totalValues()); - } - - /** - * Sets both the identifier and the referent. - * - * @param c The classifier that produced this feature. - * @param r The discrete feature referred to by this new feature. - * @param av The allowable values of the classifier that produced - * r. - **/ - public DiscreteReferrer(Classifier c, DiscreteFeature r, String[] av) { - this(c.containingPackage, c.name, r, - c.valueIndexOf(av[r.getValueIndex()]), - (short) c.allowableValues().length); - } - - /** - * Constructs a new referring feature. - * - * @param p The new discrete feature's package. - * @param c The name of the classifier that produced this feature. - * @param r The discrete feature referred to by this new feature. - * @param vi The index corresponding to the value. - * @param t The total allowable values for this feature. - **/ - public DiscreteReferrer(String p, String c, DiscreteFeature r, short vi, - short t) { - super(p, c, vi, t); - referent = r; - } - - - /** - * Determines if this feature is a referring feature. - * - * @return true iff this feature is a referring feature. - **/ - public boolean isReferrer() { return true; } - - - /** Returns the value of {@link #referent}. */ - public DiscreteFeature getReferent() { return referent; } - - - /** - * The depth of a feature is one more than the maximum depth of any of its - * children, or 0 if it has no children. - * - * @return The depth of this feature as described above. - **/ - public int depth() { return referent.depth() + 1; } - - - /** - * Gives a string representation of the value of this feature. - * - * @return Whatever is returned by this method on {@link #referent}. - **/ - public String getStringValue() { return referent.getStringValue(); } - - - /** - * Gives a string representation of the value of this feature. - * - * @return Whatever is returned by this method on {@link #referent}. - **/ - public ByteString getByteStringValue() { - return referent.getByteStringValue(); - } - - - /** - * Determines whether or not the parameter is equivalent to the string - * representation of the value of this feature. - * - * @param v The string to compare against. - * @return true iff v is equivalent to the string - * representation of the value of this feature. - **/ - public boolean valueEquals(String v) { return referent.valueEquals(v); } - - - /** - * Takes care of any feature-type-specific tasks that need to be taken care - * of when removing a feature of this type from a {@link ChildLexicon}, in - * particular updating parent counts and removing children of this feature - * if necessary. - * - * @param lex The child lexicon this feature is being removed from. - **/ - public void removeFromChildLexicon(ChildLexicon lex) { - lex.decrementParentCounts(referent); - } - - - /** - * Does a feature-type-specific lookup of this feature in the given - * {@link ChildLexicon}. - * - * @param lex The child lexicon this feature is being looked up in. - * @param label The label of the example containing this feature, or -1 if - * we aren't doing per class feature counting. - * @return The index of f in this lexicon. - **/ - public int childLexiconLookup(ChildLexicon lex, int label) { - return lex.childLexiconLookup(this, label); - } - - - /** - * The hash code of a DiscreteReferrer is the sum of - * the hash codes of its containing package, identifier, and the referent - * feature. - * - * @return The hash code of this feature. - **/ - public int hashCode() { - return 17 * super.hashCode() + referent.hashCode(); - } - - - /** - * Used to sort features into an order that is convenient both to page - * through and for the lexicon to read off disk. - * - * @param o An object to compare with. - * @return Integers appropriate for sorting features first by package, then - * by identifier, then by value. - **/ - public int compareTo(Object o) { - int d = compareNameStrings(o); - if (d != 0) return d; - DiscreteReferrer r = (DiscreteReferrer) o; - return referent.compareTo(r.referent); - } - - - /** - * Writes a string representation of this Feature to the - * specified buffer. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - writeNameString(buffer); - buffer.append("->"); - referent.write(buffer); - } - - - /** - * Writes a string representation of this Feature to the - * specified buffer, omitting the package name. - * - * @param buffer The buffer to write to. - **/ - public void writeNoPackage(StringBuffer buffer) { - String p = containingPackage; - containingPackage = null; - writeNameString(buffer); - buffer.append("->"); - referent.writeNoPackage(buffer); - containingPackage = p; - } - - - /** - * Writes a complete binary representation of the feature. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - referent.write(out); - } - - - /** - * Reads the representation of a feaeture with this object's run-time type - * from the given stream, overwriting the data in this object. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - referent = (DiscreteFeature) Feature.readFeature(in); - } - - - /** - * Writes a binary representation of the feature intended for use by a - * lexicon, omitting redundant information when possible. - * - * @param out The output stream. - * @param lex The lexicon out of which this feature is being written. - * @param c The fully qualified name of the assumed class. The runtime - * class of this feature won't be written if it's equivalent to - * c. - * @param p The assumed package string. This feature's package string - * won't be written if it's equivalent to p. - * @param g The assumed classifier name string. This feature's - * classifier name string won't be written if it's equivalent - * to g. - * @param si The assumed identifier as a string. If this feature has a - * string identifier, it won't be written if it's equivalent to - * si. - * @param bi The assumed identifier as a byte string. If this feature - * has a byte string identifier, it won't be written if it's - * equivalent to bi. - * @return The name of the runtime type of this feature. - **/ - public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, - String p, String g, String si, ByteString bi) { - String result = super.lexWrite(out, lex, c, p, g, si, bi); - out.writeInt(lex.lookupChild(referent)); - return result; - } - - - /** - * Reads the representation of a feature with this object's run-time type - * as stored by a lexicon, overwriting the data in this object. - * - *

This method is appropriate for reading features as written by - * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. - * - * @param in The input stream. - * @param lex The lexicon we are reading in to. - * @param p The assumed package string. If no package name is given in - * the input stream, the instantiated feature is given this - * package. - * @param g The assumed classifier name string. If no classifier name - * is given in the input stream, the instantiated feature is - * given this classifier name. - * @param si The assumed identifier as a string. If the feature being - * read has a string identifier field and no identifier is - * given in the input stream, the feature is given this - * identifier. - * @param bi The assumed identifier as a byte string. If the feature - * being read has a byte string identifier field and no - * identifier is given in the input stream, the feature is - * given this identifier. - **/ - public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, - String g, String si, ByteString bi) { - super.lexRead(in, lex, p, g, si, bi); - referent = (DiscreteFeature) lex.lookupKey(in.readInt()); - } +public abstract class DiscreteReferrer extends DiscreteFeature { + /** The feature being referred to. */ + protected DiscreteFeature referent; + + + /** + * For internal use only. + * + * @see Feature#readFeature(ExceptionlessInputStream) + **/ + protected DiscreteReferrer() {} + + /** + * Sets both the identifier and the referent. + * + * @param c The classifier that produced this feature. + * @param r The discrete feature referred to by this new feature. + **/ + public DiscreteReferrer(Classifier c, DiscreteFeature r) { + this(c.containingPackage, c.name, r, r.getValueIndex(), r.totalValues()); + } + + /** + * Sets both the identifier and the referent. + * + * @param c The classifier that produced this feature. + * @param r The discrete feature referred to by this new feature. + * @param av The allowable values of the classifier that produced r. + **/ + public DiscreteReferrer(Classifier c, DiscreteFeature r, String[] av) { + this(c.containingPackage, c.name, r, c.valueIndexOf(av[r.getValueIndex()]), (short) c + .allowableValues().length); + } + + /** + * Constructs a new referring feature. + * + * @param p The new discrete feature's package. + * @param c The name of the classifier that produced this feature. + * @param r The discrete feature referred to by this new feature. + * @param vi The index corresponding to the value. + * @param t The total allowable values for this feature. + **/ + public DiscreteReferrer(String p, String c, DiscreteFeature r, short vi, short t) { + super(p, c, vi, t); + referent = r; + } + + + /** + * Determines if this feature is a referring feature. + * + * @return true iff this feature is a referring feature. + **/ + public boolean isReferrer() { + return true; + } + + + /** Returns the value of {@link #referent}. */ + public DiscreteFeature getReferent() { + return referent; + } + + + /** + * The depth of a feature is one more than the maximum depth of any of its children, or 0 if it + * has no children. + * + * @return The depth of this feature as described above. + **/ + public int depth() { + return referent.depth() + 1; + } + + + /** + * Gives a string representation of the value of this feature. + * + * @return Whatever is returned by this method on {@link #referent}. + **/ + public String getStringValue() { + return referent.getStringValue(); + } + + + /** + * Gives a string representation of the value of this feature. + * + * @return Whatever is returned by this method on {@link #referent}. + **/ + public ByteString getByteStringValue() { + return referent.getByteStringValue(); + } + + + /** + * Determines whether or not the parameter is equivalent to the string representation of the + * value of this feature. + * + * @param v The string to compare against. + * @return true iff v is equivalent to the string representation of + * the value of this feature. + **/ + public boolean valueEquals(String v) { + return referent.valueEquals(v); + } + + + /** + * Takes care of any feature-type-specific tasks that need to be taken care of when removing a + * feature of this type from a {@link ChildLexicon}, in particular updating parent counts and + * removing children of this feature if necessary. + * + * @param lex The child lexicon this feature is being removed from. + **/ + public void removeFromChildLexicon(ChildLexicon lex) { + lex.decrementParentCounts(referent); + } + + + /** + * Does a feature-type-specific lookup of this feature in the given {@link ChildLexicon}. + * + * @param lex The child lexicon this feature is being looked up in. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return The index of f in this lexicon. + **/ + public int childLexiconLookup(ChildLexicon lex, int label) { + return lex.childLexiconLookup(this, label); + } + + + /** + * The hash code of a DiscreteReferrer is the sum of the hash codes of its + * containing package, identifier, and the referent feature. + * + * @return The hash code of this feature. + **/ + public int hashCode() { + return 17 * super.hashCode() + referent.hashCode(); + } + + + /** + * Used to sort features into an order that is convenient both to page through and for the + * lexicon to read off disk. + * + * @param o An object to compare with. + * @return Integers appropriate for sorting features first by package, then by identifier, then + * by value. + **/ + public int compareTo(Object o) { + int d = compareNameStrings(o); + if (d != 0) + return d; + DiscreteReferrer r = (DiscreteReferrer) o; + return referent.compareTo(r.referent); + } + + + /** + * Writes a string representation of this Feature to the specified buffer. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + writeNameString(buffer); + buffer.append("->"); + referent.write(buffer); + } + + + /** + * Writes a string representation of this Feature to the specified buffer, omitting + * the package name. + * + * @param buffer The buffer to write to. + **/ + public void writeNoPackage(StringBuffer buffer) { + String p = containingPackage; + containingPackage = null; + writeNameString(buffer); + buffer.append("->"); + referent.writeNoPackage(buffer); + containingPackage = p; + } + + + /** + * Writes a complete binary representation of the feature. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + referent.write(out); + } + + + /** + * Reads the representation of a feaeture with this object's run-time type from the given + * stream, overwriting the data in this object. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + referent = (DiscreteFeature) Feature.readFeature(in); + } + + + /** + * Writes a binary representation of the feature intended for use by a lexicon, omitting + * redundant information when possible. + * + * @param out The output stream. + * @param lex The lexicon out of which this feature is being written. + * @param c The fully qualified name of the assumed class. The runtime class of this feature + * won't be written if it's equivalent to c. + * @param p The assumed package string. This feature's package string won't be written if it's + * equivalent to p. + * @param g The assumed classifier name string. This feature's classifier name string won't be + * written if it's equivalent to g. + * @param si The assumed identifier as a string. If this feature has a string identifier, it + * won't be written if it's equivalent to si. + * @param bi The assumed identifier as a byte string. If this feature has a byte string + * identifier, it won't be written if it's equivalent to bi. + * @return The name of the runtime type of this feature. + **/ + public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, String p, + String g, String si, ByteString bi) { + String result = super.lexWrite(out, lex, c, p, g, si, bi); + out.writeInt(lex.lookupChild(referent)); + return result; + } + + + /** + * Reads the representation of a feature with this object's run-time type as stored by a + * lexicon, overwriting the data in this object. + * + *

+ * This method is appropriate for reading features as written by + * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. + * + * @param in The input stream. + * @param lex The lexicon we are reading in to. + * @param p The assumed package string. If no package name is given in the input stream, the + * instantiated feature is given this package. + * @param g The assumed classifier name string. If no classifier name is given in the input + * stream, the instantiated feature is given this classifier name. + * @param si The assumed identifier as a string. If the feature being read has a string + * identifier field and no identifier is given in the input stream, the feature is given + * this identifier. + * @param bi The assumed identifier as a byte string. If the feature being read has a byte + * string identifier field and no identifier is given in the input stream, the feature is + * given this identifier. + **/ + public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, String g, String si, + ByteString bi) { + super.lexRead(in, lex, p, g, si, bi); + referent = (DiscreteFeature) lex.lookupKey(in.readInt()); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteReferringFeature.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteReferringFeature.java index f934c610..0ddb73bb 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteReferringFeature.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteReferringFeature.java @@ -1,350 +1,327 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; import edu.illinois.cs.cogcomp.lbjava.util.ByteString; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - /** - * A referring discrete feature is one that has its own identifier, but whose - * value comes from a separate feature that it refers to. - * - * @author Nick Rizzolo + * A referring discrete feature is one that has its own identifier, but whose value comes from a + * separate feature that it refers to. + * + * @author Nick Rizzolo **/ -public class DiscreteReferringFeature extends DiscreteReferrer -{ - /** - * The identifier string distinguishes this - * Feature from other Features. - **/ - protected ByteString identifier; - - - /** - * For internal use only. - * - * @see Feature#readFeature(ExceptionlessInputStream) - **/ - protected DiscreteReferringFeature() { } - - /** - * Sets both the identifier and the referent. - * - * @param c The classifier that produced this feature. - * @param i The new referring feature's identifier. - * @param r The discrete feature referred to by this new feature. - **/ - public DiscreteReferringFeature(Classifier c, ByteString i, - DiscreteFeature r) { - this(c.containingPackage, c.name, i, r, r.getValueIndex(), - r.totalValues()); - } - - /** - * Sets both the identifier and the referent. - * - * @param c The classifier that produced this feature. - * @param i The new referring feature's identifier. - * @param r The discrete feature referred to by this new feature. - * @param av The allowable values of the classifier that produced - * r. - **/ - public DiscreteReferringFeature(Classifier c, ByteString i, - DiscreteFeature r, String[] av) { - super(c, r, av); - identifier = i; - } - - /** - * Constructs a new referring feature. - * - * @param p The new discrete feature's package. - * @param c The name of the classifier that produced this feature. - * @param i The new discrete feature's identifier. - * @param r The discrete feature referred to by this new feature. - * @param vi The index corresponding to the value. - * @param t The total allowable values for this feature. - **/ - public DiscreteReferringFeature(String p, String c, ByteString i, - DiscreteFeature r, short vi, short t) { - super(p, c, r, vi, t); - identifier = i; - } - - - /** - * Determines if this feature contains a byte string identifier field. - * - * @return true iff this feature contains a byte string - * identifier field. - **/ - public boolean hasByteStringIdentifier() { return true; } - - - /** - * Retrieves this feature's identifier as a string. - * - * @return This feature's identifier as a string. - **/ - public String getStringIdentifier() { return identifier.toString(); } - - - /** - * Retrieves this feature's identifier as a byte string. - * - * @return This feature's identifier as a byte string. - **/ - public ByteString getByteStringIdentifier() { - return (ByteString) identifier.clone(); - } - - - /** - * Return the feature that should be used to index this feature into a - * lexicon. If it is a binary feature, we return the feature with an empty - * value so that the feature will be mapped to the same weight whether it - * is active or not. If the feature can take multiple values, then simply - * return the feature object as-is. - * - * @param lexicon The lexicon into which this feature will be indexed. - * @param training Whether or not the learner is currently training. - * @param label The label of the example containing this feature, or -1 - * if we aren't doing per class feature counting. - * @return A feature object appropriate for use as the key of a map. - **/ - public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { - DiscreteFeature f = - (DiscreteFeature) referent.getFeatureKey(lexicon, training, label); - if (training) f = (DiscreteFeature) lexicon.getChildFeature(f, label); - else if (f == referent) return this; - return - new DiscreteReferringFeature( - containingPackage, generatingClassifier, identifier, f, - valueIndex, totalValues); - } - - - /** - * Returns a {@link RealPrimitiveFeature} whose - * {@link RealPrimitiveFeature#value value} field is set to the strength of - * the current feature, and whose {@link #identifier} field contains all - * the information necessary to distinguish this feature from other - * features. - **/ - public RealFeature makeReal() { - return - new RealReferringFeature(containingPackage, generatingClassifier, - identifier, referent.makeReal()); - } - - - /** - * Returns the strength of this feature if it were to be placed in a - * mathematical vector space. - **/ - public double getStrength() { return referent.getStrength(); } - - - /** - * Returns a new feature object that's identical to this feature except its - * strength is given by s. - * - * @param s The strength of the new feature. - * @return A new feature object as above, or null if this - * feature cannot take the specified strength. - **/ - public Feature withStrength(double s) { - DiscreteFeature f = (DiscreteFeature) referent.withStrength(s); - if (f == null) return null; - return - new DiscreteReferringFeature( - containingPackage, generatingClassifier, identifier, f, - (short) Math.round(f.getStrength()), totalValues); - } - - - /** - * Returns a feature object in which any strings that are being used to - * represent an identifier or value have been encoded in byte strings. - * - * @param e The encoding to use. - * @return A feature object as above; possible this object. - **/ - public Feature encode(String e) { - DiscreteFeature newReferent = (DiscreteFeature) referent.encode(e); - if (newReferent == referent) return this; - return - new DiscreteReferringFeature( - containingPackage, generatingClassifier, identifier, newReferent, - valueIndex, totalValues); - } - - - /** - * The hash code of a DiscreteReferringFeature is the sum of - * the hash codes of its containing package, identifier, and the referent - * feature. - * - * @return The hash code of this feature. - **/ - public int hashCode() { - return 17 * super.hashCode() + identifier.hashCode(); - } - - - /** - * Two DiscreteReferringFeatures are equivalent when their - * containing packages, identifiers, and referent features are equivalent. - * - * @param o The object with which to compare this feature. - * @return true iff the parameter is an equivalent feature. - **/ - public boolean equals(Object o) { - return - super.equals(o) - && (o instanceof DiscreteReferringStringFeature - ? identifier.equals(((DiscreteReferringStringFeature) o).identifier) - : identifier.equals(((DiscreteReferringFeature) o).identifier)) - && referent.equals(((DiscreteReferrer) o).referent); - } - - - /** - * Some features are functionally equivalent, differing only in the - * encoding of their values; this method will return true iff - * the class of this feature and f are different, but they - * differ only because they encode their values differently. This method - * does not compare the values themselves, however. - * - * @param f Another feature. - * @return See above. - **/ - public boolean classEquivalent(Feature f) { - return f instanceof DiscreteReferringStringFeature; - } - - - /** - * Compares only the run-time types, packages, classifier names, and - * identifiers of the features. - * - * @param o An object to compare with. - * @return Integers appropriate for sorting features first by package, then - * by classifier name, and then by identifier. - **/ - public int compareNameStrings(Object o) { - int d = super.compareNameStrings(o); - if (d != 0) return d; - return identifier.compareTo(((DiscreteReferringFeature) o).identifier); - } - - - /** - * Writes a string representation of this Feature's package, - * generating classifier, and identifier information to the specified - * buffer. - * - * @param buffer The buffer to write to. - **/ - public void writeNameString(StringBuffer buffer) { - super.writeNameString(buffer); - buffer.append(":"); - buffer.append(identifier.toString()); - } - - - /** - * Writes a complete binary representation of the feature. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - identifier.write(out); - } - - - /** - * Reads the representation of a feaeture with this object's run-time type - * from the given stream, overwriting the data in this object. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - identifier = ByteString.readByteString(in); - } - - - /** - * Writes a binary representation of the feature intended for use by a - * lexicon, omitting redundant information when possible. - * - * @param out The output stream. - * @param lex The lexicon out of which this feature is being written. - * @param c The fully qualified name of the assumed class. The runtime - * class of this feature won't be written if it's equivalent to - * c. - * @param p The assumed package string. This feature's package string - * won't be written if it's equivalent to p. - * @param g The assumed classifier name string. This feature's - * classifier name string won't be written if it's equivalent - * to g. - * @param si The assumed identifier as a string. If this feature has a - * string identifier, it won't be written if it's equivalent to - * si. - * @param bi The assumed identifier as a byte string. If this feature - * has a byte string identifier, it won't be written if it's - * equivalent to bi. - * @return The name of the runtime type of this feature. - **/ - public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, - String p, String g, String si, ByteString bi) { - String result = super.lexWrite(out, lex, c, p, g, si, bi); - identifier.lexWrite(out, bi); - return result; - } - - - /** - * Reads the representation of a feature with this object's run-time type - * as stored by a lexicon, overwriting the data in this object. - * - *

This method is appropriate for reading features as written by - * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. - * - * @param in The input stream. - * @param lex The lexicon we are reading in to. - * @param p The assumed package string. If no package name is given in - * the input stream, the instantiated feature is given this - * package. - * @param g The assumed classifier name string. If no classifier name - * is given in the input stream, the instantiated feature is - * given this classifier name. - * @param si The assumed identifier as a string. If the feature being - * read has a string identifier field and no identifier is - * given in the input stream, the feature is given this - * identifier. - * @param bi The assumed identifier as a byte string. If the feature - * being read has a byte string identifier field and no - * identifier is given in the input stream, the feature is - * given this identifier. - **/ - public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, - String g, String si, ByteString bi) { - super.lexRead(in, lex, p, g, si, bi); - identifier = ByteString.lexReadByteString(in, bi); - } +public class DiscreteReferringFeature extends DiscreteReferrer { + /** + * The identifier string distinguishes this Feature from other + * Features. + **/ + protected ByteString identifier; + + + /** + * For internal use only. + * + * @see Feature#readFeature(ExceptionlessInputStream) + **/ + protected DiscreteReferringFeature() {} + + /** + * Sets both the identifier and the referent. + * + * @param c The classifier that produced this feature. + * @param i The new referring feature's identifier. + * @param r The discrete feature referred to by this new feature. + **/ + public DiscreteReferringFeature(Classifier c, ByteString i, DiscreteFeature r) { + this(c.containingPackage, c.name, i, r, r.getValueIndex(), r.totalValues()); + } + + /** + * Sets both the identifier and the referent. + * + * @param c The classifier that produced this feature. + * @param i The new referring feature's identifier. + * @param r The discrete feature referred to by this new feature. + * @param av The allowable values of the classifier that produced r. + **/ + public DiscreteReferringFeature(Classifier c, ByteString i, DiscreteFeature r, String[] av) { + super(c, r, av); + identifier = i; + } + + /** + * Constructs a new referring feature. + * + * @param p The new discrete feature's package. + * @param c The name of the classifier that produced this feature. + * @param i The new discrete feature's identifier. + * @param r The discrete feature referred to by this new feature. + * @param vi The index corresponding to the value. + * @param t The total allowable values for this feature. + **/ + public DiscreteReferringFeature(String p, String c, ByteString i, DiscreteFeature r, short vi, + short t) { + super(p, c, r, vi, t); + identifier = i; + } + + + /** + * Determines if this feature contains a byte string identifier field. + * + * @return true iff this feature contains a byte string identifier field. + **/ + public boolean hasByteStringIdentifier() { + return true; + } + + + /** + * Retrieves this feature's identifier as a string. + * + * @return This feature's identifier as a string. + **/ + public String getStringIdentifier() { + return identifier.toString(); + } + + + /** + * Retrieves this feature's identifier as a byte string. + * + * @return This feature's identifier as a byte string. + **/ + public ByteString getByteStringIdentifier() { + return (ByteString) identifier.clone(); + } + + + /** + * Return the feature that should be used to index this feature into a lexicon. If it is a + * binary feature, we return the feature with an empty value so that the feature will be mapped + * to the same weight whether it is active or not. If the feature can take multiple values, then + * simply return the feature object as-is. + * + * @param lexicon The lexicon into which this feature will be indexed. + * @param training Whether or not the learner is currently training. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return A feature object appropriate for use as the key of a map. + **/ + public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { + DiscreteFeature f = (DiscreteFeature) referent.getFeatureKey(lexicon, training, label); + if (training) + f = (DiscreteFeature) lexicon.getChildFeature(f, label); + else if (f == referent) + return this; + return new DiscreteReferringFeature(containingPackage, generatingClassifier, identifier, f, + valueIndex, totalValues); + } + + + /** + * Returns a {@link RealPrimitiveFeature} whose {@link RealPrimitiveFeature#value value} field + * is set to the strength of the current feature, and whose {@link #identifier} field contains + * all the information necessary to distinguish this feature from other features. + **/ + public RealFeature makeReal() { + return new RealReferringFeature(containingPackage, generatingClassifier, identifier, + referent.makeReal()); + } + + + /** + * Returns the strength of this feature if it were to be placed in a mathematical vector space. + **/ + public double getStrength() { + return referent.getStrength(); + } + + + /** + * Returns a new feature object that's identical to this feature except its strength is given by + * s. + * + * @param s The strength of the new feature. + * @return A new feature object as above, or null if this feature cannot take the + * specified strength. + **/ + public Feature withStrength(double s) { + DiscreteFeature f = (DiscreteFeature) referent.withStrength(s); + if (f == null) + return null; + return new DiscreteReferringFeature(containingPackage, generatingClassifier, identifier, f, + (short) Math.round(f.getStrength()), totalValues); + } + + + /** + * Returns a feature object in which any strings that are being used to represent an identifier + * or value have been encoded in byte strings. + * + * @param e The encoding to use. + * @return A feature object as above; possible this object. + **/ + public Feature encode(String e) { + DiscreteFeature newReferent = (DiscreteFeature) referent.encode(e); + if (newReferent == referent) + return this; + return new DiscreteReferringFeature(containingPackage, generatingClassifier, identifier, + newReferent, valueIndex, totalValues); + } + + + /** + * The hash code of a DiscreteReferringFeature is the sum of the hash codes of its + * containing package, identifier, and the referent feature. + * + * @return The hash code of this feature. + **/ + public int hashCode() { + return 17 * super.hashCode() + identifier.hashCode(); + } + + + /** + * Two DiscreteReferringFeatures are equivalent when their containing packages, + * identifiers, and referent features are equivalent. + * + * @param o The object with which to compare this feature. + * @return true iff the parameter is an equivalent feature. + **/ + public boolean equals(Object o) { + return super.equals(o) + && (o instanceof DiscreteReferringStringFeature ? identifier + .equals(((DiscreteReferringStringFeature) o).identifier) : identifier + .equals(((DiscreteReferringFeature) o).identifier)) + && referent.equals(((DiscreteReferrer) o).referent); + } + + + /** + * Some features are functionally equivalent, differing only in the encoding of their values; + * this method will return true iff the class of this feature and f + * are different, but they differ only because they encode their values differently. This method + * does not compare the values themselves, however. + * + * @param f Another feature. + * @return See above. + **/ + public boolean classEquivalent(Feature f) { + return f instanceof DiscreteReferringStringFeature; + } + + + /** + * Compares only the run-time types, packages, classifier names, and identifiers of the + * features. + * + * @param o An object to compare with. + * @return Integers appropriate for sorting features first by package, then by classifier name, + * and then by identifier. + **/ + public int compareNameStrings(Object o) { + int d = super.compareNameStrings(o); + if (d != 0) + return d; + return identifier.compareTo(((DiscreteReferringFeature) o).identifier); + } + + + /** + * Writes a string representation of this Feature's package, generating classifier, + * and identifier information to the specified buffer. + * + * @param buffer The buffer to write to. + **/ + public void writeNameString(StringBuffer buffer) { + super.writeNameString(buffer); + buffer.append(":"); + buffer.append(identifier.toString()); + } + + + /** + * Writes a complete binary representation of the feature. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + identifier.write(out); + } + + + /** + * Reads the representation of a feaeture with this object's run-time type from the given + * stream, overwriting the data in this object. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + identifier = ByteString.readByteString(in); + } + + + /** + * Writes a binary representation of the feature intended for use by a lexicon, omitting + * redundant information when possible. + * + * @param out The output stream. + * @param lex The lexicon out of which this feature is being written. + * @param c The fully qualified name of the assumed class. The runtime class of this feature + * won't be written if it's equivalent to c. + * @param p The assumed package string. This feature's package string won't be written if it's + * equivalent to p. + * @param g The assumed classifier name string. This feature's classifier name string won't be + * written if it's equivalent to g. + * @param si The assumed identifier as a string. If this feature has a string identifier, it + * won't be written if it's equivalent to si. + * @param bi The assumed identifier as a byte string. If this feature has a byte string + * identifier, it won't be written if it's equivalent to bi. + * @return The name of the runtime type of this feature. + **/ + public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, String p, + String g, String si, ByteString bi) { + String result = super.lexWrite(out, lex, c, p, g, si, bi); + identifier.lexWrite(out, bi); + return result; + } + + + /** + * Reads the representation of a feature with this object's run-time type as stored by a + * lexicon, overwriting the data in this object. + * + *

+ * This method is appropriate for reading features as written by + * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. + * + * @param in The input stream. + * @param lex The lexicon we are reading in to. + * @param p The assumed package string. If no package name is given in the input stream, the + * instantiated feature is given this package. + * @param g The assumed classifier name string. If no classifier name is given in the input + * stream, the instantiated feature is given this classifier name. + * @param si The assumed identifier as a string. If the feature being read has a string + * identifier field and no identifier is given in the input stream, the feature is given + * this identifier. + * @param bi The assumed identifier as a byte string. If the feature being read has a byte + * string identifier field and no identifier is given in the input stream, the feature is + * given this identifier. + **/ + public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, String g, String si, + ByteString bi) { + super.lexRead(in, lex, p, g, si, bi); + identifier = ByteString.lexReadByteString(in, bi); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteReferringStringFeature.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteReferringStringFeature.java index 70eaf8d2..5eb959e0 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteReferringStringFeature.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/DiscreteReferringStringFeature.java @@ -1,358 +1,331 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; import edu.illinois.cs.cogcomp.lbjava.util.ByteString; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - /** - * A referring discrete feature is one that has its own identifier, but whose - * value comes from a separate feature that it refers to. - * - * @author Nick Rizzolo + * A referring discrete feature is one that has its own identifier, but whose value comes from a + * separate feature that it refers to. + * + * @author Nick Rizzolo **/ -public class DiscreteReferringStringFeature extends DiscreteReferrer -{ - /** - * The identifier string distinguishes this - * Feature from other Features. - **/ - protected String identifier; - - - /** - * For internal use only. - * - * @see Feature#readFeature(ExceptionlessInputStream) - **/ - protected DiscreteReferringStringFeature() { } - - /** - * Sets both the identifier and the referent. - * - * @param c The classifier that produced this feature. - * @param i The new referring feature's identifier. - * @param r The discrete feature referred to by this new feature. - **/ - public DiscreteReferringStringFeature(Classifier c, String i, - DiscreteFeature r) { - this(c.containingPackage, c.name, i, r, r.getValueIndex(), - r.totalValues()); - } - - /** - * Sets both the identifier and the referent. - * - * @param c The classifier that produced this feature. - * @param i The new referring feature's identifier. - * @param r The discrete feature referred to by this new feature. - * @param av The allowable values of the classifier that produced - * r. - **/ - public DiscreteReferringStringFeature(Classifier c, String i, - DiscreteFeature r, String[] av) { - super(c, r, av); - identifier = i; - } - - /** - * Constructs a new referring feature. - * - * @param p The new discrete feature's package. - * @param c The name of the classifier that produced this feature. - * @param i The new discrete feature's identifier. - * @param r The discrete feature referred to by this new feature. - * @param vi The index corresponding to the value. - * @param t The total allowable values for this feature. - **/ - public DiscreteReferringStringFeature(String p, String c, String i, - DiscreteFeature r, short vi, short t) - { - super(p, c, r, vi, t); - identifier = i; - } - - - /** - * Determines if this feature contains a string identifier field. - * - * @return true iff this feature contains a string identifier - * field. - **/ - public boolean hasStringIdentifier() { return true; } - - - /** - * Retrieves this feature's identifier as a string. - * - * @return This feature's identifier as a string. - **/ - public String getStringIdentifier() { return identifier; } - - - /** - * Retrieves this feature's identifier as a byte string. - * - * @return This feature's identifier as a byte string. - **/ - public ByteString getByteStringIdentifier() { - return new ByteString(identifier); - } - - - /** - * Return the feature that should be used to index this feature into a - * lexicon. If it is a binary feature, we return the feature with an empty - * value so that the feature will be mapped to the same weight whether it - * is active or not. If the feature can take multiple values, then simply - * return the feature object as-is. - * - * @param lexicon The lexicon into which this feature will be indexed. - * @param training Whether or not the learner is currently training. - * @param label The label of the example containing this feature, or -1 - * if we aren't doing per class feature counting. - * @return A feature object appropriate for use as the key of a map. - **/ - public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { - DiscreteFeature f = - (DiscreteFeature) referent.getFeatureKey(lexicon, training, label); - if (training) f = (DiscreteFeature) lexicon.getChildFeature(f, label); - else if (f == referent) return this; - return - new DiscreteReferringStringFeature( - containingPackage, generatingClassifier, identifier, f, - valueIndex, totalValues); - } - - - /** - * Returns a {@link RealPrimitiveFeature} whose - * {@link RealPrimitiveFeature#value value} field is set to the strength of - * the current feature, and whose {@link #identifier} field contains all - * the information necessary to distinguish this feature from other - * features. - **/ - public RealFeature makeReal() { - return - new RealReferringStringFeature( - containingPackage, generatingClassifier, identifier, - referent.makeReal()); - } - - - /** - * Returns the strength of this feature if it were to be placed in a - * mathematical vector space. - **/ - public double getStrength() { return referent.getStrength(); } - - - /** - * Returns a new feature object that's identical to this feature except its - * strength is given by s. - * - * @param s The strength of the new feature. - * @return A new feature object as above, or null if this - * feature cannot take the specified strength. - **/ - public Feature withStrength(double s) { - DiscreteFeature f = (DiscreteFeature) referent.withStrength(s); - if (f == null) return null; - return - new DiscreteReferringStringFeature( - containingPackage, generatingClassifier, identifier, f, - (short) Math.round(f.getStrength()), totalValues); - } - - - /** - * Returns a feature object in which any strings that are being used to - * represent an identifier or value have been encoded in byte strings. - * - * @param e The encoding to use. - * @return A feature object as above; possible this object. - **/ - public Feature encode(String e) { - DiscreteFeature newReferent = (DiscreteFeature) referent.encode(e); - if (referent == newReferent && (e == null || e == "String")) return this; - ByteString id = - identifier.length() == 0 ? ByteString.emptyString - : new ByteString(identifier, e); - return - new DiscreteReferringFeature( - containingPackage, generatingClassifier, id, newReferent, - valueIndex, totalValues); - } - - - /** - * The hash code of a DiscreteReferringStringFeature is the - * sum of the hash codes of its containing package, identifier, and the - * referent feature. - * - * @return The hash code of this feature. - **/ - public int hashCode() { - return 17 * super.hashCode() + identifier.hashCode(); - } - - - /** - * Two DiscreteReferringStringFeatures are equivalent when - * their containing packages, identifiers, and referent features are - * equivalent. - * - * @param o The object with which to compare this feature. - * @return true iff the parameter is an equivalent feature. - **/ - public boolean equals(Object o) { - return - super.equals(o) - && (o instanceof DiscreteReferringStringFeature - ? identifier.equals(((DiscreteReferringStringFeature) o).identifier) - : identifier.equals(((DiscreteReferringFeature) o).identifier)) - && referent.equals(((DiscreteReferrer) o).referent); - } - - - /** - * Some features are functionally equivalent, differing only in the - * encoding of their values; this method will return true iff - * the class of this feature and f are different, but they - * differ only because they encode their values differently. This method - * does not compare the values themselves, however. - * - * @param f Another feature. - * @return See above. - **/ - public boolean classEquivalent(Feature f) { - return f instanceof DiscreteReferringFeature; - } - - - /** - * Compares only the run-time types, packages, classifier names, and - * identifiers of the features. - * - * @param o An object to compare with. - * @return Integers appropriate for sorting features first by package, then - * by classifier name, and then by identifier. - **/ - public int compareNameStrings(Object o) { - int d = super.compareNameStrings(o); - if (d != 0) return d; - return - identifier.compareTo(((DiscreteReferringStringFeature) o).identifier); - } - - - /** - * Writes a string representation of this Feature's package, - * generating classifier, and identifier information to the specified - * buffer. - * - * @param buffer The buffer to write to. - **/ - public void writeNameString(StringBuffer buffer) { - super.writeNameString(buffer); - buffer.append(":"); - buffer.append(identifier); - } - - - /** - * Writes a complete binary representation of the feature. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - out.writeString(identifier); - } - - - /** - * Reads the representation of a feaeture with this object's run-time type - * from the given stream, overwriting the data in this object. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - identifier = in.readString(); - } - - - /** - * Writes a binary representation of the feature intended for use by a - * lexicon, omitting redundant information when possible. - * - * @param out The output stream. - * @param lex The lexicon out of which this feature is being written. - * @param c The fully qualified name of the assumed class. The runtime - * class of this feature won't be written if it's equivalent to - * c. - * @param p The assumed package string. This feature's package string - * won't be written if it's equivalent to p. - * @param g The assumed classifier name string. This feature's - * classifier name string won't be written if it's equivalent - * to g. - * @param si The assumed identifier as a string. If this feature has a - * string identifier, it won't be written if it's equivalent to - * si. - * @param bi The assumed identifier as a byte string. If this feature - * has a byte string identifier, it won't be written if it's - * equivalent to bi. - * @return The name of the runtime type of this feature. - **/ - public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, - String p, String g, String si, ByteString bi) { - String result = super.lexWrite(out, lex, c, p, g, si, bi); - out.writeString(identifier.equals(si) ? null : identifier); - return result; - } - - - /** - * Reads the representation of a feature with this object's run-time type - * as stored by a lexicon, overwriting the data in this object. - * - *

This method is appropriate for reading features as written by - * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. - * - * @param in The input stream. - * @param lex The lexicon we are reading in to. - * @param p The assumed package string. If no package name is given in - * the input stream, the instantiated feature is given this - * package. - * @param g The assumed classifier name string. If no classifier name - * is given in the input stream, the instantiated feature is - * given this classifier name. - * @param si The assumed identifier as a string. If the feature being - * read has a string identifier field and no identifier is - * given in the input stream, the feature is given this - * identifier. - * @param bi The assumed identifier as a byte string. If the feature - * being read has a byte string identifier field and no - * identifier is given in the input stream, the feature is - * given this identifier. - **/ - public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, - String g, String si, ByteString bi) { - super.lexRead(in, lex, p, g, si, bi); - identifier = in.readString(); - if (identifier == null) identifier = si; - } +public class DiscreteReferringStringFeature extends DiscreteReferrer { + /** + * The identifier string distinguishes this Feature from other + * Features. + **/ + protected String identifier; + + + /** + * For internal use only. + * + * @see Feature#readFeature(ExceptionlessInputStream) + **/ + protected DiscreteReferringStringFeature() {} + + /** + * Sets both the identifier and the referent. + * + * @param c The classifier that produced this feature. + * @param i The new referring feature's identifier. + * @param r The discrete feature referred to by this new feature. + **/ + public DiscreteReferringStringFeature(Classifier c, String i, DiscreteFeature r) { + this(c.containingPackage, c.name, i, r, r.getValueIndex(), r.totalValues()); + } + + /** + * Sets both the identifier and the referent. + * + * @param c The classifier that produced this feature. + * @param i The new referring feature's identifier. + * @param r The discrete feature referred to by this new feature. + * @param av The allowable values of the classifier that produced r. + **/ + public DiscreteReferringStringFeature(Classifier c, String i, DiscreteFeature r, String[] av) { + super(c, r, av); + identifier = i; + } + + /** + * Constructs a new referring feature. + * + * @param p The new discrete feature's package. + * @param c The name of the classifier that produced this feature. + * @param i The new discrete feature's identifier. + * @param r The discrete feature referred to by this new feature. + * @param vi The index corresponding to the value. + * @param t The total allowable values for this feature. + **/ + public DiscreteReferringStringFeature(String p, String c, String i, DiscreteFeature r, + short vi, short t) { + super(p, c, r, vi, t); + identifier = i; + } + + + /** + * Determines if this feature contains a string identifier field. + * + * @return true iff this feature contains a string identifier field. + **/ + public boolean hasStringIdentifier() { + return true; + } + + + /** + * Retrieves this feature's identifier as a string. + * + * @return This feature's identifier as a string. + **/ + public String getStringIdentifier() { + return identifier; + } + + + /** + * Retrieves this feature's identifier as a byte string. + * + * @return This feature's identifier as a byte string. + **/ + public ByteString getByteStringIdentifier() { + return new ByteString(identifier); + } + + + /** + * Return the feature that should be used to index this feature into a lexicon. If it is a + * binary feature, we return the feature with an empty value so that the feature will be mapped + * to the same weight whether it is active or not. If the feature can take multiple values, then + * simply return the feature object as-is. + * + * @param lexicon The lexicon into which this feature will be indexed. + * @param training Whether or not the learner is currently training. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return A feature object appropriate for use as the key of a map. + **/ + public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { + DiscreteFeature f = (DiscreteFeature) referent.getFeatureKey(lexicon, training, label); + if (training) + f = (DiscreteFeature) lexicon.getChildFeature(f, label); + else if (f == referent) + return this; + return new DiscreteReferringStringFeature(containingPackage, generatingClassifier, + identifier, f, valueIndex, totalValues); + } + + + /** + * Returns a {@link RealPrimitiveFeature} whose {@link RealPrimitiveFeature#value value} field + * is set to the strength of the current feature, and whose {@link #identifier} field contains + * all the information necessary to distinguish this feature from other features. + **/ + public RealFeature makeReal() { + return new RealReferringStringFeature(containingPackage, generatingClassifier, identifier, + referent.makeReal()); + } + + + /** + * Returns the strength of this feature if it were to be placed in a mathematical vector space. + **/ + public double getStrength() { + return referent.getStrength(); + } + + + /** + * Returns a new feature object that's identical to this feature except its strength is given by + * s. + * + * @param s The strength of the new feature. + * @return A new feature object as above, or null if this feature cannot take the + * specified strength. + **/ + public Feature withStrength(double s) { + DiscreteFeature f = (DiscreteFeature) referent.withStrength(s); + if (f == null) + return null; + return new DiscreteReferringStringFeature(containingPackage, generatingClassifier, + identifier, f, (short) Math.round(f.getStrength()), totalValues); + } + + + /** + * Returns a feature object in which any strings that are being used to represent an identifier + * or value have been encoded in byte strings. + * + * @param e The encoding to use. + * @return A feature object as above; possible this object. + **/ + public Feature encode(String e) { + DiscreteFeature newReferent = (DiscreteFeature) referent.encode(e); + if (referent == newReferent && (e == null || e == "String")) + return this; + ByteString id = + identifier.length() == 0 ? ByteString.emptyString : new ByteString(identifier, e); + return new DiscreteReferringFeature(containingPackage, generatingClassifier, id, + newReferent, valueIndex, totalValues); + } + + + /** + * The hash code of a DiscreteReferringStringFeature is the sum of the hash codes + * of its containing package, identifier, and the referent feature. + * + * @return The hash code of this feature. + **/ + public int hashCode() { + return 17 * super.hashCode() + identifier.hashCode(); + } + + + /** + * Two DiscreteReferringStringFeatures are equivalent when their containing + * packages, identifiers, and referent features are equivalent. + * + * @param o The object with which to compare this feature. + * @return true iff the parameter is an equivalent feature. + **/ + public boolean equals(Object o) { + return super.equals(o) + && (o instanceof DiscreteReferringStringFeature ? identifier + .equals(((DiscreteReferringStringFeature) o).identifier) : identifier + .equals(((DiscreteReferringFeature) o).identifier)) + && referent.equals(((DiscreteReferrer) o).referent); + } + + + /** + * Some features are functionally equivalent, differing only in the encoding of their values; + * this method will return true iff the class of this feature and f + * are different, but they differ only because they encode their values differently. This method + * does not compare the values themselves, however. + * + * @param f Another feature. + * @return See above. + **/ + public boolean classEquivalent(Feature f) { + return f instanceof DiscreteReferringFeature; + } + + + /** + * Compares only the run-time types, packages, classifier names, and identifiers of the + * features. + * + * @param o An object to compare with. + * @return Integers appropriate for sorting features first by package, then by classifier name, + * and then by identifier. + **/ + public int compareNameStrings(Object o) { + int d = super.compareNameStrings(o); + if (d != 0) + return d; + return identifier.compareTo(((DiscreteReferringStringFeature) o).identifier); + } + + + /** + * Writes a string representation of this Feature's package, generating classifier, + * and identifier information to the specified buffer. + * + * @param buffer The buffer to write to. + **/ + public void writeNameString(StringBuffer buffer) { + super.writeNameString(buffer); + buffer.append(":"); + buffer.append(identifier); + } + + + /** + * Writes a complete binary representation of the feature. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeString(identifier); + } + + + /** + * Reads the representation of a feaeture with this object's run-time type from the given + * stream, overwriting the data in this object. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + identifier = in.readString(); + } + + + /** + * Writes a binary representation of the feature intended for use by a lexicon, omitting + * redundant information when possible. + * + * @param out The output stream. + * @param lex The lexicon out of which this feature is being written. + * @param c The fully qualified name of the assumed class. The runtime class of this feature + * won't be written if it's equivalent to c. + * @param p The assumed package string. This feature's package string won't be written if it's + * equivalent to p. + * @param g The assumed classifier name string. This feature's classifier name string won't be + * written if it's equivalent to g. + * @param si The assumed identifier as a string. If this feature has a string identifier, it + * won't be written if it's equivalent to si. + * @param bi The assumed identifier as a byte string. If this feature has a byte string + * identifier, it won't be written if it's equivalent to bi. + * @return The name of the runtime type of this feature. + **/ + public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, String p, + String g, String si, ByteString bi) { + String result = super.lexWrite(out, lex, c, p, g, si, bi); + out.writeString(identifier.equals(si) ? null : identifier); + return result; + } + + + /** + * Reads the representation of a feature with this object's run-time type as stored by a + * lexicon, overwriting the data in this object. + * + *

+ * This method is appropriate for reading features as written by + * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. + * + * @param in The input stream. + * @param lex The lexicon we are reading in to. + * @param p The assumed package string. If no package name is given in the input stream, the + * instantiated feature is given this package. + * @param g The assumed classifier name string. If no classifier name is given in the input + * stream, the instantiated feature is given this classifier name. + * @param si The assumed identifier as a string. If the feature being read has a string + * identifier field and no identifier is given in the input stream, the feature is given + * this identifier. + * @param bi The assumed identifier as a byte string. If the feature being read has a byte + * string identifier field and no identifier is given in the input stream, the feature is + * given this identifier. + **/ + public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, String g, String si, + ByteString bi) { + super.lexRead(in, lex, p, g, si, bi); + identifier = in.readString(); + if (identifier == null) + identifier = si; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/Feature.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/Feature.java index 745979a9..3ed8d54b 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/Feature.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/Feature.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; @@ -13,707 +10,710 @@ import java.io.IOException; import java.io.Serializable; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.learn.ChildLexicon; import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; import edu.illinois.cs.cogcomp.lbjava.util.ByteString; import edu.illinois.cs.cogcomp.lbjava.util.ClassUtils; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - /** - * Objects of this class represent the value of a Classifier's - * decision. - * - * @author Nick Rizzolo + * Objects of this class represent the value of a Classifier's decision. + * + * @author Nick Rizzolo **/ -public abstract class Feature implements Cloneable, Comparable, Serializable -{ - /** - * The Java package containing the classifier that produced - * this feature. - **/ - protected String containingPackage; - /** The name of the LBJava classifier that produced this feature. */ - protected String generatingClassifier; - - - /** - * For internal use only. - * - * @see #readFeature(ExceptionlessInputStream) - **/ - protected Feature() { } - - /** - * Initializing constructor. - * - * @param p The package containing the classifier that produced this - * feature. - * @param c The name of the classifier that produced this feature. - **/ - public Feature(String p, String c) { - containingPackage = p; - generatingClassifier = c; - } - - - /** Retrieves this feature's package. */ - public String getPackage() { return containingPackage; } - - - /** Retrieves the name of the classifier that produced this feature. */ - public String getGeneratingClassifier() { return generatingClassifier; } - - - /** - * Retrieves this feature's identifier as a string. - * - * @return This feature's identifier as a string. - **/ - public abstract String getStringIdentifier(); - - - /** - * Retrieves this feature's identifier as a byte string. - * - * @return This feature's identifier as a byte string. - **/ - public abstract ByteString getByteStringIdentifier(); - - - /** - * Gives a string representation of the value of this feature. - * - * @return A string representation of the value of this feature. - **/ - public abstract String getStringValue(); - - - /** - * Gives a string representation of the value of this feature. - * - * @return A string representation of the value of this feature. - **/ - public abstract ByteString getByteStringValue(); - - - /** - * Determines whether or not the parameter is equivalent to the string - * representation of the value of this feature. - * - * @param v The string to compare against. - * @return true iff the parameter is equivalent to the string - * representation of the value of this feature. - **/ - public abstract boolean valueEquals(String v); - - - /** - * Determines if this feature is discrete. - * - * @return true iff this is discrete. - **/ - public abstract boolean isDiscrete(); - - - /** - * Determines if this feature contains a byte string identifier field. - * - * @return true iff this feature contains a byte string - * identifier field. - **/ - public boolean hasByteStringIdentifier() { return false; } - - - /** - * Determines if this feature contains a string identifier field. - * - * @return true iff this feature contains a string identifier - * field. - **/ - public boolean hasStringIdentifier() { return false; } - - - /** - * Determines if this feature is primitive. - * - * @return true iff this is primitive. - **/ - public boolean isPrimitive() { return false; } - - - /** - * Determines if this feature is conjunctive. - * - * @return true iff this feature is conjunctive. - **/ - public boolean isConjunctive() { return false; } - - - /** - * Determines if this feature is a referring feature. - * - * @return true iff this feature is a referring feature. - **/ - public boolean isReferrer() { return false; } - - - /** - * Determines if this feature comes from an array. - * - * @return true iff this feature comes from an array. - **/ - public boolean fromArray() { return false; } - - - /** - * The depth of a feature is one more than the maximum depth of any of its - * children, or 0 if it has no children. - * - * @return The depth of this feature as described above. - **/ - public int depth() { return 0; } - - - /** - * Returns the index in the generating classifier's value list of this - * feature's value. - * - * @return A non-negative integer index, or -1 if this feature is real or - * doesn't have a value list. - **/ - public short getValueIndex() { return -1; } - - - /** - * Returns the total number of values this feature might possibly be set - * to. - * - * @return Some integer greater than 1 iff this feature is a discrete - * feature with a specified value list or a conjunctive feature - * whose arguments have value lists, and 0 otherwise. - **/ - public short totalValues() { return 0; } - - - /** - * If this feature is an array feature, call this method to set its array - * length; otherwise, this method has no effect. - * - * @param l The new length. - **/ - public void setArrayLength(int l) { } - - - /** - * Returns the strength of this feature if it were to be placed in a - * mathematical vector space. - **/ - public abstract double getStrength(); - - - /** - * Return the feature that should be used to index this feature into a - * lexicon. This method simply calls getFeatureKey(lexicon, true, - * -1). - * - * @see #getFeatureKey(Lexicon,boolean,int) - * @param lexicon The lexicon into which this feature will be indexed. - * @return A feature object appropriate for use as the key of a map. - **/ - public Feature getFeatureKey(Lexicon lexicon) { - return getFeatureKey(lexicon, true, -1); - } - - - /** - * Return the feature that should be used to index this feature into a - * lexicon. - * - * @param lexicon The lexicon into which this feature will be indexed. - * @param training Whether or not the learner is currently training. - * @param label The label of the example containing this feature, or -1 - * if we aren't doing per class feature counting. - * @return A feature object appropriate for use as the key of a map. - **/ - public abstract Feature getFeatureKey(Lexicon lexicon, boolean training, - int label); - - - /** - * Returns a {@link RealFeature} whose value is the strength of the current - * feature, and whose identifier field contains all the - * information necessary to distinguish this feature from other features. - * When defining this method, RealFeatures may simply return - * themselves. - **/ - public abstract RealFeature makeReal(); - - - /** - * Create a feature representing the conjunction of this feature with the - * given argument feature. - * - * @param f The feature to conjunct with. - * @param c The classifier producing the resulting feature. - * @return A feature representing the conjunction of this feature and - * f. - **/ - public abstract Feature conjunction(Feature f, Classifier c); - - - /** - * Create a feature representing the conjunction of this feature with the - * given argument feature. - * - * @param f The feature to conjunct with. - * @param c The classifier producing the resulting feature. - * @return A feature representing the conjunction of this feature and - * f. - **/ - protected Feature conjunctWith(DiscreteFeature f, Classifier c) { - return new RealConjunctiveFeature(c, f, this); - } - - - /** - * Create a feature representing the conjunction of this feature with the - * given argument feature. - * - * @param f The feature to conjunct with. - * @param c The classifier producing the resulting feature. - * @return A feature representing the conjunction of this feature and - * f. - **/ - protected Feature conjunctWith(RealFeature f, Classifier c) { - return new RealConjunctiveFeature(c, f, this); - } - - - /** - * Returns a new feature object that's identical to this feature except its - * strength is given by s. - * - * @param s The strength of the new feature. - * @return A new feature object as above, or null if this - * feature cannot take the specified strength. - **/ - public abstract Feature withStrength(double s); - - - /** - * Returns a feature object in which any strings that are being used to - * represent an identifier or value have been encoded in byte strings. - * - * @param e The encoding to use. - * @return A feature object as above; possible this object. - **/ - public abstract Feature encode(String e); - - - /** - * Takes care of any feature-type-specific tasks that need to be taken care - * of when removing a feature of this type from a {@link ChildLexicon}, in - * particular updating parent counts and removing children of this feature - * if necessary. - * - * @param lex The child lexicon this feature is being removed from. - **/ - public void removeFromChildLexicon(ChildLexicon lex) { - } - - - /** - * Does a feature-type-specific lookup of this feature in the given - * {@link ChildLexicon}. - * - * @param lex The child lexicon this feature is being looked up in. - * @param label The label of the example containing this feature, or -1 if - * we aren't doing per class feature counting. - * @return The index of f in this lexicon. - **/ - public int childLexiconLookup(ChildLexicon lex, int label) { - return lex.childLexiconLookup(this, label); - } - - - /** - * The hash code of a Feature is a function of the hash codes - * of {@link #containingPackage} and {@link #generatingClassifier}. - * - * @return The hash code of this Feature. - **/ - public int hashCode() { - return 31 * containingPackage.hashCode() - + generatingClassifier.hashCode(); - } - - - /** - * Two Features are equal when their packages and generating - * classifiers are equivalent. - * - * @return true iff the argument is an equivalent - * Feature. - **/ - public boolean equals(Object o) { - assert (getClass() == o.getClass()) - == (getClass().getName().equals(o.getClass().getName())) - : "getClass() doesn't behave as expected."; - if (!(o instanceof Feature)) return false; - Feature f = (Feature) o; - if (getClass() != o.getClass() && !classEquivalent(f)) return false; - - assert !(f.containingPackage.equals(containingPackage) - && f.containingPackage != containingPackage) - : "Features \"" + f + "\" and \"" + this - + " have equivalent package strings in different objects."; - - return f.containingPackage == containingPackage - && f.generatingClassifier.equals(generatingClassifier); - } - - - /** - * Some features are functionally equivalent, differing only in the - * encoding of their values; this method will return true iff - * the class of this feature and f are different, but they - * differ only because they encode their values differently. This method - * does not compare the values themselves, however. - * - * @param f Another feature. - * @return See above. - **/ - public boolean classEquivalent(Feature f) { return false; } - - - /** - * Used to sort features into an order that is convenient both to page - * through and for the lexicon to read off disk. - * - * @param o An object to compare with. - * @return Integers appropriate for sorting features first by package, then - * by classifier name, and then by identifier. - **/ - public abstract int compareTo(Object o); - - /** - * Compares only the run-time types, packages, classifier names, and - * identifiers of the features. This method must be overridden in order to - * accomplish the comparison of identifiers, but the overriding method will - * still have the convenience of calling this method to accomplish the - * majority of the work. - * - * @param o An object to compare with. - * @return Integers appropriate for sorting features first by package, then - * by classifier name, and then by identifier. - **/ - public int compareNameStrings(Object o) { - int d = compareTypes(o); - if (d != 0) return d; - - Feature f = (Feature) o; - d = containingPackage.compareTo(f.containingPackage); - if (d != 0) return d; - return generatingClassifier.compareTo(f.generatingClassifier); - } - - - /** - * Compares only the run-time types of the features. - * - * @param o An object to compare with. - * @return Integers appropriate for sorting features by run-time type. - **/ - private int compareTypes(Object o) { - if (!(o instanceof Feature)) return -1; - Feature f = (Feature) o; - - boolean b1 = isDiscrete(); - boolean b2 = f.isDiscrete(); - int d = (b2 ? 1 : 0) - (b1 ? 1 : 0); - if (d != 0) return d; - - int i1 = depth(); - int i2 = f.depth(); - d = i1 - i2; - if (d != 0) return d; - - b1 = isReferrer(); - b2 = f.isReferrer(); - d = (b2 ? 1 : 0) - (b1 ? 1 : 0); - if (d != 0) return d; - - b1 = fromArray(); - b2 = f.fromArray(); - d = (b1 ? 1 : 0) - (b2 ? 1 : 0); - if (d != 0) return d; - - b1 = hasStringIdentifier(); - b2 = f.hasStringIdentifier(); - return (b1 ? 1 : 0) - (b2 ? 1 : 0); - } - - - /** - * Writes a string representation of this Feature to the - * specified buffer. - * - * @param buffer The buffer to write to. - **/ - public abstract void write(StringBuffer buffer); - - - /** - * Writes a string representation of this Feature's package, - * generating classifier, and sometimes identifier information to the - * specified buffer. This method will need to be overridden to write the - * identifier information, but at least the overriding method will have the - * convenience of calling this method to accomplish most of the work first. - * - * @param buffer The buffer to write to. - **/ - public void writeNameString(StringBuffer buffer) { - if (containingPackage != null && containingPackage.length() > 0) { - buffer.append(containingPackage); - buffer.append("."); - } - buffer.append(generatingClassifier); - if (hasByteStringIdentifier()) buffer.append("|B|"); - } - - - /** - * Writes a complete binary representation of the feature. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - out.writeString(getClass().getName()); - out.writeString(containingPackage); - out.writeString(generatingClassifier); - } - - - /** - * Writes a string representation of this Feature to the - * specified buffer, omitting the package name. - * - * @param buffer The buffer to write to. - **/ - public void writeNoPackage(StringBuffer buffer) { - String p = containingPackage; - containingPackage = null; - write(buffer); - containingPackage = p; - } - - - /** - * Writes a binary representation of the feature intended for use by a - * lexicon, omitting redundant information when possible. - * - * @param out The output stream. - * @param lex The lexicon out of which this feature is being written. - * @param c The fully qualified name of the assumed class. The runtime - * class of this feature won't be written if it's equivalent to - * c. - * @param p The assumed package string. This feature's package string - * won't be written if it's equivalent to p. - * @param g The assumed classifier name string. This feature's - * classifier name string won't be written if it's equivalent - * to g. - * @param si The assumed identifier as a string. If this feature has a - * string identifier, it won't be written if it's equivalent to - * si. - * @param bi The assumed identifier as a byte string. If this feature - * has a byte string identifier, it won't be written if it's - * equivalent to bi. - * @return The name of the runtime type of this feature. - **/ - public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, - String p, String g, String si, ByteString bi) { - String clazz = getClass().getName(); - out.writeString(clazz.equals(c) ? null : clazz); - out.writeString(containingPackage == p ? null : containingPackage); - out.writeString(generatingClassifier == g ? null : generatingClassifier); - return clazz; - } - - - /** - * Reads the binary representation of a feature of any type from the given - * stream. The stream is expected to first return a string containing the - * fully qualified class name of the feature. If the short value - * -1 appears instead, this method returns null. - * - *

This method is appropriate for reading features as written by - * {@link #write(ExceptionlessOutputStream)}. - * - * @param in The input stream. - * @return The feature read from the stream. - **/ - public static Feature readFeature(ExceptionlessInputStream in) { - String name = in.readString(); - if (name == null) return null; - Class c = ClassUtils.getClass(name); - Feature result = null; - - try { result = (Feature) c.newInstance(); } - catch (Exception e) { - System.err.println("Error instantiating feature '" + name + "':"); - e.printStackTrace(); - in.close(); - System.exit(1); - } - - result.read(in); - return result; - } - - - /** - * Reads the representation of a feature with this object's run-time type - * from the given stream, overwriting the data in this object. - * - *

This method is appropriate for reading features as written by - * {@link #write(ExceptionlessOutputStream)}. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - containingPackage = in.readString().intern(); - generatingClassifier = in.readString().intern(); - } - - - /** - * Reads the representation of a feature of any type as stored by a - * lexicon, omitting redundant information. - * - *

This method is appropriate for reading features as written by - * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. - * - * @param in The input stream. - * @param lex The lexicon we are reading in to. - * @param c The assumed class. If no class name is given in the input - * stream, a feature of this type is instantiated. - * @param p The assumed package string. If no package name is given in - * the input stream, the instantiated feature is given this - * package. - * @param g The assumed classifier name string. If no classifier name - * is given in the input stream, the instantiated feature is - * given this classifier name. - * @param si The assumed identifier as a string. If the feature being - * read has a string identifier field and no identifier is - * given in the input stream, the feature is given this - * identifier. - * @param bi The assumed identifier as a byte string. If the feature - * being read has a byte string identifier field and no - * identifier is given in the input stream, the feature is - * given this identifier. - * @return The feature read from the stream. - **/ - public static Feature lexReadFeature(ExceptionlessInputStream in, - Lexicon lex, Class c, String p, - String g, String si, ByteString bi) { - String name = in.readString(); - if (name != null) c = ClassUtils.getClass(name); - Feature result = null; - - try { result = (Feature) c.newInstance(); } - catch (Exception e) { - System.err.println("Error instantiating feature '" + name + "':"); - e.printStackTrace(); - in.close(); - System.exit(1); - } - - result.lexRead(in, lex, p, g, si, bi); - return result; - } - - - /** - * Reads the representation of a feature with this object's run-time type - * as stored by a lexicon, overwriting the data in this object. - * - *

This method is appropriate for reading features as written by - * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. - * - * @param in The input stream. - * @param lex The lexicon we are reading in to. - * @param p The assumed package string. If no package name is given in - * the input stream, the instantiated feature is given this - * package. - * @param g The assumed classifier name string. If no classifier name - * is given in the input stream, the instantiated feature is - * given this classifier name. - * @param si The assumed identifier as a string. If the feature being - * read has a string identifier field and no identifier is - * given in the input stream, the feature is given this - * identifier. - * @param bi The assumed identifier as a byte string. If the feature - * being read has a byte string identifier field and no - * identifier is given in the input stream, the feature is - * given this identifier. - **/ - public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, - String g, String si, ByteString bi) { - containingPackage = in.readString(); - if (containingPackage == null) containingPackage = p; - else containingPackage = containingPackage.intern(); - generatingClassifier = in.readString(); - if (generatingClassifier == null) generatingClassifier = g; - else generatingClassifier = generatingClassifier.intern(); - } - - - /** Returns a string representation of this Feature. */ - public String toString() { - StringBuffer result = new StringBuffer(); - write(result); - return result.toString(); - } - - - /** - * Returns a string representation of this Feature omitting - * the package. - **/ - public String toStringNoPackage() { - StringBuffer result = new StringBuffer(); - writeNoPackage(result); - return result.toString(); - } - - - /** Returns a shallow clone of this Feature. */ - public Object clone() { - Object result = null; - - try { result = super.clone(); } - catch (Exception e) { - System.err.println("Can't clone feature '" + this + "':"); - e.printStackTrace(); - } - - return result; - } - - - /** - * Special handling during deserialization to ensure that - * Strings are intern()ed. - * - * @param in The stream to deserialize from. - **/ - private void readObject(java.io.ObjectInputStream in) - throws IOException, ClassNotFoundException { - in.defaultReadObject(); - containingPackage = containingPackage.intern(); - generatingClassifier = generatingClassifier.intern(); - } -} +public abstract class Feature implements Cloneable, Comparable, Serializable { + /** + * The Java package containing the classifier that produced this feature. + **/ + protected String containingPackage; + /** The name of the LBJava classifier that produced this feature. */ + protected String generatingClassifier; + + + /** + * For internal use only. + * + * @see #readFeature(ExceptionlessInputStream) + **/ + protected Feature() {} + + /** + * Initializing constructor. + * + * @param p The package containing the classifier that produced this feature. + * @param c The name of the classifier that produced this feature. + **/ + public Feature(String p, String c) { + containingPackage = p; + generatingClassifier = c; + } + + + /** Retrieves this feature's package. */ + public String getPackage() { + return containingPackage; + } + + + /** Retrieves the name of the classifier that produced this feature. */ + public String getGeneratingClassifier() { + return generatingClassifier; + } + + + /** + * Retrieves this feature's identifier as a string. + * + * @return This feature's identifier as a string. + **/ + public abstract String getStringIdentifier(); + + + /** + * Retrieves this feature's identifier as a byte string. + * + * @return This feature's identifier as a byte string. + **/ + public abstract ByteString getByteStringIdentifier(); + + + /** + * Gives a string representation of the value of this feature. + * + * @return A string representation of the value of this feature. + **/ + public abstract String getStringValue(); + + + /** + * Gives a string representation of the value of this feature. + * + * @return A string representation of the value of this feature. + **/ + public abstract ByteString getByteStringValue(); + + + /** + * Determines whether or not the parameter is equivalent to the string representation of the + * value of this feature. + * + * @param v The string to compare against. + * @return true iff the parameter is equivalent to the string representation of the + * value of this feature. + **/ + public abstract boolean valueEquals(String v); + + + /** + * Determines if this feature is discrete. + * + * @return true iff this is discrete. + **/ + public abstract boolean isDiscrete(); + + + /** + * Determines if this feature contains a byte string identifier field. + * + * @return true iff this feature contains a byte string identifier field. + **/ + public boolean hasByteStringIdentifier() { + return false; + } + + + /** + * Determines if this feature contains a string identifier field. + * + * @return true iff this feature contains a string identifier field. + **/ + public boolean hasStringIdentifier() { + return false; + } + + + /** + * Determines if this feature is primitive. + * + * @return true iff this is primitive. + **/ + public boolean isPrimitive() { + return false; + } + + + /** + * Determines if this feature is conjunctive. + * + * @return true iff this feature is conjunctive. + **/ + public boolean isConjunctive() { + return false; + } + + + /** + * Determines if this feature is a referring feature. + * + * @return true iff this feature is a referring feature. + **/ + public boolean isReferrer() { + return false; + } + + + /** + * Determines if this feature comes from an array. + * + * @return true iff this feature comes from an array. + **/ + public boolean fromArray() { + return false; + } + + + /** + * The depth of a feature is one more than the maximum depth of any of its children, or 0 if it + * has no children. + * + * @return The depth of this feature as described above. + **/ + public int depth() { + return 0; + } + + + /** + * Returns the index in the generating classifier's value list of this feature's value. + * + * @return A non-negative integer index, or -1 if this feature is real or doesn't have a value + * list. + **/ + public short getValueIndex() { + return -1; + } + + + /** + * Returns the total number of values this feature might possibly be set to. + * + * @return Some integer greater than 1 iff this feature is a discrete feature with a specified + * value list or a conjunctive feature whose arguments have value lists, and 0 + * otherwise. + **/ + public short totalValues() { + return 0; + } + + + /** + * If this feature is an array feature, call this method to set its array length; otherwise, + * this method has no effect. + * + * @param l The new length. + **/ + public void setArrayLength(int l) {} + + + /** + * Returns the strength of this feature if it were to be placed in a mathematical vector space. + **/ + public abstract double getStrength(); + + + /** + * Return the feature that should be used to index this feature into a lexicon. This method + * simply calls getFeatureKey(lexicon, true, + * -1). + * + * @see #getFeatureKey(Lexicon,boolean,int) + * @param lexicon The lexicon into which this feature will be indexed. + * @return A feature object appropriate for use as the key of a map. + **/ + public Feature getFeatureKey(Lexicon lexicon) { + return getFeatureKey(lexicon, true, -1); + } + + + /** + * Return the feature that should be used to index this feature into a lexicon. + * + * @param lexicon The lexicon into which this feature will be indexed. + * @param training Whether or not the learner is currently training. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return A feature object appropriate for use as the key of a map. + **/ + public abstract Feature getFeatureKey(Lexicon lexicon, boolean training, int label); + + + /** + * Returns a {@link RealFeature} whose value is the strength of the current feature, and whose + * identifier field contains all the information necessary to distinguish this + * feature from other features. When defining this method, RealFeatures may simply + * return themselves. + **/ + public abstract RealFeature makeReal(); + + + /** + * Create a feature representing the conjunction of this feature with the given argument + * feature. + * + * @param f The feature to conjunct with. + * @param c The classifier producing the resulting feature. + * @return A feature representing the conjunction of this feature and f. + **/ + public abstract Feature conjunction(Feature f, Classifier c); + + + /** + * Create a feature representing the conjunction of this feature with the given argument + * feature. + * + * @param f The feature to conjunct with. + * @param c The classifier producing the resulting feature. + * @return A feature representing the conjunction of this feature and f. + **/ + protected Feature conjunctWith(DiscreteFeature f, Classifier c) { + return new RealConjunctiveFeature(c, f, this); + } + + + /** + * Create a feature representing the conjunction of this feature with the given argument + * feature. + * + * @param f The feature to conjunct with. + * @param c The classifier producing the resulting feature. + * @return A feature representing the conjunction of this feature and f. + **/ + protected Feature conjunctWith(RealFeature f, Classifier c) { + return new RealConjunctiveFeature(c, f, this); + } + + + /** + * Returns a new feature object that's identical to this feature except its strength is given by + * s. + * + * @param s The strength of the new feature. + * @return A new feature object as above, or null if this feature cannot take the + * specified strength. + **/ + public abstract Feature withStrength(double s); + + + /** + * Returns a feature object in which any strings that are being used to represent an identifier + * or value have been encoded in byte strings. + * + * @param e The encoding to use. + * @return A feature object as above; possible this object. + **/ + public abstract Feature encode(String e); + + + /** + * Takes care of any feature-type-specific tasks that need to be taken care of when removing a + * feature of this type from a {@link ChildLexicon}, in particular updating parent counts and + * removing children of this feature if necessary. + * + * @param lex The child lexicon this feature is being removed from. + **/ + public void removeFromChildLexicon(ChildLexicon lex) {} + + + /** + * Does a feature-type-specific lookup of this feature in the given {@link ChildLexicon}. + * + * @param lex The child lexicon this feature is being looked up in. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return The index of f in this lexicon. + **/ + public int childLexiconLookup(ChildLexicon lex, int label) { + return lex.childLexiconLookup(this, label); + } + + + /** + * The hash code of a Feature is a function of the hash codes of + * {@link #containingPackage} and {@link #generatingClassifier}. + * + * @return The hash code of this Feature. + **/ + public int hashCode() { + return 31 * containingPackage.hashCode() + generatingClassifier.hashCode(); + } + + + /** + * Two Features are equal when their packages and generating classifiers are + * equivalent. + * + * @return true iff the argument is an equivalent Feature. + **/ + public boolean equals(Object o) { + assert (getClass() == o.getClass()) == (getClass().getName().equals(o.getClass().getName())) : "getClass() doesn't behave as expected."; + if (!(o instanceof Feature)) + return false; + Feature f = (Feature) o; + if (getClass() != o.getClass() && !classEquivalent(f)) + return false; + + assert !(f.containingPackage.equals(containingPackage) && f.containingPackage != containingPackage) : "Features \"" + + f + "\" and \"" + this + " have equivalent package strings in different objects."; + + return f.containingPackage == containingPackage + && f.generatingClassifier.equals(generatingClassifier); + } + + + /** + * Some features are functionally equivalent, differing only in the encoding of their values; + * this method will return true iff the class of this feature and f + * are different, but they differ only because they encode their values differently. This method + * does not compare the values themselves, however. + * + * @param f Another feature. + * @return See above. + **/ + public boolean classEquivalent(Feature f) { + return false; + } + + + /** + * Used to sort features into an order that is convenient both to page through and for the + * lexicon to read off disk. + * + * @param o An object to compare with. + * @return Integers appropriate for sorting features first by package, then by classifier name, + * and then by identifier. + **/ + public abstract int compareTo(Object o); + + /** + * Compares only the run-time types, packages, classifier names, and identifiers of the + * features. This method must be overridden in order to accomplish the comparison of + * identifiers, but the overriding method will still have the convenience of calling this method + * to accomplish the majority of the work. + * + * @param o An object to compare with. + * @return Integers appropriate for sorting features first by package, then by classifier name, + * and then by identifier. + **/ + public int compareNameStrings(Object o) { + int d = compareTypes(o); + if (d != 0) + return d; + + Feature f = (Feature) o; + d = containingPackage.compareTo(f.containingPackage); + if (d != 0) + return d; + return generatingClassifier.compareTo(f.generatingClassifier); + } + + + /** + * Compares only the run-time types of the features. + * + * @param o An object to compare with. + * @return Integers appropriate for sorting features by run-time type. + **/ + private int compareTypes(Object o) { + if (!(o instanceof Feature)) + return -1; + Feature f = (Feature) o; + + boolean b1 = isDiscrete(); + boolean b2 = f.isDiscrete(); + int d = (b2 ? 1 : 0) - (b1 ? 1 : 0); + if (d != 0) + return d; + + int i1 = depth(); + int i2 = f.depth(); + d = i1 - i2; + if (d != 0) + return d; + + b1 = isReferrer(); + b2 = f.isReferrer(); + d = (b2 ? 1 : 0) - (b1 ? 1 : 0); + if (d != 0) + return d; + + b1 = fromArray(); + b2 = f.fromArray(); + d = (b1 ? 1 : 0) - (b2 ? 1 : 0); + if (d != 0) + return d; + + b1 = hasStringIdentifier(); + b2 = f.hasStringIdentifier(); + return (b1 ? 1 : 0) - (b2 ? 1 : 0); + } + + + /** + * Writes a string representation of this Feature to the specified buffer. + * + * @param buffer The buffer to write to. + **/ + public abstract void write(StringBuffer buffer); + + + /** + * Writes a string representation of this Feature's package, generating classifier, + * and sometimes identifier information to the specified buffer. This method will need to be + * overridden to write the identifier information, but at least the overriding method will have + * the convenience of calling this method to accomplish most of the work first. + * + * @param buffer The buffer to write to. + **/ + public void writeNameString(StringBuffer buffer) { + if (containingPackage != null && containingPackage.length() > 0) { + buffer.append(containingPackage); + buffer.append("."); + } + buffer.append(generatingClassifier); + if (hasByteStringIdentifier()) + buffer.append("|B|"); + } + + + /** + * Writes a complete binary representation of the feature. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + out.writeString(getClass().getName()); + out.writeString(containingPackage); + out.writeString(generatingClassifier); + } + + + /** + * Writes a string representation of this Feature to the specified buffer, omitting + * the package name. + * + * @param buffer The buffer to write to. + **/ + public void writeNoPackage(StringBuffer buffer) { + String p = containingPackage; + containingPackage = null; + write(buffer); + containingPackage = p; + } + + + /** + * Writes a binary representation of the feature intended for use by a lexicon, omitting + * redundant information when possible. + * + * @param out The output stream. + * @param lex The lexicon out of which this feature is being written. + * @param c The fully qualified name of the assumed class. The runtime class of this feature + * won't be written if it's equivalent to c. + * @param p The assumed package string. This feature's package string won't be written if it's + * equivalent to p. + * @param g The assumed classifier name string. This feature's classifier name string won't be + * written if it's equivalent to g. + * @param si The assumed identifier as a string. If this feature has a string identifier, it + * won't be written if it's equivalent to si. + * @param bi The assumed identifier as a byte string. If this feature has a byte string + * identifier, it won't be written if it's equivalent to bi. + * @return The name of the runtime type of this feature. + **/ + public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, String p, + String g, String si, ByteString bi) { + String clazz = getClass().getName(); + out.writeString(clazz.equals(c) ? null : clazz); + out.writeString(containingPackage == p ? null : containingPackage); + out.writeString(generatingClassifier == g ? null : generatingClassifier); + return clazz; + } + + /** + * Reads the binary representation of a feature of any type from the given stream. The stream is + * expected to first return a string containing the fully qualified class name of the feature. + * If the short value -1 appears instead, this method returns + * null. + * + *

+ * This method is appropriate for reading features as written by + * {@link #write(ExceptionlessOutputStream)}. + * + * @param in The input stream. + * @return The feature read from the stream. + **/ + public static Feature readFeature(ExceptionlessInputStream in) { + String name = in.readString(); + if (name == null) + return null; + Class c = ClassUtils.getClass(name); + Feature result = null; + + try { + result = (Feature) c.newInstance(); + } catch (Exception e) { + System.err.println("Error instantiating feature '" + name + "':"); + e.printStackTrace(); + in.close(); + System.exit(1); + } + + result.read(in); + return result; + } + + + /** + * Reads the representation of a feature with this object's run-time type from the given stream, + * overwriting the data in this object. + * + *

+ * This method is appropriate for reading features as written by + * {@link #write(ExceptionlessOutputStream)}. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + containingPackage = in.readString().intern(); + generatingClassifier = in.readString().intern(); + } + + + /** + * Reads the representation of a feature of any type as stored by a lexicon, omitting redundant + * information. + * + *

+ * This method is appropriate for reading features as written by + * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. + * + * @param in The input stream. + * @param lex The lexicon we are reading in to. + * @param c The assumed class. If no class name is given in the input stream, a feature of this + * type is instantiated. + * @param p The assumed package string. If no package name is given in the input stream, the + * instantiated feature is given this package. + * @param g The assumed classifier name string. If no classifier name is given in the input + * stream, the instantiated feature is given this classifier name. + * @param si The assumed identifier as a string. If the feature being read has a string + * identifier field and no identifier is given in the input stream, the feature is given + * this identifier. + * @param bi The assumed identifier as a byte string. If the feature being read has a byte + * string identifier field and no identifier is given in the input stream, the feature is + * given this identifier. + * @return The feature read from the stream. + **/ + public static Feature lexReadFeature(ExceptionlessInputStream in, Lexicon lex, Class c, + String p, String g, String si, ByteString bi) { + String name = in.readString(); + if (name != null) + c = ClassUtils.getClass(name); + Feature result = null; + + try { + result = (Feature) c.newInstance(); + } catch (Exception e) { + System.err.println("Error instantiating feature '" + name + "':"); + e.printStackTrace(); + in.close(); + System.exit(1); + } + + result.lexRead(in, lex, p, g, si, bi); + return result; + } + + + /** + * Reads the representation of a feature with this object's run-time type as stored by a + * lexicon, overwriting the data in this object. + * + *

+ * This method is appropriate for reading features as written by + * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. + * + * @param in The input stream. + * @param lex The lexicon we are reading in to. + * @param p The assumed package string. If no package name is given in the input stream, the + * instantiated feature is given this package. + * @param g The assumed classifier name string. If no classifier name is given in the input + * stream, the instantiated feature is given this classifier name. + * @param si The assumed identifier as a string. If the feature being read has a string + * identifier field and no identifier is given in the input stream, the feature is given + * this identifier. + * @param bi The assumed identifier as a byte string. If the feature being read has a byte + * string identifier field and no identifier is given in the input stream, the feature is + * given this identifier. + **/ + public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, String g, String si, + ByteString bi) { + containingPackage = in.readString(); + if (containingPackage == null) + containingPackage = p; + else + containingPackage = containingPackage.intern(); + generatingClassifier = in.readString(); + if (generatingClassifier == null) + generatingClassifier = g; + else + generatingClassifier = generatingClassifier.intern(); + } + + + /** Returns a string representation of this Feature. */ + public String toString() { + StringBuffer result = new StringBuffer(); + write(result); + return result.toString(); + } + + + /** + * Returns a string representation of this Feature omitting the package. + **/ + public String toStringNoPackage() { + StringBuffer result = new StringBuffer(); + writeNoPackage(result); + return result.toString(); + } + + + /** Returns a shallow clone of this Feature. */ + public Object clone() { + Object result = null; + + try { + result = super.clone(); + } catch (Exception e) { + System.err.println("Can't clone feature '" + this + "':"); + e.printStackTrace(); + } + + return result; + } + + + /** + * Special handling during deserialization to ensure that Strings are + * intern()ed. + * + * @param in The stream to deserialize from. + **/ + private void readObject(java.io.ObjectInputStream in) throws IOException, + ClassNotFoundException { + in.defaultReadObject(); + containingPackage = containingPackage.intern(); + generatingClassifier = generatingClassifier.intern(); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/FeatureVector.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/FeatureVector.java index 561eb4ff..19c64f62 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/FeatureVector.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/FeatureVector.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; @@ -18,586 +15,608 @@ import java.util.SortedMap; import java.util.TreeMap; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.util.FVector; - /** - * Objects of this class are returned by classifiers that have been applied - * to an object. - * - * @author Nick Rizzolo + * Objects of this class are returned by classifiers that have been applied to an object. + * + * @author Nick Rizzolo **/ -public class FeatureVector implements Cloneable, Serializable -{ - /** Stores non-label features. */ - protected FVector features; - /** Stores labels. */ - protected FVector labels; - /** With this variable, the user can weight the entire vector. */ - protected double weight; - /** Caches the result of the {@link #makeReal()} method. */ - protected FeatureVector realCache; - - - /** Simply instantiates the member variables. */ - public FeatureVector() { - features = new FVector(); - labels = new FVector(); - weight = 1; - } - - /** - * Creates the vector and adds the given feature to it. - * - * @param f A feature to start this vector off with. - **/ - public FeatureVector(Feature f) { - this(); - addFeature(f); - } - - /** - * Creates the vector and adds the given features to it. - * - * @param features A feature array to start this vector off with. - **/ - public FeatureVector(Feature[] features) { - this(); - for (int f = 0; f < features.length; f++) - addFeature(features[f]); - } - - /** - * Instantiates a feature vector from example arrays and lexicons. - * - * @param ex The example array. - * @param lex The feature lexicon. - * @param llex The label lexicon. - **/ - public FeatureVector(Object[] ex, Lexicon lex, Lexicon llex) { - this(); - int[] fs = (int[]) ex[0]; - double[] vs = (double[]) ex[1]; - - for (int i = 0; i < fs.length; ++i) { - Feature f = lex.lookupKey(fs[i]); - Feature ff = f.withStrength(vs[i]); - addFeature(ff == null ? f : ff); - } - - if (ex.length > 2) { - int[] ls = (int[]) ex[2]; - double[] lvs = (double[]) ex[3]; - for (int i = 0; i < ls.length; ++i) { - Feature f = llex.lookupKey(ls[i]); - if (!f.isDiscrete()) f = f.withStrength(lvs[i]); - addLabel(f); - } - } - } - - - /** - * The size of this vector is defined as the size of {@link #features} plus - * the size of {@link #labels}. - * - * @return The size of this vector. - **/ - public int size() { return features.size() + labels.size(); } - /** Returns the size of just the {@link #features} list. */ - public int featuresSize() { return features.size(); } - /** Returns the size of just the {@link #labels} list. */ - public int labelsSize() { return labels.size(); } - - - /** - * Returns the feature at the specified index. - * - * @param index The index of the requested feature. - * @return The feature. - **/ - public Feature getFeature(int index) { return features.get(index); } - - - /** - * Returns the label at the specified index. - * - * @param index The index of the requested label. - * @return The label. - **/ - public Feature getLabel(int index) { return labels.get(index); } - - - /** Returns the value of {@link #weight}. */ - public double getWeight() { return weight; } - - - /** Removes all elements from both {@link #features} and {@link #labels}. */ - public void clear() { - features = new FVector(); - labels = new FVector(); - realCache = null; - } - - - /** Removes all elements from just the {@link #labels} list. */ - public void clearLabels() { labels = new FVector(); } - - - /** Sorts both of the feature lists. */ - public void sort() { - features.sort(); - labels.sort(); - } - - - /** - * Adds a feature to the vector. - * - * @param f The features to be added. - **/ - public void addFeature(Feature f) { - features.add(f); - realCache = null; - } - - - /** - * Adds all the features in another vector to this vector. - * - * @param v The vector whose features are to be added. - **/ - public void addFeatures(FeatureVector v) { - features.addAll(v.features); - realCache = null; - } - - - /** - * Adds a label to the vector. - * - * @param l The label to be added. - **/ - public void addLabel(Feature l) { labels.add(l); } - - - /** - * Adds all the features in another vector (but not the labels in that - * vector) to the labels of this vector. - * - * @param v The vector whose features will become this vector's labels. - **/ - public void addLabels(FeatureVector v) { labels.addAll(v.features); } - - - /** - * Determines whether this vector has any labels. - * - * @return true iff this vector has at least one label. - **/ - public boolean isLabeled() { return labels.size() > 0; } - - - /** - * Converts all of the features in the {@link #features} list to - * {@link RealFeature}s with appropriate strengths. Otherwise, the - * returned feature vector is the same as this one. In particular, the - * {@link #labels} list of the returned vector is a shallow clone of this - * vector's {@link #labels} list. - * - * @return A new feature vector which is the same as this one, except all - * features have been converted to {@link RealFeature}s. - **/ - public FeatureVector makeReal() { - if (realCache == null) { - realCache = (FeatureVector) clone(); - int N = realCache.labels.size(); - for (int i = 0; i < N; ++i) - realCache.labels.set(i, realCache.labels.get(i).makeReal()); - N = realCache.features.size(); - for (int i = 0; i < N; ++i) - realCache.features.set(i, realCache.features.get(i).makeReal()); - } - - return realCache; - } - - - /** - * Returns all the values of the features in this vector (not labels) - * arranged in a String array. - * - * @return An array of Strings with all the feature values - * from this vector, or null if there are any - * {@link RealFeature}s in this vector. - **/ - public String[] discreteValueArray() { - String[] result = new String[features.size()]; - for (int i = 0; i < result.length; ++i) - result[i] = features.get(i).getStringValue(); - return result; - } - - - /** - * Returns all the values of the features in this vector (not labels) - * arranged in a double array. - * - * @return An array of doubles with all the feature values - * from this vector, or null if there are any - * {@link DiscreteFeature}s in this vector. - **/ - public double[] realValueArray() { - double[] result = new double[features.size()]; - for (int i = 0; i < result.length; ++i) - result[i] = features.get(i).getStrength(); - return result; - } - - - /** - * Returns the first feature in {@link #features}. - * - * @return The first feature, or null if there aren't any. - **/ - public Feature firstFeature() { return features.get(0); } - - - /** Removes and returns the first feature in {@link #features}. * / - public Feature removeFirstFeature() { - realCache = null; - return (Feature) features.removeFirst(); - } - */ - - - /** - * Returns the first feature in {@link #labels}. - * - * @return The first label, or null if there aren't any. - **/ - public Feature firstLabel() { return labels.get(0); } - - - /** - * Returns the square of the magnitude of the feature vector. - * - * @return The square of the magnitude of the feature vector. - **/ - public double L2NormSquared() { - double sum = 0; - int N = features.size(); - - for (int i = 0; i < N; ++i) { - double val = features.get(i).getStrength(); - sum += val * val; - } - - return sum; - } - - - /** - * Returns the square of the magnitude of the given vector. - * - * @param exampleValues A vector. - * @return The square of the magnitude of the given vector. - **/ - public static double L2NormSquared(double[] exampleValues) { - double sum = 0; - for (int i = 0; i < exampleValues.length; i++) - sum += exampleValues[i] * exampleValues[i]; - return sum; - } - - - /** - * The hash code for a FeatureVector is simply the sum of the - * hash codes of the features and the labels. - * - * @return The hash code of this vector. - **/ - public int hashCode() { - int result = 0; - int N = features.size(); - for (int i = 0; i < N; ++i) - result = 17 * result + features.get(i).hashCode(); - N = labels.size(); - for (int i = 0; i < N; ++i) - result = 31 * result + labels.get(i).hashCode(); - return result; - } - - - /** - * Two FeatureVectors are equivalent if they contain the same - * features and labels, as defined by {@link Feature} equivalence. - * - * @param o The object to compare with this FeatureVector for - * equality. - * @return True iff o is a FeatureVector - * equivalent with this vector as defined above. - **/ - public boolean equals(Object o) { - if (!(o instanceof FeatureVector)) return false; - FeatureVector v = (FeatureVector) o; - return features.equals(v.features) && labels.equals(v.labels); - } - - - /** - * Returns a sorted map where the key is the feature index and the value is - * the feature value. If there are multiple occurrences of the same - * feature, then the corresponding values are summed up. - * - * @param features The feature indices. - * @param values The feature values. - * @return The sorted map. - **/ - public static SortedMap getSortedMap(int[] features, double[] values) { - SortedMap map = new TreeMap(); - - for (int i = 0; i < features.length; i++) { - Integer key = Integer.valueOf(features[i]); - - Object value = map.get(key); - if (value == null) map.put(key, Double.valueOf(values[i])); - else - map.put(key, - Double.valueOf(((Double) value).doubleValue() + values[i])); - } - - return map; - } - - - /** - * Computes the dot product of the 2 argument vectors. - * - * @param firstFeatures The first feature vector's indices. - * @param firstValues The first feature vector's values. - * @param secondFeatures The second feature vector's indices. - * @param secondValues The second feature vector's values. - * @return The dot product. - **/ - public static double dot(int[] firstFeatures, double[] firstValues, - int[] secondFeatures, double[] secondValues) { - Set firstFeatureValueSet = - getSortedMap(firstFeatures, firstValues).entrySet(); - Set secondFeatureValueSet = - getSortedMap(secondFeatures, secondValues).entrySet(); - - double result = 0.0; - - try { - Iterator firstIterator = firstFeatureValueSet.iterator(); - Iterator secondIterator = secondFeatureValueSet.iterator(); - - Map.Entry firstEntry = (Map.Entry) firstIterator.next(); - Map.Entry secondEntry = (Map.Entry) secondIterator.next(); - - while(true) { - int firstEntryKey = ((Integer) firstEntry.getKey()).intValue(); - int secondEntryKey = ((Integer) secondEntry.getKey()).intValue(); - - if (firstEntryKey == secondEntryKey) { - result += ((Double) firstEntry.getValue()).doubleValue() - * ((Double) secondEntry.getValue()).doubleValue(); - - firstEntry = (Map.Entry) firstIterator.next(); - secondEntry = (Map.Entry) secondIterator.next(); +public class FeatureVector implements Cloneable, Serializable { + /** Stores non-label features. */ + protected FVector features; + /** Stores labels. */ + protected FVector labels; + /** With this variable, the user can weight the entire vector. */ + protected double weight; + /** Caches the result of the {@link #makeReal()} method. */ + protected FeatureVector realCache; + + + /** Simply instantiates the member variables. */ + public FeatureVector() { + features = new FVector(); + labels = new FVector(); + weight = 1; + } + + /** + * Creates the vector and adds the given feature to it. + * + * @param f A feature to start this vector off with. + **/ + public FeatureVector(Feature f) { + this(); + addFeature(f); + } + + /** + * Creates the vector and adds the given features to it. + * + * @param features A feature array to start this vector off with. + **/ + public FeatureVector(Feature[] features) { + this(); + for (int f = 0; f < features.length; f++) + addFeature(features[f]); + } + /** + * Instantiates a feature vector from example arrays and lexicons. + * + * @param ex The example array. + * @param lex The feature lexicon. + * @param llex The label lexicon. + **/ + public FeatureVector(Object[] ex, Lexicon lex, Lexicon llex) { + this(); + int[] fs = (int[]) ex[0]; + double[] vs = (double[]) ex[1]; + + for (int i = 0; i < fs.length; ++i) { + Feature f = lex.lookupKey(fs[i]); + Feature ff = f.withStrength(vs[i]); + addFeature(ff == null ? f : ff); } - else if (firstEntryKey < secondEntryKey) - firstEntry = (Map.Entry) firstIterator.next(); - else - secondEntry = (Map.Entry) secondIterator.next(); - } - } - catch (NoSuchElementException nsee) { - // Program reaches here when one of the iterator.next() in the above - // try catch block leads to this exception, and so we are done - // computing the dot product. - } - - return result; - } - - - /** - * Take the dot product of two feature vectors. - * - * @param vector The feature vector to take the dot product with. - * @return The dot product of this feature vector and vector. - **/ - public double dot(FeatureVector vector) { - if (features.size() == 0 || vector.features.size() == 0) return 0; - FVector v1 = (FVector) features.clone(); - FVector v2 = (FVector) vector.features.clone(); - - v1.sort(); - v2.sort(); - - double res = 0; - int i = 0, j = 0; - - Feature f1 = v1.get(0); - Feature f2 = v2.get(0); - - while (f1 != null && f2 != null) { - if (f1.equals(f2)) { - res += f1.getStrength() * f2.getStrength(); - f1 = v1.get(++i); - f2 = v2.get(++j); - } - else if (f1.compareTo(f2) < 0) f1 = v1.get(++i); - else f2 = v2.get(++j); - } - - return res; - } - - - /** - * Two FeatureVectors have equal value if they contain the - * same number of {@link Feature}s and if the values of those - * {@link Feature}s are pair-wise equivalent according to the - * {@link Feature#valueEquals(String)} method. - * - * @param vector The vector with which to test equivalence. - * @return true iff the two vectors are "value equivalent" as - * defined above. - **/ - public boolean valueEquals(FeatureVector vector) { - if (features.size() != vector.features.size() - || labels.size() != vector.labels.size()) - return false; - int N = features.size(); - for (int i = 0; i < N; ++i) - if (!features.get(i) - .valueEquals(vector.features.get(i).getStringValue())) - return false; - N = labels.size(); - for (int i = 0; i < N; ++i) - if (!labels.get(i).valueEquals(vector.labels.get(i).getStringValue())) - return false; - return true; - } - - - /** - * Creates a string representation of this FeatureVector. A - * comma separated list of labels appears first, surrounded by square - * brackets. Then follows a comma separated list of features. - * - * @param buffer The buffer in which to create the representation. - **/ - public void write(StringBuffer buffer) { write(buffer, true); } - - - /** - * Creates a string representation of this FeatureVector. A - * comma separated list of labels appears first, surrounded by square - * brackets. Then follows a comma separated list of features. - * - * @param buffer The buffer in which to create the representation. - * @param packages Whether or not to print package names. - **/ - public void write(StringBuffer buffer, boolean packages) { - buffer.append("["); - int N = labels.size(); - - if (N > 0) { - if (packages) labels.get(0).write(buffer); - else labels.get(0).writeNoPackage(buffer); - - for (int i = 1; i < N; ++i) { - buffer.append(", "); - if (packages) labels.get(i).write(buffer); - else labels.get(i).writeNoPackage(buffer); - } - } - - buffer.append("]"); - N = features.size(); - - if (N > 0) { - buffer.append(" "); - if (packages) features.get(0).write(buffer); - else features.get(0).writeNoPackage(buffer); - - for (int i = 1; i < N; ++i) { - buffer.append(", "); - if (packages) features.get(i).write(buffer); - else features.get(i).writeNoPackage(buffer); - } - } - } - - - /** - * Returns the string representation of this FeatureVector as - * created by {@link #write(StringBuffer)}. - **/ - public String toString() { - StringBuffer result = new StringBuffer(); - write(result); - return result.toString(); - } - - - /** - * Returns the string representation of this FeatureVector - * like {@link #toString()} except without package names. - **/ - public String toStringNoPackage() { - StringBuffer result = new StringBuffer(); - write(result, false); - return result.toString(); - } - - - /** - * Writes a binary representation of the feature vector. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - out.writeDouble(weight); - features.write(out); - labels.write(out); - } - - - /** - * Reads the binary representation of a feature vector from the specified - * stream, overwriting the contents of this vector. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - realCache = null; - weight = in.readDouble(); - features = new FVector(); - features.read(in); - labels = new FVector(); - labels.read(in); - } - - - /** - * Returns a shallow clone of this vector; the vectors are cloned, but - * their elements aren't. - **/ - public Object clone() { - FeatureVector clone = new FeatureVector(); - clone.features = (FVector) features.clone(); - clone.labels = (FVector) labels.clone(); - clone.weight = weight; - return clone; - } -} + if (ex.length > 2) { + int[] ls = (int[]) ex[2]; + double[] lvs = (double[]) ex[3]; + for (int i = 0; i < ls.length; ++i) { + Feature f = llex.lookupKey(ls[i]); + if (!f.isDiscrete()) + f = f.withStrength(lvs[i]); + addLabel(f); + } + } + } + + + /** + * The size of this vector is defined as the size of {@link #features} plus the size of + * {@link #labels}. + * + * @return The size of this vector. + **/ + public int size() { + return features.size() + labels.size(); + } + + /** Returns the size of just the {@link #features} list. */ + public int featuresSize() { + return features.size(); + } + + /** Returns the size of just the {@link #labels} list. */ + public int labelsSize() { + return labels.size(); + } + + + /** + * Returns the feature at the specified index. + * + * @param index The index of the requested feature. + * @return The feature. + **/ + public Feature getFeature(int index) { + return features.get(index); + } + + + /** + * Returns the label at the specified index. + * + * @param index The index of the requested label. + * @return The label. + **/ + public Feature getLabel(int index) { + return labels.get(index); + } + + + /** Returns the value of {@link #weight}. */ + public double getWeight() { + return weight; + } + + + /** Removes all elements from both {@link #features} and {@link #labels}. */ + public void clear() { + features = new FVector(); + labels = new FVector(); + realCache = null; + } + + + /** Removes all elements from just the {@link #labels} list. */ + public void clearLabels() { + labels = new FVector(); + } + + + /** Sorts both of the feature lists. */ + public void sort() { + features.sort(); + labels.sort(); + } + + + /** + * Adds a feature to the vector. + * + * @param f The features to be added. + **/ + public void addFeature(Feature f) { + features.add(f); + realCache = null; + } + + + /** + * Adds all the features in another vector to this vector. + * + * @param v The vector whose features are to be added. + **/ + public void addFeatures(FeatureVector v) { + features.addAll(v.features); + realCache = null; + } + + + /** + * Adds a label to the vector. + * + * @param l The label to be added. + **/ + public void addLabel(Feature l) { + labels.add(l); + } + + + /** + * Adds all the features in another vector (but not the labels in that vector) to the labels of + * this vector. + * + * @param v The vector whose features will become this vector's labels. + **/ + public void addLabels(FeatureVector v) { + labels.addAll(v.features); + } + + + /** + * Determines whether this vector has any labels. + * + * @return true iff this vector has at least one label. + **/ + public boolean isLabeled() { + return labels.size() > 0; + } + + + /** + * Converts all of the features in the {@link #features} list to {@link RealFeature}s with + * appropriate strengths. Otherwise, the returned feature vector is the same as this one. In + * particular, the {@link #labels} list of the returned vector is a shallow clone of this + * vector's {@link #labels} list. + * + * @return A new feature vector which is the same as this one, except all features have been + * converted to {@link RealFeature}s. + **/ + public FeatureVector makeReal() { + if (realCache == null) { + realCache = (FeatureVector) clone(); + int N = realCache.labels.size(); + for (int i = 0; i < N; ++i) + realCache.labels.set(i, realCache.labels.get(i).makeReal()); + N = realCache.features.size(); + for (int i = 0; i < N; ++i) + realCache.features.set(i, realCache.features.get(i).makeReal()); + } + + return realCache; + } + + + /** + * Returns all the values of the features in this vector (not labels) arranged in a + * String array. + * + * @return An array of Strings with all the feature values from this vector, or + * null if there are any {@link RealFeature}s in this vector. + **/ + public String[] discreteValueArray() { + String[] result = new String[features.size()]; + for (int i = 0; i < result.length; ++i) + result[i] = features.get(i).getStringValue(); + return result; + } + + + /** + * Returns all the values of the features in this vector (not labels) arranged in a + * double array. + * + * @return An array of doubles with all the feature values from this vector, or + * null if there are any {@link DiscreteFeature}s in this vector. + **/ + public double[] realValueArray() { + double[] result = new double[features.size()]; + for (int i = 0; i < result.length; ++i) + result[i] = features.get(i).getStrength(); + return result; + } + + + /** + * Returns the first feature in {@link #features}. + * + * @return The first feature, or null if there aren't any. + **/ + public Feature firstFeature() { + return features.get(0); + } + + + /** + * Removes and returns the first feature in {@link #features}. * / public Feature + * removeFirstFeature() { realCache = null; return (Feature) features.removeFirst(); } + */ + + + /** + * Returns the first feature in {@link #labels}. + * + * @return The first label, or null if there aren't any. + **/ + public Feature firstLabel() { + return labels.get(0); + } + + + /** + * Returns the square of the magnitude of the feature vector. + * + * @return The square of the magnitude of the feature vector. + **/ + public double L2NormSquared() { + double sum = 0; + int N = features.size(); + + for (int i = 0; i < N; ++i) { + double val = features.get(i).getStrength(); + sum += val * val; + } + + return sum; + } + + + /** + * Returns the square of the magnitude of the given vector. + * + * @param exampleValues A vector. + * @return The square of the magnitude of the given vector. + **/ + public static double L2NormSquared(double[] exampleValues) { + double sum = 0; + for (int i = 0; i < exampleValues.length; i++) + sum += exampleValues[i] * exampleValues[i]; + return sum; + } + + + /** + * The hash code for a FeatureVector is simply the sum of the hash codes of the + * features and the labels. + * + * @return The hash code of this vector. + **/ + public int hashCode() { + int result = 0; + int N = features.size(); + for (int i = 0; i < N; ++i) + result = 17 * result + features.get(i).hashCode(); + N = labels.size(); + for (int i = 0; i < N; ++i) + result = 31 * result + labels.get(i).hashCode(); + return result; + } + + + /** + * Two FeatureVectors are equivalent if they contain the same features and labels, + * as defined by {@link Feature} equivalence. + * + * @param o The object to compare with this FeatureVector for equality. + * @return True iff o is a FeatureVector equivalent with this vector + * as defined above. + **/ + public boolean equals(Object o) { + if (!(o instanceof FeatureVector)) + return false; + FeatureVector v = (FeatureVector) o; + return features.equals(v.features) && labels.equals(v.labels); + } + + + /** + * Returns a sorted map where the key is the feature index and the value is the feature value. + * If there are multiple occurrences of the same feature, then the corresponding values are + * summed up. + * + * @param features The feature indices. + * @param values The feature values. + * @return The sorted map. + **/ + public static SortedMap getSortedMap(int[] features, double[] values) { + SortedMap map = new TreeMap(); + + for (int i = 0; i < features.length; i++) { + Integer key = Integer.valueOf(features[i]); + + Object value = map.get(key); + if (value == null) + map.put(key, Double.valueOf(values[i])); + else + map.put(key, Double.valueOf(((Double) value).doubleValue() + values[i])); + } + + return map; + } + + + /** + * Computes the dot product of the 2 argument vectors. + * + * @param firstFeatures The first feature vector's indices. + * @param firstValues The first feature vector's values. + * @param secondFeatures The second feature vector's indices. + * @param secondValues The second feature vector's values. + * @return The dot product. + **/ + public static double dot(int[] firstFeatures, double[] firstValues, int[] secondFeatures, + double[] secondValues) { + Set firstFeatureValueSet = getSortedMap(firstFeatures, firstValues).entrySet(); + Set secondFeatureValueSet = getSortedMap(secondFeatures, secondValues).entrySet(); + + double result = 0.0; + + try { + Iterator firstIterator = firstFeatureValueSet.iterator(); + Iterator secondIterator = secondFeatureValueSet.iterator(); + + Map.Entry firstEntry = (Map.Entry) firstIterator.next(); + Map.Entry secondEntry = (Map.Entry) secondIterator.next(); + + while (true) { + int firstEntryKey = ((Integer) firstEntry.getKey()).intValue(); + int secondEntryKey = ((Integer) secondEntry.getKey()).intValue(); + + if (firstEntryKey == secondEntryKey) { + result += + ((Double) firstEntry.getValue()).doubleValue() + * ((Double) secondEntry.getValue()).doubleValue(); + + firstEntry = (Map.Entry) firstIterator.next(); + secondEntry = (Map.Entry) secondIterator.next(); + + } else if (firstEntryKey < secondEntryKey) + firstEntry = (Map.Entry) firstIterator.next(); + else + secondEntry = (Map.Entry) secondIterator.next(); + } + } catch (NoSuchElementException nsee) { + // Program reaches here when one of the iterator.next() in the above + // try catch block leads to this exception, and so we are done + // computing the dot product. + } + + return result; + } + + + /** + * Take the dot product of two feature vectors. + * + * @param vector The feature vector to take the dot product with. + * @return The dot product of this feature vector and vector. + **/ + public double dot(FeatureVector vector) { + if (features.size() == 0 || vector.features.size() == 0) + return 0; + FVector v1 = (FVector) features.clone(); + FVector v2 = (FVector) vector.features.clone(); + + v1.sort(); + v2.sort(); + + double res = 0; + int i = 0, j = 0; + + Feature f1 = v1.get(0); + Feature f2 = v2.get(0); + + while (f1 != null && f2 != null) { + if (f1.equals(f2)) { + res += f1.getStrength() * f2.getStrength(); + f1 = v1.get(++i); + f2 = v2.get(++j); + } else if (f1.compareTo(f2) < 0) + f1 = v1.get(++i); + else + f2 = v2.get(++j); + } + + return res; + } + + + /** + * Two FeatureVectors have equal value if they contain the same number of + * {@link Feature}s and if the values of those {@link Feature}s are pair-wise equivalent + * according to the {@link Feature#valueEquals(String)} method. + * + * @param vector The vector with which to test equivalence. + * @return true iff the two vectors are "value equivalent" as defined above. + **/ + public boolean valueEquals(FeatureVector vector) { + if (features.size() != vector.features.size() || labels.size() != vector.labels.size()) + return false; + int N = features.size(); + for (int i = 0; i < N; ++i) + if (!features.get(i).valueEquals(vector.features.get(i).getStringValue())) + return false; + N = labels.size(); + for (int i = 0; i < N; ++i) + if (!labels.get(i).valueEquals(vector.labels.get(i).getStringValue())) + return false; + return true; + } + + + /** + * Creates a string representation of this FeatureVector. A comma separated list of + * labels appears first, surrounded by square brackets. Then follows a comma separated list of + * features. + * + * @param buffer The buffer in which to create the representation. + **/ + public void write(StringBuffer buffer) { + write(buffer, true); + } + + + /** + * Creates a string representation of this FeatureVector. A comma separated list of + * labels appears first, surrounded by square brackets. Then follows a comma separated list of + * features. + * + * @param buffer The buffer in which to create the representation. + * @param packages Whether or not to print package names. + **/ + public void write(StringBuffer buffer, boolean packages) { + buffer.append("["); + int N = labels.size(); + + if (N > 0) { + if (packages) + labels.get(0).write(buffer); + else + labels.get(0).writeNoPackage(buffer); + + for (int i = 1; i < N; ++i) { + buffer.append(", "); + if (packages) + labels.get(i).write(buffer); + else + labels.get(i).writeNoPackage(buffer); + } + } + + buffer.append("]"); + N = features.size(); + + if (N > 0) { + buffer.append(" "); + if (packages) + features.get(0).write(buffer); + else + features.get(0).writeNoPackage(buffer); + + for (int i = 1; i < N; ++i) { + buffer.append(", "); + if (packages) + features.get(i).write(buffer); + else + features.get(i).writeNoPackage(buffer); + } + } + } + + + /** + * Returns the string representation of this FeatureVector as created by + * {@link #write(StringBuffer)}. + **/ + public String toString() { + StringBuffer result = new StringBuffer(); + write(result); + return result.toString(); + } + + + /** + * Returns the string representation of this FeatureVector like {@link #toString()} + * except without package names. + **/ + public String toStringNoPackage() { + StringBuffer result = new StringBuffer(); + write(result, false); + return result.toString(); + } + + + /** + * Writes a binary representation of the feature vector. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + out.writeDouble(weight); + features.write(out); + labels.write(out); + } + + + /** + * Reads the binary representation of a feature vector from the specified stream, overwriting + * the contents of this vector. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + realCache = null; + weight = in.readDouble(); + features = new FVector(); + features.read(in); + labels = new FVector(); + labels.read(in); + } + + + /** + * Returns a shallow clone of this vector; the vectors are cloned, but their elements aren't. + **/ + public Object clone() { + FeatureVector clone = new FeatureVector(); + clone.features = (FVector) features.clone(); + clone.labels = (FVector) labels.clone(); + clone.weight = weight; + return clone; + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/FeatureVectorReturner.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/FeatureVectorReturner.java index dc8bac0f..9e9974f6 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/FeatureVectorReturner.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/FeatureVectorReturner.java @@ -1,66 +1,70 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; /** - * This classifier expects FeatureVectors as input, and it - * simply returns them as output. - * - * @author Nick Rizzolo + * This classifier expects FeatureVectors as input, and it simply returns them as + * output. + * + * @author Nick Rizzolo **/ -public class FeatureVectorReturner extends Classifier -{ - /** Default constructor. */ - public FeatureVectorReturner() { super("FeatureVectorReturner"); } +public class FeatureVectorReturner extends Classifier { + /** Default constructor. */ + public FeatureVectorReturner() { + super("FeatureVectorReturner"); + } - /** - * This method makes one or more decisions about a single object, returning - * those decisions as Features in a vector. - * - * @param o The object to make decisions about. - * @return A vector of Features about the input object. - **/ - public FeatureVector classify(Object o) { - FeatureVector result = null; + /** + * This method makes one or more decisions about a single object, returning those decisions as + * Features in a vector. + * + * @param o The object to make decisions about. + * @return A vector of Features about the input object. + **/ + public FeatureVector classify(Object o) { + FeatureVector result = null; - try { result = (FeatureVector) o; } - catch (ClassCastException e) { - System.err.println("LBJava ERROR: FeatureVectorReturner received a '" - + o.getClass().getName() + "' as input."); - System.exit(1); - } + try { + result = (FeatureVector) o; + } catch (ClassCastException e) { + System.err.println("LBJava ERROR: FeatureVectorReturner received a '" + + o.getClass().getName() + "' as input."); + System.exit(1); + } - return result; - } + return result; + } - /** - * Returns a string describing the input type of this classifier. - * - * @return A string describing the input type of this classifier. - **/ - public String getInputType() { return "edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector"; } + /** + * Returns a string describing the input type of this classifier. + * + * @return A string describing the input type of this classifier. + **/ + public String getInputType() { + return "edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector"; + } - /** - * Returns a string describing the output type of this classifier. - * - * @return A string describing the output type of this classifier. - **/ - public String getOutputType() { return "mixed%"; } + /** + * Returns a string describing the output type of this classifier. + * + * @return A string describing the output type of this classifier. + **/ + public String getOutputType() { + return "mixed%"; + } - /** Simply returns the string "FeatureVectorReturner". */ - public String toString() { return "FeatureVectorReturner"; } + /** Simply returns the string "FeatureVectorReturner". */ + public String toString() { + return "FeatureVectorReturner"; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/LabelVectorReturner.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/LabelVectorReturner.java index c987b61d..1d4c8a33 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/LabelVectorReturner.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/LabelVectorReturner.java @@ -1,71 +1,74 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; /** - * This classifier expects a {@link FeatureVector} as input, and it returns - * the contents of its {@link FeatureVector#labels labels} list in a new - * {@link FeatureVector} as output. - * - * @author Nick Rizzolo + * This classifier expects a {@link FeatureVector} as input, and it returns the contents of its + * {@link FeatureVector#labels labels} list in a new {@link FeatureVector} as output. + * + * @author Nick Rizzolo **/ -public class LabelVectorReturner extends Classifier -{ - /** Default constructor. */ - public LabelVectorReturner() { super("LabelVectorReturner"); } +public class LabelVectorReturner extends Classifier { + /** Default constructor. */ + public LabelVectorReturner() { + super("LabelVectorReturner"); + } - /** - * This method makes one or more decisions about a single object, returning - * those decisions as Features in a vector. - * - * @param o The object to make decisions about. - * @return A vector of Features about the input object. - **/ - public FeatureVector classify(Object o) { - FeatureVector vector = null; + /** + * This method makes one or more decisions about a single object, returning those decisions as + * Features in a vector. + * + * @param o The object to make decisions about. + * @return A vector of Features about the input object. + **/ + public FeatureVector classify(Object o) { + FeatureVector vector = null; - try { vector = (FeatureVector) o; } - catch (ClassCastException e) { - System.err.println("LBJava ERROR: LabelVectorReturner received a '" - + o.getClass().getName() + "' as input."); - System.exit(1); - } + try { + vector = (FeatureVector) o; + } catch (ClassCastException e) { + System.err.println("LBJava ERROR: LabelVectorReturner received a '" + + o.getClass().getName() + "' as input."); + System.exit(1); + } - FeatureVector result = new FeatureVector(); - int N = vector.labelsSize(); - for (int i = 0; i < N; ++i) - result.addFeature(vector.getLabel(i)); - return result; - } + FeatureVector result = new FeatureVector(); + int N = vector.labelsSize(); + for (int i = 0; i < N; ++i) + result.addFeature(vector.getLabel(i)); + return result; + } - /** - * Returns a string describing the input type of this classifier. - * - * @return A string describing the input type of this classifier. - **/ - public String getInputType() { return "edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector"; } + /** + * Returns a string describing the input type of this classifier. + * + * @return A string describing the input type of this classifier. + **/ + public String getInputType() { + return "edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector"; + } - /** - * Returns a string describing the output type of this classifier. - * - * @return A string describing the output type of this classifier. - **/ - public String getOutputType() { return "mixed%"; } + /** + * Returns a string describing the output type of this classifier. + * + * @return A string describing the output type of this classifier. + **/ + public String getOutputType() { + return "mixed%"; + } - /** Simply returns the string "LabelVectorReturner". */ - public String toString() { return "LabelVectorReturner"; } + /** Simply returns the string "LabelVectorReturner". */ + public String toString() { + return "LabelVectorReturner"; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/MultiValueComparer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/MultiValueComparer.java index f7faecdb..ee89589c 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/MultiValueComparer.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/MultiValueComparer.java @@ -1,107 +1,98 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; /** - * This classifier applies another classifier to the example object and - * returns a Boolean feature (with value "true" or "false") indicating - * whether a given feature value appeared in the output of the classifier. - * This behavior differs from that of {@link ValueComparer} because it does - * not assume that the given classifier will return only a single feature. - * - * @author Nick Rizzolo + * This classifier applies another classifier to the example object and returns a Boolean feature + * (with value "true" or "false") indicating whether a given feature value appeared in the output of + * the classifier. This behavior differs from that of {@link ValueComparer} because it does not + * assume that the given classifier will return only a single feature. + * + * @author Nick Rizzolo **/ -public class MultiValueComparer extends ValueComparer -{ - /** - * Constructor. - * - * @param c The classifier whose value will be compared. - * @param v The value to compare with. - **/ - public MultiValueComparer(Classifier c, String v) { super(c, v); } +public class MultiValueComparer extends ValueComparer { + /** + * Constructor. + * + * @param c The classifier whose value will be compared. + * @param v The value to compare with. + **/ + public MultiValueComparer(Classifier c, String v) { + super(c, v); + } - /** - * Returns a Boolean feature (with value "true" or "false") indicating - * whether the output of {@link ValueComparer#labeler} applied to the - * argument object contained the feature value referenced by - * {@link ValueComparer#value}. - * - * @param o The object to make decisions about. - * @return A feature vector containing the feature described above. - **/ - public FeatureVector classify(Object o) { - return new FeatureVector(featureValue(o)); - } + /** + * Returns a Boolean feature (with value "true" or "false") indicating whether the output of + * {@link ValueComparer#labeler} applied to the argument object contained the feature value + * referenced by {@link ValueComparer#value}. + * + * @param o The object to make decisions about. + * @return A feature vector containing the feature described above. + **/ + public FeatureVector classify(Object o) { + return new FeatureVector(featureValue(o)); + } - /** - * Returns the classification of the given example object as a single - * feature instead of a {@link FeatureVector}. - * - * @param o The object to classify. - * @return The classification of o as a feature. - **/ - public Feature featureValue(Object o) { - short p = shortValue(o); - return - new DiscretePrimitiveStringFeature( - "", "MultiValueComparer", "", DiscreteFeature.BooleanValues[p], p, - (short) 2); - } + /** + * Returns the classification of the given example object as a single feature instead of a + * {@link FeatureVector}. + * + * @param o The object to classify. + * @return The classification of o as a feature. + **/ + public Feature featureValue(Object o) { + short p = shortValue(o); + return new DiscretePrimitiveStringFeature("", "MultiValueComparer", "", + DiscreteFeature.BooleanValues[p], p, (short) 2); + } - /** - * Returns the value of the discrete feature that would be returned by this - * classifier. - * - * @param o The object to classify. - * @return The value of the feature produced for the input object. - **/ - public String discreteValue(Object o) { - return DiscreteFeature.BooleanValues[shortValue(o)]; - } + /** + * Returns the value of the discrete feature that would be returned by this classifier. + * + * @param o The object to classify. + * @return The value of the feature produced for the input object. + **/ + public String discreteValue(Object o) { + return DiscreteFeature.BooleanValues[shortValue(o)]; + } - /** - * Returns the prediction of this classifier as a short that - * acts as a pointer into {@link DiscreteFeature#BooleanValues}. - * - * @param o The object to classify. - * @return The classification of o as a short. - **/ - public short shortValue(Object o) { - boolean prediction = false; - FeatureVector v = labeler.classify(o); - int N = v.featuresSize(); + /** + * Returns the prediction of this classifier as a short that acts as a pointer into + * {@link DiscreteFeature#BooleanValues}. + * + * @param o The object to classify. + * @return The classification of o as a short. + **/ + public short shortValue(Object o) { + boolean prediction = false; + FeatureVector v = labeler.classify(o); + int N = v.featuresSize(); - for (int i = 0; i < N && !prediction; ++i) - prediction = v.getFeature(i).getStringValue().equals(value); + for (int i = 0; i < N && !prediction; ++i) + prediction = v.getFeature(i).getStringValue().equals(value); - return prediction ? (short) 1 : (short) 0; - } + return prediction ? (short) 1 : (short) 0; + } - /** - * The String representation of a ValueComparer - * has the form "ValueComparer(child), - * where child is the String representation of the - * classifier whose value is being compared. - * - * @return A string of the form described above. - **/ - public String toString() { - return "MultiValueComparer(" + labeler + ")"; - } + /** + * The String representation of a ValueComparer has the form + * "ValueComparer(child), where child is the + * String representation of the classifier whose value is being compared. + * + * @return A string of the form described above. + **/ + public String toString() { + return "MultiValueComparer(" + labeler + ")"; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealArrayFeature.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealArrayFeature.java index 8eff5572..afffd767 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealArrayFeature.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealArrayFeature.java @@ -1,299 +1,289 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; import edu.illinois.cs.cogcomp.lbjava.util.ByteString; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - /** - * A real array feature keeps track of its index in the classifier's returned - * array. - * - * @author Nick Rizzolo + * A real array feature keeps track of its index in the classifier's returned array. + * + * @author Nick Rizzolo **/ -public class RealArrayFeature extends RealPrimitiveFeature -{ - /** The feature's index in the returned array it is contained in. */ - protected int arrayIndex; - /** The size of the returned array this feature is contained in. */ - protected int arrayLength; - - - /** - * For internal use only. - * - * @see Feature#readFeature(ExceptionlessInputStream) - **/ - protected RealArrayFeature() { } - - /** - * Sets all member variables. - * - * @param p The new real feature's package. - * @param c The name of the classifier that produced this feature. - * @param id The new real feature's identifier. - * @param v The new real feature's value. - * @param i The index of this feature in the returned array. - * @param l The length of the array this feature is contained in. - **/ - public RealArrayFeature(String p, String c, ByteString id, double v, int i, - int l) { - super(p, c, id, v); - arrayIndex = i; - arrayLength = l; - } - - - /** - * Determines if this feature comes from an array. - * - * @return true. - **/ - public boolean fromArray() { return true; } - - - /** - * Return the feature that should be used to index this feature into a - * lexicon. Specifically, we return this feature with a value of 0 so that - * the same features with different real values will map to the same - * object. - * - * @param lexicon The lexicon into which this feature will be indexed. - * @param training Whether or not the learner is currently training. - * @param label The label of the example containing this feature, or -1 - * if we aren't doing per class feature counting. - * @return A feature object appropriate for use as the key of a map. - **/ - public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { - return - new RealArrayFeature(containingPackage, generatingClassifier, identifier, - 0, arrayIndex, 0); - } - - - /** - * If this feature is an array feature, call this method to set its array - * length; otherwise, this method has no effect. - * - * @param l The new length. - **/ - public void setArrayLength(int l) { arrayLength = l; } - - - /** Returns the array index of this feature. */ - public int getArrayIndex() { return arrayIndex; } - - - /** Returns the length of the feature array that this feature comes from. */ - public int getArrayLength() { return arrayLength; } - - - /** - * Returns a new feature object, the same as this one in all respects - * except the {@link #value} field has been multiplied by the specified - * number. - * - * @param m The multiplier. - * @return A new real feature whose value is the product of this feature's - * value and the specified multiplier. - **/ - public RealFeature multiply(double m) { - return - new RealArrayFeature(containingPackage, generatingClassifier, - identifier, value * m, arrayIndex, arrayLength); - } - - - /** - * Returns a new feature object that's identical to this feature except its - * strength is given by s. - * - * @param s The strength of the new feature. - * @return A new feature object as above, or null if this - * feature cannot take the specified strength. - **/ - public Feature withStrength(double s) { - return - new RealArrayFeature(containingPackage, generatingClassifier, - identifier, s, arrayIndex, arrayLength); - } - - - /** - * The hash code of a RealArrayFeature is the sum of the hash - * codes of the containing package, the identifier, the value, and the - * array index. - * - * @return The hash code for this Feature. - **/ - public int hashCode() { return 17 * super.hashCode() + arrayIndex; } - - - /** - * Two RealArrayFeatures are equivalent when their containing - * packages, identifiers, indices, and values are equivalent. - * - * @param o The object with which to compare this Feature. - * @return True iff the parameter is an equivalent Feature. - **/ - public boolean equals(Object o) { - return - super.equals(o) - && (o instanceof RealArrayFeature - ? arrayIndex == ((RealArrayFeature) o).arrayIndex - : arrayIndex == ((RealArrayStringFeature) o).arrayIndex); - } - - - /** - * Some features are functionally equivalent, differing only in the - * encoding of their values; this method will return true iff - * the class of this feature and f are different, but they - * differ only because they encode their values differently. This method - * does not compare the values themselves, however. - * - * @param f Another feature. - * @return See above. - **/ - public boolean classEquivalent(Feature f) { - return f instanceof RealArrayStringFeature; - } - - - /** - * Used to sort features into an order that is convenient both to page - * through and for the lexicon to read off disk. - * - * @param o An object to compare with. - * @return Integers appropriate for sorting features first by package, then - * by identifier, then by array index, then by value. - **/ - public int compareTo(Object o) { - int d = compareNameStrings(o); - if (d != 0) return d; - RealArrayFeature f = (RealArrayFeature) o; - d = identifier.compareTo(f.identifier); - if (d != 0) return d; - d = arrayIndex - f.arrayIndex; - if (d != 0) return d; - double difference = value - f.value; - if (difference < 0) return -1; - if (difference > 0) return 1; - return 0; - } - - - /** - * Writes a string representation of this Feature to the - * specified buffer. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - writeNameString(buffer); - buffer.append("["); - buffer.append(arrayIndex); - buffer.append("]("); - buffer.append(value); - buffer.append(")"); - } - - - /** - * Writes a complete binary representation of the feature. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - out.writeInt(arrayIndex); - out.writeInt(arrayLength); - } - - - /** - * Reads the representation of a feaeture with this object's run-time type - * from the given stream, overwriting the data in this object. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - arrayIndex = in.readInt(); - arrayLength = in.readInt(); - } - - - /** - * Writes a binary representation of the feature intended for use by a - * lexicon, omitting redundant information when possible. - * - * @param out The output stream. - * @param lex The lexicon out of which this feature is being written. - * @param c The fully qualified name of the assumed class. The runtime - * class of this feature won't be written if it's equivalent to - * c. - * @param p The assumed package string. This feature's package string - * won't be written if it's equivalent to p. - * @param g The assumed classifier name string. This feature's - * classifier name string won't be written if it's equivalent - * to g. - * @param si The assumed identifier as a string. If this feature has a - * string identifier, it won't be written if it's equivalent to - * si. - * @param bi The assumed identifier as a byte string. If this feature - * has a byte string identifier, it won't be written if it's - * equivalent to bi. - * @return The name of the runtime type of this feature. - **/ - public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, - String p, String g, String si, ByteString bi) { - String result = super.lexWrite(out, lex, c, p, g, si, bi); - out.writeInt(arrayIndex); - return result; - } - - - /** - * Reads the representation of a feature with this object's run-time type - * as stored by a lexicon, overwriting the data in this object. - * - *

This method is appropriate for reading features as written by - * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. - * - * @param in The input stream. - * @param lex The lexicon we are reading in to. - * @param p The assumed package string. If no package name is given in - * the input stream, the instantiated feature is given this - * package. - * @param g The assumed classifier name string. If no classifier name - * is given in the input stream, the instantiated feature is - * given this classifier name. - * @param si The assumed identifier as a string. If the feature being - * read has a string identifier field and no identifier is - * given in the input stream, the feature is given this - * identifier. - * @param bi The assumed identifier as a byte string. If the feature - * being read has a byte string identifier field and no - * identifier is given in the input stream, the feature is - * given this identifier. - **/ - public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, - String g, String si, ByteString bi) { - super.lexRead(in, lex, p, g, si, bi); - arrayIndex = in.readInt(); - arrayLength = 0; - } +public class RealArrayFeature extends RealPrimitiveFeature { + /** The feature's index in the returned array it is contained in. */ + protected int arrayIndex; + /** The size of the returned array this feature is contained in. */ + protected int arrayLength; + + + /** + * For internal use only. + * + * @see Feature#readFeature(ExceptionlessInputStream) + **/ + protected RealArrayFeature() {} + + /** + * Sets all member variables. + * + * @param p The new real feature's package. + * @param c The name of the classifier that produced this feature. + * @param id The new real feature's identifier. + * @param v The new real feature's value. + * @param i The index of this feature in the returned array. + * @param l The length of the array this feature is contained in. + **/ + public RealArrayFeature(String p, String c, ByteString id, double v, int i, int l) { + super(p, c, id, v); + arrayIndex = i; + arrayLength = l; + } + + + /** + * Determines if this feature comes from an array. + * + * @return true. + **/ + public boolean fromArray() { + return true; + } + + + /** + * Return the feature that should be used to index this feature into a lexicon. Specifically, we + * return this feature with a value of 0 so that the same features with different real values + * will map to the same object. + * + * @param lexicon The lexicon into which this feature will be indexed. + * @param training Whether or not the learner is currently training. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return A feature object appropriate for use as the key of a map. + **/ + public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { + return new RealArrayFeature(containingPackage, generatingClassifier, identifier, 0, + arrayIndex, 0); + } + + + /** + * If this feature is an array feature, call this method to set its array length; otherwise, + * this method has no effect. + * + * @param l The new length. + **/ + public void setArrayLength(int l) { + arrayLength = l; + } + + + /** Returns the array index of this feature. */ + public int getArrayIndex() { + return arrayIndex; + } + + + /** Returns the length of the feature array that this feature comes from. */ + public int getArrayLength() { + return arrayLength; + } + + + /** + * Returns a new feature object, the same as this one in all respects except the {@link #value} + * field has been multiplied by the specified number. + * + * @param m The multiplier. + * @return A new real feature whose value is the product of this feature's value and the + * specified multiplier. + **/ + public RealFeature multiply(double m) { + return new RealArrayFeature(containingPackage, generatingClassifier, identifier, value * m, + arrayIndex, arrayLength); + } + + + /** + * Returns a new feature object that's identical to this feature except its strength is given by + * s. + * + * @param s The strength of the new feature. + * @return A new feature object as above, or null if this feature cannot take the + * specified strength. + **/ + public Feature withStrength(double s) { + return new RealArrayFeature(containingPackage, generatingClassifier, identifier, s, + arrayIndex, arrayLength); + } + + + /** + * The hash code of a RealArrayFeature is the sum of the hash codes of the + * containing package, the identifier, the value, and the array index. + * + * @return The hash code for this Feature. + **/ + public int hashCode() { + return 17 * super.hashCode() + arrayIndex; + } + + + /** + * Two RealArrayFeatures are equivalent when their containing packages, + * identifiers, indices, and values are equivalent. + * + * @param o The object with which to compare this Feature. + * @return True iff the parameter is an equivalent Feature. + **/ + public boolean equals(Object o) { + return super.equals(o) + && (o instanceof RealArrayFeature ? arrayIndex == ((RealArrayFeature) o).arrayIndex + : arrayIndex == ((RealArrayStringFeature) o).arrayIndex); + } + + + /** + * Some features are functionally equivalent, differing only in the encoding of their values; + * this method will return true iff the class of this feature and f + * are different, but they differ only because they encode their values differently. This method + * does not compare the values themselves, however. + * + * @param f Another feature. + * @return See above. + **/ + public boolean classEquivalent(Feature f) { + return f instanceof RealArrayStringFeature; + } + + + /** + * Used to sort features into an order that is convenient both to page through and for the + * lexicon to read off disk. + * + * @param o An object to compare with. + * @return Integers appropriate for sorting features first by package, then by identifier, then + * by array index, then by value. + **/ + public int compareTo(Object o) { + int d = compareNameStrings(o); + if (d != 0) + return d; + RealArrayFeature f = (RealArrayFeature) o; + d = identifier.compareTo(f.identifier); + if (d != 0) + return d; + d = arrayIndex - f.arrayIndex; + if (d != 0) + return d; + double difference = value - f.value; + if (difference < 0) + return -1; + if (difference > 0) + return 1; + return 0; + } + + + /** + * Writes a string representation of this Feature to the specified buffer. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + writeNameString(buffer); + buffer.append("["); + buffer.append(arrayIndex); + buffer.append("]("); + buffer.append(value); + buffer.append(")"); + } + + + /** + * Writes a complete binary representation of the feature. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeInt(arrayIndex); + out.writeInt(arrayLength); + } + + + /** + * Reads the representation of a feaeture with this object's run-time type from the given + * stream, overwriting the data in this object. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + arrayIndex = in.readInt(); + arrayLength = in.readInt(); + } + + + /** + * Writes a binary representation of the feature intended for use by a lexicon, omitting + * redundant information when possible. + * + * @param out The output stream. + * @param lex The lexicon out of which this feature is being written. + * @param c The fully qualified name of the assumed class. The runtime class of this feature + * won't be written if it's equivalent to c. + * @param p The assumed package string. This feature's package string won't be written if it's + * equivalent to p. + * @param g The assumed classifier name string. This feature's classifier name string won't be + * written if it's equivalent to g. + * @param si The assumed identifier as a string. If this feature has a string identifier, it + * won't be written if it's equivalent to si. + * @param bi The assumed identifier as a byte string. If this feature has a byte string + * identifier, it won't be written if it's equivalent to bi. + * @return The name of the runtime type of this feature. + **/ + public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, String p, + String g, String si, ByteString bi) { + String result = super.lexWrite(out, lex, c, p, g, si, bi); + out.writeInt(arrayIndex); + return result; + } + + + /** + * Reads the representation of a feature with this object's run-time type as stored by a + * lexicon, overwriting the data in this object. + * + *

+ * This method is appropriate for reading features as written by + * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. + * + * @param in The input stream. + * @param lex The lexicon we are reading in to. + * @param p The assumed package string. If no package name is given in the input stream, the + * instantiated feature is given this package. + * @param g The assumed classifier name string. If no classifier name is given in the input + * stream, the instantiated feature is given this classifier name. + * @param si The assumed identifier as a string. If the feature being read has a string + * identifier field and no identifier is given in the input stream, the feature is given + * this identifier. + * @param bi The assumed identifier as a byte string. If the feature being read has a byte + * string identifier field and no identifier is given in the input stream, the feature is + * given this identifier. + **/ + public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, String g, String si, + ByteString bi) { + super.lexRead(in, lex, p, g, si, bi); + arrayIndex = in.readInt(); + arrayLength = 0; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealArrayStringFeature.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealArrayStringFeature.java index d554c4dd..43669a90 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealArrayStringFeature.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealArrayStringFeature.java @@ -1,320 +1,306 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; import edu.illinois.cs.cogcomp.lbjava.util.ByteString; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - /** - * A real array feature keeps track of its index in the classifier's returned - * array. - * - * @author Nick Rizzolo + * A real array feature keeps track of its index in the classifier's returned array. + * + * @author Nick Rizzolo **/ -public class RealArrayStringFeature extends RealPrimitiveStringFeature -{ - /** The feature's index in the returned array it is contained in. */ - protected int arrayIndex; - /** The size of the returned array this feature is contained in. */ - protected int arrayLength; - - - /** - * For internal use only. - * - * @see Feature#readFeature(ExceptionlessInputStream) - **/ - protected RealArrayStringFeature() { } - - /** - * Sets all member variables. - * - * @param p The new real feature's package. - * @param c The name of the classifier that produced this feature. - * @param id The new real feature's identifier. - * @param v The new real feature's value. - * @param i The index of this feature in the returned array. - * @param l The length of the array this feature is contained in. - **/ - public RealArrayStringFeature(String p, String c, String id, double v, - int i, int l) { - super(p, c, id, v); - arrayIndex = i; - arrayLength = l; - } - - - /** - * Determines if this feature comes from an array. - * - * @return true. - **/ - public boolean fromArray() { return true; } - - - /** - * Return the feature that should be used to index this feature into a - * lexicon. Specifically, we return this feature with a value of 0 so that - * the same features with different real values will map to the same - * object. - * - * @param lexicon The lexicon into which this feature will be indexed. - * @param training Whether or not the learner is currently training. - * @param label The label of the example containing this feature, or -1 - * if we aren't doing per class feature counting. - * @return A feature object appropriate for use as the key of a map. - **/ - public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { - return - new RealArrayStringFeature( - containingPackage, generatingClassifier, identifier, 0, arrayIndex, - 0); - } - - - /** - * If this feature is an array feature, call this method to set its array - * length; otherwise, this method has no effect. - * - * @param l The new length. - **/ - public void setArrayLength(int l) { arrayLength = l; } - - - /** Returns the array index of this feature. */ - public int getArrayIndex() { return arrayIndex; } - - - /** Returns the length of the feature array that this feature comes from. */ - public int getArrayLength() { return arrayLength; } - - - /** - * Returns a new feature object, the same as this one in all respects - * except the {@link #value} field has been multiplied by the specified - * number. - * - * @param m The multiplier. - * @return A new real feature whose value is the product of this feature's - * value and the specified multiplier. - **/ - public RealFeature multiply(double m) { - return - new RealArrayStringFeature( - containingPackage, generatingClassifier, identifier, value * m, - arrayIndex, arrayLength); - } - - - /** - * Returns a new feature object that's identical to this feature except its - * strength is given by s. - * - * @param s The strength of the new feature. - * @return A new feature object as above, or null if this - * feature cannot take the specified strength. - **/ - public Feature withStrength(double s) { - return - new RealArrayStringFeature( - containingPackage, generatingClassifier, identifier, s, - arrayIndex, arrayLength); - } - - - /** - * Returns a feature object in which any strings that are being used to - * represent an identifier or value have been encoded in byte strings. - * - * @param e The encoding to use. - * @return A feature object as above; possible this object. - **/ - public Feature encode(String e) { - if (e == null || e == "String") return this; - ByteString id = - identifier.length() == 0 ? ByteString.emptyString - : new ByteString(identifier, e); - return - new RealArrayFeature(containingPackage, generatingClassifier, id, value, - arrayIndex, arrayLength); - } - - - /** - * The hash code of a RealArrayStringFeature is the sum of the - * hash codes of the containing package, the identifier, the value, and the - * array index. - * - * @return The hash code for this Feature. - **/ - public int hashCode() { return 17 * super.hashCode() + arrayIndex; } - - - /** - * Two RealArrayStringFeatures are equivalent when their - * containing packages, identifiers, indices, and values are equivalent. - * - * @param o The object with which to compare this Feature. - * @return True iff the parameter is an equivalent Feature. - **/ - public boolean equals(Object o) { - return - super.equals(o) - && (o instanceof RealArrayStringFeature - ? arrayIndex == ((RealArrayStringFeature) o).arrayIndex - : arrayIndex == ((RealArrayFeature) o).arrayIndex); - } - - - /** - * Some features are functionally equivalent, differing only in the - * encoding of their values; this method will return true iff - * the class of this feature and f are different, but they - * differ only because they encode their values differently. This method - * does not compare the values themselves, however. - * - * @param f Another feature. - * @return See above. - **/ - public boolean classEquivalent(Feature f) { - return f instanceof RealArrayFeature; - } - - - /** - * Used to sort features into an order that is convenient both to page - * through and for the lexicon to read off disk. - * - * @param o An object to compare with. - * @return Integers appropriate for sorting features first by package, then - * by identifier, then by array index, then by value. - **/ - public int compareTo(Object o) { - int d = compareNameStrings(o); - if (d != 0) return d; - RealArrayStringFeature f = (RealArrayStringFeature) o; - d = identifier.compareTo(f.identifier); - if (d != 0) return d; - d = arrayIndex - f.arrayIndex; - if (d != 0) return d; - double difference = value - f.value; - if (difference < 0) return -1; - if (difference > 0) return 1; - return 0; - } - - - /** - * Writes a string representation of this Feature to the - * specified buffer. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - writeNameString(buffer); - buffer.append("["); - buffer.append(arrayIndex); - buffer.append("]("); - buffer.append(value); - buffer.append(")"); - } - - - /** - * Writes a complete binary representation of the feature. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - out.writeInt(arrayIndex); - out.writeInt(arrayLength); - } - - - /** - * Reads the representation of a feaeture with this object's run-time type - * from the given stream, overwriting the data in this object. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - arrayIndex = in.readInt(); - arrayLength = in.readInt(); - } - - - /** - * Writes a binary representation of the feature intended for use by a - * lexicon, omitting redundant information when possible. - * - * @param out The output stream. - * @param lex The lexicon out of which this feature is being written. - * @param c The fully qualified name of the assumed class. The runtime - * class of this feature won't be written if it's equivalent to - * c. - * @param p The assumed package string. This feature's package string - * won't be written if it's equivalent to p. - * @param g The assumed classifier name string. This feature's - * classifier name string won't be written if it's equivalent - * to g. - * @param si The assumed identifier as a string. If this feature has a - * string identifier, it won't be written if it's equivalent to - * si. - * @param bi The assumed identifier as a byte string. If this feature - * has a byte string identifier, it won't be written if it's - * equivalent to bi. - * @return The name of the runtime type of this feature. - **/ - public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, - String p, String g, String si, ByteString bi) { - String result = super.lexWrite(out, lex, c, p, g, si, bi); - out.writeInt(arrayIndex); - return result; - } - - - /** - * Reads the representation of a feature with this object's run-time type - * as stored by a lexicon, overwriting the data in this object. - * - *

This method is appropriate for reading features as written by - * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. - * - * @param in The input stream. - * @param lex The lexicon we are reading in to. - * @param p The assumed package string. If no package name is given in - * the input stream, the instantiated feature is given this - * package. - * @param g The assumed classifier name string. If no classifier name - * is given in the input stream, the instantiated feature is - * given this classifier name. - * @param si The assumed identifier as a string. If the feature being - * read has a string identifier field and no identifier is - * given in the input stream, the feature is given this - * identifier. - * @param bi The assumed identifier as a byte string. If the feature - * being read has a byte string identifier field and no - * identifier is given in the input stream, the feature is - * given this identifier. - **/ - public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, - String g, String si, ByteString bi) { - super.lexRead(in, lex, p, g, si, bi); - arrayIndex = in.readInt(); - arrayLength = 0; - } +public class RealArrayStringFeature extends RealPrimitiveStringFeature { + /** The feature's index in the returned array it is contained in. */ + protected int arrayIndex; + /** The size of the returned array this feature is contained in. */ + protected int arrayLength; + + + /** + * For internal use only. + * + * @see Feature#readFeature(ExceptionlessInputStream) + **/ + protected RealArrayStringFeature() {} + + /** + * Sets all member variables. + * + * @param p The new real feature's package. + * @param c The name of the classifier that produced this feature. + * @param id The new real feature's identifier. + * @param v The new real feature's value. + * @param i The index of this feature in the returned array. + * @param l The length of the array this feature is contained in. + **/ + public RealArrayStringFeature(String p, String c, String id, double v, int i, int l) { + super(p, c, id, v); + arrayIndex = i; + arrayLength = l; + } + + + /** + * Determines if this feature comes from an array. + * + * @return true. + **/ + public boolean fromArray() { + return true; + } + + + /** + * Return the feature that should be used to index this feature into a lexicon. Specifically, we + * return this feature with a value of 0 so that the same features with different real values + * will map to the same object. + * + * @param lexicon The lexicon into which this feature will be indexed. + * @param training Whether or not the learner is currently training. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return A feature object appropriate for use as the key of a map. + **/ + public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { + return new RealArrayStringFeature(containingPackage, generatingClassifier, identifier, 0, + arrayIndex, 0); + } + + + /** + * If this feature is an array feature, call this method to set its array length; otherwise, + * this method has no effect. + * + * @param l The new length. + **/ + public void setArrayLength(int l) { + arrayLength = l; + } + + + /** Returns the array index of this feature. */ + public int getArrayIndex() { + return arrayIndex; + } + + + /** Returns the length of the feature array that this feature comes from. */ + public int getArrayLength() { + return arrayLength; + } + + + /** + * Returns a new feature object, the same as this one in all respects except the {@link #value} + * field has been multiplied by the specified number. + * + * @param m The multiplier. + * @return A new real feature whose value is the product of this feature's value and the + * specified multiplier. + **/ + public RealFeature multiply(double m) { + return new RealArrayStringFeature(containingPackage, generatingClassifier, identifier, + value * m, arrayIndex, arrayLength); + } + + + /** + * Returns a new feature object that's identical to this feature except its strength is given by + * s. + * + * @param s The strength of the new feature. + * @return A new feature object as above, or null if this feature cannot take the + * specified strength. + **/ + public Feature withStrength(double s) { + return new RealArrayStringFeature(containingPackage, generatingClassifier, identifier, s, + arrayIndex, arrayLength); + } + + + /** + * Returns a feature object in which any strings that are being used to represent an identifier + * or value have been encoded in byte strings. + * + * @param e The encoding to use. + * @return A feature object as above; possible this object. + **/ + public Feature encode(String e) { + if (e == null || e == "String") + return this; + ByteString id = + identifier.length() == 0 ? ByteString.emptyString : new ByteString(identifier, e); + return new RealArrayFeature(containingPackage, generatingClassifier, id, value, arrayIndex, + arrayLength); + } + + + /** + * The hash code of a RealArrayStringFeature is the sum of the hash codes of the + * containing package, the identifier, the value, and the array index. + * + * @return The hash code for this Feature. + **/ + public int hashCode() { + return 17 * super.hashCode() + arrayIndex; + } + + + /** + * Two RealArrayStringFeatures are equivalent when their containing packages, + * identifiers, indices, and values are equivalent. + * + * @param o The object with which to compare this Feature. + * @return True iff the parameter is an equivalent Feature. + **/ + public boolean equals(Object o) { + return super.equals(o) + && (o instanceof RealArrayStringFeature ? arrayIndex == ((RealArrayStringFeature) o).arrayIndex + : arrayIndex == ((RealArrayFeature) o).arrayIndex); + } + + + /** + * Some features are functionally equivalent, differing only in the encoding of their values; + * this method will return true iff the class of this feature and f + * are different, but they differ only because they encode their values differently. This method + * does not compare the values themselves, however. + * + * @param f Another feature. + * @return See above. + **/ + public boolean classEquivalent(Feature f) { + return f instanceof RealArrayFeature; + } + + + /** + * Used to sort features into an order that is convenient both to page through and for the + * lexicon to read off disk. + * + * @param o An object to compare with. + * @return Integers appropriate for sorting features first by package, then by identifier, then + * by array index, then by value. + **/ + public int compareTo(Object o) { + int d = compareNameStrings(o); + if (d != 0) + return d; + RealArrayStringFeature f = (RealArrayStringFeature) o; + d = identifier.compareTo(f.identifier); + if (d != 0) + return d; + d = arrayIndex - f.arrayIndex; + if (d != 0) + return d; + double difference = value - f.value; + if (difference < 0) + return -1; + if (difference > 0) + return 1; + return 0; + } + + + /** + * Writes a string representation of this Feature to the specified buffer. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + writeNameString(buffer); + buffer.append("["); + buffer.append(arrayIndex); + buffer.append("]("); + buffer.append(value); + buffer.append(")"); + } + + + /** + * Writes a complete binary representation of the feature. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeInt(arrayIndex); + out.writeInt(arrayLength); + } + + + /** + * Reads the representation of a feaeture with this object's run-time type from the given + * stream, overwriting the data in this object. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + arrayIndex = in.readInt(); + arrayLength = in.readInt(); + } + + + /** + * Writes a binary representation of the feature intended for use by a lexicon, omitting + * redundant information when possible. + * + * @param out The output stream. + * @param lex The lexicon out of which this feature is being written. + * @param c The fully qualified name of the assumed class. The runtime class of this feature + * won't be written if it's equivalent to c. + * @param p The assumed package string. This feature's package string won't be written if it's + * equivalent to p. + * @param g The assumed classifier name string. This feature's classifier name string won't be + * written if it's equivalent to g. + * @param si The assumed identifier as a string. If this feature has a string identifier, it + * won't be written if it's equivalent to si. + * @param bi The assumed identifier as a byte string. If this feature has a byte string + * identifier, it won't be written if it's equivalent to bi. + * @return The name of the runtime type of this feature. + **/ + public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, String p, + String g, String si, ByteString bi) { + String result = super.lexWrite(out, lex, c, p, g, si, bi); + out.writeInt(arrayIndex); + return result; + } + + + /** + * Reads the representation of a feature with this object's run-time type as stored by a + * lexicon, overwriting the data in this object. + * + *

+ * This method is appropriate for reading features as written by + * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. + * + * @param in The input stream. + * @param lex The lexicon we are reading in to. + * @param p The assumed package string. If no package name is given in the input stream, the + * instantiated feature is given this package. + * @param g The assumed classifier name string. If no classifier name is given in the input + * stream, the instantiated feature is given this classifier name. + * @param si The assumed identifier as a string. If the feature being read has a string + * identifier field and no identifier is given in the input stream, the feature is given + * this identifier. + * @param bi The assumed identifier as a byte string. If the feature being read has a byte + * string identifier field and no identifier is given in the input stream, the feature is + * given this identifier. + **/ + public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, String g, String si, + ByteString bi) { + super.lexRead(in, lex, p, g, si, bi); + arrayIndex = in.readInt(); + arrayLength = 0; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealConjunctiveFeature.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealConjunctiveFeature.java index b389a2d9..f962d2ec 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealConjunctiveFeature.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealConjunctiveFeature.java @@ -1,418 +1,408 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.learn.ChildLexicon; import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; import edu.illinois.cs.cogcomp.lbjava.util.ByteString; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - /** - * Represents the conjunction of two features. - * - * @author Nick Rizzolo + * Represents the conjunction of two features. + * + * @author Nick Rizzolo **/ -public class RealConjunctiveFeature extends RealFeature -{ - /** One feature argument. */ - protected Feature left; - /** The other feature argument. */ - protected Feature right; - - - /** - * For internal use only. - * - * @see Feature#readFeature(ExceptionlessInputStream) - **/ - public RealConjunctiveFeature() { } - - /** - * Creates a new conjunctive feature taking the package and name of the - * given classifier. - * - * @param c The classifier from which package and name information is - * taken. - * @param l One feature argument. - * @param r The other feature argument. - **/ - public RealConjunctiveFeature(Classifier c, Feature l, Feature r) { - this(c.containingPackage, c.name, l, r); - } - - /** - * Creates a new conjunctive feature. - * - * @param p The new discrete feature's package. - * @param c The name of the classifier that produced this feature. - * @param l One feature argument. - * @param r The other feature argument. - **/ - public RealConjunctiveFeature(String p, String c, Feature l, Feature r) { - super(p, c); - left = l; - right = r; - } - - - /** - * Determines if this feature is conjunctive. - * - * @return true iff this feature is conjunctive. - **/ - public boolean isConjunctive() { return true; } - - - /** Returns the value of {@link #left}. */ - public Feature getLeft() { return left; } - /** Returns the value of {@link #right}. */ - public Feature getRight() { return right; } - - - /** - * Retrieves this feature's identifier as a string. - * - * @return The empty string, since conjunctive features don't have - * identifiers. - **/ - public String getStringIdentifier() { return ""; } - - - /** - * Retrieves this feature's identifier as a byte string. - * - * @return {@link ByteString#emptyString}, since conjunctive features don't - * have identifiers. - **/ - public ByteString getByteStringIdentifier() { - return ByteString.emptyString; - } - - - /** - * The depth of a feature is one more than the maximum depth of any of its - * children, or 0 if it has no children. - * - * @return The depth of this feature as described above. - **/ - public int depth() { return Math.max(left.depth(), right.depth()) + 1; } - - - /** - * Returns the strength of this feature if it were to be placed in a - * mathematical vector space. - **/ - public double getStrength() { - return - (left.isDiscrete() ? 1 : left.getStrength()) - * (right.isDiscrete() ? 1 : right.getStrength()); - } - - - /** - * Return the feature that should be used to index this feature into a - * lexicon. - * - * @param lexicon The lexicon into which this feature will be indexed. - * @param training Whether or not the learner is currently training. - * @param label The label of the example containing this feature, or -1 - * if we aren't doing per class feature counting. - * @return A feature object appropriate for use as the key of a map. - **/ - public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { - return - new RealConjunctiveFeature( - containingPackage, generatingClassifier, - getArgumentKey(left, lexicon, training, label), - getArgumentKey(right, lexicon, training, label)); - } - - - /** - * A helper method for {@link #getFeatureKey(Lexicon,boolean,int)}, this - * method computes the feature keys corresponding to the arguments of the - * conjunction. Here, we lookup the arguments to the conjunction in the - * lexicon so that their counts are never less than the conjunction's, and - * we return the actual feature object that's already a key in the lexicon. - * - * @param f The argument feature for which a key will be computed. - * @param lexicon The lexicon into which this feature will be indexed. - * @param training Whether or not the learner is currently training. - * @param label The label of the example containing this feature, or -1 - * if we aren't doing per class feature counting. - * @return A feature object appropriate for use as the key of a map. - **/ - protected Feature getArgumentKey(Feature f, Lexicon lexicon, - boolean training, int label) { - if (f.isDiscrete()) { - if (!training) return f; - if (!f.isPrimitive()) f = f.getFeatureKey(lexicon, true, label); +public class RealConjunctiveFeature extends RealFeature { + /** One feature argument. */ + protected Feature left; + /** The other feature argument. */ + protected Feature right; + + + /** + * For internal use only. + * + * @see Feature#readFeature(ExceptionlessInputStream) + **/ + public RealConjunctiveFeature() {} + + /** + * Creates a new conjunctive feature taking the package and name of the given classifier. + * + * @param c The classifier from which package and name information is taken. + * @param l One feature argument. + * @param r The other feature argument. + **/ + public RealConjunctiveFeature(Classifier c, Feature l, Feature r) { + this(c.containingPackage, c.name, l, r); } - else { - f = f.getFeatureKey(lexicon, training, label); - if (!training) return f; + + /** + * Creates a new conjunctive feature. + * + * @param p The new discrete feature's package. + * @param c The name of the classifier that produced this feature. + * @param l One feature argument. + * @param r The other feature argument. + **/ + public RealConjunctiveFeature(String p, String c, Feature l, Feature r) { + super(p, c); + left = l; + right = r; } - return lexicon.getChildFeature(f, label); - } - - - /** - * Returns a new feature object, the same as this one in all respects - * except the value has been multiplied by the specified number. - * - * @param m The multiplier. - * @return A new real feature whose value is the product of this feature's - * value and the specified multiplier. - **/ - public RealFeature multiply(double m) { - RealConjunctiveFeature result = (RealConjunctiveFeature) clone(); - if (!left.isDiscrete()) result.left = ((RealFeature) left).multiply(m); - else result.right = ((RealFeature) right).multiply(m); - return result; - } - - - /** - * Returns a new feature object that's identical to this feature except its - * strength is given by s. - * - * @param s The strength of the new feature. - * @return A new feature object as above, or null if this - * feature cannot take the specified strength. - **/ - public Feature withStrength(double s) { - RealConjunctiveFeature result = (RealConjunctiveFeature) clone(); - if (!left.isDiscrete()) { - result.left = left.withStrength(s); - if (!right.isDiscrete()) result.right = right.withStrength(1); + + /** + * Determines if this feature is conjunctive. + * + * @return true iff this feature is conjunctive. + **/ + public boolean isConjunctive() { + return true; } - else result.right = right.withStrength(s); - return result; - } - - - /** - * Returns a feature object in which any strings that are being used to - * represent an identifier or value have been encoded in byte strings. - * - * @param e The encoding to use. - * @return A feature object as above; possible this object. - **/ - public Feature encode(String e) { - RealFeature newLeft = (RealFeature) left.encode(e); - RealFeature newRight = (RealFeature) right.encode(e); - if (newLeft == left && newRight == right) return this; - return - new RealConjunctiveFeature(containingPackage, generatingClassifier, - newLeft, newRight); - } - - - /** - * Takes care of any feature-type-specific tasks that need to be taken care - * of when removing a feature of this type from a {@link ChildLexicon}, in - * particular updating parent counts and removing children of this feature - * if necessary. - * - * @param lex The child lexicon this feature is being removed from. - **/ - public void removeFromChildLexicon(ChildLexicon lex) { - lex.decrementParentCounts(left); - lex.decrementParentCounts(right); - } - - - /** - * Does a feature-type-specific lookup of this feature in the given - * {@link ChildLexicon}. - * - * @param lex The child lexicon this feature is being looked up in. - * @param label The label of the example containing this feature, or -1 if - * we aren't doing per class feature counting. - * @return The index of f in this lexicon. - **/ - public int childLexiconLookup(ChildLexicon lex, int label) { - return lex.childLexiconLookup(this, label); - } - - - /** - * Returns a hash code based on the hash codes of {@link #left} and - * {@link #right}. - * - * @return The hash code of this feature. - **/ - public int hashCode() { - return 31 * super.hashCode() + 17 * left.hashCode() + right.hashCode(); - } - - - /** - * Two conjunctions are equivalent when their arguments are equivalent. - * - * @return true iff the argument is an equivalent - * Feature. - **/ - public boolean equals(Object o) { - if (!super.equals(o)) return false; - RealConjunctiveFeature c = (RealConjunctiveFeature) o; - return (left == c.left || left.equals(c.left)) - && (right == c.right || right.equals(c.right)); - } - - - /** - * Used to sort features into an order that is convenient both to page - * through and for the lexicon to read off disk. - * - * @param o An object to compare with. - * @return Integers appropriate for sorting features first by {@link #left} - * and then by {@link #right}. - **/ - public int compareTo(Object o) { - int d = compareNameStrings(o); - if (d != 0) return d; - RealConjunctiveFeature c = (RealConjunctiveFeature) o; - d = left.compareTo(c.left); - if (d != 0) return d; - return right.compareTo(c.right); - } - - - /** - * Writes a string representation of this Feature to the - * specified buffer. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - writeNameString(buffer); - buffer.append('{'); - left.write(buffer); - buffer.append(", "); - right.write(buffer); - buffer.append('}'); - } - - - /** - * Writes a string representation of this Feature to the - * specified buffer, omitting the package name. - * - * @param buffer The buffer to write to. - **/ - public void writeNoPackage(StringBuffer buffer) { - String p = containingPackage; - containingPackage = null; - writeNameString(buffer); - buffer.append('{'); - left.writeNoPackage(buffer); - buffer.append(", "); - right.writeNoPackage(buffer); - buffer.append('}'); - containingPackage = p; - } - - - /** - * Writes a complete binary representation of the feature. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - left.write(out); - right.write(out); - } - - - /** - * Reads the representation of a feature with this object's run-time type - * from the given stream, overwriting the data in this object. - * - *

This method is appropriate for reading features as written by - * {@link #write(ExceptionlessOutputStream)}. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - left = Feature.readFeature(in); - right = Feature.readFeature(in); - } - - - /** - * Writes a binary representation of the feature intended for use by a - * lexicon, omitting redundant information when possible. - * - * @param out The output stream. - * @param lex The lexicon out of which this feature is being written. - * @param c The fully qualified name of the assumed class. The runtime - * class of this feature won't be written if it's equivalent to - * c. - * @param p The assumed package string. This feature's package string - * won't be written if it's equivalent to p. - * @param g The assumed classifier name string. This feature's - * classifier name string won't be written if it's equivalent - * to g. - * @param si The assumed identifier as a string. If this feature has a - * string identifier, it won't be written if it's equivalent to - * si. - * @param bi The assumed identifier as a byte string. If this feature - * has a byte string identifier, it won't be written if it's - * equivalent to bi. - * @return The name of the runtime type of this feature. - **/ - public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, - String p, String g, String si, ByteString bi) { - String result = super.lexWrite(out, lex, c, p, g, si, bi); - out.writeInt(lex.lookupChild(left)); - out.writeInt(lex.lookupChild(right)); - return result; - } - - - /** - * Reads the representation of a feature with this object's run-time type - * as stored by a lexicon, overwriting the data in this object. - * - *

This method is appropriate for reading features as written by - * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. - * - * @param in The input stream. - * @param lex The lexicon we are reading in to. - * @param p The assumed package string. If no package name is given in - * the input stream, the instantiated feature is given this - * package. - * @param g The assumed classifier name string. If no classifier name - * is given in the input stream, the instantiated feature is - * given this classifier name. - * @param si The assumed identifier as a string. If the feature being - * read has a string identifier field and no identifier is - * given in the input stream, the feature is given this - * identifier. - * @param bi The assumed identifier as a byte string. If the feature - * being read has a byte string identifier field and no - * identifier is given in the input stream, the feature is - * given this identifier. - **/ - public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, - String g, String si, ByteString bi) { - super.lexRead(in, lex, p, g, si, bi); - left = lex.lookupKey(in.readInt()); - right = lex.lookupKey(in.readInt()); - } -} + + /** Returns the value of {@link #left}. */ + public Feature getLeft() { + return left; + } + + /** Returns the value of {@link #right}. */ + public Feature getRight() { + return right; + } + + + /** + * Retrieves this feature's identifier as a string. + * + * @return The empty string, since conjunctive features don't have identifiers. + **/ + public String getStringIdentifier() { + return ""; + } + + + /** + * Retrieves this feature's identifier as a byte string. + * + * @return {@link ByteString#emptyString}, since conjunctive features don't have identifiers. + **/ + public ByteString getByteStringIdentifier() { + return ByteString.emptyString; + } + + + /** + * The depth of a feature is one more than the maximum depth of any of its children, or 0 if it + * has no children. + * + * @return The depth of this feature as described above. + **/ + public int depth() { + return Math.max(left.depth(), right.depth()) + 1; + } + + + /** + * Returns the strength of this feature if it were to be placed in a mathematical vector space. + **/ + public double getStrength() { + return (left.isDiscrete() ? 1 : left.getStrength()) + * (right.isDiscrete() ? 1 : right.getStrength()); + } + + + /** + * Return the feature that should be used to index this feature into a lexicon. + * + * @param lexicon The lexicon into which this feature will be indexed. + * @param training Whether or not the learner is currently training. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return A feature object appropriate for use as the key of a map. + **/ + public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { + return new RealConjunctiveFeature(containingPackage, generatingClassifier, getArgumentKey( + left, lexicon, training, label), getArgumentKey(right, lexicon, training, label)); + } + + + /** + * A helper method for {@link #getFeatureKey(Lexicon,boolean,int)}, this method computes the + * feature keys corresponding to the arguments of the conjunction. Here, we lookup the arguments + * to the conjunction in the lexicon so that their counts are never less than the conjunction's, + * and we return the actual feature object that's already a key in the lexicon. + * + * @param f The argument feature for which a key will be computed. + * @param lexicon The lexicon into which this feature will be indexed. + * @param training Whether or not the learner is currently training. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return A feature object appropriate for use as the key of a map. + **/ + protected Feature getArgumentKey(Feature f, Lexicon lexicon, boolean training, int label) { + if (f.isDiscrete()) { + if (!training) + return f; + if (!f.isPrimitive()) + f = f.getFeatureKey(lexicon, true, label); + } else { + f = f.getFeatureKey(lexicon, training, label); + if (!training) + return f; + } + + return lexicon.getChildFeature(f, label); + } + + + /** + * Returns a new feature object, the same as this one in all respects except the value has been + * multiplied by the specified number. + * + * @param m The multiplier. + * @return A new real feature whose value is the product of this feature's value and the + * specified multiplier. + **/ + public RealFeature multiply(double m) { + RealConjunctiveFeature result = (RealConjunctiveFeature) clone(); + if (!left.isDiscrete()) + result.left = ((RealFeature) left).multiply(m); + else + result.right = ((RealFeature) right).multiply(m); + return result; + } + + + /** + * Returns a new feature object that's identical to this feature except its strength is given by + * s. + * + * @param s The strength of the new feature. + * @return A new feature object as above, or null if this feature cannot take the + * specified strength. + **/ + public Feature withStrength(double s) { + RealConjunctiveFeature result = (RealConjunctiveFeature) clone(); + if (!left.isDiscrete()) { + result.left = left.withStrength(s); + if (!right.isDiscrete()) + result.right = right.withStrength(1); + } else + result.right = right.withStrength(s); + return result; + } + + + /** + * Returns a feature object in which any strings that are being used to represent an identifier + * or value have been encoded in byte strings. + * + * @param e The encoding to use. + * @return A feature object as above; possible this object. + **/ + public Feature encode(String e) { + RealFeature newLeft = (RealFeature) left.encode(e); + RealFeature newRight = (RealFeature) right.encode(e); + if (newLeft == left && newRight == right) + return this; + return new RealConjunctiveFeature(containingPackage, generatingClassifier, newLeft, + newRight); + } + + + /** + * Takes care of any feature-type-specific tasks that need to be taken care of when removing a + * feature of this type from a {@link ChildLexicon}, in particular updating parent counts and + * removing children of this feature if necessary. + * + * @param lex The child lexicon this feature is being removed from. + **/ + public void removeFromChildLexicon(ChildLexicon lex) { + lex.decrementParentCounts(left); + lex.decrementParentCounts(right); + } + + + /** + * Does a feature-type-specific lookup of this feature in the given {@link ChildLexicon}. + * + * @param lex The child lexicon this feature is being looked up in. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return The index of f in this lexicon. + **/ + public int childLexiconLookup(ChildLexicon lex, int label) { + return lex.childLexiconLookup(this, label); + } + + + /** + * Returns a hash code based on the hash codes of {@link #left} and {@link #right}. + * + * @return The hash code of this feature. + **/ + public int hashCode() { + return 31 * super.hashCode() + 17 * left.hashCode() + right.hashCode(); + } + + + /** + * Two conjunctions are equivalent when their arguments are equivalent. + * + * @return true iff the argument is an equivalent Feature. + **/ + public boolean equals(Object o) { + if (!super.equals(o)) + return false; + RealConjunctiveFeature c = (RealConjunctiveFeature) o; + return (left == c.left || left.equals(c.left)) + && (right == c.right || right.equals(c.right)); + } + + + /** + * Used to sort features into an order that is convenient both to page through and for the + * lexicon to read off disk. + * + * @param o An object to compare with. + * @return Integers appropriate for sorting features first by {@link #left} and then by + * {@link #right}. + **/ + public int compareTo(Object o) { + int d = compareNameStrings(o); + if (d != 0) + return d; + RealConjunctiveFeature c = (RealConjunctiveFeature) o; + d = left.compareTo(c.left); + if (d != 0) + return d; + return right.compareTo(c.right); + } + + + /** + * Writes a string representation of this Feature to the specified buffer. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + writeNameString(buffer); + buffer.append('{'); + left.write(buffer); + buffer.append(", "); + right.write(buffer); + buffer.append('}'); + } + + + /** + * Writes a string representation of this Feature to the specified buffer, omitting + * the package name. + * + * @param buffer The buffer to write to. + **/ + public void writeNoPackage(StringBuffer buffer) { + String p = containingPackage; + containingPackage = null; + writeNameString(buffer); + buffer.append('{'); + left.writeNoPackage(buffer); + buffer.append(", "); + right.writeNoPackage(buffer); + buffer.append('}'); + containingPackage = p; + } + + + /** + * Writes a complete binary representation of the feature. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + left.write(out); + right.write(out); + } + + + /** + * Reads the representation of a feature with this object's run-time type from the given stream, + * overwriting the data in this object. + * + *

+ * This method is appropriate for reading features as written by + * {@link #write(ExceptionlessOutputStream)}. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + left = Feature.readFeature(in); + right = Feature.readFeature(in); + } + + + /** + * Writes a binary representation of the feature intended for use by a lexicon, omitting + * redundant information when possible. + * + * @param out The output stream. + * @param lex The lexicon out of which this feature is being written. + * @param c The fully qualified name of the assumed class. The runtime class of this feature + * won't be written if it's equivalent to c. + * @param p The assumed package string. This feature's package string won't be written if it's + * equivalent to p. + * @param g The assumed classifier name string. This feature's classifier name string won't be + * written if it's equivalent to g. + * @param si The assumed identifier as a string. If this feature has a string identifier, it + * won't be written if it's equivalent to si. + * @param bi The assumed identifier as a byte string. If this feature has a byte string + * identifier, it won't be written if it's equivalent to bi. + * @return The name of the runtime type of this feature. + **/ + public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, String p, + String g, String si, ByteString bi) { + String result = super.lexWrite(out, lex, c, p, g, si, bi); + out.writeInt(lex.lookupChild(left)); + out.writeInt(lex.lookupChild(right)); + return result; + } + + + /** + * Reads the representation of a feature with this object's run-time type as stored by a + * lexicon, overwriting the data in this object. + * + *

+ * This method is appropriate for reading features as written by + * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. + * + * @param in The input stream. + * @param lex The lexicon we are reading in to. + * @param p The assumed package string. If no package name is given in the input stream, the + * instantiated feature is given this package. + * @param g The assumed classifier name string. If no classifier name is given in the input + * stream, the instantiated feature is given this classifier name. + * @param si The assumed identifier as a string. If the feature being read has a string + * identifier field and no identifier is given in the input stream, the feature is given + * this identifier. + * @param bi The assumed identifier as a byte string. If the feature being read has a byte + * string identifier field and no identifier is given in the input stream, the feature is + * given this identifier. + **/ + public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, String g, String si, + ByteString bi) { + super.lexRead(in, lex, p, g, si, bi); + left = lex.lookupKey(in.readInt()); + right = lex.lookupKey(in.readInt()); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealFeature.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealFeature.java index 20e47149..e167c465 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealFeature.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealFeature.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; @@ -14,89 +11,97 @@ /** - * A real feature takes on any value representable by a double. - * - * @author Nick Rizzolo + * A real feature takes on any value representable by a double. + * + * @author Nick Rizzolo **/ -public abstract class RealFeature extends Feature -{ - /** - * For internal use only. - * - * @see Feature#readFeature(edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream) - **/ - protected RealFeature() { } - - /** - * Sets both the identifier and the value. - * - * @param p The new real feature's package. - * @param c The name of the classifier that produced this feature. - **/ - public RealFeature(String p, String c) { super(p, c); } - - - /** - * Determines if this feature is discrete. - * - * @return true iff this is discrete. - **/ - public boolean isDiscrete() { return false; } - - - /** - * Gives a string representation of the value of this feature. - * - * @return null, since real features don't have string values. - **/ - public String getStringValue() { return null; } - - - /** - * Gives a string representation of the value of this feature. - * - * @return null, since real features don't have string values. - **/ - public ByteString getByteStringValue() { return null; } - - - /** - * Determines whether or not the parameter is equivalent to the string - * representation of the value of this feature. - * - * @param v The string to compare against. - * @return false, since real features don't have string - * values. - **/ - public boolean valueEquals(String v) { return false; } - - - /** Simply returns this object. */ - public RealFeature makeReal() { return this; } - - - /** - * Returns a new feature object, the same as this one in all respects - * except the value has been multiplied by the specified number. - * - * @param m The multiplier. - * @return A new real feature whose value is the product of this feature's - * value and the specified multiplier. - **/ - public abstract RealFeature multiply(double m); - - - /** - * Create a feature representing the conjunction of this feature with the - * given argument feature. - * - * @param f The feature to conjunct with. - * @param c The classifier producing the resulting feature. - * @return A feature representing the conjunction of this feature and - * f. - **/ - public Feature conjunction(Feature f, Classifier c) { - return new RealConjunctiveFeature(c, f, this); - } +public abstract class RealFeature extends Feature { + /** + * For internal use only. + * + * @see Feature#readFeature(edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream) + **/ + protected RealFeature() {} + + /** + * Sets both the identifier and the value. + * + * @param p The new real feature's package. + * @param c The name of the classifier that produced this feature. + **/ + public RealFeature(String p, String c) { + super(p, c); + } + + + /** + * Determines if this feature is discrete. + * + * @return true iff this is discrete. + **/ + public boolean isDiscrete() { + return false; + } + + + /** + * Gives a string representation of the value of this feature. + * + * @return null, since real features don't have string values. + **/ + public String getStringValue() { + return null; + } + + + /** + * Gives a string representation of the value of this feature. + * + * @return null, since real features don't have string values. + **/ + public ByteString getByteStringValue() { + return null; + } + + + /** + * Determines whether or not the parameter is equivalent to the string representation of the + * value of this feature. + * + * @param v The string to compare against. + * @return false, since real features don't have string values. + **/ + public boolean valueEquals(String v) { + return false; + } + + + /** Simply returns this object. */ + public RealFeature makeReal() { + return this; + } + + + /** + * Returns a new feature object, the same as this one in all respects except the value has been + * multiplied by the specified number. + * + * @param m The multiplier. + * @return A new real feature whose value is the product of this feature's value and the + * specified multiplier. + **/ + public abstract RealFeature multiply(double m); + + + /** + * Create a feature representing the conjunction of this feature with the given argument + * feature. + * + * @param f The feature to conjunct with. + * @param c The classifier producing the resulting feature. + * @return A feature representing the conjunction of this feature and f. + **/ + public Feature conjunction(Feature f, Classifier c) { + return new RealConjunctiveFeature(c, f, this); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealPrimitiveFeature.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealPrimitiveFeature.java index e2de85f1..7b9fa8d2 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealPrimitiveFeature.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealPrimitiveFeature.java @@ -1,339 +1,328 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; import edu.illinois.cs.cogcomp.lbjava.util.ByteString; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - /** - * A real feature takes on any value representable by a double. - * - * @author Nick Rizzolo + * A real feature takes on any value representable by a double. + * + * @author Nick Rizzolo **/ -public class RealPrimitiveFeature extends RealFeature -{ - /** - * The identifier string distinguishes this - * Feature from other Features. - **/ - protected ByteString identifier; - /** The real value is represented as a double. */ - protected double value; - - - /** - * For internal use only. - * - * @see Feature#readFeature(ExceptionlessInputStream) - **/ - protected RealPrimitiveFeature() { } - - /** - * Sets both the identifier and the value. - * - * @param p The new real feature's package. - * @param c The name of the classifier that produced this feature. - * @param i The new RealPrimitiveFeature's identifier. - * @param v The new RealPrimitiveFeature's value. - **/ - public RealPrimitiveFeature(String p, String c, ByteString i, double v) { - super(p, c); - identifier = i; - value = v; - } - - - /** - * Determines if this feature contains a byte string identifier field. - * - * @return true iff this feature contains a byte string - * identifier field. - **/ - public boolean hasByteStringIdentifier() { return true; } - - - /** - * Determines if this feature is primitive. - * - * @return true iff this is primitive. - **/ - public boolean isPrimitive() { return true; } - - - /** - * Retrieves this feature's identifier as a string. - * - * @return This feature's identifier as a string. - **/ - public String getStringIdentifier() { return identifier.toString(); } - - - /** - * Retrieves this feature's identifier as a byte string. - * - * @return This feature's identifier as a byte string. - **/ - public ByteString getByteStringIdentifier() { - return (ByteString) identifier.clone(); - } - - - /** Simply returns the value of {@link #value}. */ - public double getStrength() { return value; } - - - /** - * Return the feature that should be used to index this feature into a - * lexicon. Specifically, we return this feature with a value of 0 so that - * the same features with different real values will map to the same - * object. - * - * @param lexicon The lexicon into which this feature will be indexed. - * @param training Whether or not the learner is currently training. - * @param label The label of the example containing this feature, or -1 - * if we aren't doing per class feature counting. - * @return A feature object appropriate for use as the key of a map. - **/ - public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { - return - new RealPrimitiveFeature(containingPackage, generatingClassifier, - identifier, 0); - } - - - /** - * Returns a new feature object, the same as this one in all respects - * except the {@link #value} field has been multiplied by the specified - * number. - * - * @param m The multiplier. - * @return A new real feature whose value is the product of this feature's - * value and the specified multiplier. - **/ - public RealFeature multiply(double m) { - return - new RealPrimitiveFeature(containingPackage, generatingClassifier, - identifier, value * m); - } - - - /** - * Returns a new feature object that's identical to this feature except its - * strength is given by s. - * - * @param s The strength of the new feature. - * @return A new feature object as above, or null if this - * feature cannot take the specified strength. - **/ - public Feature withStrength(double s) { - return - new RealPrimitiveFeature(containingPackage, generatingClassifier, - identifier, s); - } - - - /** - * Returns a feature object in which any strings that are being used to - * represent an identifier or value have been encoded in byte strings. - * - * @param e The encoding to use. - * @return A feature object as above; possible this object. - **/ - public Feature encode(String e) { return this; } - - - /** - * The hash code of a RealPrimitiveFeature is the sum of the - * hash codes of the containing package, the identifier, and the value. - * - * @return The hash code for this feature. - **/ - public int hashCode() { - return 31 * super.hashCode() + 17 * identifier.hashCode() - + new Double(value).hashCode(); - } - - - /** - * Two RealPrimitiveFeatures are equivalent when their - * containing packages and identifiers are equivalent and their values are - * equal. - * - * @param o The object with which to compare this feature. - * @return true iff the parameter is an equivalent feature. - **/ - public boolean equals(Object o) { - if (!super.equals(o)) return false; - if (o instanceof RealPrimitiveFeature) { - RealPrimitiveFeature f = (RealPrimitiveFeature) o; - return identifier.equals(f.identifier) && value == f.value; +public class RealPrimitiveFeature extends RealFeature { + /** + * The identifier string distinguishes this Feature from other + * Features. + **/ + protected ByteString identifier; + /** The real value is represented as a double. */ + protected double value; + + + /** + * For internal use only. + * + * @see Feature#readFeature(ExceptionlessInputStream) + **/ + protected RealPrimitiveFeature() {} + + /** + * Sets both the identifier and the value. + * + * @param p The new real feature's package. + * @param c The name of the classifier that produced this feature. + * @param i The new RealPrimitiveFeature's identifier. + * @param v The new RealPrimitiveFeature's value. + **/ + public RealPrimitiveFeature(String p, String c, ByteString i, double v) { + super(p, c); + identifier = i; + value = v; + } + + + /** + * Determines if this feature contains a byte string identifier field. + * + * @return true iff this feature contains a byte string identifier field. + **/ + public boolean hasByteStringIdentifier() { + return true; + } + + + /** + * Determines if this feature is primitive. + * + * @return true iff this is primitive. + **/ + public boolean isPrimitive() { + return true; + } + + + /** + * Retrieves this feature's identifier as a string. + * + * @return This feature's identifier as a string. + **/ + public String getStringIdentifier() { + return identifier.toString(); + } + + + /** + * Retrieves this feature's identifier as a byte string. + * + * @return This feature's identifier as a byte string. + **/ + public ByteString getByteStringIdentifier() { + return (ByteString) identifier.clone(); + } + + + /** Simply returns the value of {@link #value}. */ + public double getStrength() { + return value; + } + + + /** + * Return the feature that should be used to index this feature into a lexicon. Specifically, we + * return this feature with a value of 0 so that the same features with different real values + * will map to the same object. + * + * @param lexicon The lexicon into which this feature will be indexed. + * @param training Whether or not the learner is currently training. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return A feature object appropriate for use as the key of a map. + **/ + public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { + return new RealPrimitiveFeature(containingPackage, generatingClassifier, identifier, 0); + } + + + /** + * Returns a new feature object, the same as this one in all respects except the {@link #value} + * field has been multiplied by the specified number. + * + * @param m The multiplier. + * @return A new real feature whose value is the product of this feature's value and the + * specified multiplier. + **/ + public RealFeature multiply(double m) { + return new RealPrimitiveFeature(containingPackage, generatingClassifier, identifier, value + * m); + } + + + /** + * Returns a new feature object that's identical to this feature except its strength is given by + * s. + * + * @param s The strength of the new feature. + * @return A new feature object as above, or null if this feature cannot take the + * specified strength. + **/ + public Feature withStrength(double s) { + return new RealPrimitiveFeature(containingPackage, generatingClassifier, identifier, s); + } + + + /** + * Returns a feature object in which any strings that are being used to represent an identifier + * or value have been encoded in byte strings. + * + * @param e The encoding to use. + * @return A feature object as above; possible this object. + **/ + public Feature encode(String e) { + return this; } - RealPrimitiveStringFeature f = (RealPrimitiveStringFeature) o; - return identifier.equals(f.identifier) && value == f.value; - } - - - /** - * Some features are functionally equivalent, differing only in the - * encoding of their values; this method will return true iff - * the class of this feature and f are different, but they - * differ only because they encode their values differently. This method - * does not compare the values themselves, however. - * - * @param f Another feature. - * @return See above. - **/ - public boolean classEquivalent(Feature f) { - return f instanceof RealPrimitiveStringFeature; - } - - - /** - * Used to sort features into an order that is convenient both to page - * through and for the lexicon to read off disk. - * - * @param o An object to compare with. - * @return Integers appropriate for sorting features first by package, then - * by identifier, then by value. - **/ - public int compareTo(Object o) { - int d = compareNameStrings(o); - if (d != 0) return d; - RealPrimitiveFeature f = (RealPrimitiveFeature) o; - d = identifier.compareTo(f.identifier); - if (d != 0) return d; - double difference = value - f.value; - if (difference < 0) return -1; - if (difference > 0) return 1; - return 0; - } - - - /** - * Writes a string representation of this Feature to the - * specified buffer. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - writeNameString(buffer); - buffer.append("("); - buffer.append(value); - buffer.append(")"); - } - - - /** - * Writes a string representation of this Feature's package, - * generating classifier, and identifier information to the specified - * buffer. - * - * @param buffer The buffer to write to. - **/ - public void writeNameString(StringBuffer buffer) { - super.writeNameString(buffer); - buffer.append(":"); - buffer.append(identifier.toString()); - } - - - /** - * Writes a complete binary representation of the feature. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - identifier.write(out); - out.writeDouble(value); - } - - - /** - * Reads the representation of a feaeture with this object's run-time type - * from the given stream, overwriting the data in this object. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - identifier = ByteString.readByteString(in); - value = in.readDouble(); - } - - - /** - * Writes a binary representation of the feature intended for use by a - * lexicon, omitting redundant information when possible. - * - * @param out The output stream. - * @param lex The lexicon out of which this feature is being written. - * @param c The fully qualified name of the assumed class. The runtime - * class of this feature won't be written if it's equivalent to - * c. - * @param p The assumed package string. This feature's package string - * won't be written if it's equivalent to p. - * @param g The assumed classifier name string. This feature's - * classifier name string won't be written if it's equivalent - * to g. - * @param si The assumed identifier as a string. If this feature has a - * string identifier, it won't be written if it's equivalent to - * si. - * @param bi The assumed identifier as a byte string. If this feature - * has a byte string identifier, it won't be written if it's - * equivalent to bi. - * @return The name of the runtime type of this feature. - **/ - public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, - String p, String g, String si, ByteString bi) { - String result = super.lexWrite(out, lex, c, p, g, si, bi); - identifier.lexWrite(out, bi); - // NOTE: The lexicon has no use for a real-valued feature's value. - return result; - } - - - /** - * Reads the representation of a feature with this object's run-time type - * as stored by a lexicon, overwriting the data in this object. - * - *

This method is appropriate for reading features as written by - * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. - * - * @param in The input stream. - * @param lex The lexicon we are reading in to. - * @param p The assumed package string. If no package name is given in - * the input stream, the instantiated feature is given this - * package. - * @param g The assumed classifier name string. If no classifier name - * is given in the input stream, the instantiated feature is - * given this classifier name. - * @param si The assumed identifier as a string. If the feature being - * read has a string identifier field and no identifier is - * given in the input stream, the feature is given this - * identifier. - * @param bi The assumed identifier as a byte string. If the feature - * being read has a byte string identifier field and no - * identifier is given in the input stream, the feature is - * given this identifier. - **/ - public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, - String g, String si, ByteString bi) { - super.lexRead(in, lex, p, g, si, bi); - identifier = ByteString.lexReadByteString(in, bi); - value = 0; - } -} + /** + * The hash code of a RealPrimitiveFeature is the sum of the hash codes of the + * containing package, the identifier, and the value. + * + * @return The hash code for this feature. + **/ + public int hashCode() { + return 31 * super.hashCode() + 17 * identifier.hashCode() + new Double(value).hashCode(); + } + + + /** + * Two RealPrimitiveFeatures are equivalent when their containing packages and + * identifiers are equivalent and their values are equal. + * + * @param o The object with which to compare this feature. + * @return true iff the parameter is an equivalent feature. + **/ + public boolean equals(Object o) { + if (!super.equals(o)) + return false; + if (o instanceof RealPrimitiveFeature) { + RealPrimitiveFeature f = (RealPrimitiveFeature) o; + return identifier.equals(f.identifier) && value == f.value; + } + + RealPrimitiveStringFeature f = (RealPrimitiveStringFeature) o; + return identifier.equals(f.identifier) && value == f.value; + } + + + /** + * Some features are functionally equivalent, differing only in the encoding of their values; + * this method will return true iff the class of this feature and f + * are different, but they differ only because they encode their values differently. This method + * does not compare the values themselves, however. + * + * @param f Another feature. + * @return See above. + **/ + public boolean classEquivalent(Feature f) { + return f instanceof RealPrimitiveStringFeature; + } + + + /** + * Used to sort features into an order that is convenient both to page through and for the + * lexicon to read off disk. + * + * @param o An object to compare with. + * @return Integers appropriate for sorting features first by package, then by identifier, then + * by value. + **/ + public int compareTo(Object o) { + int d = compareNameStrings(o); + if (d != 0) + return d; + RealPrimitiveFeature f = (RealPrimitiveFeature) o; + d = identifier.compareTo(f.identifier); + if (d != 0) + return d; + double difference = value - f.value; + if (difference < 0) + return -1; + if (difference > 0) + return 1; + return 0; + } + + + /** + * Writes a string representation of this Feature to the specified buffer. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + writeNameString(buffer); + buffer.append("("); + buffer.append(value); + buffer.append(")"); + } + + + /** + * Writes a string representation of this Feature's package, generating classifier, + * and identifier information to the specified buffer. + * + * @param buffer The buffer to write to. + **/ + public void writeNameString(StringBuffer buffer) { + super.writeNameString(buffer); + buffer.append(":"); + buffer.append(identifier.toString()); + } + + + /** + * Writes a complete binary representation of the feature. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + identifier.write(out); + out.writeDouble(value); + } + + + /** + * Reads the representation of a feaeture with this object's run-time type from the given + * stream, overwriting the data in this object. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + identifier = ByteString.readByteString(in); + value = in.readDouble(); + } + + + /** + * Writes a binary representation of the feature intended for use by a lexicon, omitting + * redundant information when possible. + * + * @param out The output stream. + * @param lex The lexicon out of which this feature is being written. + * @param c The fully qualified name of the assumed class. The runtime class of this feature + * won't be written if it's equivalent to c. + * @param p The assumed package string. This feature's package string won't be written if it's + * equivalent to p. + * @param g The assumed classifier name string. This feature's classifier name string won't be + * written if it's equivalent to g. + * @param si The assumed identifier as a string. If this feature has a string identifier, it + * won't be written if it's equivalent to si. + * @param bi The assumed identifier as a byte string. If this feature has a byte string + * identifier, it won't be written if it's equivalent to bi. + * @return The name of the runtime type of this feature. + **/ + public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, String p, + String g, String si, ByteString bi) { + String result = super.lexWrite(out, lex, c, p, g, si, bi); + identifier.lexWrite(out, bi); + // NOTE: The lexicon has no use for a real-valued feature's value. + return result; + } + + + /** + * Reads the representation of a feature with this object's run-time type as stored by a + * lexicon, overwriting the data in this object. + * + *

+ * This method is appropriate for reading features as written by + * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. + * + * @param in The input stream. + * @param lex The lexicon we are reading in to. + * @param p The assumed package string. If no package name is given in the input stream, the + * instantiated feature is given this package. + * @param g The assumed classifier name string. If no classifier name is given in the input + * stream, the instantiated feature is given this classifier name. + * @param si The assumed identifier as a string. If the feature being read has a string + * identifier field and no identifier is given in the input stream, the feature is given + * this identifier. + * @param bi The assumed identifier as a byte string. If the feature being read has a byte + * string identifier field and no identifier is given in the input stream, the feature is + * given this identifier. + **/ + public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, String g, String si, + ByteString bi) { + super.lexRead(in, lex, p, g, si, bi); + identifier = ByteString.lexReadByteString(in, bi); + value = 0; + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealPrimitiveStringFeature.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealPrimitiveStringFeature.java index 2547dfb6..51ca8a94 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealPrimitiveStringFeature.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealPrimitiveStringFeature.java @@ -1,348 +1,336 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; import edu.illinois.cs.cogcomp.lbjava.util.ByteString; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - /** - * A real feature takes on any value representable by a double. - * - * @author Nick Rizzolo + * A real feature takes on any value representable by a double. + * + * @author Nick Rizzolo **/ -public class RealPrimitiveStringFeature extends RealFeature -{ - /** - * The identifier string distinguishes this - * Feature from other Features. - **/ - protected String identifier; - /** The real value is represented as a double. */ - protected double value; - - - /** - * For internal use only. - * - * @see Feature#readFeature(ExceptionlessInputStream) - **/ - protected RealPrimitiveStringFeature() { } - - /** - * Sets both the identifier and the value. - * - * @param p The new real feature's package. - * @param c The name of the classifier that produced this feature. - * @param i The new RealPrimitiveStringFeature's identifier. - * @param v The new RealPrimitiveStringFeature's value. - **/ - public RealPrimitiveStringFeature(String p, String c, String i, double v) { - super(p, c); - identifier = i; - value = v; - } - - - /** - * Determines if this feature contains a string identifier field. - * - * @return true iff this feature contains a string identifier - * field. - **/ - public boolean hasStringIdentifier() { return true; } - - - /** - * Determines if this feature is primitive. - * - * @return true iff this is primitive. - **/ - public boolean isPrimitive() { return true; } - - - /** - * Retrieves this feature's identifier as a string. - * - * @return This feature's identifier as a string. - **/ - public String getStringIdentifier() { return identifier; } - - - /** - * Retrieves this feature's identifier as a byte string. - * - * @return This feature's identifier as a byte string. - **/ - public ByteString getByteStringIdentifier() { - return new ByteString(identifier); - } - - - /** Simply returns the value of {@link #value}. */ - public double getStrength() { return value; } - - - /** - * Return the feature that should be used to index this feature into a - * lexicon. Specifically, we return this feature with a value of 0 so that - * the same features with different real values will map to the same - * object. - * - * @param lexicon The lexicon into which this feature will be indexed. - * @param training Whether or not the learner is currently training. - * @param label The label of the example containing this feature, or -1 - * if we aren't doing per class feature counting. - * @return A feature object appropriate for use as the key of a map. - **/ - public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { - return - new RealPrimitiveStringFeature(containingPackage, generatingClassifier, - identifier, 0); - } - - - /** - * Returns a new feature object, the same as this one in all respects - * except the {@link #value} field has been multiplied by the specified - * number. - * - * @param m The multiplier. - * @return A new real feature whose value is the product of this feature's - * value and the specified multiplier. - **/ - public RealFeature multiply(double m) { - return - new RealPrimitiveStringFeature(containingPackage, generatingClassifier, - identifier, value * m); - } - - - /** - * Returns a new feature object that's identical to this feature except its - * strength is given by s. - * - * @param s The strength of the new feature. - * @return A new feature object as above, or null if this - * feature cannot take the specified strength. - **/ - public Feature withStrength(double s) { - return - new RealPrimitiveStringFeature(containingPackage, generatingClassifier, - identifier, s); - } - - - /** - * Returns a feature object in which any strings that are being used to - * represent an identifier or value have been encoded in byte strings. - * - * @param e The encoding to use. - * @return A feature object as above; possible this object. - **/ - public Feature encode(String e) { - if (e == null || e == "String") return this; - ByteString id = - identifier.length() == 0 ? ByteString.emptyString - : new ByteString(identifier, e); - return - new RealPrimitiveFeature(containingPackage, generatingClassifier, id, - value); - } - - - /** - * The hash code of a RealPrimitiveStringFeature is the sum of the - * hash codes of the containing package, the identifier, and the value. - * - * @return The hash code for this feature. - **/ - public int hashCode() { - return 31 * super.hashCode() + 17 * identifier.hashCode() - + new Double(value).hashCode(); - } - - - /** - * Two RealPrimitiveStringFeatures are equivalent when their - * containing packages and identifiers are equivalent and their values are - * equal. - * - * @param o The object with which to compare this feature. - * @return true iff the parameter is an equivalent feature. - **/ - public boolean equals(Object o) { - if (!super.equals(o)) return false; - if (o instanceof RealPrimitiveStringFeature) { - RealPrimitiveStringFeature f = (RealPrimitiveStringFeature) o; - return identifier.equals(f.identifier) && value == f.value; +public class RealPrimitiveStringFeature extends RealFeature { + /** + * The identifier string distinguishes this Feature from other + * Features. + **/ + protected String identifier; + /** The real value is represented as a double. */ + protected double value; + + + /** + * For internal use only. + * + * @see Feature#readFeature(ExceptionlessInputStream) + **/ + protected RealPrimitiveStringFeature() {} + + /** + * Sets both the identifier and the value. + * + * @param p The new real feature's package. + * @param c The name of the classifier that produced this feature. + * @param i The new RealPrimitiveStringFeature's identifier. + * @param v The new RealPrimitiveStringFeature's value. + **/ + public RealPrimitiveStringFeature(String p, String c, String i, double v) { + super(p, c); + identifier = i; + value = v; + } + + + /** + * Determines if this feature contains a string identifier field. + * + * @return true iff this feature contains a string identifier field. + **/ + public boolean hasStringIdentifier() { + return true; + } + + + /** + * Determines if this feature is primitive. + * + * @return true iff this is primitive. + **/ + public boolean isPrimitive() { + return true; + } + + + /** + * Retrieves this feature's identifier as a string. + * + * @return This feature's identifier as a string. + **/ + public String getStringIdentifier() { + return identifier; + } + + + /** + * Retrieves this feature's identifier as a byte string. + * + * @return This feature's identifier as a byte string. + **/ + public ByteString getByteStringIdentifier() { + return new ByteString(identifier); + } + + + /** Simply returns the value of {@link #value}. */ + public double getStrength() { + return value; + } + + + /** + * Return the feature that should be used to index this feature into a lexicon. Specifically, we + * return this feature with a value of 0 so that the same features with different real values + * will map to the same object. + * + * @param lexicon The lexicon into which this feature will be indexed. + * @param training Whether or not the learner is currently training. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return A feature object appropriate for use as the key of a map. + **/ + public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { + return new RealPrimitiveStringFeature(containingPackage, generatingClassifier, identifier, + 0); + } + + + /** + * Returns a new feature object, the same as this one in all respects except the {@link #value} + * field has been multiplied by the specified number. + * + * @param m The multiplier. + * @return A new real feature whose value is the product of this feature's value and the + * specified multiplier. + **/ + public RealFeature multiply(double m) { + return new RealPrimitiveStringFeature(containingPackage, generatingClassifier, identifier, + value * m); + } + + + /** + * Returns a new feature object that's identical to this feature except its strength is given by + * s. + * + * @param s The strength of the new feature. + * @return A new feature object as above, or null if this feature cannot take the + * specified strength. + **/ + public Feature withStrength(double s) { + return new RealPrimitiveStringFeature(containingPackage, generatingClassifier, identifier, + s); + } + + + /** + * Returns a feature object in which any strings that are being used to represent an identifier + * or value have been encoded in byte strings. + * + * @param e The encoding to use. + * @return A feature object as above; possible this object. + **/ + public Feature encode(String e) { + if (e == null || e == "String") + return this; + ByteString id = + identifier.length() == 0 ? ByteString.emptyString : new ByteString(identifier, e); + return new RealPrimitiveFeature(containingPackage, generatingClassifier, id, value); } - RealPrimitiveFeature f = (RealPrimitiveFeature) o; - return f.identifier.equals(identifier) && value == f.value; - } - - - /** - * Some features are functionally equivalent, differing only in the - * encoding of their values; this method will return true iff - * the class of this feature and f are different, but they - * differ only because they encode their values differently. This method - * does not compare the values themselves, however. - * - * @param f Another feature. - * @return See above. - **/ - public boolean classEquivalent(Feature f) { - return f instanceof RealPrimitiveFeature; - } - - - /** - * Used to sort features into an order that is convenient both to page - * through and for the lexicon to read off disk. - * - * @param o An object to compare with. - * @return Integers appropriate for sorting features first by package, then - * by identifier, then by value. - **/ - public int compareTo(Object o) { - int d = compareNameStrings(o); - if (d != 0) return d; - RealPrimitiveStringFeature f = (RealPrimitiveStringFeature) o; - d = identifier.compareTo(f.identifier); - if (d != 0) return d; - double difference = value - f.value; - if (difference < 0) return -1; - if (difference > 0) return 1; - return 0; - } - - - /** - * Writes a string representation of this Feature to the - * specified buffer. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - writeNameString(buffer); - buffer.append("("); - buffer.append(value); - buffer.append(")"); - } - - - /** - * Writes a string representation of this Feature's package, - * generating classifier, and identifier information to the specified - * buffer. - * - * @param buffer The buffer to write to. - **/ - public void writeNameString(StringBuffer buffer) { - super.writeNameString(buffer); - buffer.append(":"); - buffer.append(identifier); - } - - - /** - * Writes a complete binary representation of the feature. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - out.writeString(identifier); - out.writeDouble(value); - } - - - /** - * Reads the representation of a feaeture with this object's run-time type - * from the given stream, overwriting the data in this object. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - identifier = in.readString(); - value = in.readDouble(); - } - - - /** - * Writes a binary representation of the feature intended for use by a - * lexicon, omitting redundant information when possible. - * - * @param out The output stream. - * @param lex The lexicon out of which this feature is being written. - * @param c The fully qualified name of the assumed class. The runtime - * class of this feature won't be written if it's equivalent to - * c. - * @param p The assumed package string. This feature's package string - * won't be written if it's equivalent to p. - * @param g The assumed classifier name string. This feature's - * classifier name string won't be written if it's equivalent - * to g. - * @param si The assumed identifier as a string. If this feature has a - * string identifier, it won't be written if it's equivalent to - * si. - * @param bi The assumed identifier as a byte string. If this feature - * has a byte string identifier, it won't be written if it's - * equivalent to bi. - * @return The name of the runtime type of this feature. - **/ - public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, - String p, String g, String si, ByteString bi) { - String result = super.lexWrite(out, lex, c, p, g, si, bi); - out.writeString(identifier.equals(si) ? null : identifier); - // NOTE: The lexicon has no use for a real-valued feature's value. - return result; - } - - - /** - * Reads the representation of a feature with this object's run-time type - * as stored by a lexicon, overwriting the data in this object. - * - *

This method is appropriate for reading features as written by - * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. - * - * @param in The input stream. - * @param lex The lexicon we are reading in to. - * @param p The assumed package string. If no package name is given in - * the input stream, the instantiated feature is given this - * package. - * @param g The assumed classifier name string. If no classifier name - * is given in the input stream, the instantiated feature is - * given this classifier name. - * @param si The assumed identifier as a string. If the feature being - * read has a string identifier field and no identifier is - * given in the input stream, the feature is given this - * identifier. - * @param bi The assumed identifier as a byte string. If the feature - * being read has a byte string identifier field and no - * identifier is given in the input stream, the feature is - * given this identifier. - **/ - public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, - String g, String si, ByteString bi) { - super.lexRead(in, lex, p, g, si, bi); - identifier = in.readString(); - if (identifier == null) identifier = si; - value = 0; - } -} + /** + * The hash code of a RealPrimitiveStringFeature is the sum of the hash codes of + * the containing package, the identifier, and the value. + * + * @return The hash code for this feature. + **/ + public int hashCode() { + return 31 * super.hashCode() + 17 * identifier.hashCode() + new Double(value).hashCode(); + } + + + /** + * Two RealPrimitiveStringFeatures are equivalent when their containing packages + * and identifiers are equivalent and their values are equal. + * + * @param o The object with which to compare this feature. + * @return true iff the parameter is an equivalent feature. + **/ + public boolean equals(Object o) { + if (!super.equals(o)) + return false; + if (o instanceof RealPrimitiveStringFeature) { + RealPrimitiveStringFeature f = (RealPrimitiveStringFeature) o; + return identifier.equals(f.identifier) && value == f.value; + } + + RealPrimitiveFeature f = (RealPrimitiveFeature) o; + return f.identifier.equals(identifier) && value == f.value; + } + + + /** + * Some features are functionally equivalent, differing only in the encoding of their values; + * this method will return true iff the class of this feature and f + * are different, but they differ only because they encode their values differently. This method + * does not compare the values themselves, however. + * + * @param f Another feature. + * @return See above. + **/ + public boolean classEquivalent(Feature f) { + return f instanceof RealPrimitiveFeature; + } + + + /** + * Used to sort features into an order that is convenient both to page through and for the + * lexicon to read off disk. + * + * @param o An object to compare with. + * @return Integers appropriate for sorting features first by package, then by identifier, then + * by value. + **/ + public int compareTo(Object o) { + int d = compareNameStrings(o); + if (d != 0) + return d; + RealPrimitiveStringFeature f = (RealPrimitiveStringFeature) o; + d = identifier.compareTo(f.identifier); + if (d != 0) + return d; + double difference = value - f.value; + if (difference < 0) + return -1; + if (difference > 0) + return 1; + return 0; + } + + + /** + * Writes a string representation of this Feature to the specified buffer. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + writeNameString(buffer); + buffer.append("("); + buffer.append(value); + buffer.append(")"); + } + + + /** + * Writes a string representation of this Feature's package, generating classifier, + * and identifier information to the specified buffer. + * + * @param buffer The buffer to write to. + **/ + public void writeNameString(StringBuffer buffer) { + super.writeNameString(buffer); + buffer.append(":"); + buffer.append(identifier); + } + + + /** + * Writes a complete binary representation of the feature. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeString(identifier); + out.writeDouble(value); + } + + + /** + * Reads the representation of a feaeture with this object's run-time type from the given + * stream, overwriting the data in this object. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + identifier = in.readString(); + value = in.readDouble(); + } + + + /** + * Writes a binary representation of the feature intended for use by a lexicon, omitting + * redundant information when possible. + * + * @param out The output stream. + * @param lex The lexicon out of which this feature is being written. + * @param c The fully qualified name of the assumed class. The runtime class of this feature + * won't be written if it's equivalent to c. + * @param p The assumed package string. This feature's package string won't be written if it's + * equivalent to p. + * @param g The assumed classifier name string. This feature's classifier name string won't be + * written if it's equivalent to g. + * @param si The assumed identifier as a string. If this feature has a string identifier, it + * won't be written if it's equivalent to si. + * @param bi The assumed identifier as a byte string. If this feature has a byte string + * identifier, it won't be written if it's equivalent to bi. + * @return The name of the runtime type of this feature. + **/ + public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, String p, + String g, String si, ByteString bi) { + String result = super.lexWrite(out, lex, c, p, g, si, bi); + out.writeString(identifier.equals(si) ? null : identifier); + // NOTE: The lexicon has no use for a real-valued feature's value. + return result; + } + + + /** + * Reads the representation of a feature with this object's run-time type as stored by a + * lexicon, overwriting the data in this object. + * + *

+ * This method is appropriate for reading features as written by + * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. + * + * @param in The input stream. + * @param lex The lexicon we are reading in to. + * @param p The assumed package string. If no package name is given in the input stream, the + * instantiated feature is given this package. + * @param g The assumed classifier name string. If no classifier name is given in the input + * stream, the instantiated feature is given this classifier name. + * @param si The assumed identifier as a string. If the feature being read has a string + * identifier field and no identifier is given in the input stream, the feature is given + * this identifier. + * @param bi The assumed identifier as a byte string. If the feature being read has a byte + * string identifier field and no identifier is given in the input stream, the feature is + * given this identifier. + **/ + public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, String g, String si, + ByteString bi) { + super.lexRead(in, lex, p, g, si, bi); + identifier = in.readString(); + if (identifier == null) + identifier = si; + value = 0; + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealReferrer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealReferrer.java index 5a392a0c..80e0ecd6 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealReferrer.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealReferrer.java @@ -1,255 +1,248 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.learn.ChildLexicon; import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; import edu.illinois.cs.cogcomp.lbjava.util.ByteString; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - /** - * A referring real feature is one that has its own identifier, but whose - * value comes from a separate feature that it refers to. - * - * @author Nick Rizzolo + * A referring real feature is one that has its own identifier, but whose value comes from a + * separate feature that it refers to. + * + * @author Nick Rizzolo **/ -public abstract class RealReferrer extends RealFeature -{ - /** The feature being referred to. */ - protected RealFeature referent; - - - /** - * For internal use only. - * - * @see Feature#readFeature(ExceptionlessInputStream) - **/ - protected RealReferrer() { } - - /** - * Sets both the identifier and the referent. - * - * @param c The classifier that produced this feature. - * @param r The real feature referred to by this new feature. - **/ - public RealReferrer(Classifier c, RealFeature r) { - this(c.containingPackage, c.name, r); - } - - /** - * Sets both the identifier and the referent. - * - * @param p The new real feature's package. - * @param c The name of the classifier that produced this feature. - * @param r The real feature referred to by this new feature. - **/ - public RealReferrer(String p, String c, RealFeature r) { - super(p, c); - referent = r; - } - - - /** - * Determines if this feature is a referring feature. - * - * @return true iff this feature is a referring feature. - **/ - public boolean isReferrer() { return true; } - - - /** Returns the value of {@link #referent}. */ - public RealFeature getReferent() { return referent; } - - - /** - * The depth of a feature is one more than the maximum depth of any of its - * children, or 0 if it has no children. - * - * @return The depth of this feature as described above. - **/ - public int depth() { return referent.depth() + 1; } - - - /** Simply returns the strength of {@link #referent}. */ - public double getStrength() { return referent.getStrength(); } - - - /** - * Takes care of any feature-type-specific tasks that need to be taken care - * of when removing a feature of this type from a {@link ChildLexicon}, in - * particular updating parent counts and removing children of this feature - * if necessary. - * - * @param lex The child lexicon this feature is being removed from. - **/ - public void removeFromChildLexicon(ChildLexicon lex) { - lex.decrementParentCounts(referent); - } - - - /** - * Does a feature-type-specific lookup of this feature in the given - * {@link ChildLexicon}. - * - * @param lex The child lexicon this feature is being looked up in. - * @param label The label of the example containing this feature, or -1 if - * we aren't doing per class feature counting. - * @return The index of f in this lexicon. - **/ - public int childLexiconLookup(ChildLexicon lex, int label) { - return lex.childLexiconLookup(this, label); - } - - - /** - * The hash code of a RealReferrer is the sum of the hash - * codes of the containing package, the identifier, and the referent - * feature. - * - * @return The hash code for this feature. - **/ - public int hashCode() { - return 17 * super.hashCode() + referent.hashCode(); - } - - - /** - * Used to sort features into an order that is convenient both to page - * through and for the lexicon to read off disk. - * - * @param o An object to compare with. - * @return Integers appropriate for sorting features first by package, then - * by identifier, then by value. - **/ - public int compareTo(Object o) { - int d = compareNameStrings(o); - if (d != 0) return d; - RealReferrer r = (RealReferrer) o; - if (d != 0) return d; - return referent.compareTo(r.referent); - } - - - /** - * Writes a string representation of this Feature to the - * specified buffer. - * - * @param buffer The buffer to write to. - **/ - public void write(StringBuffer buffer) { - writeNameString(buffer); - buffer.append("->"); - referent.write(buffer); - } - - - /** - * Writes a string representation of this Feature to the - * specified buffer, omitting the package name. - * - * @param buffer The buffer to write to. - **/ - public void writeNoPackage(StringBuffer buffer) { - String p = containingPackage; - containingPackage = null; - writeNameString(buffer); - buffer.append("->"); - referent.writeNoPackage(buffer); - containingPackage = p; - } - - - /** - * Writes a complete binary representation of the feature. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - referent.write(out); - } - - - /** - * Reads the representation of a feaeture with this object's run-time type - * from the given stream, overwriting the data in this object. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - referent = (RealFeature) Feature.readFeature(in); - } - - - /** - * Writes a binary representation of the feature intended for use by a - * lexicon, omitting redundant information when possible. - * - * @param out The output stream. - * @param lex The lexicon out of which this feature is being written. - * @param c The fully qualified name of the assumed class. The runtime - * class of this feature won't be written if it's equivalent to - * c. - * @param p The assumed package string. This feature's package string - * won't be written if it's equivalent to p. - * @param g The assumed classifier name string. This feature's - * classifier name string won't be written if it's equivalent - * to g. - * @param si The assumed identifier as a string. If this feature has a - * string identifier, it won't be written if it's equivalent to - * si. - * @param bi The assumed identifier as a byte string. If this feature - * has a byte string identifier, it won't be written if it's - * equivalent to bi. - * @return The name of the runtime type of this feature. - **/ - public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, - String p, String g, String si, ByteString bi) { - String result = super.lexWrite(out, lex, c, p, g, si, bi); - out.writeInt(lex.lookupChild(referent)); - return result; - } - - - /** - * Reads the representation of a feature with this object's run-time type - * as stored by a lexicon, overwriting the data in this object. - * - *

This method is appropriate for reading features as written by - * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. - * - * @param in The input stream. - * @param lex The lexicon we are reading in to. - * @param p The assumed package string. If no package name is given in - * the input stream, the instantiated feature is given this - * package. - * @param g The assumed classifier name string. If no classifier name - * is given in the input stream, the instantiated feature is - * given this classifier name. - * @param si The assumed identifier as a string. If the feature being - * read has a string identifier field and no identifier is - * given in the input stream, the feature is given this - * identifier. - * @param bi The assumed identifier as a byte string. If the feature - * being read has a byte string identifier field and no - * identifier is given in the input stream, the feature is - * given this identifier. - **/ - public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, - String g, String si, ByteString bi) { - super.lexRead(in, lex, p, g, si, bi); - referent = (RealFeature) lex.lookupKey(in.readInt()); - } +public abstract class RealReferrer extends RealFeature { + /** The feature being referred to. */ + protected RealFeature referent; + + + /** + * For internal use only. + * + * @see Feature#readFeature(ExceptionlessInputStream) + **/ + protected RealReferrer() {} + + /** + * Sets both the identifier and the referent. + * + * @param c The classifier that produced this feature. + * @param r The real feature referred to by this new feature. + **/ + public RealReferrer(Classifier c, RealFeature r) { + this(c.containingPackage, c.name, r); + } + + /** + * Sets both the identifier and the referent. + * + * @param p The new real feature's package. + * @param c The name of the classifier that produced this feature. + * @param r The real feature referred to by this new feature. + **/ + public RealReferrer(String p, String c, RealFeature r) { + super(p, c); + referent = r; + } + + + /** + * Determines if this feature is a referring feature. + * + * @return true iff this feature is a referring feature. + **/ + public boolean isReferrer() { + return true; + } + + + /** Returns the value of {@link #referent}. */ + public RealFeature getReferent() { + return referent; + } + + + /** + * The depth of a feature is one more than the maximum depth of any of its children, or 0 if it + * has no children. + * + * @return The depth of this feature as described above. + **/ + public int depth() { + return referent.depth() + 1; + } + + + /** Simply returns the strength of {@link #referent}. */ + public double getStrength() { + return referent.getStrength(); + } + + + /** + * Takes care of any feature-type-specific tasks that need to be taken care of when removing a + * feature of this type from a {@link ChildLexicon}, in particular updating parent counts and + * removing children of this feature if necessary. + * + * @param lex The child lexicon this feature is being removed from. + **/ + public void removeFromChildLexicon(ChildLexicon lex) { + lex.decrementParentCounts(referent); + } + + + /** + * Does a feature-type-specific lookup of this feature in the given {@link ChildLexicon}. + * + * @param lex The child lexicon this feature is being looked up in. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return The index of f in this lexicon. + **/ + public int childLexiconLookup(ChildLexicon lex, int label) { + return lex.childLexiconLookup(this, label); + } + + + /** + * The hash code of a RealReferrer is the sum of the hash codes of the containing + * package, the identifier, and the referent feature. + * + * @return The hash code for this feature. + **/ + public int hashCode() { + return 17 * super.hashCode() + referent.hashCode(); + } + + + /** + * Used to sort features into an order that is convenient both to page through and for the + * lexicon to read off disk. + * + * @param o An object to compare with. + * @return Integers appropriate for sorting features first by package, then by identifier, then + * by value. + **/ + public int compareTo(Object o) { + int d = compareNameStrings(o); + if (d != 0) + return d; + RealReferrer r = (RealReferrer) o; + if (d != 0) + return d; + return referent.compareTo(r.referent); + } + + + /** + * Writes a string representation of this Feature to the specified buffer. + * + * @param buffer The buffer to write to. + **/ + public void write(StringBuffer buffer) { + writeNameString(buffer); + buffer.append("->"); + referent.write(buffer); + } + + + /** + * Writes a string representation of this Feature to the specified buffer, omitting + * the package name. + * + * @param buffer The buffer to write to. + **/ + public void writeNoPackage(StringBuffer buffer) { + String p = containingPackage; + containingPackage = null; + writeNameString(buffer); + buffer.append("->"); + referent.writeNoPackage(buffer); + containingPackage = p; + } + + + /** + * Writes a complete binary representation of the feature. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + referent.write(out); + } + + + /** + * Reads the representation of a feaeture with this object's run-time type from the given + * stream, overwriting the data in this object. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + referent = (RealFeature) Feature.readFeature(in); + } + + + /** + * Writes a binary representation of the feature intended for use by a lexicon, omitting + * redundant information when possible. + * + * @param out The output stream. + * @param lex The lexicon out of which this feature is being written. + * @param c The fully qualified name of the assumed class. The runtime class of this feature + * won't be written if it's equivalent to c. + * @param p The assumed package string. This feature's package string won't be written if it's + * equivalent to p. + * @param g The assumed classifier name string. This feature's classifier name string won't be + * written if it's equivalent to g. + * @param si The assumed identifier as a string. If this feature has a string identifier, it + * won't be written if it's equivalent to si. + * @param bi The assumed identifier as a byte string. If this feature has a byte string + * identifier, it won't be written if it's equivalent to bi. + * @return The name of the runtime type of this feature. + **/ + public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, String p, + String g, String si, ByteString bi) { + String result = super.lexWrite(out, lex, c, p, g, si, bi); + out.writeInt(lex.lookupChild(referent)); + return result; + } + + + /** + * Reads the representation of a feature with this object's run-time type as stored by a + * lexicon, overwriting the data in this object. + * + *

+ * This method is appropriate for reading features as written by + * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. + * + * @param in The input stream. + * @param lex The lexicon we are reading in to. + * @param p The assumed package string. If no package name is given in the input stream, the + * instantiated feature is given this package. + * @param g The assumed classifier name string. If no classifier name is given in the input + * stream, the instantiated feature is given this classifier name. + * @param si The assumed identifier as a string. If the feature being read has a string + * identifier field and no identifier is given in the input stream, the feature is given + * this identifier. + * @param bi The assumed identifier as a byte string. If the feature being read has a byte + * string identifier field and no identifier is given in the input stream, the feature is + * given this identifier. + **/ + public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, String g, String si, + ByteString bi) { + super.lexRead(in, lex, p, g, si, bi); + referent = (RealFeature) lex.lookupKey(in.readInt()); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealReferringFeature.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealReferringFeature.java index 33fe8a9f..fb4480c4 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealReferringFeature.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealReferringFeature.java @@ -1,324 +1,305 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; import edu.illinois.cs.cogcomp.lbjava.util.ByteString; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - /** - * A referring real feature is one that has its own identifier, but whose - * value comes from a separate feature that it refers to. - * - * @author Nick Rizzolo + * A referring real feature is one that has its own identifier, but whose value comes from a + * separate feature that it refers to. + * + * @author Nick Rizzolo **/ -public class RealReferringFeature extends RealReferrer -{ - /** - * The identifier string distinguishes this - * Feature from other Features. - **/ - protected ByteString identifier; - - - /** - * For internal use only. - * - * @see Feature#readFeature(ExceptionlessInputStream) - **/ - protected RealReferringFeature() { } - - /** - * Sets both the identifier and the referent. - * - * @param c The classifier that produced this feature. - * @param i The new RealReferringFeature's identifier. - * @param r The real feature referred to by this new feature. - **/ - public RealReferringFeature(Classifier c, ByteString i, RealFeature r) { - this(c.containingPackage, c.name, i, r); - } - - /** - * Sets both the identifier and the referent. - * - * @param p The new real feature's package. - * @param c The name of the classifier that produced this feature. - * @param i The new RealReferringFeature's identifier. - * @param r The real feature referred to by this new feature. - **/ - public RealReferringFeature(String p, String c, ByteString i, RealFeature r) - { - super(p, c, r); - identifier = i; - } - - - /** - * Determines if this feature contains a byte string identifier field. - * - * @return true iff this feature contains a byte string - * identifier field. - **/ - public boolean hasByteStringIdentifier() { return true; } - - - /** - * Retrieves this feature's identifier as a string. - * - * @return This feature's identifier as a string. - **/ - public String getStringIdentifier() { return identifier.toString(); } - - - /** - * Retrieves this feature's identifier as a byte string. - * - * @return This feature's identifier as a byte string. - **/ - public ByteString getByteStringIdentifier() { - return (ByteString) identifier.clone(); - } - - - /** Simply returns the strength of {@link #referent}. */ - public double getStrength() { return referent.getStrength(); } - - - /** - * Return the feature that should be used to index this feature into a - * lexicon. Specifically, we return this feature with a value of 0 so that - * the same features with different real values will map to the same - * object. - * - * @param lexicon The lexicon into which this feature will be indexed. - * @param training Whether or not the learner is currently training. - * @param label The label of the example containing this feature, or -1 - * if we aren't doing per class feature counting. - * @return A feature object appropriate for use as the key of a map. - **/ - public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { - RealFeature f = - (RealFeature) referent.getFeatureKey(lexicon, training, label); - if (training) f = (RealFeature) lexicon.getChildFeature(f, label); - return - new RealReferringFeature(containingPackage, generatingClassifier, - identifier, f); - } - - - /** - * Returns a new feature object, the same as this one in all respects - * except the {@link #referent} field has been multiplied by the specified - * number. - * - * @param m The multiplier. - * @return A new real feature whose value is the product of this feature's - * value and the specified multiplier. - **/ - public RealFeature multiply(double m) { - return - new RealReferringFeature(containingPackage, generatingClassifier, - identifier, referent.multiply(m)); - } - - - /** - * Returns a new feature object that's identical to this feature except its - * strength is given by s. - * - * @param s The strength of the new feature. - * @return A new feature object as above, or null if this - * feature cannot take the specified strength. - **/ - public Feature withStrength(double s) { - return - new RealReferringFeature( - containingPackage, generatingClassifier, identifier, - (RealFeature) referent.withStrength(s)); - } - - - /** - * Returns a feature object in which any strings that are being used to - * represent an identifier or value have been encoded in byte strings. - * - * @param e The encoding to use. - * @return A feature object as above; possible this object. - **/ - public Feature encode(String e) { - RealFeature newReferent = (RealFeature) referent.encode(e); - if (newReferent == referent) return this; - return - new RealReferringFeature(containingPackage, generatingClassifier, - identifier, newReferent); - } - - - /** - * The hash code of a RealReferringFeature is the sum of the - * hash codes of the containing package, the identifier, and the referent - * feature. - * - * @return The hash code for this feature. - **/ - public int hashCode() { - return 17 * super.hashCode() + identifier.hashCode(); - } - - - /** - * Two RealReferringFeatures are equivalent when their - * containing packages, identifiers, and referent features are equivalent. - * - * @param o The object with which to compare this feature. - * @return true iff the parameter is an equivalent feature. - **/ - public boolean equals(Object o) { - return - super.equals(o) - && (o instanceof RealReferringFeature - ? identifier.equals(((RealReferringFeature) o).identifier) - : identifier.equals(((RealReferringStringFeature) o).identifier)) - && referent.equals(((RealReferrer) o).referent); - } - - - /** - * Some features are functionally equivalent, differing only in the - * encoding of their values; this method will return true iff - * the class of this feature and f are different, but they - * differ only because they encode their values differently. This method - * does not compare the values themselves, however. - * - * @param f Another feature. - * @return See above. - **/ - public boolean classEquivalent(Feature f) { - return f instanceof RealReferringStringFeature; - } - - - /** - * Compares only the run-time types, packages, classifier names, and - * identifiers of the features. - * - * @param o An object to compare with. - * @return Integers appropriate for sorting features first by package, then - * by classifier name, and then by identifier. - **/ - public int compareNameStrings(Object o) { - int d = super.compareNameStrings(o); - if (d != 0) return d; - return identifier.compareTo(((RealReferringFeature) o).identifier); - } - - - /** - * Writes a string representation of this Feature's package, - * generating classifier, and identifier information to the specified - * buffer. - * - * @param buffer The buffer to write to. - **/ - public void writeNameString(StringBuffer buffer) { - super.writeNameString(buffer); - buffer.append(":"); - buffer.append(identifier.toString()); - } - - - /** - * Writes a complete binary representation of the feature. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - identifier.write(out); - } - - - /** - * Reads the representation of a feaeture with this object's run-time type - * from the given stream, overwriting the data in this object. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - identifier = ByteString.readByteString(in); - } - - - /** - * Writes a binary representation of the feature intended for use by a - * lexicon, omitting redundant information when possible. - * - * @param out The output stream. - * @param lex The lexicon out of which this feature is being written. - * @param c The fully qualified name of the assumed class. The runtime - * class of this feature won't be written if it's equivalent to - * c. - * @param p The assumed package string. This feature's package string - * won't be written if it's equivalent to p. - * @param g The assumed classifier name string. This feature's - * classifier name string won't be written if it's equivalent - * to g. - * @param si The assumed identifier as a string. If this feature has a - * string identifier, it won't be written if it's equivalent to - * si. - * @param bi The assumed identifier as a byte string. If this feature - * has a byte string identifier, it won't be written if it's - * equivalent to bi. - * @return The name of the runtime type of this feature. - **/ - public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, - String p, String g, String si, ByteString bi) { - String result = super.lexWrite(out, lex, c, p, g, si, bi); - identifier.lexWrite(out, bi); - return result; - } - - - /** - * Reads the representation of a feature with this object's run-time type - * as stored by a lexicon, overwriting the data in this object. - * - *

This method is appropriate for reading features as written by - * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. - * - * @param in The input stream. - * @param lex The lexicon we are reading in to. - * @param p The assumed package string. If no package name is given in - * the input stream, the instantiated feature is given this - * package. - * @param g The assumed classifier name string. If no classifier name - * is given in the input stream, the instantiated feature is - * given this classifier name. - * @param si The assumed identifier as a string. If the feature being - * read has a string identifier field and no identifier is - * given in the input stream, the feature is given this - * identifier. - * @param bi The assumed identifier as a byte string. If the feature - * being read has a byte string identifier field and no - * identifier is given in the input stream, the feature is - * given this identifier. - **/ - public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, - String g, String si, ByteString bi) { - super.lexRead(in, lex, p, g, si, bi); - identifier = ByteString.lexReadByteString(in, bi); - } +public class RealReferringFeature extends RealReferrer { + /** + * The identifier string distinguishes this Feature from other + * Features. + **/ + protected ByteString identifier; + + + /** + * For internal use only. + * + * @see Feature#readFeature(ExceptionlessInputStream) + **/ + protected RealReferringFeature() {} + + /** + * Sets both the identifier and the referent. + * + * @param c The classifier that produced this feature. + * @param i The new RealReferringFeature's identifier. + * @param r The real feature referred to by this new feature. + **/ + public RealReferringFeature(Classifier c, ByteString i, RealFeature r) { + this(c.containingPackage, c.name, i, r); + } + + /** + * Sets both the identifier and the referent. + * + * @param p The new real feature's package. + * @param c The name of the classifier that produced this feature. + * @param i The new RealReferringFeature's identifier. + * @param r The real feature referred to by this new feature. + **/ + public RealReferringFeature(String p, String c, ByteString i, RealFeature r) { + super(p, c, r); + identifier = i; + } + + + /** + * Determines if this feature contains a byte string identifier field. + * + * @return true iff this feature contains a byte string identifier field. + **/ + public boolean hasByteStringIdentifier() { + return true; + } + + + /** + * Retrieves this feature's identifier as a string. + * + * @return This feature's identifier as a string. + **/ + public String getStringIdentifier() { + return identifier.toString(); + } + + + /** + * Retrieves this feature's identifier as a byte string. + * + * @return This feature's identifier as a byte string. + **/ + public ByteString getByteStringIdentifier() { + return (ByteString) identifier.clone(); + } + + + /** Simply returns the strength of {@link #referent}. */ + public double getStrength() { + return referent.getStrength(); + } + + + /** + * Return the feature that should be used to index this feature into a lexicon. Specifically, we + * return this feature with a value of 0 so that the same features with different real values + * will map to the same object. + * + * @param lexicon The lexicon into which this feature will be indexed. + * @param training Whether or not the learner is currently training. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return A feature object appropriate for use as the key of a map. + **/ + public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { + RealFeature f = (RealFeature) referent.getFeatureKey(lexicon, training, label); + if (training) + f = (RealFeature) lexicon.getChildFeature(f, label); + return new RealReferringFeature(containingPackage, generatingClassifier, identifier, f); + } + + + /** + * Returns a new feature object, the same as this one in all respects except the + * {@link #referent} field has been multiplied by the specified number. + * + * @param m The multiplier. + * @return A new real feature whose value is the product of this feature's value and the + * specified multiplier. + **/ + public RealFeature multiply(double m) { + return new RealReferringFeature(containingPackage, generatingClassifier, identifier, + referent.multiply(m)); + } + + + /** + * Returns a new feature object that's identical to this feature except its strength is given by + * s. + * + * @param s The strength of the new feature. + * @return A new feature object as above, or null if this feature cannot take the + * specified strength. + **/ + public Feature withStrength(double s) { + return new RealReferringFeature(containingPackage, generatingClassifier, identifier, + (RealFeature) referent.withStrength(s)); + } + + + /** + * Returns a feature object in which any strings that are being used to represent an identifier + * or value have been encoded in byte strings. + * + * @param e The encoding to use. + * @return A feature object as above; possible this object. + **/ + public Feature encode(String e) { + RealFeature newReferent = (RealFeature) referent.encode(e); + if (newReferent == referent) + return this; + return new RealReferringFeature(containingPackage, generatingClassifier, identifier, + newReferent); + } + + + /** + * The hash code of a RealReferringFeature is the sum of the hash codes of the + * containing package, the identifier, and the referent feature. + * + * @return The hash code for this feature. + **/ + public int hashCode() { + return 17 * super.hashCode() + identifier.hashCode(); + } + + + /** + * Two RealReferringFeatures are equivalent when their containing packages, + * identifiers, and referent features are equivalent. + * + * @param o The object with which to compare this feature. + * @return true iff the parameter is an equivalent feature. + **/ + public boolean equals(Object o) { + return super.equals(o) + && (o instanceof RealReferringFeature ? identifier + .equals(((RealReferringFeature) o).identifier) : identifier + .equals(((RealReferringStringFeature) o).identifier)) + && referent.equals(((RealReferrer) o).referent); + } + + + /** + * Some features are functionally equivalent, differing only in the encoding of their values; + * this method will return true iff the class of this feature and f + * are different, but they differ only because they encode their values differently. This method + * does not compare the values themselves, however. + * + * @param f Another feature. + * @return See above. + **/ + public boolean classEquivalent(Feature f) { + return f instanceof RealReferringStringFeature; + } + + + /** + * Compares only the run-time types, packages, classifier names, and identifiers of the + * features. + * + * @param o An object to compare with. + * @return Integers appropriate for sorting features first by package, then by classifier name, + * and then by identifier. + **/ + public int compareNameStrings(Object o) { + int d = super.compareNameStrings(o); + if (d != 0) + return d; + return identifier.compareTo(((RealReferringFeature) o).identifier); + } + + + /** + * Writes a string representation of this Feature's package, generating classifier, + * and identifier information to the specified buffer. + * + * @param buffer The buffer to write to. + **/ + public void writeNameString(StringBuffer buffer) { + super.writeNameString(buffer); + buffer.append(":"); + buffer.append(identifier.toString()); + } + + + /** + * Writes a complete binary representation of the feature. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + identifier.write(out); + } + + + /** + * Reads the representation of a feaeture with this object's run-time type from the given + * stream, overwriting the data in this object. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + identifier = ByteString.readByteString(in); + } + + + /** + * Writes a binary representation of the feature intended for use by a lexicon, omitting + * redundant information when possible. + * + * @param out The output stream. + * @param lex The lexicon out of which this feature is being written. + * @param c The fully qualified name of the assumed class. The runtime class of this feature + * won't be written if it's equivalent to c. + * @param p The assumed package string. This feature's package string won't be written if it's + * equivalent to p. + * @param g The assumed classifier name string. This feature's classifier name string won't be + * written if it's equivalent to g. + * @param si The assumed identifier as a string. If this feature has a string identifier, it + * won't be written if it's equivalent to si. + * @param bi The assumed identifier as a byte string. If this feature has a byte string + * identifier, it won't be written if it's equivalent to bi. + * @return The name of the runtime type of this feature. + **/ + public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, String p, + String g, String si, ByteString bi) { + String result = super.lexWrite(out, lex, c, p, g, si, bi); + identifier.lexWrite(out, bi); + return result; + } + + + /** + * Reads the representation of a feature with this object's run-time type as stored by a + * lexicon, overwriting the data in this object. + * + *

+ * This method is appropriate for reading features as written by + * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. + * + * @param in The input stream. + * @param lex The lexicon we are reading in to. + * @param p The assumed package string. If no package name is given in the input stream, the + * instantiated feature is given this package. + * @param g The assumed classifier name string. If no classifier name is given in the input + * stream, the instantiated feature is given this classifier name. + * @param si The assumed identifier as a string. If the feature being read has a string + * identifier field and no identifier is given in the input stream, the feature is given + * this identifier. + * @param bi The assumed identifier as a byte string. If the feature being read has a byte + * string identifier field and no identifier is given in the input stream, the feature is + * given this identifier. + **/ + public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, String g, String si, + ByteString bi) { + super.lexRead(in, lex, p, g, si, bi); + identifier = ByteString.lexReadByteString(in, bi); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealReferringStringFeature.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealReferringStringFeature.java index b4031fa1..9958eaa2 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealReferringStringFeature.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/RealReferringStringFeature.java @@ -1,329 +1,309 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; import edu.illinois.cs.cogcomp.lbjava.util.ByteString; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - /** - * A referring real feature is one that has its own identifier, but whose - * value comes from a separate feature that it refers to. - * - * @author Nick Rizzolo + * A referring real feature is one that has its own identifier, but whose value comes from a + * separate feature that it refers to. + * + * @author Nick Rizzolo **/ -public class RealReferringStringFeature extends RealReferrer -{ - /** - * The identifier string distinguishes this - * Feature from other Features. - **/ - protected String identifier; - - - /** - * For internal use only. - * - * @see Feature#readFeature(ExceptionlessInputStream) - **/ - protected RealReferringStringFeature() { } - - /** - * Sets both the identifier and the referent. - * - * @param c The classifier that produced this feature. - * @param i The new RealReferringStringFeature's identifier. - * @param r The real feature referred to by this new feature. - **/ - public RealReferringStringFeature(Classifier c, String i, RealFeature r) { - this(c.containingPackage, c.name, i, r); - } - - /** - * Sets both the identifier and the referent. - * - * @param p The new real feature's package. - * @param c The name of the classifier that produced this feature. - * @param i The new RealReferringStringFeature's identifier. - * @param r The real feature referred to by this new feature. - **/ - public RealReferringStringFeature(String p, String c, String i, - RealFeature r) { - super(p, c, r); - identifier = i; - } - - - /** - * Determines if this feature contains a string identifier field. - * - * @return true iff this feature contains a string identifier - * field. - **/ - public boolean hasStringIdentifier() { return true; } - - - /** - * Retrieves this feature's identifier as a string. - * - * @return This feature's identifier as a string. - **/ - public String getStringIdentifier() { return identifier; } - - - /** - * Retrieves this feature's identifier as a byte string. - * - * @return This feature's identifier as a byte string. - **/ - public ByteString getByteStringIdentifier() { - return new ByteString(identifier); - } - - - /** Simply returns the strength of {@link #referent}. */ - public double getStrength() { return referent.getStrength(); } - - - /** - * Return the feature that should be used to index this feature into a - * lexicon. Specifically, we return this feature with a value of 0 so that - * the same features with different real values will map to the same - * object. - * - * @param lexicon The lexicon into which this feature will be indexed. - * @param training Whether or not the learner is currently training. - * @param label The label of the example containing this feature, or -1 - * if we aren't doing per class feature counting. - * @return A feature object appropriate for use as the key of a map. - **/ - public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { - RealFeature f = - (RealFeature) referent.getFeatureKey(lexicon, training, label); - if (training) f = (RealFeature) lexicon.getChildFeature(f, label); - return - new RealReferringStringFeature(containingPackage, generatingClassifier, - identifier, f); - } - - - /** - * Returns a new feature object, the same as this one in all respects - * except the {@link #referent} field has been multiplied by the specified - * number. - * - * @param m The multiplier. - * @return A new real feature whose value is the product of this feature's - * value and the specified multiplier. - **/ - public RealFeature multiply(double m) { - return - new RealReferringStringFeature( - containingPackage, generatingClassifier, identifier, - referent.multiply(m)); - } - - - /** - * Returns a new feature object that's identical to this feature except its - * strength is given by s. - * - * @param s The strength of the new feature. - * @return A new feature object as above, or null if this - * feature cannot take the specified strength. - **/ - public Feature withStrength(double s) { - return - new RealReferringStringFeature( - containingPackage, generatingClassifier, identifier, - (RealFeature) referent.withStrength(s)); - } - - - /** - * Returns a feature object in which any strings that are being used to - * represent an identifier or value have been encoded in byte strings. - * - * @param e The encoding to use. - * @return A feature object as above; possible this object. - **/ - public Feature encode(String e) { - RealFeature newReferent = (RealFeature) referent.encode(e); - if (referent == newReferent && (e == null || e == "String")) return this; - ByteString id = - identifier.length() == 0 ? ByteString.emptyString - : new ByteString(identifier, e); - return - new RealReferringFeature(containingPackage, generatingClassifier, id, - newReferent); - } - - - /** - * The hash code of a RealReferringStringFeature is the sum of the - * hash codes of the containing package, the identifier, and the referent - * feature. - * - * @return The hash code for this feature. - **/ - public int hashCode() { - return 17 * super.hashCode() + identifier.hashCode(); - } - - - /** - * Two RealReferringStringFeatures are equivalent when their - * containing packages, identifiers, and referent features are equivalent. - * - * @param o The object with which to compare this feature. - * @return true iff the parameter is an equivalent feature. - **/ - public boolean equals(Object o) { - return - super.equals(o) - && (o instanceof RealReferringStringFeature - ? identifier.equals(((RealReferringStringFeature) o).identifier) - : identifier.equals(((RealReferringFeature) o).identifier)) - && referent.equals(((RealReferrer) o).referent); - } - - - /** - * Some features are functionally equivalent, differing only in the - * encoding of their values; this method will return true iff - * the class of this feature and f are different, but they - * differ only because they encode their values differently. This method - * does not compare the values themselves, however. - * - * @param f Another feature. - * @return See above. - **/ - public boolean classEquivalent(Feature f) { - return f instanceof RealReferringFeature; - } - - - /** - * Compares only the run-time types, packages, classifier names, and - * identifiers of the features. - * - * @param o An object to compare with. - * @return Integers appropriate for sorting features first by package, then - * by classifier name, and then by identifier. - **/ - public int compareNameStrings(Object o) { - int d = super.compareNameStrings(o); - if (d != 0) return d; - return identifier.compareTo(((RealReferringStringFeature) o).identifier); - } - - - /** - * Writes a string representation of this Feature's package, - * generating classifier, and identifier information to the specified - * buffer. - * - * @param buffer The buffer to write to. - **/ - public void writeNameString(StringBuffer buffer) { - super.writeNameString(buffer); - buffer.append(":"); - buffer.append(identifier); - } - - - /** - * Writes a complete binary representation of the feature. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - out.writeString(identifier); - } - - - /** - * Reads the representation of a feaeture with this object's run-time type - * from the given stream, overwriting the data in this object. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - identifier = in.readString(); - } - - - /** - * Writes a binary representation of the feature intended for use by a - * lexicon, omitting redundant information when possible. - * - * @param out The output stream. - * @param lex The lexicon out of which this feature is being written. - * @param c The fully qualified name of the assumed class. The runtime - * class of this feature won't be written if it's equivalent to - * c. - * @param p The assumed package string. This feature's package string - * won't be written if it's equivalent to p. - * @param g The assumed classifier name string. This feature's - * classifier name string won't be written if it's equivalent - * to g. - * @param si The assumed identifier as a string. If this feature has a - * string identifier, it won't be written if it's equivalent to - * si. - * @param bi The assumed identifier as a byte string. If this feature - * has a byte string identifier, it won't be written if it's - * equivalent to bi. - * @return The name of the runtime type of this feature. - **/ - public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, - String p, String g, String si, ByteString bi) { - String result = super.lexWrite(out, lex, c, p, g, si, bi); - out.writeString(identifier.equals(si) ? null : identifier); - return result; - } - - - /** - * Reads the representation of a feature with this object's run-time type - * as stored by a lexicon, overwriting the data in this object. - * - *

This method is appropriate for reading features as written by - * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. - * - * @param in The input stream. - * @param lex The lexicon we are reading in to. - * @param p The assumed package string. If no package name is given in - * the input stream, the instantiated feature is given this - * package. - * @param g The assumed classifier name string. If no classifier name - * is given in the input stream, the instantiated feature is - * given this classifier name. - * @param si The assumed identifier as a string. If the feature being - * read has a string identifier field and no identifier is - * given in the input stream, the feature is given this - * identifier. - * @param bi The assumed identifier as a byte string. If the feature - * being read has a byte string identifier field and no - * identifier is given in the input stream, the feature is - * given this identifier. - **/ - public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, - String g, String si, ByteString bi) { - super.lexRead(in, lex, p, g, si, bi); - identifier = in.readString(); - if (identifier == null) identifier = si; - } +public class RealReferringStringFeature extends RealReferrer { + /** + * The identifier string distinguishes this Feature from other + * Features. + **/ + protected String identifier; + + + /** + * For internal use only. + * + * @see Feature#readFeature(ExceptionlessInputStream) + **/ + protected RealReferringStringFeature() {} + + /** + * Sets both the identifier and the referent. + * + * @param c The classifier that produced this feature. + * @param i The new RealReferringStringFeature's identifier. + * @param r The real feature referred to by this new feature. + **/ + public RealReferringStringFeature(Classifier c, String i, RealFeature r) { + this(c.containingPackage, c.name, i, r); + } + + /** + * Sets both the identifier and the referent. + * + * @param p The new real feature's package. + * @param c The name of the classifier that produced this feature. + * @param i The new RealReferringStringFeature's identifier. + * @param r The real feature referred to by this new feature. + **/ + public RealReferringStringFeature(String p, String c, String i, RealFeature r) { + super(p, c, r); + identifier = i; + } + + + /** + * Determines if this feature contains a string identifier field. + * + * @return true iff this feature contains a string identifier field. + **/ + public boolean hasStringIdentifier() { + return true; + } + + + /** + * Retrieves this feature's identifier as a string. + * + * @return This feature's identifier as a string. + **/ + public String getStringIdentifier() { + return identifier; + } + + + /** + * Retrieves this feature's identifier as a byte string. + * + * @return This feature's identifier as a byte string. + **/ + public ByteString getByteStringIdentifier() { + return new ByteString(identifier); + } + + + /** Simply returns the strength of {@link #referent}. */ + public double getStrength() { + return referent.getStrength(); + } + + + /** + * Return the feature that should be used to index this feature into a lexicon. Specifically, we + * return this feature with a value of 0 so that the same features with different real values + * will map to the same object. + * + * @param lexicon The lexicon into which this feature will be indexed. + * @param training Whether or not the learner is currently training. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return A feature object appropriate for use as the key of a map. + **/ + public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { + RealFeature f = (RealFeature) referent.getFeatureKey(lexicon, training, label); + if (training) + f = (RealFeature) lexicon.getChildFeature(f, label); + return new RealReferringStringFeature(containingPackage, generatingClassifier, identifier, + f); + } + + + /** + * Returns a new feature object, the same as this one in all respects except the + * {@link #referent} field has been multiplied by the specified number. + * + * @param m The multiplier. + * @return A new real feature whose value is the product of this feature's value and the + * specified multiplier. + **/ + public RealFeature multiply(double m) { + return new RealReferringStringFeature(containingPackage, generatingClassifier, identifier, + referent.multiply(m)); + } + + + /** + * Returns a new feature object that's identical to this feature except its strength is given by + * s. + * + * @param s The strength of the new feature. + * @return A new feature object as above, or null if this feature cannot take the + * specified strength. + **/ + public Feature withStrength(double s) { + return new RealReferringStringFeature(containingPackage, generatingClassifier, identifier, + (RealFeature) referent.withStrength(s)); + } + + + /** + * Returns a feature object in which any strings that are being used to represent an identifier + * or value have been encoded in byte strings. + * + * @param e The encoding to use. + * @return A feature object as above; possible this object. + **/ + public Feature encode(String e) { + RealFeature newReferent = (RealFeature) referent.encode(e); + if (referent == newReferent && (e == null || e == "String")) + return this; + ByteString id = + identifier.length() == 0 ? ByteString.emptyString : new ByteString(identifier, e); + return new RealReferringFeature(containingPackage, generatingClassifier, id, newReferent); + } + + + /** + * The hash code of a RealReferringStringFeature is the sum of the hash codes of + * the containing package, the identifier, and the referent feature. + * + * @return The hash code for this feature. + **/ + public int hashCode() { + return 17 * super.hashCode() + identifier.hashCode(); + } + + + /** + * Two RealReferringStringFeatures are equivalent when their containing packages, + * identifiers, and referent features are equivalent. + * + * @param o The object with which to compare this feature. + * @return true iff the parameter is an equivalent feature. + **/ + public boolean equals(Object o) { + return super.equals(o) + && (o instanceof RealReferringStringFeature ? identifier + .equals(((RealReferringStringFeature) o).identifier) : identifier + .equals(((RealReferringFeature) o).identifier)) + && referent.equals(((RealReferrer) o).referent); + } + + + /** + * Some features are functionally equivalent, differing only in the encoding of their values; + * this method will return true iff the class of this feature and f + * are different, but they differ only because they encode their values differently. This method + * does not compare the values themselves, however. + * + * @param f Another feature. + * @return See above. + **/ + public boolean classEquivalent(Feature f) { + return f instanceof RealReferringFeature; + } + + + /** + * Compares only the run-time types, packages, classifier names, and identifiers of the + * features. + * + * @param o An object to compare with. + * @return Integers appropriate for sorting features first by package, then by classifier name, + * and then by identifier. + **/ + public int compareNameStrings(Object o) { + int d = super.compareNameStrings(o); + if (d != 0) + return d; + return identifier.compareTo(((RealReferringStringFeature) o).identifier); + } + + + /** + * Writes a string representation of this Feature's package, generating classifier, + * and identifier information to the specified buffer. + * + * @param buffer The buffer to write to. + **/ + public void writeNameString(StringBuffer buffer) { + super.writeNameString(buffer); + buffer.append(":"); + buffer.append(identifier); + } + + + /** + * Writes a complete binary representation of the feature. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeString(identifier); + } + + + /** + * Reads the representation of a feaeture with this object's run-time type from the given + * stream, overwriting the data in this object. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + identifier = in.readString(); + } + + + /** + * Writes a binary representation of the feature intended for use by a lexicon, omitting + * redundant information when possible. + * + * @param out The output stream. + * @param lex The lexicon out of which this feature is being written. + * @param c The fully qualified name of the assumed class. The runtime class of this feature + * won't be written if it's equivalent to c. + * @param p The assumed package string. This feature's package string won't be written if it's + * equivalent to p. + * @param g The assumed classifier name string. This feature's classifier name string won't be + * written if it's equivalent to g. + * @param si The assumed identifier as a string. If this feature has a string identifier, it + * won't be written if it's equivalent to si. + * @param bi The assumed identifier as a byte string. If this feature has a byte string + * identifier, it won't be written if it's equivalent to bi. + * @return The name of the runtime type of this feature. + **/ + public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, String p, + String g, String si, ByteString bi) { + String result = super.lexWrite(out, lex, c, p, g, si, bi); + out.writeString(identifier.equals(si) ? null : identifier); + return result; + } + + + /** + * Reads the representation of a feature with this object's run-time type as stored by a + * lexicon, overwriting the data in this object. + * + *

+ * This method is appropriate for reading features as written by + * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. + * + * @param in The input stream. + * @param lex The lexicon we are reading in to. + * @param p The assumed package string. If no package name is given in the input stream, the + * instantiated feature is given this package. + * @param g The assumed classifier name string. If no classifier name is given in the input + * stream, the instantiated feature is given this classifier name. + * @param si The assumed identifier as a string. If the feature being read has a string + * identifier field and no identifier is given in the input stream, the feature is given + * this identifier. + * @param bi The assumed identifier as a byte string. If the feature being read has a byte + * string identifier field and no identifier is given in the input stream, the feature is + * given this identifier. + **/ + public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, String g, String si, + ByteString bi) { + super.lexRead(in, lex, p, g, si, bi); + identifier = in.readString(); + if (identifier == null) + identifier = si; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/Score.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/Score.java index f11f757f..bfeaddf7 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/Score.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/Score.java @@ -1,89 +1,86 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; /** - * A score is a number produced by a learner that indicates the degree to - * which a particular discrete classification is appropriate for a given - * object. The scores for all possible discrete classifications given an - * object need not be positive or sum to one. A Score object - * simply contains a score and the associated discrete classification. - * - * @author Nick Rizzolo + * A score is a number produced by a learner that indicates the degree to which a particular + * discrete classification is appropriate for a given object. The scores for all possible discrete + * classifications given an object need not be positive or sum to one. A Score object + * simply contains a score and the associated discrete classification. + * + * @author Nick Rizzolo **/ -public class Score implements Comparable, Cloneable -{ - /** The discrete classification associated with this score. */ - public String value; - /** The score. */ - public double score; +public class Score implements Comparable, Cloneable { + /** The discrete classification associated with this score. */ + public String value; + /** The score. */ + public double score; - /** - * Initializes both member variables. - * - * @param v The discrete classification. - * @param s The score. - **/ - public Score(String v, double s) { - value = v; - score = s; - } + /** + * Initializes both member variables. + * + * @param v The discrete classification. + * @param s The score. + **/ + public Score(String v, double s) { + value = v; + score = s; + } - /** - * This method is implemented so that a collection of Scores - * will be sorted first by value and then by score. - * - * @param o The object to compare against. - * @return A negative integer, zero, or a positive integer if this object - * is less than, equal to, or greater than the specified object - * respectively. - **/ - public int compareTo(Object o) { - if (!(o instanceof Score)) return -1; - Score s = (Score) o; - int result = value.compareTo(s.value); - if (result == 0) - result = new Double(score).compareTo(new Double(s.score)); - return result; - } + /** + * This method is implemented so that a collection of Scores will be sorted first + * by value and then by score. + * + * @param o The object to compare against. + * @return A negative integer, zero, or a positive integer if this object is less than, equal + * to, or greater than the specified object respectively. + **/ + public int compareTo(Object o) { + if (!(o instanceof Score)) + return -1; + Score s = (Score) o; + int result = value.compareTo(s.value); + if (result == 0) + result = new Double(score).compareTo(new Double(s.score)); + return result; + } - /** - * The string representation of a Score is the value followed - * by the score separated by a colon. - * - * @return The string representation of a Score. - **/ - public String toString() { return value + " : " + score; } + /** + * The string representation of a Score is the value followed by the score + * separated by a colon. + * + * @return The string representation of a Score. + **/ + public String toString() { + return value + " : " + score; + } - /** - * Produces a deep copy of this object. - * - * @return A deep copy of this object. - **/ - public Object clone() { - Object result = null; + /** + * Produces a deep copy of this object. + * + * @return A deep copy of this object. + **/ + public Object clone() { + Object result = null; - try { result = super.clone(); } - catch (Exception e) { - System.err.println("Error cloning " + getClass().getName() + ":"); - e.printStackTrace(); - System.exit(1); - } + try { + result = super.clone(); + } catch (Exception e) { + System.err.println("Error cloning " + getClass().getName() + ":"); + e.printStackTrace(); + System.exit(1); + } - return result; - } + return result; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/ScoreSet.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/ScoreSet.java index bc57a2f3..72ce3277 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/ScoreSet.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/ScoreSet.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; @@ -18,149 +15,158 @@ /** - * A score set is simply a set of Scores. - * - * @see Score - * @author Nick Rizzolo + * A score set is simply a set of Scores. + * + * @see Score + * @author Nick Rizzolo **/ -public class ScoreSet implements Cloneable -{ - /** The scores in this set, indexed by the discrete classification. */ - private Map set; - - - /** Default constructor. */ - public ScoreSet() { this(null, null); } - - /** - * The elements of the two argument arrays are assumed to be pair-wise - * associated with each other. - * - * @param values The classification values being scored. - * @param scores The scores of the classification values. - **/ - public ScoreSet(String[] values, double[] scores) { - set = new TreeMap(); - if (values == null || scores == null) return; - for (int i = 0; i < values.length && i < scores.length; ++i) - put(values[i], scores[i]); - } - - /** - * The elements of the array are added to the set. - * - * @param scores The scores to add to the set. - **/ - public ScoreSet(Score[] scores) { - set = new TreeMap(); - if (scores == null) return; - for (int i = 0; i < scores.length; ++i) - set.put(scores[i].value, scores[i]); - } - - - /** Returns the number of scores in this set. */ - public int size() { return set.size(); } - - - /** - * Sets the score for a particular classification value. - * - * @param v The classification value. - * @param s The score. - **/ - public void put(String v, double s) { set.put(v, new Score(v, s)); } - - - /** - * Retrieves the set of values that have scores associated with them in - * this score set. - * - * @return A set of Strings. - **/ - public Set values() { return set.keySet(); } - - - /** - * Returns the double precision score for a particular classification - * value. - * - * @param v The classification value. - * @return The associated score. - **/ - public double get(String v) { return ((Score) set.get(v)).score; } - - - /** - * Retrieves the {@link Score} object associated with the given - * classification value. - * - * @param v The classification value. - * @return The associated {@link Score} object. - **/ - public Score getScore(String v) { return (Score) set.get(v); } - - - /** Retrieves the value with the highest score in this set. */ - public String highScoreValue() { - String result = null; - double highScore = Double.NEGATIVE_INFINITY; - - for (Iterator I = set.entrySet().iterator(); I.hasNext(); ) { - Map.Entry e = (Map.Entry) I.next(); - double score = ((Score) e.getValue()).score; - if (score > highScore) { - highScore = score; - result = (String) e.getKey(); - } +public class ScoreSet implements Cloneable { + /** The scores in this set, indexed by the discrete classification. */ + private Map set; + + + /** Default constructor. */ + public ScoreSet() { + this(null, null); } - return result; - } - - - /** - * Returns an array view of the Scores contained in this set. - * - * @return An array of Scores. - **/ - public Score[] toArray() { - return (Score[]) set.values().toArray(new Score[set.size()]); - } - - - /** - * The string representation of a ScoreSet is the - * concatenation of the string representations of each Score - * in the set sorted by value, separated by commas, and surrounded by curly - * braces. - * - * @return The string representation of a ScoreSet. - **/ - public String toString() { - String result = "{"; - - if (set.size() > 0) { - Score[] scores = toArray(); - Arrays.sort(scores); - result += " " + scores[0]; - for (int i = 1; i < scores.length; ++i) result += ", " + scores[i]; + /** + * The elements of the two argument arrays are assumed to be pair-wise associated with each + * other. + * + * @param values The classification values being scored. + * @param scores The scores of the classification values. + **/ + public ScoreSet(String[] values, double[] scores) { + set = new TreeMap(); + if (values == null || scores == null) + return; + for (int i = 0; i < values.length && i < scores.length; ++i) + put(values[i], scores[i]); } - return result + " }"; - } - - - /** - * Produces a deep copy of this object. - * - * @return A deep copy of this object. - **/ - public Object clone() { - Score[] scores = toArray(); - for (int i = 0; i < scores.length; ++i) - scores[i] = (Score) scores[i].clone(); - return new ScoreSet(scores); - } -} + /** + * The elements of the array are added to the set. + * + * @param scores The scores to add to the set. + **/ + public ScoreSet(Score[] scores) { + set = new TreeMap(); + if (scores == null) + return; + for (int i = 0; i < scores.length; ++i) + set.put(scores[i].value, scores[i]); + } + + + /** Returns the number of scores in this set. */ + public int size() { + return set.size(); + } + + + /** + * Sets the score for a particular classification value. + * + * @param v The classification value. + * @param s The score. + **/ + public void put(String v, double s) { + set.put(v, new Score(v, s)); + } + + + /** + * Retrieves the set of values that have scores associated with them in this score set. + * + * @return A set of Strings. + **/ + public Set values() { + return set.keySet(); + } + + + /** + * Returns the double precision score for a particular classification value. + * + * @param v The classification value. + * @return The associated score. + **/ + public double get(String v) { + return ((Score) set.get(v)).score; + } + + /** + * Retrieves the {@link Score} object associated with the given classification value. + * + * @param v The classification value. + * @return The associated {@link Score} object. + **/ + public Score getScore(String v) { + return (Score) set.get(v); + } + + + /** Retrieves the value with the highest score in this set. */ + public String highScoreValue() { + String result = null; + double highScore = Double.NEGATIVE_INFINITY; + + for (Iterator I = set.entrySet().iterator(); I.hasNext();) { + Map.Entry e = (Map.Entry) I.next(); + double score = ((Score) e.getValue()).score; + if (score > highScore) { + highScore = score; + result = (String) e.getKey(); + } + } + + return result; + } + + + /** + * Returns an array view of the Scores contained in this set. + * + * @return An array of Scores. + **/ + public Score[] toArray() { + return (Score[]) set.values().toArray(new Score[set.size()]); + } + + + /** + * The string representation of a ScoreSet is the concatenation of the string + * representations of each Score in the set sorted by value, separated by commas, + * and surrounded by curly braces. + * + * @return The string representation of a ScoreSet. + **/ + public String toString() { + String result = "{"; + + if (set.size() > 0) { + Score[] scores = toArray(); + Arrays.sort(scores); + result += " " + scores[0]; + for (int i = 1; i < scores.length; ++i) + result += ", " + scores[i]; + } + + return result + " }"; + } + + + /** + * Produces a deep copy of this object. + * + * @return A deep copy of this object. + **/ + public Object clone() { + Score[] scores = toArray(); + for (int i = 0; i < scores.length; ++i) + scores[i] = (Score) scores[i].clone(); + return new ScoreSet(scores); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/TestDiscrete.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/TestDiscrete.java index 13495861..ab892fb6 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/TestDiscrete.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/TestDiscrete.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; @@ -27,720 +24,677 @@ /** - * This class is a program that can evaluate any Classifier - * against an oracle Classifier on the objects returned from a - * Parser. - * - *

Usage: - *

- * - * java edu.illinois.cs.cogcomp.lbjava.classify.TestDiscrete [-t <n>] <classifier> - * <oracle> <parser> - * <input file> [<null label> - * [<null label> ...]] - * - *
- * - *

Options: The -t <n> option is similar to the - * LBJava compiler's command line option of the same name. When - * <n> is greater than 0, a time stamp is printed to - * STDOUT after every <n> examples are - * processed. - * - *

Input: The first three command line parameters are fully - * qualified class names, e.g. myPackage.myClassifier. - * Next, <input file> is passed (as a String) - * to the constructor of <parser>. The optional parameter - * <null label> identifies one of the possible labels - * produced by <oracle> as representing "no - * classification". It is used during the computation of overall precision, - * recall, and F1 scores. Finally, it is also assumed that - * <classifier> is discrete, and that its - * discreteValue(Object) method is implemented. - * - *

Output: First some timing information is presented. The first - * time reported is the time taken to load the specified classifier's Java - * class into memory. This reflects the time taken for LBJava to load the - * classifier's internal representation if the classifier does - * not make use of the cachedin keyword. Next, the time - * taken to evaluate the first example is reported. It isn't particularly - * informative unless the classifier does make use of the - * cachedin keyword. In this case, it reflects the time LBJava - * takes to load the classifier's internal representation better than the - * first time reported. Finally, the average time taken to execute the - * classifier's discreteValue(Object) method is reported. - * - *

After the timing information, an ASCII table is written to - * STDOUT reporting precision, recall, and F1 scores - * itemized by the values that either the classifier or the oracle produced - * during the test. The two rightmost columns are named - * "LCount" and "PCount" (standing for "labeled - * count" and "predicted count" respectively), and they report the number of - * times the oracle produced each label and the number of times the - * classifier predicted each label respectively. If a "null label" is - * specified, overall precision, recall, and F1 scores and a total - * count of non-null-labeled examples are reported at the bottom of the - * table. In the last row, whether a "null label" is specified or not, - * overall accuracy is reported in the precision column. In the count - * column, the total number of predictions (or labels, equivalently) is - * reported. + * This class is a program that can evaluate any Classifier against an oracle + * Classifier on the objects returned from a Parser. + * + *

+ * Usage:

+ * java edu.illinois.cs.cogcomp.lbjava.classify.TestDiscrete [-t <n>] <classifier> + * <oracle> <parser> + * <input file> [<null label> + * [<null label> ...]] + *
+ * + *

+ * Options: The -t <n> option is similar to the LBJava compiler's command + * line option of the same name. When <n> is greater than 0, a time stamp is + * printed to STDOUT after every <n> examples are processed. + * + *

+ * Input: The first three command line parameters are fully qualified class names, e.g. + * myPackage.myClassifier. Next, <input file> is passed (as a + * String) to the constructor of <parser>. The optional parameter + * <null label> identifies one of the possible labels produced by + * <oracle> as representing "no classification". It is used during the + * computation of overall precision, recall, and F1 scores. Finally, it is also assumed that + * <classifier> is discrete, and that its discreteValue(Object) + * method is implemented. + * + *

+ * Output: First some timing information is presented. The first time reported is the time + * taken to load the specified classifier's Java class into memory. This reflects the time taken for + * LBJava to load the classifier's internal representation if the classifier does not + * make use of the cachedin keyword. Next, the time taken to evaluate the first example + * is reported. It isn't particularly informative unless the classifier does make use of the + * cachedin keyword. In this case, it reflects the time LBJava takes to load the + * classifier's internal representation better than the first time reported. Finally, the average + * time taken to execute the classifier's discreteValue(Object) method is reported. + * + *

+ * After the timing information, an ASCII table is written to STDOUT reporting + * precision, recall, and F1 scores itemized by the values that either the classifier or + * the oracle produced during the test. The two rightmost columns are named "LCount" + * and "PCount" (standing for "labeled count" and "predicted count" respectively), and + * they report the number of times the oracle produced each label and the number of times the + * classifier predicted each label respectively. If a "null label" is specified, overall precision, + * recall, and F1 scores and a total count of non-null-labeled examples are reported at + * the bottom of the table. In the last row, whether a "null label" is specified or not, overall + * accuracy is reported in the precision column. In the count column, the total number of + * predictions (or labels, equivalently) is reported. **/ -public class TestDiscrete -{ - /** References the classifier that is to be tested. */ - private static Classifier classifier; - /** References the oracle classifier to test against. */ - private static Classifier oracle; - /** References the parser supplying the testing objects. */ - private static Parser parser; - /** The number of examples processed in between time stamp messages. */ - private static int outputGranularity; - - - /** - * The entry point of this program. - * - * @param args The command line parameters. - **/ - public static void main(String[] args) { - long totalTime = -System.currentTimeMillis(); - TestDiscrete tester = instantiate(args); - totalTime += System.currentTimeMillis(); - System.out.println("Classifier loaded in " + (totalTime / 1000.0) - + " seconds."); - testDiscrete(tester, classifier, oracle, parser, true, outputGranularity); - } - - - /** - * Tests the given discrete classifier against the given oracle using the - * given parser to provide the labeled testing data. This simplified - * interface to - * {@link #testDiscrete(TestDiscrete,Classifier,Classifier,Parser,boolean,int)} - * assumes there are no null predictions and that output should not be - * generated on STDOUT. - * - * @param classifier The classifier to be tested. - * @param oracle The classifier to test against. - * @param parser The parser supplying the labeled example objects. - * @return A new TestDiscrete object filled with testing - * statistics. - **/ - public static TestDiscrete testDiscrete(Classifier classifier, - Classifier oracle, Parser parser) { - return - testDiscrete(new TestDiscrete(), classifier, oracle, parser, false, 0); - } - - - /** - * Tests the given discrete classifier against the given oracle using the - * given parser to provide the labeled testing data. If the parser returns - * examples as Object[]s containing arrays of - * ints and doubles, as would be the case if - * pre-extraction was performed, then it is assumed that this example array - * already includes the label, so this is used directly and the oracle - * classifier is ignored. In this case, it is also assumed that the given - * discrete classifier is an instance of Learner and thus - * a lexicon of label mappings can be retrieved from it. - * - * @param tester An object of this class that has already been told via - * {@link #addNull(String)} which prediction values are - * considered to be null predictions. - * @param classifier The classifier to be tested. - * @param oracle The classifier to test against. - * @param parser The parser supplying the labeled example objects. - * @param output Whether or not to produce output on - * STDOUT. - * @param outputGranularity - * The number of examples processed in between time stamp - * messages. - * @return The same TestDiscrete object passed in the first - * argument, after being filled with statistics. - **/ - public static TestDiscrete testDiscrete(TestDiscrete tester, - Classifier classifier, - Classifier oracle, - Parser parser, - boolean output, - int outputGranularity) { - int processed = 1; - long totalTime = 0; - Lexicon labelLexicon = null; - Runtime runtime = null; - boolean preExtraction = false; - - if (output && outputGranularity > 0) { - runtime = Runtime.getRuntime(); - System.out.println("0 examples tested at " + new Date()); - System.out.println("Total memory before first example: " - + runtime.totalMemory()); - Object example = parser.next(); - if (example == null) return tester; - - totalTime -= System.currentTimeMillis(); - String prediction = classifier.discreteValue(example); - totalTime += System.currentTimeMillis(); - System.out.println("First example processed in " + (totalTime / 1000.0) - + " seconds."); - System.out.println("Total memory after first example: " - + runtime.totalMemory()); - - String gold; - if (example instanceof Object[] - && ((Object[]) example)[0] instanceof int[]) { - preExtraction = true; - labelLexicon = ((Learner) classifier).getLabelLexicon(); - gold = - ((Feature) - labelLexicon.lookupKey(((int[]) ((Object[]) example)[2])[0])) - .getStringValue(); - } - else gold = oracle.discreteValue(example); - - tester.reportPrediction(prediction, gold); - - for (example = parser.next(); example != null; - example = parser.next(), ++processed) { - if (processed % outputGranularity == 0) - System.out.println(processed + " examples tested at " + new Date()); - - totalTime -= System.currentTimeMillis(); - prediction = classifier.discreteValue(example); +public class TestDiscrete { + /** References the classifier that is to be tested. */ + private static Classifier classifier; + /** References the oracle classifier to test against. */ + private static Classifier oracle; + /** References the parser supplying the testing objects. */ + private static Parser parser; + /** The number of examples processed in between time stamp messages. */ + private static int outputGranularity; + + + /** + * The entry point of this program. + * + * @param args The command line parameters. + **/ + public static void main(String[] args) { + long totalTime = -System.currentTimeMillis(); + TestDiscrete tester = instantiate(args); totalTime += System.currentTimeMillis(); - assert prediction != null - : "Classifier returned null prediction for example " + example; + System.out.println("Classifier loaded in " + (totalTime / 1000.0) + " seconds."); + testDiscrete(tester, classifier, oracle, parser, true, outputGranularity); + } - if (preExtraction) - gold = - ((Feature) - labelLexicon.lookupKey(((int[]) ((Object[]) example)[2])[0])) - .getStringValue(); - else gold = oracle.discreteValue(example); - tester.reportPrediction(prediction, gold); - } + /** + * Tests the given discrete classifier against the given oracle using the given parser to + * provide the labeled testing data. This simplified interface to + * {@link #testDiscrete(TestDiscrete,Classifier,Classifier,Parser,boolean,int)} assumes there + * are no null predictions and that output should not be generated on STDOUT. + * + * @param classifier The classifier to be tested. + * @param oracle The classifier to test against. + * @param parser The parser supplying the labeled example objects. + * @return A new TestDiscrete object filled with testing statistics. + **/ + public static TestDiscrete testDiscrete(Classifier classifier, Classifier oracle, Parser parser) { + return testDiscrete(new TestDiscrete(), classifier, oracle, parser, false, 0); + } + - System.out.println(processed + " examples tested at " + new Date() - + "\n"); + /** + * Tests the given discrete classifier against the given oracle using the given parser to + * provide the labeled testing data. If the parser returns examples as Object[]s + * containing arrays of ints and doubles, as would be the case if + * pre-extraction was performed, then it is assumed that this example array already includes the + * label, so this is used directly and the oracle classifier is ignored. In this case, it is + * also assumed that the given discrete classifier is an instance of Learner and + * thus a lexicon of label mappings can be retrieved from it. + * + * @param tester An object of this class that has already been told via {@link #addNull(String)} + * which prediction values are considered to be null predictions. + * @param classifier The classifier to be tested. + * @param oracle The classifier to test against. + * @param parser The parser supplying the labeled example objects. + * @param output Whether or not to produce output on STDOUT. + * @param outputGranularity The number of examples processed in between time stamp messages. + * @return The same TestDiscrete object passed in the first argument, after being + * filled with statistics. + **/ + public static TestDiscrete testDiscrete(TestDiscrete tester, Classifier classifier, + Classifier oracle, Parser parser, boolean output, int outputGranularity) { + int processed = 1; + long totalTime = 0; + Lexicon labelLexicon = null; + Runtime runtime = null; + boolean preExtraction = false; + + if (output && outputGranularity > 0) { + runtime = Runtime.getRuntime(); + System.out.println("0 examples tested at " + new Date()); + System.out.println("Total memory before first example: " + runtime.totalMemory()); + Object example = parser.next(); + if (example == null) + return tester; + + totalTime -= System.currentTimeMillis(); + String prediction = classifier.discreteValue(example); + totalTime += System.currentTimeMillis(); + System.out.println("First example processed in " + (totalTime / 1000.0) + " seconds."); + System.out.println("Total memory after first example: " + runtime.totalMemory()); + + String gold; + if (example instanceof Object[] && ((Object[]) example)[0] instanceof int[]) { + preExtraction = true; + labelLexicon = ((Learner) classifier).getLabelLexicon(); + gold = + ((Feature) labelLexicon.lookupKey(((int[]) ((Object[]) example)[2])[0])) + .getStringValue(); + } else + gold = oracle.discreteValue(example); + + tester.reportPrediction(prediction, gold); + + for (example = parser.next(); example != null; example = parser.next(), ++processed) { + if (processed % outputGranularity == 0) + System.out.println(processed + " examples tested at " + new Date()); + + totalTime -= System.currentTimeMillis(); + prediction = classifier.discreteValue(example); + totalTime += System.currentTimeMillis(); + assert prediction != null : "Classifier returned null prediction for example " + + example; + + if (preExtraction) + gold = + ((Feature) labelLexicon.lookupKey(((int[]) ((Object[]) example)[2])[0])) + .getStringValue(); + else + gold = oracle.discreteValue(example); + + tester.reportPrediction(prediction, gold); + } + + System.out.println(processed + " examples tested at " + new Date() + "\n"); + } else { + if (output) { + runtime = Runtime.getRuntime(); + System.out.println("Total memory before first example: " + runtime.totalMemory()); + } + + Object example = parser.next(); + if (example == null) + return tester; + + totalTime -= System.currentTimeMillis(); + String prediction = classifier.discreteValue(example); + totalTime += System.currentTimeMillis(); + if (output) { + System.out.println("First example processed in " + (totalTime / 1000.0) + + " seconds."); + System.out.println("Total memory after first example: " + runtime.totalMemory()); + } + + String gold; + if (example instanceof Object[] && ((Object[]) example)[0] instanceof int[]) { + preExtraction = true; + labelLexicon = ((Learner) classifier).getLabelLexicon(); + gold = + ((Feature) labelLexicon.lookupKey(((int[]) ((Object[]) example)[2])[0])) + .getStringValue(); + } else + gold = oracle.discreteValue(example); + + tester.reportPrediction(prediction, gold); + + for (example = parser.next(); example != null; example = parser.next(), ++processed) { + totalTime -= System.currentTimeMillis(); + prediction = classifier.discreteValue(example); + totalTime += System.currentTimeMillis(); + assert prediction != null : "Classifier returned null prediction for example " + + example; + + if (preExtraction) + gold = + ((Feature) labelLexicon.lookupKey(((int[]) ((Object[]) example)[2])[0])) + .getStringValue(); + else + gold = oracle.discreteValue(example); + + tester.reportPrediction(prediction, gold); + } + } + + if (output) { + System.out.println("Average evaluation time: " + (totalTime / (1000.0 * processed)) + + " seconds\n"); + tester.printPerformance(System.out); + } + + return tester; } - else { - if (output) { - runtime = Runtime.getRuntime(); - System.out.println("Total memory before first example: " - + runtime.totalMemory()); - } - - Object example = parser.next(); - if (example == null) return tester; - - totalTime -= System.currentTimeMillis(); - String prediction = classifier.discreteValue(example); - totalTime += System.currentTimeMillis(); - if (output) { - System.out.println("First example processed in " - + (totalTime / 1000.0) + " seconds."); - System.out.println("Total memory after first example: " - + runtime.totalMemory()); - } - - String gold; - if (example instanceof Object[] - && ((Object[]) example)[0] instanceof int[]) { - preExtraction = true; - labelLexicon = ((Learner) classifier).getLabelLexicon(); - gold = - ((Feature) - labelLexicon.lookupKey(((int[]) ((Object[]) example)[2])[0])) - .getStringValue(); - } - else gold = oracle.discreteValue(example); - - tester.reportPrediction(prediction, gold); - - for (example = parser.next(); example != null; - example = parser.next(), ++processed) { - totalTime -= System.currentTimeMillis(); - prediction = classifier.discreteValue(example); - totalTime += System.currentTimeMillis(); - assert prediction != null - : "Classifier returned null prediction for example " + example; - - if (preExtraction) - gold = - ((Feature) - labelLexicon.lookupKey(((int[]) ((Object[]) example)[2])[0])) - .getStringValue(); - else gold = oracle.discreteValue(example); - - tester.reportPrediction(prediction, gold); - } + + + /** + * Given command line parameters representing the fully qualified names of the classifier to be + * tested, the oracle classifier to test against, the parser supplying the testing objects, and + * the input parameter to the parser's constructor this method instantiates all three objects. + * + * @param args The command line. + * @return A new tester object containing the "null" labels. + **/ + private static TestDiscrete instantiate(String[] args) { + String classifierName = null, oracleName = null, parserName = null; + String inputFile = null; + TestDiscrete result = new TestDiscrete(); + + try { + int offset = 0; + + if (args[0].charAt(0) == '-') { + if (!args[0].equals("-t")) + throw new Exception(); + outputGranularity = Integer.parseInt(args[1]); + offset = 2; + } + + classifierName = args[offset]; + oracleName = args[offset + 1]; + parserName = args[offset + 2]; + inputFile = args[offset + 3]; + for (int i = offset + 4; i < args.length; ++i) + result.addNull(args[i]); + } catch (Exception e) { + System.err + .println("usage:\n" + + " java edu.illinois.cs.cogcomp.lbjava.classify.TestDiscrete [-t ] \\\n" + + " \\\n" + + " [ [ ...]]"); + System.exit(1); + } + + classifier = ClassUtils.getClassifier(classifierName); + oracle = ClassUtils.getClassifier(oracleName); + parser = + ClassUtils.getParser(parserName, new Class[] {String.class}, + new String[] {inputFile}); + + return result; } - if (output) { - System.out.println("Average evaluation time: " - + (totalTime / (1000.0 * processed)) + " seconds\n"); - tester.printPerformance(System.out); + + /** The histogram of correct labels. */ + protected HashMap goldHistogram; + /** The histogram of predictions. */ + protected HashMap predictionHistogram; + /** The histogram of correct predictions. */ + protected HashMap correctHistogram; + /** + * The set of "null" labels whose statistics are not included in overall precision, recall, F1, + * or accuracy. + **/ + protected HashSet nullLabels; + + + /** Default constructor. */ + public TestDiscrete() { + goldHistogram = new HashMap(); + predictionHistogram = new HashMap(); + correctHistogram = new HashMap(); + nullLabels = new HashSet(); } - return tester; - } - - - /** - * Given command line parameters representing the fully qualified names of - * the classifier to be tested, the oracle classifier to test against, the - * parser supplying the testing objects, and the input parameter to the - * parser's constructor this method instantiates all three objects. - * - * @param args The command line. - * @return A new tester object containing the "null" labels. - **/ - private static TestDiscrete instantiate(String[] args) { - String classifierName = null, oracleName = null, parserName = null; - String inputFile = null; - TestDiscrete result = new TestDiscrete(); - - try { - int offset = 0; - - if (args[0].charAt(0) == '-') { - if (!args[0].equals("-t")) throw new Exception(); - outputGranularity = Integer.parseInt(args[1]); - offset = 2; - } - - classifierName = args[offset]; - oracleName = args[offset + 1]; - parserName = args[offset + 2]; - inputFile = args[offset + 3]; - for (int i = offset + 4; i < args.length; ++i) result.addNull(args[i]); + + /** + * Whenever a prediction is made, report that prediction and the correct label with this method. + * + * @param p The prediction. + * @param l The correct label. + **/ + public void reportPrediction(String p, String l) { + histogramAdd(goldHistogram, l, 1); + histogramAdd(predictionHistogram, p, 1); + if (p.equals(l)) + histogramAdd(correctHistogram, p, 1); } - catch (Exception e) { - System.err.println( - "usage:\n" - + " java edu.illinois.cs.cogcomp.lbjava.classify.TestDiscrete [-t ] \\\n" - + " \\\n" - + " [ [ ...]]"); - System.exit(1); + + + /** + * Report all the predictions in the argument's histograms. + * + * @param t Another object of this class. + **/ + public void reportAll(TestDiscrete t) { + histogramAddAll(goldHistogram, t.goldHistogram); + histogramAddAll(predictionHistogram, t.predictionHistogram); + histogramAddAll(correctHistogram, t.correctHistogram); } - classifier = ClassUtils.getClassifier(classifierName); - oracle = ClassUtils.getClassifier(oracleName); - parser = - ClassUtils.getParser(parserName, new Class[]{ String.class }, - new String[]{ inputFile }); - - return result; - } - - - /** The histogram of correct labels. */ - protected HashMap goldHistogram; - /** The histogram of predictions. */ - protected HashMap predictionHistogram; - /** The histogram of correct predictions. */ - protected HashMap correctHistogram; - /** - * The set of "null" labels whose statistics are not included in overall - * precision, recall, F1, or accuracy. - **/ - protected HashSet nullLabels; - - - /** Default constructor. */ - public TestDiscrete() { - goldHistogram = new HashMap(); - predictionHistogram = new HashMap(); - correctHistogram = new HashMap(); - nullLabels = new HashSet(); - } - - - /** - * Whenever a prediction is made, report that prediction and the correct - * label with this method. - * - * @param p The prediction. - * @param l The correct label. - **/ - public void reportPrediction(String p, String l) { - histogramAdd(goldHistogram, l, 1); - histogramAdd(predictionHistogram, p, 1); - if (p.equals(l)) histogramAdd(correctHistogram, p, 1); - } - - - /** - * Report all the predictions in the argument's histograms. - * - * @param t Another object of this class. - **/ - public void reportAll(TestDiscrete t) { - histogramAddAll(goldHistogram, t.goldHistogram); - histogramAddAll(predictionHistogram, t.predictionHistogram); - histogramAddAll(correctHistogram, t.correctHistogram); - } - - - /** - * Returns the set of labels that have been reported so far. - * - * @return An array containing the labels that have been reported so far. - **/ - public String[] getLabels() { - return (String[]) goldHistogram.keySet().toArray(new String[0]); - } - - - /** - * Returns the set of predictions that have been reported so far. - * - * @return An array containing the predictions that have been reported so - * far. - **/ - public String[] getPredictions() { - return (String[]) predictionHistogram.keySet().toArray(new String[0]); - } - - - /** - * Returns the set of all classes reported as either predictions or labels. - * - * @return An array containing all classes reported as either predictions - * or labels. - **/ - public String[] getAllClasses() { - HashSet result = new HashSet(goldHistogram.keySet()); - result.addAll(predictionHistogram.keySet()); - return (String[]) result.toArray(new String[0]); - } - - - /** - * Adds a label to the set of "null" labels. - * - * @param n The label to add. - **/ - public void addNull(String n) { nullLabels.add(n); } - - - /** - * Removes a label from the set of "null" labels. - * - * @param n The label to remove. - **/ - public void removeNull(String n) { nullLabels.remove(n); } - - - /** - * Determines if a label is treated as a "null" label. - * - * @param n The label in question. - * @return true iff n is one of the "null" - * labels. - **/ - public boolean isNull(String n) { return nullLabels.contains(n); } - - - /** Returns true iff there exist "null" labels. */ - public boolean hasNulls() { return nullLabels.size() > 0; } - - - /** - * Takes a histogram implemented as a map and increments the count for the - * given key by the given amount. - * - * @param histogram The histogram. - * @param key The key whose count should be incremented. - * @param amount The amount by which to increment. - **/ - protected void histogramAdd(HashMap histogram, String key, int amount) { - Integer I = (Integer) histogram.get(key); - if (I == null) I = new Integer(0); - histogram.put(key, new Integer(I.intValue() + amount)); - } - - - /** - * Takes a histogram implemented as a map and retrieves the count for the - * given key. - * - * @param histogram The histogram. - * @param key The key whose count should be retrieved. - * @return The count of the specified key. - **/ - protected int histogramGet(HashMap histogram, String key) { - Integer I = (Integer) histogram.get(key); - if (I == null) I = new Integer(0); - return I.intValue(); - } - - - /** - * Takes two histograms implemented as maps and adds the amounts found in - * the second histogram to the amounts found in the first. - * - * @param h1 The first histogram, whose values will be modified. - * @param h2 The second histogram, whose values will be added into the - * first's. - **/ - protected void histogramAddAll(HashMap h1, HashMap h2) { - for (Iterator I = h2.entrySet().iterator(); I.hasNext(); ) { - Map.Entry e = (Map.Entry) I.next(); - histogramAdd(h1, (String) e.getKey(), - ((Integer) e.getValue()).intValue()); + + /** + * Returns the set of labels that have been reported so far. + * + * @return An array containing the labels that have been reported so far. + **/ + public String[] getLabels() { + return (String[]) goldHistogram.keySet().toArray(new String[0]); } - } - - - /** - * Returns the number of times the requested label was reported. - * - * @param l The label in question. - * @return The number of times l was reported. - **/ - public int getLabeled(String l) { return histogramGet(goldHistogram, l); } - - - /** - * Returns the number of times the requested prediction was reported. - * - * @param p The prediction in question. - * @return The number of times p was reported. - **/ - public int getPredicted(String p) { - return histogramGet(predictionHistogram, p); - } - - - /** - * Returns the number of times the requested prediction was reported - * correctly. - * - * @param p The prediction in question. - * @return The number of times p was reported. - **/ - public int getCorrect(String p) { - return histogramGet(correctHistogram, p); - } - - - /** - * Returns the precision associated with the given prediction. - * - * @param p The given prediction. - * @return The precision associated with p. - **/ - public double getPrecision(String p) { - return getCorrect(p) / (double) getPredicted(p); - } - - - /** - * Returns the recall associated with the given label. - * - * @param l The given label. - * @return The precision associated with l. - **/ - public double getRecall(String l) { - return getCorrect(l) / (double) getLabeled(l); - } - - - /** - * Returns the F1 score associated with the given label. - * - * @param l The given label. - * @return The F1 score associated with l. - **/ - public double getF1(String l) { return getF(1, l); } - - - /** - * Returns the Fbeta score associated with the given label. - * Fbeta is defined as: - *

- * Fbeta = (beta2 + 1) * P * R - * / (beta2 * P + R) - *
- * - * @param b The value of beta. - * @param l The given label. - * @return The Fbeta score associated with l. - **/ - public double getF(double b, String l) { - double precision = getPrecision(l); - double recall = getRecall(l); - return (b * b + 1) * precision * recall / (b * b * precision + recall); - } - - - /** - * Computes overall the overall statistics precision, recall, - * F1, and accuracy. Note that these statistics are all - * equivalent unless "null" labels have been added. - * - * @return An array in which the first element represents overall - * precision, the second represents overall recall, then F1, and - * finally accuracy. - **/ - public double[] getOverallStats() { return getOverallStats(1); } - - - /** - * Computes overall the overall statistics precision, recall, - * Fbeta, and accuracy. Note that these statistics are all - * equivalent unless "null" labels have been added. - * - * @param b The value of beta. - * @return An array in which the first element represents overall - * precision, the second represents overall recall, then F1, and - * finally accuracy. - **/ - public double[] getOverallStats(double b) { - String[] allClasses = getAllClasses(); - - int totalCorrect = 0; - int totalPredicted = 0; - int notNullCorrect = 0; - int notNullPredicted = 0; - int notNullLabeled = 0; - - for (int i = 0; i < allClasses.length; ++i) { - int correct = getCorrect(allClasses[i]); - int predicted = getPredicted(allClasses[i]); - int labeled = getLabeled(allClasses[i]); - - totalCorrect += correct; - totalPredicted += predicted; - - if (hasNulls() && !isNull(allClasses[i])) { - notNullCorrect += correct; - notNullPredicted += predicted; - notNullLabeled += labeled; - } + + + /** + * Returns the set of predictions that have been reported so far. + * + * @return An array containing the predictions that have been reported so far. + **/ + public String[] getPredictions() { + return (String[]) predictionHistogram.keySet().toArray(new String[0]); } - double[] result = new double[4]; - result[3] = totalCorrect / (double) totalPredicted; - if (hasNulls()) { - result[0] = notNullCorrect / (double) notNullPredicted; - result[1] = notNullCorrect / (double) notNullLabeled; - result[2] = (b * b + 1) * result[0] * result[1] - / (b * b * result[0] + result[1]); + /** + * Returns the set of all classes reported as either predictions or labels. + * + * @return An array containing all classes reported as either predictions or labels. + **/ + public String[] getAllClasses() { + HashSet result = new HashSet(goldHistogram.keySet()); + result.addAll(predictionHistogram.keySet()); + return (String[]) result.toArray(new String[0]); } - else result[0] = result[1] = result[2] = result[3]; - - return result; - } - - - /** - * Performance results are written to the given stream in the form of - * precision, recall, and F1 statistics. - * - * @param out The stream to write to. - **/ - public void printPerformance(PrintStream out) { - String[] allClasses = getAllClasses(); - final HashSet n = nullLabels; - Arrays.sort(allClasses, - new Comparator() { - public int compare(Object o1, Object o2) { - String s1 = (String) o1; - String s2 = (String) o2; - int n1 = n.contains(s1) ? 1 : 0; - int n2 = n.contains(s2) ? 1 : 0; - if (n1 != n2) return n1 - n2; - return s1.compareTo(s2); - } - }); - - int rows = allClasses.length + 1; - if (hasNulls()) ++rows; - String[] rowLabels = new String[rows]; - System.arraycopy(allClasses, 0, rowLabels, 0, allClasses.length); - rowLabels[rows - 1] = "Accuracy"; - if (hasNulls()) rowLabels[rows - 2] = "Overall"; - - String[] columnLabels = - new String[]{ "Label", "Precision", "Recall", "F1", "LCount", - "PCount" }; - - int totalCorrect = 0; - int totalPredicted = 0; - int notNullCorrect = 0; - int notNullPredicted = 0; - int notNullLabeled = 0; - - Double[][] table = new Double[rows][]; - Double zero = new Double(0); - - for (int i = 0; i < allClasses.length; ++i) { - int correct = getCorrect(allClasses[i]); - int predicted = getPredicted(allClasses[i]); - int labeled = getLabeled(allClasses[i]); - - totalCorrect += correct; - totalPredicted += predicted; - - if (hasNulls() && !isNull(allClasses[i])) { - notNullCorrect += correct; - notNullPredicted += predicted; - notNullLabeled += labeled; - } - - table[i] = - new Double[]{ zero, zero, zero, new Double(labeled), - new Double(predicted) }; - - if (predicted > 0) - table[i][0] = new Double(100 * correct / (double) predicted); - if (labeled > 0) - table[i][1] = new Double(100 * correct / (double) labeled); - - if (correct > 0) { - double p = table[i][0].doubleValue(); - double r = table[i][1].doubleValue(); - table[i][2] = new Double(2 * p * r / (p + r)); - } + + + /** + * Adds a label to the set of "null" labels. + * + * @param n The label to add. + **/ + public void addNull(String n) { + nullLabels.add(n); } - int[] dashRows = null; - if (hasNulls()) { - table[rows - 2] = - new Double[]{ zero, zero, zero, new Double(notNullLabeled), - new Double(notNullPredicted) }; + /** + * Removes a label from the set of "null" labels. + * + * @param n The label to remove. + **/ + public void removeNull(String n) { + nullLabels.remove(n); + } + - if (notNullPredicted > 0) - table[rows - 2][0] = - new Double(100 * notNullCorrect / (double) notNullPredicted); - if (notNullLabeled > 0) - table[rows - 2][1] = - new Double(100 * notNullCorrect / (double) notNullLabeled); + /** + * Determines if a label is treated as a "null" label. + * + * @param n The label in question. + * @return true iff n is one of the "null" labels. + **/ + public boolean isNull(String n) { + return nullLabels.contains(n); + } - if (notNullCorrect > 0) { - double p = table[rows - 2][0].doubleValue(); - double r = table[rows - 2][1].doubleValue(); - table[rows - 2][2] = new Double(2 * p * r / (p + r)); - } - int nonNullLabels = allClasses.length - nullLabels.size(); - dashRows = new int[]{ 0, nonNullLabels, allClasses.length }; + /** Returns true iff there exist "null" labels. */ + public boolean hasNulls() { + return nullLabels.size() > 0; } - else dashRows = new int[]{ 0, allClasses.length }; - double accuracy = - totalPredicted == 0 ? 0 : 100 * totalCorrect / (double) totalPredicted; - table[rows - 1] = - new Double[]{ new Double(accuracy), null, null, null, - new Double(totalPredicted) }; - TableFormat.printTableFormat(out, columnLabels, rowLabels, table, - new int[]{ 3, 3, 3, 0, 0 }, dashRows); - } -} + /** + * Takes a histogram implemented as a map and increments the count for the given key by the + * given amount. + * + * @param histogram The histogram. + * @param key The key whose count should be incremented. + * @param amount The amount by which to increment. + **/ + protected void histogramAdd(HashMap histogram, String key, int amount) { + Integer I = (Integer) histogram.get(key); + if (I == null) + I = new Integer(0); + histogram.put(key, new Integer(I.intValue() + amount)); + } + + + /** + * Takes a histogram implemented as a map and retrieves the count for the given key. + * + * @param histogram The histogram. + * @param key The key whose count should be retrieved. + * @return The count of the specified key. + **/ + protected int histogramGet(HashMap histogram, String key) { + Integer I = (Integer) histogram.get(key); + if (I == null) + I = new Integer(0); + return I.intValue(); + } + + + /** + * Takes two histograms implemented as maps and adds the amounts found in the second histogram + * to the amounts found in the first. + * + * @param h1 The first histogram, whose values will be modified. + * @param h2 The second histogram, whose values will be added into the first's. + **/ + protected void histogramAddAll(HashMap h1, HashMap h2) { + for (Iterator I = h2.entrySet().iterator(); I.hasNext();) { + Map.Entry e = (Map.Entry) I.next(); + histogramAdd(h1, (String) e.getKey(), ((Integer) e.getValue()).intValue()); + } + } + + /** + * Returns the number of times the requested label was reported. + * + * @param l The label in question. + * @return The number of times l was reported. + **/ + public int getLabeled(String l) { + return histogramGet(goldHistogram, l); + } + + + /** + * Returns the number of times the requested prediction was reported. + * + * @param p The prediction in question. + * @return The number of times p was reported. + **/ + public int getPredicted(String p) { + return histogramGet(predictionHistogram, p); + } + + + /** + * Returns the number of times the requested prediction was reported correctly. + * + * @param p The prediction in question. + * @return The number of times p was reported. + **/ + public int getCorrect(String p) { + return histogramGet(correctHistogram, p); + } + + + /** + * Returns the precision associated with the given prediction. + * + * @param p The given prediction. + * @return The precision associated with p. + **/ + public double getPrecision(String p) { + return getCorrect(p) / (double) getPredicted(p); + } + + + /** + * Returns the recall associated with the given label. + * + * @param l The given label. + * @return The precision associated with l. + **/ + public double getRecall(String l) { + return getCorrect(l) / (double) getLabeled(l); + } + + + /** + * Returns the F1 score associated with the given label. + * + * @param l The given label. + * @return The F1 score associated with l. + **/ + public double getF1(String l) { + return getF(1, l); + } + + + /** + * Returns the Fbeta score associated with the given label. Fbeta is + * defined as:
Fbeta = (beta2 + 1) * P * R / + * (beta2 * P + R)
+ * + * @param b The value of beta. + * @param l The given label. + * @return The Fbeta score associated with l. + **/ + public double getF(double b, String l) { + double precision = getPrecision(l); + double recall = getRecall(l); + return (b * b + 1) * precision * recall / (b * b * precision + recall); + } + + + /** + * Computes overall the overall statistics precision, recall, F1, and accuracy. Note + * that these statistics are all equivalent unless "null" labels have been added. + * + * @return An array in which the first element represents overall precision, the second + * represents overall recall, then F1, and finally accuracy. + **/ + public double[] getOverallStats() { + return getOverallStats(1); + } + + + /** + * Computes overall the overall statistics precision, recall, Fbeta, and accuracy. + * Note that these statistics are all equivalent unless "null" labels have been added. + * + * @param b The value of beta. + * @return An array in which the first element represents overall precision, the second + * represents overall recall, then F1, and finally accuracy. + **/ + public double[] getOverallStats(double b) { + String[] allClasses = getAllClasses(); + + int totalCorrect = 0; + int totalPredicted = 0; + int notNullCorrect = 0; + int notNullPredicted = 0; + int notNullLabeled = 0; + + for (int i = 0; i < allClasses.length; ++i) { + int correct = getCorrect(allClasses[i]); + int predicted = getPredicted(allClasses[i]); + int labeled = getLabeled(allClasses[i]); + + totalCorrect += correct; + totalPredicted += predicted; + + if (hasNulls() && !isNull(allClasses[i])) { + notNullCorrect += correct; + notNullPredicted += predicted; + notNullLabeled += labeled; + } + } + + double[] result = new double[4]; + result[3] = totalCorrect / (double) totalPredicted; + + if (hasNulls()) { + result[0] = notNullCorrect / (double) notNullPredicted; + result[1] = notNullCorrect / (double) notNullLabeled; + result[2] = (b * b + 1) * result[0] * result[1] / (b * b * result[0] + result[1]); + } else + result[0] = result[1] = result[2] = result[3]; + + return result; + } + + + /** + * Performance results are written to the given stream in the form of precision, recall, and F1 + * statistics. + * + * @param out The stream to write to. + **/ + public void printPerformance(PrintStream out) { + String[] allClasses = getAllClasses(); + final HashSet n = nullLabels; + Arrays.sort(allClasses, new Comparator() { + public int compare(Object o1, Object o2) { + String s1 = (String) o1; + String s2 = (String) o2; + int n1 = n.contains(s1) ? 1 : 0; + int n2 = n.contains(s2) ? 1 : 0; + if (n1 != n2) + return n1 - n2; + return s1.compareTo(s2); + } + }); + + int rows = allClasses.length + 1; + if (hasNulls()) + ++rows; + String[] rowLabels = new String[rows]; + System.arraycopy(allClasses, 0, rowLabels, 0, allClasses.length); + rowLabels[rows - 1] = "Accuracy"; + if (hasNulls()) + rowLabels[rows - 2] = "Overall"; + + String[] columnLabels = + new String[] {"Label", "Precision", "Recall", "F1", "LCount", "PCount"}; + + int totalCorrect = 0; + int totalPredicted = 0; + int notNullCorrect = 0; + int notNullPredicted = 0; + int notNullLabeled = 0; + + Double[][] table = new Double[rows][]; + Double zero = new Double(0); + + for (int i = 0; i < allClasses.length; ++i) { + int correct = getCorrect(allClasses[i]); + int predicted = getPredicted(allClasses[i]); + int labeled = getLabeled(allClasses[i]); + + totalCorrect += correct; + totalPredicted += predicted; + + if (hasNulls() && !isNull(allClasses[i])) { + notNullCorrect += correct; + notNullPredicted += predicted; + notNullLabeled += labeled; + } + + table[i] = new Double[] {zero, zero, zero, new Double(labeled), new Double(predicted)}; + + if (predicted > 0) + table[i][0] = new Double(100 * correct / (double) predicted); + if (labeled > 0) + table[i][1] = new Double(100 * correct / (double) labeled); + + if (correct > 0) { + double p = table[i][0].doubleValue(); + double r = table[i][1].doubleValue(); + table[i][2] = new Double(2 * p * r / (p + r)); + } + } + + int[] dashRows = null; + + if (hasNulls()) { + table[rows - 2] = + new Double[] {zero, zero, zero, new Double(notNullLabeled), + new Double(notNullPredicted)}; + + if (notNullPredicted > 0) + table[rows - 2][0] = new Double(100 * notNullCorrect / (double) notNullPredicted); + if (notNullLabeled > 0) + table[rows - 2][1] = new Double(100 * notNullCorrect / (double) notNullLabeled); + + if (notNullCorrect > 0) { + double p = table[rows - 2][0].doubleValue(); + double r = table[rows - 2][1].doubleValue(); + table[rows - 2][2] = new Double(2 * p * r / (p + r)); + } + + int nonNullLabels = allClasses.length - nullLabels.size(); + dashRows = new int[] {0, nonNullLabels, allClasses.length}; + } else + dashRows = new int[] {0, allClasses.length}; + + double accuracy = totalPredicted == 0 ? 0 : 100 * totalCorrect / (double) totalPredicted; + table[rows - 1] = + new Double[] {new Double(accuracy), null, null, null, new Double(totalPredicted)}; + + TableFormat.printTableFormat(out, columnLabels, rowLabels, table, + new int[] {3, 3, 3, 0, 0}, dashRows); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/TestReal.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/TestReal.java index 9f85bb2e..28408284 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/TestReal.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/TestReal.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; @@ -17,12 +14,12 @@ import java.util.Date; /** - * This class is a program that can evaluate any {@link Classifier} - * against an oracle {@link Classifier} on the objects returned from - * a {@link Parser}, with different statistical metrics. + * This class is a program that can evaluate any {@link Classifier} against an oracle + * {@link Classifier} on the objects returned from a {@link Parser}, with different statistical + * metrics. * - * In particular, this class is for {@code real} type, i.e. regression. - * For {@code discrete} type, refer class {@link TestDiscrete}. + * In particular, this class is for {@code real} type, i.e. regression. For {@code discrete} type, + * refer class {@link TestDiscrete}. * * @author Yiming Jiang */ @@ -35,14 +32,15 @@ public class TestReal { protected DescriptiveStatistics goldSubtractPredictionStats = new DescriptiveStatistics(); // array of abs(y - y_hat) - protected DescriptiveStatistics goldSubtractPredictionAbsoluteStats = new DescriptiveStatistics(); + protected DescriptiveStatistics goldSubtractPredictionAbsoluteStats = + new DescriptiveStatistics(); // array of y protected DescriptiveStatistics goldStats = new DescriptiveStatistics(); /** - * Tests the given {@code real} classifier against the given oracle - * using the given {@link Parser} to provide the {@code real} labeled testing data. + * Tests the given {@code real} classifier against the given oracle using the given + * {@link Parser} to provide the {@code real} labeled testing data. * * This method uses root-mean-square error as the evaluation criteria. * @@ -53,12 +51,8 @@ public class TestReal { * @param output Whether or not to produce output on {@code stdout}. * @param outputGranularity The number of examples processed in between time stamp messages. */ - public static void testReal(TestReal tester, - Classifier classifier, - Classifier oracle, - Parser parser, - boolean output, - int outputGranularity) { + public static void testReal(TestReal tester, Classifier classifier, Classifier oracle, + Parser parser, boolean output, int outputGranularity) { int processed = 1; long totalTime = 0; @@ -97,8 +91,7 @@ public static void testReal(TestReal tester, } System.out.println(processed + " examples tested at " + new Date() + "\n"); - } - else { + } else { if (output) { runtime = Runtime.getRuntime(); System.out.println("Total memory before first example: " + runtime.totalMemory()); @@ -113,7 +106,8 @@ public static void testReal(TestReal tester, totalTime += System.currentTimeMillis(); if (output) { - System.out.println("First example processed in " + (totalTime / 1000.0) + " seconds."); + System.out.println("First example processed in " + (totalTime / 1000.0) + + " seconds."); System.out.println("Total memory after first example: " + runtime.totalMemory()); } @@ -133,25 +127,27 @@ public static void testReal(TestReal tester, } if (output) { - System.out.println("Average evaluation time: " + (totalTime / (1000.0 * processed)) + " seconds\n"); + System.out.println("Average evaluation time: " + (totalTime / (1000.0 * processed)) + + " seconds\n"); tester.printPerformace(System.out, processed); } } /** * Update internal book keeping of each prediction and gold + * * @param prediction prediction value * @param gold gold value */ public void reportPrediction(double prediction, double gold) { // keep track of root mean squared error sum - sumOfSquareOfGoldSubtractPrediction += Math.pow((prediction-gold), 2); + sumOfSquareOfGoldSubtractPrediction += Math.pow((prediction - gold), 2); // add y-y_hat into list - goldSubtractPredictionStats.addValue(gold-prediction); + goldSubtractPredictionStats.addValue(gold - prediction); // add |y-y_hat| into list - goldSubtractPredictionAbsoluteStats.addValue(Math.abs(gold-prediction)); + goldSubtractPredictionAbsoluteStats.addValue(Math.abs(gold - prediction)); // add y into list goldStats.addValue(gold); @@ -159,6 +155,7 @@ public void reportPrediction(double prediction, double gold) { /** * Write to PrintStream, with statistical information + * * @param out printstream * @param processed number of testing examples */ @@ -175,6 +172,7 @@ public void printPerformace(PrintStream out, int processed) { /** * Compute Root Mean Squared Error + * * @param processed number of testing examples * @return RMSE */ @@ -184,6 +182,7 @@ private double getRootMeanSquaredError(int processed) { /** * Compute Mean Squared Error + * * @param processed number of testing examples * @return MSE */ @@ -193,6 +192,7 @@ private double getMeanSquaredError(int processed) { /** * Compute Mean Absolute Error + * * @param processed number of testing examples * @return MAE */ @@ -202,6 +202,7 @@ private double getMeanAbsoluteError(int processed) { /** * Compute Median Absolute Error + * * @return MedAE */ private double getMedianAbsoluteError() { @@ -209,8 +210,8 @@ private double getMedianAbsoluteError() { } /** - * Compute Explained Variance - * The best possible score is 1.0, lower values are worse. + * Compute Explained Variance The best possible score is 1.0, lower values are worse. + * * @return EV */ private double getExplainedVariance() { @@ -220,11 +221,11 @@ private double getExplainedVariance() { } /** - * Compute R2 score, also called Coefficient of Determination - * Best possible score is 1.0 and it can be negative - * (because the model can be arbitrarily worse). - * A constant model that always predicts the expected value of y, - * disregarding the input features, would get a R^2 score of 0.0. + * Compute R2 score, also called Coefficient of Determination Best possible score is 1.0 and it + * can be negative (because the model can be arbitrarily worse). A constant model that always + * predicts the expected value of y, disregarding the input features, would get a R^2 score of + * 0.0. + * * @return R2 score */ private double getR2Score() { @@ -237,4 +238,4 @@ private double getR2Score() { return 1 - (sumOfSquareOfGoldSubtractPrediction / sum); } -} \ No newline at end of file +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/ValueComparer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/ValueComparer.java index f3b7b0c8..f72bf3e0 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/ValueComparer.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/classify/ValueComparer.java @@ -1,119 +1,116 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.classify; /** - * This classifier applies another classifier to the example object and - * returns a Boolean feature (with value "true" or "false") representing the - * equality of the argument classifier's feature value to a given value. The - * argument classifier must be discrete. - * - * @see MultiValueComparer - * @author Nick Rizzolo + * This classifier applies another classifier to the example object and returns a Boolean feature + * (with value "true" or "false") representing the equality of the argument classifier's feature + * value to a given value. The argument classifier must be discrete. + * + * @see MultiValueComparer + * @author Nick Rizzolo **/ -public class ValueComparer extends Classifier -{ - /** The classifier whose value will be compared. */ - protected Classifier labeler; - /** The value to compare with. */ - protected String value; - - - /** - * Constructor. - * - * @param c The classifier whose value will be compared. - * @param v The value to compare with. - **/ - public ValueComparer(Classifier c, String v) { - super("ValueComparer"); - labeler = c; - value = v; - } - - - /** Sets the value of {@link #labeler}. */ - public void setLabeler(Classifier l) { labeler = l; } - - - /** - * Returns a Boolean feature (with value "true" or "false") representing - * the equality of the output of {@link #labeler} applied to the argument - * object and {@link #value}. - * - * @param o The object to make decisions about. - * @return A feature vector containing the feature described above. - **/ - public FeatureVector classify(Object o) { - return new FeatureVector(featureValue(o)); - } - - - /** - * Returns the classification of the given example object as a single - * feature instead of a {@link FeatureVector}. - * - * @param o The object to classify. - * @return The classification of o as a feature. - **/ - public Feature featureValue(Object o) { - String label = labeler.discreteValue(o); - short prediction = label.equals(value) ? (short) 1 : (short) 0; - return - new DiscretePrimitiveStringFeature( - "edu.illinois.cs.cogcomp.lbjava.classify", "ValueComparer", "", - DiscreteFeature.BooleanValues[prediction], prediction, (short) 2); - } - - - /** - * Returns the value of the discrete feature that would be returned by this - * classifier. - * - * @param o The object to classify. - * @return The value of the feature produced for the input object. - **/ - public String discreteValue(Object o) { - String label = labeler.discreteValue(o); - return DiscreteFeature.BooleanValues[label.equals(value) ? 1 : 0]; - } - - - /** - * Returns a string describing the input type of this classifier. - * - * @return A string describing the input type of this classifier. - **/ - public String getInputType() { return labeler.getInputType(); } - - - /** - * Returns the array of allowable values that a feature returned by this - * classifier may take. - * - * @see DiscreteFeature#BooleanValues - * @return DiscreteFeature.BooleanValues - **/ - public String[] allowableValues() { return DiscreteFeature.BooleanValues; } - - - /** - * The String representation of a ValueComparer - * has the form "ValueComparer(child), - * where child is the String representation of the - * classifier whose value is being compared. - * - * @return A string of the form described above. - **/ - public String toString() { return "ValueComparer(" + labeler + ")"; } +public class ValueComparer extends Classifier { + /** The classifier whose value will be compared. */ + protected Classifier labeler; + /** The value to compare with. */ + protected String value; + + + /** + * Constructor. + * + * @param c The classifier whose value will be compared. + * @param v The value to compare with. + **/ + public ValueComparer(Classifier c, String v) { + super("ValueComparer"); + labeler = c; + value = v; + } + + + /** Sets the value of {@link #labeler}. */ + public void setLabeler(Classifier l) { + labeler = l; + } + + + /** + * Returns a Boolean feature (with value "true" or "false") representing the equality of the + * output of {@link #labeler} applied to the argument object and {@link #value}. + * + * @param o The object to make decisions about. + * @return A feature vector containing the feature described above. + **/ + public FeatureVector classify(Object o) { + return new FeatureVector(featureValue(o)); + } + + + /** + * Returns the classification of the given example object as a single feature instead of a + * {@link FeatureVector}. + * + * @param o The object to classify. + * @return The classification of o as a feature. + **/ + public Feature featureValue(Object o) { + String label = labeler.discreteValue(o); + short prediction = label.equals(value) ? (short) 1 : (short) 0; + return new DiscretePrimitiveStringFeature("edu.illinois.cs.cogcomp.lbjava.classify", + "ValueComparer", "", DiscreteFeature.BooleanValues[prediction], prediction, + (short) 2); + } + + + /** + * Returns the value of the discrete feature that would be returned by this classifier. + * + * @param o The object to classify. + * @return The value of the feature produced for the input object. + **/ + public String discreteValue(Object o) { + String label = labeler.discreteValue(o); + return DiscreteFeature.BooleanValues[label.equals(value) ? 1 : 0]; + } + + + /** + * Returns a string describing the input type of this classifier. + * + * @return A string describing the input type of this classifier. + **/ + public String getInputType() { + return labeler.getInputType(); + } + + + /** + * Returns the array of allowable values that a feature returned by this classifier may take. + * + * @see DiscreteFeature#BooleanValues + * @return DiscreteFeature.BooleanValues + **/ + public String[] allowableValues() { + return DiscreteFeature.BooleanValues; + } + + + /** + * The String representation of a ValueComparer has the form + * "ValueComparer(child), where child is the + * String representation of the classifier whose value is being compared. + * + * @return A string of the form described above. + **/ + public String toString() { + return "ValueComparer(" + labeler + ")"; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/frontend/GenerateParserAndSymbols.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/frontend/GenerateParserAndSymbols.java index c7008b2c..6a91cf6d 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/frontend/GenerateParserAndSymbols.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/frontend/GenerateParserAndSymbols.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.frontend; @@ -21,87 +18,89 @@ import java_cup.Main; /** - * A wrapper for running {@link java_cup.Main} to generate the {@link parser} - * and {@link sym} classes, as well as {@link SymbolNames}. - *
- * This replaces the old automake configuration (and perl scripts) - * with a Maven compatible version. - *
- * NB: java_cup.Main was last updated in 1996 back when people - * didn't capitalize their class names. Maybe an update is due? + * A wrapper for running {@link java_cup.Main} to generate the {@link parser} and {@link sym} + * classes, as well as {@link SymbolNames}. This replaces the old automake configuration (and perl + * scripts) with a Maven compatible version. NB: java_cup.Main was last updated in 1996 back + * when people didn't capitalize their class names. Maybe an update is due? + * * @author Christos Christodoulopoulos * */ public class GenerateParserAndSymbols { - public GenerateParserAndSymbols() { - // Assuming we are in /target/classes - String sourcePath = getClass().getClassLoader().getResource("").getPath() + "../../src/main/"; - String classPath = getClass().getResource("").getPath(); - String packageName = classPath.substring(classPath.indexOf("classes/") + 8); - String packagePath = sourcePath + "java/" + packageName; - String cupFile = sourcePath + "lex/parser.cup"; + public GenerateParserAndSymbols() { + // Assuming we are in /target/classes + String sourcePath = + getClass().getClassLoader().getResource("").getPath() + "../../src/main/"; + String classPath = getClass().getResource("").getPath(); + String packageName = classPath.substring(classPath.indexOf("classes/") + 8); + String packagePath = sourcePath + "java/" + packageName; + String cupFile = sourcePath + "lex/parser.cup"; - assert new File(cupFile).exists(); - try { - String[] javaCupArgs = { cupFile }; - // Step 1: Run CUP to generate parser.java and sys.java - Main.main(javaCupArgs); + assert new File(cupFile).exists(); + try { + String[] javaCupArgs = {cupFile}; + // Step 1: Run CUP to generate parser.java and sys.java + Main.main(javaCupArgs); - // Step 2: Move the generated files to the appropriate directory - File parserFile = new File("parser.java"); - File symFile = new File("sym.java"); - File newSymFile = new File(packagePath + "sym.java"); - assert parserFile.exists() && symFile.exists(); - parserFile.renameTo(new File(packagePath + "parser.java")); - symFile.renameTo(newSymFile); + // Step 2: Move the generated files to the appropriate directory + File parserFile = new File("parser.java"); + File symFile = new File("sym.java"); + File newSymFile = new File(packagePath + "sym.java"); + assert parserFile.exists() && symFile.exists(); + parserFile.renameTo(new File(packagePath + "parser.java")); + symFile.renameTo(newSymFile); - // Step 3: Generate the SymbolNames class - File symNamesFile = new File(packagePath + "SymbolNames.java"); - // Simply read the symbol names from sym.java and create a table - BufferedReader in = new BufferedReader(new InputStreamReader( - new FileInputStream(newSymFile))); - String names = ""; - String line; - while ((line = in.readLine()) != null) { - if (!line.trim().startsWith("public static final int")) continue; - String name = line.substring(line.indexOf("int") + 4, line.indexOf('=') - 1); - names += "\"" + name + "\", "; - } - in.close(); - - BufferedWriter out = new BufferedWriter(new OutputStreamWriter( - new FileOutputStream(symNamesFile))); + // Step 3: Generate the SymbolNames class + File symNamesFile = new File(packagePath + "SymbolNames.java"); + // Simply read the symbol names from sym.java and create a table + BufferedReader in = + new BufferedReader(new InputStreamReader(new FileInputStream(newSymFile))); + String names = ""; + String line; + while ((line = in.readLine()) != null) { + if (!line.trim().startsWith("public static final int")) + continue; + String name = line.substring(line.indexOf("int") + 4, line.indexOf('=') - 1); + names += "\"" + name + "\", "; + } + in.close(); + + BufferedWriter out = + new BufferedWriter(new OutputStreamWriter(new FileOutputStream(symNamesFile))); // Add the licence header String licence = "/**\n"; - in = new BufferedReader(new InputStreamReader( - new FileInputStream(sourcePath + "resources/license-header.txt"))); + in = + new BufferedReader(new InputStreamReader(new FileInputStream(sourcePath + + "resources/license-header.txt"))); while ((line = in.readLine()) != null) { - if (line.isEmpty()) licence += " *\n"; - else licence += " * " + line + "\n"; + if (line.isEmpty()) + licence += " *\n"; + else + licence += " * " + line + "\n"; } in.close(); licence += " */\n"; - String output = licence + "package " + - packageName.replaceAll("/", ".").substring(0, packageName.length() - 1) + ";\n\n" + - "public class SymbolNames {\n"+ - "\tpublic static String nameTable[] = {\n"+ - "\t\t" + names.substring(0, names.length() - 2) + "\n" + - "\t};\n" + - "}"; - out.write(output); - out.close(); - } - catch (Exception e) { - e.printStackTrace(); - System.exit(1); - } - } + String output = + licence + + "package " + + packageName.replaceAll("/", ".").substring(0, + packageName.length() - 1) + ";\n\n" + + "public class SymbolNames {\n" + + "\tpublic static String nameTable[] = {\n" + "\t\t" + + names.substring(0, names.length() - 2) + "\n" + "\t};\n" + "}"; + out.write(output); + out.close(); + } catch (Exception e) { + e.printStackTrace(); + System.exit(1); + } + } - public static void main(String[] args) { - new GenerateParserAndSymbols(); - } + public static void main(String[] args) { + new GenerateParserAndSymbols(); + } } diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/frontend/ScannerError.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/frontend/ScannerError.java index 3cba8730..c612b089 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/frontend/ScannerError.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/frontend/ScannerError.java @@ -1,53 +1,45 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.frontend; /** - * This class prints useful error messages for scanner generated errrors. - * - * @author Nick Rizzolo + * This class prints useful error messages for scanner generated errrors. + * + * @author Nick Rizzolo **/ -class ScannerError -{ - /** - * Signal an error due to an unterminated comment appearing in the token - * stream. - **/ - public static void unterminatedCommentError() { - System.err.println("Unterminated comment."); - } +class ScannerError { + /** + * Signal an error due to an unterminated comment appearing in the token stream. + **/ + public static void unterminatedCommentError() { + System.err.println("Unterminated comment."); + } - /** - * Signal an error due to discovering an end-of-comment marker while not - * scanning a comment. - **/ - public static void commentEndWithoutBegin() { - System.err.println("Comment ending encountered without beginning."); - } + /** + * Signal an error due to discovering an end-of-comment marker while not scanning a comment. + **/ + public static void commentEndWithoutBegin() { + System.err.println("Comment ending encountered without beginning."); + } - /** - * Signal an error due to an invalid character (one which is not specified - * as being allowed by the language definition) in the source text. - **/ - public static void illegalCharacterError() { - System.err.println("Illegal character"); - } + /** + * Signal an error due to an invalid character (one which is not specified as being allowed by + * the language definition) in the source text. + **/ + public static void illegalCharacterError() { + System.err.println("Illegal character"); + } - /** - * Signal an error in scanning which does not fall into any of the above - * categories. - **/ - public static void otherError() { - System.err.println("Other"); - } + /** + * Signal an error in scanning which does not fall into any of the above categories. + **/ + public static void otherError() { + System.err.println("Other"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/frontend/SymbolNames.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/frontend/SymbolNames.java index 8c3160c6..a6bc3d47 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/frontend/SymbolNames.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/frontend/SymbolNames.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.frontend; diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/frontend/TokenValue.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/frontend/TokenValue.java index fd1f77a4..0e781947 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/frontend/TokenValue.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/frontend/TokenValue.java @@ -1,108 +1,111 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.frontend; /** - * Objects of this class are returned by LBJava's scanner to its parser. It - * simply holds some information about the token and provides easy access to - * a few primitive parsing routines. - * - * @author Nick Rizzolo + * Objects of this class are returned by LBJava's scanner to its parser. It simply holds some + * information about the token and provides easy access to a few primitive parsing routines. + * + * @author Nick Rizzolo **/ -public class TokenValue -{ - /** The line on which the token is found in the source file. */ - public int line; - /** - * The byte offset in the file at which the token is found in the source - * file. - **/ - public int byteOffset; - /** The text in the source file that comprises the token. */ - public String text; - /** The name of the source file. */ - public String filename; +public class TokenValue { + /** The line on which the token is found in the source file. */ + public int line; + /** + * The byte offset in the file at which the token is found in the source file. + **/ + public int byteOffset; + /** The text in the source file that comprises the token. */ + public String text; + /** The name of the source file. */ + public String filename; - /** Default constructor. Does nothing. */ - TokenValue() { } + /** Default constructor. Does nothing. */ + TokenValue() {} - /** - * Full constructor. - * - * @param text The text in the source that comprises the token. - * @param line The line on which the token is found in the source. - * @param byteOffset The byte offset in the file at which the token is - * found in the source. - * @param filename The name of the source file. - **/ - TokenValue(String text, int line, int byteOffset, String filename) { - this.text = text; - this.line = line; - this.byteOffset = byteOffset; - this.filename = filename; - } + /** + * Full constructor. + * + * @param text The text in the source that comprises the token. + * @param line The line on which the token is found in the source. + * @param byteOffset The byte offset in the file at which the token is found in the source. + * @param filename The name of the source file. + **/ + TokenValue(String text, int line, int byteOffset, String filename) { + this.text = text; + this.line = line; + this.byteOffset = byteOffset; + this.filename = filename; + } - /** - * Return the token's text in a String. - * - * @return The token's text. - **/ - public String toString() { return text; } + /** + * Return the token's text in a String. + * + * @return The token's text. + **/ + public String toString() { + return text; + } - /** - * Attempts to parse the token's text as if it represented an integer. - * - * @return The integer that the token's text represents. - **/ - public int toInt() { return Integer.parseInt(text); } + /** + * Attempts to parse the token's text as if it represented an integer. + * + * @return The integer that the token's text represents. + **/ + public int toInt() { + return Integer.parseInt(text); + } - /** - * Attempts to parse the token's text as if it represented an integer. - * - * @return The integer that the token's text represents. - **/ - public long toLong() { return Long.parseLong(text); } + /** + * Attempts to parse the token's text as if it represented an integer. + * + * @return The integer that the token's text represents. + **/ + public long toLong() { + return Long.parseLong(text); + } - /** - * Attempts to parse the token's text as if it represented a double - * precision floating point value. - * - * @return The double precision floating point value that the token's text - * represents. - **/ - public double toFloat() { return Float.parseFloat(text); } + /** + * Attempts to parse the token's text as if it represented a double precision floating point + * value. + * + * @return The double precision floating point value that the token's text represents. + **/ + public double toFloat() { + return Float.parseFloat(text); + } - /** - * Attempts to parse the token's text as if it represented a double - * precision floating point value. - * - * @return The double precision floating point value that the token's text - * represents. - **/ - public double toDouble() { return Double.parseDouble(text); } + /** + * Attempts to parse the token's text as if it represented a double precision floating point + * value. + * + * @return The double precision floating point value that the token's text represents. + **/ + public double toDouble() { + return Double.parseDouble(text); + } - /** - * Attempts to parse the token's text as if it represented a boolean value. - * - * @return The boolean value that the token's text represents. - **/ - public boolean toBoolean() { return text.equals("true"); } + /** + * Attempts to parse the token's text as if it represented a boolean value. + * + * @return The boolean value that the token's text represents. + **/ + public boolean toBoolean() { + return text.equals("true"); + } - /** - * Attempts to parse the token's text as if it represented a character - * value. - * - * @return The character value that the token's text represents. - **/ - public String toChar() { return text.substring(1, text.length() - 1); } + /** + * Attempts to parse the token's text as if it represented a character value. + * + * @return The character value that the token's text represents. + **/ + public String toChar() { + return text.substring(1, text.length() - 1); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/frontend/Yylex.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/frontend/Yylex.java index 4aed2c9e..faa61713 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/frontend/Yylex.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/frontend/Yylex.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ /// --- scanner.jlex ------------------------------------------ vim:syntax=lex diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/ArgumentReplacer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/ArgumentReplacer.java index 615334f1..852594cb 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/ArgumentReplacer.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/ArgumentReplacer.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -14,45 +11,41 @@ /** - * Anonymous inner classes extending this class are instantiated by the code - * generated by the LBJava compiler when creating - * FirstOrderConstraint representations. The methods of this - * class are used to compute new values for the arguments of quantified - * constraint expressions. - * - * @see edu.illinois.cs.cogcomp.lbjava.infer.FirstOrderConstraint - * @author Nick Rizzolo + * Anonymous inner classes extending this class are instantiated by the code generated by the LBJava + * compiler when creating FirstOrderConstraint representations. The methods of this + * class are used to compute new values for the arguments of quantified constraint expressions. + * + * @see edu.illinois.cs.cogcomp.lbjava.infer.FirstOrderConstraint + * @author Nick Rizzolo **/ -abstract public class ArgumentReplacer -{ - /** - * The settings of non-quantification variables in context at the equality - * in question. - **/ - protected Object[] context; - /** - * The settings of quantification variables in context at the equality in - * question. - **/ - protected Vector quantificationVariables; - - - /** - * Initializing constructor. - * - * @param c The context of the corresponding quantified constraint - * expression, except for quantification variables. - **/ - public ArgumentReplacer(Object[] c) { context = c; } - - - /** - * Provides the settings of quantification variables. - * - * @param q The settings of quantification variables. - **/ - public void setQuantificationVariables(Vector q) { - quantificationVariables = q; - } +abstract public class ArgumentReplacer { + /** + * The settings of non-quantification variables in context at the equality in question. + **/ + protected Object[] context; + /** + * The settings of quantification variables in context at the equality in question. + **/ + protected Vector quantificationVariables; + + + /** + * Initializing constructor. + * + * @param c The context of the corresponding quantified constraint expression, except for + * quantification variables. + **/ + public ArgumentReplacer(Object[] c) { + context = c; + } + + + /** + * Provides the settings of quantification variables. + * + * @param q The settings of quantification variables. + **/ + public void setQuantificationVariables(Vector q) { + quantificationVariables = q; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/AtLeastQuantifier.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/AtLeastQuantifier.java index 9cef76c8..084526d4 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/AtLeastQuantifier.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/AtLeastQuantifier.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -16,139 +13,141 @@ /** - * An "at least" quantifier states that the constraint must hold for at least - * m of the objects in the collection. - * - * @author Nick Rizzolo + * An "at least" quantifier states that the constraint must hold for at least m of the + * objects in the collection. + * + * @author Nick Rizzolo **/ -public class AtLeastQuantifier extends Quantifier -{ - /** The number of objects for which the constraint must hold. */ - protected int m; - - - /** - * Initializing constructor. - * - * @param q The name of the quantification variable. - * @param col The collection of objects to iterate over. - * @param con The constraint being quantified. - * @param m The number of objects for which the constraint must hold. - **/ - public AtLeastQuantifier(String q, Collection col, FirstOrderConstraint con, - int m) { - this(q, col, con, m, null); - } - - /** - * This constructor specifies a variable setter for when this quantifier is - * itself quantified. - * - * @param q The name of the quantification variable. - * @param col The collection of objects to iterate over. - * @param con The constraint being quantified. - * @param m The number of objects for which the constraint must hold. - * @param qar The variable setter. - **/ - public AtLeastQuantifier(String q, Collection col, FirstOrderConstraint con, - int m, QuantifierArgumentReplacer qar) { - super(q, col, con, qar); - this.m = Math.max(m, 0); - } - - - /** Determines whether the constraint is satisfied. */ - public boolean evaluate() { - int satisfied = 0; - - int index = initialize(); - for (Iterator I = collection.iterator(); I.hasNext() && satisfied < m; ) { - enclosingQuantificationSettings.set(index, I.next()); - constraint.setQuantificationVariables(enclosingQuantificationSettings); - if (constraint.evaluate()) ++satisfied; +public class AtLeastQuantifier extends Quantifier { + /** The number of objects for which the constraint must hold. */ + protected int m; + + + /** + * Initializing constructor. + * + * @param q The name of the quantification variable. + * @param col The collection of objects to iterate over. + * @param con The constraint being quantified. + * @param m The number of objects for which the constraint must hold. + **/ + public AtLeastQuantifier(String q, Collection col, FirstOrderConstraint con, int m) { + this(q, col, con, m, null); + } + + /** + * This constructor specifies a variable setter for when this quantifier is itself quantified. + * + * @param q The name of the quantification variable. + * @param col The collection of objects to iterate over. + * @param con The constraint being quantified. + * @param m The number of objects for which the constraint must hold. + * @param qar The variable setter. + **/ + public AtLeastQuantifier(String q, Collection col, FirstOrderConstraint con, int m, + QuantifierArgumentReplacer qar) { + super(q, col, con, qar); + this.m = Math.max(m, 0); } - enclosingQuantificationSettings.removeElementAt(index); - return satisfied == m; - } - - - /** - * This method sets the given quantification variables to the given object - * references and evaluates the expressions involving those variables in - * this constraint's children. - * - * @param o The new object references for the enclosing quantification - * variables, in order of nesting. - **/ - public void setQuantificationVariables(Vector o) { - enclosingQuantificationSettings = o; - - if (replacer != null) { - replacer.setQuantificationVariables(o); - if (!replacer.collectionConstant) collection = replacer.getCollection(); - if (!replacer.boundConstant) m = replacer.getBound(); + + /** Determines whether the constraint is satisfied. */ + public boolean evaluate() { + int satisfied = 0; + + int index = initialize(); + for (Iterator I = collection.iterator(); I.hasNext() && satisfied < m;) { + enclosingQuantificationSettings.set(index, I.next()); + constraint.setQuantificationVariables(enclosingQuantificationSettings); + if (constraint.evaluate()) + ++satisfied; + } + + enclosingQuantificationSettings.removeElementAt(index); + return satisfied == m; } - } - - - /** - * Transforms this first order constraint into a propositional constraint. - * - * @return The propositionalized constraint. - **/ - public PropositionalConstraint propositionalize() { - if (collection.size() < m) return new PropositionalConstant(false); - if (m == 0) return new PropositionalConstant(true); - - PropositionalConstraint[] pc = - new PropositionalConstraint[collection.size()]; - - int index = initialize(); - int i = 0; - for (Iterator I = collection.iterator(); I.hasNext(); ++i) { - enclosingQuantificationSettings.set(index, I.next()); - constraint.setQuantificationVariables(enclosingQuantificationSettings); - pc[i] = constraint.propositionalize(); + + + /** + * This method sets the given quantification variables to the given object references and + * evaluates the expressions involving those variables in this constraint's children. + * + * @param o The new object references for the enclosing quantification variables, in order of + * nesting. + **/ + public void setQuantificationVariables(Vector o) { + enclosingQuantificationSettings = o; + + if (replacer != null) { + replacer.setQuantificationVariables(o); + if (!replacer.collectionConstant) + collection = replacer.getCollection(); + if (!replacer.boundConstant) + m = replacer.getBound(); + } + } + + + /** + * Transforms this first order constraint into a propositional constraint. + * + * @return The propositionalized constraint. + **/ + public PropositionalConstraint propositionalize() { + if (collection.size() < m) + return new PropositionalConstant(false); + if (m == 0) + return new PropositionalConstant(true); + + PropositionalConstraint[] pc = new PropositionalConstraint[collection.size()]; + + int index = initialize(); + int i = 0; + for (Iterator I = collection.iterator(); I.hasNext(); ++i) { + enclosingQuantificationSettings.set(index, I.next()); + constraint.setQuantificationVariables(enclosingQuantificationSettings); + pc[i] = constraint.propositionalize(); + } + + enclosingQuantificationSettings.removeElementAt(index); + if (pc.length == 1) + return pc[0]; + + return new PropositionalAtLeast(pc, m); + } + + + /** + * The hash code of a AtLeastQuantifier is the sum of the hash codes of its + * children plus one. + * + * @return The hash code for this AtLeastQuantifier. + **/ + public int hashCode() { + return super.hashCode() + m + 1; } - enclosingQuantificationSettings.removeElementAt(index); - if (pc.length == 1) return pc[0]; - - return new PropositionalAtLeast(pc, m); - } - - - /** - * The hash code of a AtLeastQuantifier is the sum of the hash - * codes of its children plus one. - * - * @return The hash code for this AtLeastQuantifier. - **/ - public int hashCode() { return super.hashCode() + m + 1; } - - - /** - * Two AtLeastQuantifiers are equivalent when their children - * are equivalent. - * - * @return true iff the argument is an equivalent - * AtLeastQuantifier. - **/ - public boolean equals(Object o) { - if (!(o instanceof AtLeastQuantifier)) return false; - AtLeastQuantifier q = (AtLeastQuantifier) o; - return super.equals(q) && m == q.m; - } - - - /** - * Calls the appropriate visit(·) method of the given - * Inference for this Constraint, as per the - * visitor pattern. - * - * @param infer The inference visiting this constraint. - **/ - public void runVisit(Inference infer) { infer.visit(this); } -} + /** + * Two AtLeastQuantifiers are equivalent when their children are equivalent. + * + * @return true iff the argument is an equivalent AtLeastQuantifier. + **/ + public boolean equals(Object o) { + if (!(o instanceof AtLeastQuantifier)) + return false; + AtLeastQuantifier q = (AtLeastQuantifier) o; + return super.equals(q) && m == q.m; + } + + + /** + * Calls the appropriate visit(·) method of the given Inference + * for this Constraint, as per the visitor pattern. + * + * @param infer The inference visiting this constraint. + **/ + public void runVisit(Inference infer) { + infer.visit(this); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/AtMostQuantifier.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/AtMostQuantifier.java index 1a11a3a7..52fb90ee 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/AtMostQuantifier.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/AtMostQuantifier.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -16,127 +13,123 @@ /** - * An "at most" quantifier states that the constraint must hold for no more - * than m of the objects in the collection. - * - * @author Nick Rizzolo + * An "at most" quantifier states that the constraint must hold for no more than m of the + * objects in the collection. + * + * @author Nick Rizzolo **/ -public class AtMostQuantifier extends Quantifier -{ - /** The maximum number of objects for which the constraint must hold. */ - protected int m; - - - /** - * Initializing constructor. - * - * @param q The name of the quantification variable. - * @param col The collection of objects to iterate over. - * @param con The constraint being quantified. - * @param m The number of objects for which the constraint must hold. - **/ - public AtMostQuantifier(String q, Collection col, FirstOrderConstraint con, - int m) { - this(q, col, con, m, null); - } - - /** - * This constructor specifies a variable setter for when this quantifier is - * itself quantified. - * - * @param q The name of the quantification variable. - * @param col The collection of objects to iterate over. - * @param con The constraint being quantified. - * @param m The number of objects for which the constraint must hold. - * @param qar The variable setter. - **/ - public AtMostQuantifier(String q, Collection col, FirstOrderConstraint con, - int m, QuantifierArgumentReplacer qar) { - super(q, col, con, qar); - this.m = Math.max(m, 0); - } - - - /** Determines whether the constraint is satisfied. */ - public boolean evaluate() { - int satisfied = 0; - - int index = initialize(); - for (Iterator I = collection.iterator(); I.hasNext() && satisfied <= m; ) - { - enclosingQuantificationSettings.set(index, I.next()); - constraint.setQuantificationVariables(enclosingQuantificationSettings); - if (constraint.evaluate()) ++satisfied; +public class AtMostQuantifier extends Quantifier { + /** The maximum number of objects for which the constraint must hold. */ + protected int m; + + + /** + * Initializing constructor. + * + * @param q The name of the quantification variable. + * @param col The collection of objects to iterate over. + * @param con The constraint being quantified. + * @param m The number of objects for which the constraint must hold. + **/ + public AtMostQuantifier(String q, Collection col, FirstOrderConstraint con, int m) { + this(q, col, con, m, null); } - enclosingQuantificationSettings.removeElementAt(index); - return satisfied <= m; - } - - - /** - * This method sets the given quantification variables to the given object - * references and evaluates the expressions involving those variables in - * this constraint's children. - * - * @param o The new object references for the enclosing quantification - * variables, in order of nesting. - **/ - public void setQuantificationVariables(Vector o) { - enclosingQuantificationSettings = o; - - if (replacer != null) { - replacer.setQuantificationVariables(o); - if (!replacer.collectionConstant) collection = replacer.getCollection(); - if (!replacer.boundConstant) m = replacer.getBound(); + /** + * This constructor specifies a variable setter for when this quantifier is itself quantified. + * + * @param q The name of the quantification variable. + * @param col The collection of objects to iterate over. + * @param con The constraint being quantified. + * @param m The number of objects for which the constraint must hold. + * @param qar The variable setter. + **/ + public AtMostQuantifier(String q, Collection col, FirstOrderConstraint con, int m, + QuantifierArgumentReplacer qar) { + super(q, col, con, qar); + this.m = Math.max(m, 0); } - } - - - /** - * Transforms this first order constraint into a propositional constraint. - * - * @return The propositionalized constraint. - **/ - public PropositionalConstraint propositionalize() { - return - new AtLeastQuantifier(quantificationVariable, collection, - new FirstOrderNegation(constraint), - collection.size() - m) - .propositionalize(); - } - - - /** - * The hash code of a AtMostQuantifier is the sum of the hash - * codes of its children. - * - * @return The hash code for this AtMostQuantifier. - **/ - public int hashCode() { return super.hashCode() + m; } - - - /** - * Two AtMostQuantifiers are equivalent when their children - * are equivalent. - * - * @return true iff the argument is an equivalent - * AtMostQuantifier. - **/ - public boolean equals(Object o) { - if (!(o instanceof AtMostQuantifier)) return false; - AtMostQuantifier q = (AtMostQuantifier) o; - return super.equals(q) && m == q.m; - } - - - /** - * Calls the appropriate visit(·) method of the given - * Inference for this Constraint, as per the - * visitor pattern. - * - * @param infer The inference visiting this constraint. - **/ - public void runVisit(Inference infer) { infer.visit(this); } -} + + /** Determines whether the constraint is satisfied. */ + public boolean evaluate() { + int satisfied = 0; + + int index = initialize(); + for (Iterator I = collection.iterator(); I.hasNext() && satisfied <= m;) { + enclosingQuantificationSettings.set(index, I.next()); + constraint.setQuantificationVariables(enclosingQuantificationSettings); + if (constraint.evaluate()) + ++satisfied; + } + + enclosingQuantificationSettings.removeElementAt(index); + return satisfied <= m; + } + + + /** + * This method sets the given quantification variables to the given object references and + * evaluates the expressions involving those variables in this constraint's children. + * + * @param o The new object references for the enclosing quantification variables, in order of + * nesting. + **/ + public void setQuantificationVariables(Vector o) { + enclosingQuantificationSettings = o; + + if (replacer != null) { + replacer.setQuantificationVariables(o); + if (!replacer.collectionConstant) + collection = replacer.getCollection(); + if (!replacer.boundConstant) + m = replacer.getBound(); + } + } + + + /** + * Transforms this first order constraint into a propositional constraint. + * + * @return The propositionalized constraint. + **/ + public PropositionalConstraint propositionalize() { + return new AtLeastQuantifier(quantificationVariable, collection, new FirstOrderNegation( + constraint), collection.size() - m).propositionalize(); + } + + + /** + * The hash code of a AtMostQuantifier is the sum of the hash codes of its + * children. + * + * @return The hash code for this AtMostQuantifier. + **/ + public int hashCode() { + return super.hashCode() + m; + } + + + /** + * Two AtMostQuantifiers are equivalent when their children are equivalent. + * + * @return true iff the argument is an equivalent AtMostQuantifier. + **/ + public boolean equals(Object o) { + if (!(o instanceof AtMostQuantifier)) + return false; + AtMostQuantifier q = (AtMostQuantifier) o; + return super.equals(q) && m == q.m; + } + + + /** + * Calls the appropriate visit(·) method of the given Inference + * for this Constraint, as per the visitor pattern. + * + * @param infer The inference visiting this constraint. + **/ + public void runVisit(Inference infer) { + infer.visit(this); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/BalasHook.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/BalasHook.java index d28bd7a9..b6806082 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/BalasHook.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/BalasHook.java @@ -1,793 +1,795 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; -import edu.illinois.cs.cogcomp.lbjava.util.IVector; -import edu.illinois.cs.cogcomp.lbjava.util.Sort; - +import edu.illinois.cs.cogcomp.core.datastructures.vectors.IVector; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.Sort; +import edu.illinois.cs.cogcomp.infer.ilp.ILPSolver; /** - * This {@link ILPSolver} implements Egon Balas' zero-one ILP solving - * algorithm. It is a branch and bound algorithm that can return the best - * solution found so far if stopped early. For more information on the - * original algorithm, see
- * - *
- * E. Balas. 1965. An Additive Algorithm for Solving Linear Programs with - * Zero-One Variables. Operations Research, 13(4):517–546. - *
- * - * @author Nick Rizzolo + * This {@link ILPSolver} implements Egon Balas' zero-one ILP solving algorithm. It is a branch and + * bound algorithm that can return the best solution found so far if stopped early. For more + * information on the original algorithm, see
+ * + *
E. Balas. 1965. An Additive Algorithm for Solving Linear Programs with Zero-One + * Variables. Operations Research, 13(4):517–546.
+ * + * @author Nick Rizzolo **/ -public class BalasHook extends ZeroOneILPProblem implements ILPSolver -{ - private static boolean debug = false; - - - /** - * Whether or not the algorithm will halt upon finding its first feasible - * solution. - **/ - protected boolean first; - /** - * Verbosity level. {@link ILPInference#VERBOSITY_NONE} produces no - * incidental output. If set to {@link ILPInference#VERBOSITY_LOW}, only - * variable and constraint counts are reported on STDOUT. If - * set to {@link ILPInference#VERBOSITY_HIGH}, a textual representation of - * the entire optimization problem is also generated on - * STDOUT. - **/ - protected int verbosity; - /** The solution to the optimization problem. */ - private int[] solution; - /** The value of the objective function at {@link #solution}. */ - private double objectiveValue; - /** - * Each element is true iff the corresponding inference - * variable's value in {@link #solution} has been negated (which happens - * iff that variable initially had a negative objective function - * coefficient). - **/ - private boolean[] negated; - - /** - * The current solution being evaluated in the intermediate stages of the - * algorithm. - **/ - private int[] x; - /** - * The current values that, when added to the left hand sides of the - * corresponding constraints, cause all constraints to be satisfied at - * equality during the intermediate stages of the algorithm. - **/ - private double[] slack; - /** - * A set of variables which must retain their current settings in - * x as the algorithm continues processing. - **/ - private boolean[] cancelled; - - - /** Default constructor. */ - public BalasHook() { this(ILPInference.VERBOSITY_NONE); } - - /** - * Creates a new ILP solver with the specified verbosity. - * - * @param v Setting for the {@link #verbosity} level. - **/ - public BalasHook(int v) { this(false, v); } - - /** - * Creates a new ILP solver that halts at the first feasible solution - * found, if the parameter to this constructor is true. - * - * @param f Whether or not to stop at the first feasible solution. - **/ - public BalasHook(boolean f) { - this(f, ILPInference.VERBOSITY_NONE); - } - - /** - * Creates a new ILP solver that halts at the first feasible solution - * found, if the first parameter to this constructor is true. - * - * @param f Whether or not to stop at the first feasible solution. - * @param v Setting for the {@link #verbosity} level. - **/ - public BalasHook(boolean f, int v) { - first = f; - verbosity = v; - } - - /** - * Creates a new ILP solver with the problem represented in the named file - * loaded and ready to solve. The constraints in the problem are assumed - * to all be "less than or equal to" constraints, and the actual - * (in)equality symbol is ignored during parsing. - * - * @param name The name of the file containing the textual representation - * of a 0-1 ILP problem. - **/ - public BalasHook(String name) { this(name, ILPInference.VERBOSITY_NONE); } - - /** - * Creates a new ILP solver with the problem represented in the named file - * loaded and ready to solve. The constraints in the problem are assumed - * to all be "less than or equal to" constraints, and the actual - * (in)equality symbol is ignored during parsing. - * - * @param name The name of the file containing the textual representation - * of a 0-1 ILP problem. - * @param f Whether or not to stop at the first feasible solution. - **/ - public BalasHook(String name, boolean f) { - this(name, f, ILPInference.VERBOSITY_NONE); - } - - /** - * Creates a new ILP solver with the problem represented in the named file - * loaded and ready to solve. The constraints in the problem are assumed - * to all be "less than or equal to" constraints, and the actual - * (in)equality symbol is ignored during parsing. - * - * @param name The name of the file containing the textual representation - * of a 0-1 ILP problem. - * @param v Setting for the {@link #verbosity} level. - **/ - public BalasHook(String name, int v) { - this(name, false, v); - } - - /** - * Creates a new ILP solver with the problem represented in the named file - * loaded and ready to solve. The constraints in the problem are assumed - * to all be "less than or equal to" constraints, and the actual - * (in)equality symbol is ignored during parsing. - * - * @param name The name of the file containing the textual representation - * of a 0-1 ILP problem. - * @param f Whether or not to stop at the first feasible solution. - * @param v Setting for the {@link #verbosity} level. - **/ - public BalasHook(String name, boolean f, int v) { - super(name); - first = f; - verbosity = v; - } - - - /** Sets the value of {@link #first}. */ - public void setFirst(boolean f) { first = f; } - - - /** - * This method clears the all constraints and variables out of the ILP - * solver's problem representation, bringing the ILPSolver - * instance back to the state it was in when first constructed. - **/ - public void reset() { - super.reset(); - solution = x = null; - negated = null; - slack = null; - objectiveValue = Double.POSITIVE_INFINITY; - } - - - /** - * Simply overrides - * {@link ZeroOneILPProblem#addConstraint(int[],double[],int,double)} so - * that it calls - * {@link ZeroOneILPProblem#addConstraint(int[],double[],double)} thereby - * ignoring the constraint's type. Overriding this method in this way - * ensures that types are not stored when reading in a textual problem - * representation, as happens when constructing an instance with - * {@link #BalasHook(String)}. - * - * @param i The indexes of the variables with non-zero coefficients. - * @param a The coefficients of the variables with the given indexes. - * @param t The type of comparison in this constraint. - * @param b The new constraint will enforce equality with this constant. - **/ - protected void addConstraint(int[] i, double[] a, int t, double b) { - addConstraint(i, a, b); - } - - - /** - * Adds a new fixed constraint to the problem. The two array arguments - * must be the same length, as their elements correspond to each other. - * Variables whose coefficients are zero need not be mentioned. Variables - * that are mentioned must have previously been added via - * {@link #addBooleanVariable(double)} or - * {@link #addDiscreteVariable(double[])}. The resulting constraint has - * the form: - *
xi * a = b
- * where xi represents the inference variables - * whose indexes are contained in the array i and - * * represents dot product. - * - * @param i The indexes of the variables with non-zero coefficients. - * @param a The coefficients of the variables with the given indexes. - * @param b The new constraint will enforce equality with this constant. - **/ - public void addEqualityConstraint(int[] i, double[] a, double b) { - addLessThanConstraint(i, a, b); - addGreaterThanConstraint(i, a, b); - } - - - /** - * Adds a new lower bounded constraint to the problem. The two array - * arguments must be the same length, as their elements correspond to each - * other. Variables whose coefficients are zero need not be mentioned. - * Variables that are mentioned must have previously been added via - * {@link #addBooleanVariable(double)} or - * {@link #addDiscreteVariable(double[])}. The resulting constraint has - * the form: - *
xi * a >= b
- * where xi represents the inference variables - * whose indexes are contained in the array i and - * * represents dot product. - * - * @param I The indexes of the variables with non-zero coefficients. - * @param a The coefficients of the variables with the given indexes. - * @param b The lower bound for the new constraint. - **/ - public void addGreaterThanConstraint(int[] I, double[] a, double b) { - for (int i = 0; i < a.length; ++i) a[i] = -a[i]; - addLessThanConstraint(I, a, -b); - } - - - /** - * Adds a new upper bounded constraint to the problem. The two array - * arguments must be the same length, as their elements correspond to each - * other. Variables whose coefficients are zero need not be mentioned. - * Variables that are mentioned must have previously been added via - * {@link #addBooleanVariable(double)} or - * {@link #addDiscreteVariable(double[])}. The resulting constraint has - * the form: - *
xi * a <= b
- * where xi represents the inference variables - * whose indexes are contained in the array i and - * * represents dot product. - * - * @param i The indexes of the variables with non-zero coefficients. - * @param a The coefficients of the variables with the given indexes. - * @param b The upper bound for the new constraint. - **/ - public void addLessThanConstraint(int[] i, double[] a, double b) { - addConstraint(i, a, b); - } - - - /** - * Solves the ILP problem, saving the solution internally. - * - * @return true iff a solution was found successfully. - **/ - public boolean solve() throws Exception { - int variables = objectiveCoefficients.size(); - int constraints = Ac.size(); - - if (verbosity > ILPInference.VERBOSITY_NONE) { - System.out.println(" variables: " + variables); - System.out.println(" constraints: " + constraints); +public class BalasHook extends ZeroOneILPProblem implements ILPSolver { + private static boolean debug = false; + + + /** + * Whether or not the algorithm will halt upon finding its first feasible solution. + **/ + protected boolean first; + /** + * Verbosity level. {@link ILPInference#VERBOSITY_NONE} produces no incidental output. If set to + * {@link ILPInference#VERBOSITY_LOW}, only variable and constraint counts are reported on + * STDOUT. If set to {@link ILPInference#VERBOSITY_HIGH}, a textual representation + * of the entire optimization problem is also generated on STDOUT. + **/ + protected int verbosity; + /** The solution to the optimization problem. */ + private int[] solution; + /** The value of the objective function at {@link #solution}. */ + private double objectiveValue; + /** + * Each element is true iff the corresponding inference variable's value in + * {@link #solution} has been negated (which happens iff that variable initially had a negative + * objective function coefficient). + **/ + private boolean[] negated; + + /** + * The current solution being evaluated in the intermediate stages of the algorithm. + **/ + private int[] x; + /** + * The current values that, when added to the left hand sides of the corresponding constraints, + * cause all constraints to be satisfied at equality during the intermediate stages of the + * algorithm. + **/ + private double[] slack; + /** + * A set of variables which must retain their current settings in x as the + * algorithm continues processing. + **/ + private boolean[] cancelled; + + + /** Default constructor. */ + public BalasHook() { + this(ILPInference.VERBOSITY_NONE); } - negated = new boolean[variables]; - - for (int i = 0; i < variables; ++i) { - double c = objectiveCoefficients.get(i); - if (Math.abs(c) < ZeroOneILPProblem.TOLERANCE) - objectiveCoefficients.set(i, 0); - else { - if (maximize) c = -c; - if (c < 0) { - c = -c; - - for (int j = 0; j < constraints; ++j) { - int vIndex = Av.binarySearch(j, i); - if (vIndex >= 0) { - double coefficient = Ac.get(j, vIndex); - bounds.set(j, bounds.get(j) - coefficient); - Ac.set(j, vIndex, -coefficient); - } - } + /** + * Creates a new ILP solver with the specified verbosity. + * + * @param v Setting for the {@link #verbosity} level. + **/ + public BalasHook(int v) { + this(false, v); + } - negated[i] = true; - } + /** + * Creates a new ILP solver that halts at the first feasible solution found, if the parameter to + * this constructor is true. + * + * @param f Whether or not to stop at the first feasible solution. + **/ + public BalasHook(boolean f) { + this(f, ILPInference.VERBOSITY_NONE); + } - objectiveCoefficients.set(i, c); - } + /** + * Creates a new ILP solver that halts at the first feasible solution found, if the first + * parameter to this constructor is true. + * + * @param f Whether or not to stop at the first feasible solution. + * @param v Setting for the {@link #verbosity} level. + **/ + public BalasHook(boolean f, int v) { + first = f; + verbosity = v; } - if (verbosity == ILPInference.VERBOSITY_HIGH) { - boolean saveMaximize = maximize; - maximize = false; - StringBuffer buffer = new StringBuffer(); - write(buffer); - System.out.print(buffer); - maximize = saveMaximize; + /** + * Creates a new ILP solver with the problem represented in the named file loaded and ready to + * solve. The constraints in the problem are assumed to all be "less than or equal to" + * constraints, and the actual (in)equality symbol is ignored during parsing. + * + * @param name The name of the file containing the textual representation of a 0-1 ILP problem. + **/ + public BalasHook(String name) { + this(name, ILPInference.VERBOSITY_NONE); } - x = new int[variables]; - slack = slack(x); - cancelled = new boolean[variables]; - boolean result = solve(evaluate(x)); - - for (int i = 0; i < variables; ++i) - if (negated[i]) { - x[i] = 1 - x[i]; - objectiveValue -= objectiveCoefficients.get(i); - } - - if (maximize) objectiveValue = -objectiveValue; - - return result; - } - - - /** - * Given a potential solution, this method determines the values for the - * slack violates that will satisfy our less-than constraints at equality. - * - * @param x The current settings of the inference variables. - * @return The resulting values of the slack variables. - **/ - private double[] slack(int[] x) { - final double[] result = new double[bounds.size()]; - - for (int i = 0; i < Ac.size(); ++i) { - double lhs = 0; - for (int j = 0; j < Ac.size(i); ++j) - lhs += x[Av.get(i, j)] * Ac.get(i, j); - result[i] = bounds.get(i) - lhs; - final double rounded = Math.round(result[i]); - if (Math.abs(rounded - result[i]) < ZeroOneILPProblem.TOLERANCE) - result[i] = rounded; + /** + * Creates a new ILP solver with the problem represented in the named file loaded and ready to + * solve. The constraints in the problem are assumed to all be "less than or equal to" + * constraints, and the actual (in)equality symbol is ignored during parsing. + * + * @param name The name of the file containing the textual representation of a 0-1 ILP problem. + * @param f Whether or not to stop at the first feasible solution. + **/ + public BalasHook(String name, boolean f) { + this(name, f, ILPInference.VERBOSITY_NONE); } - return result; - } - - - /** - * Implements the meat of the Balas algorithm recursively. - * - * @param z The value of the objective function with the current variable - * settings. - * @return true iff a solution was found successfully. - **/ - public boolean solve(final double z) { - // The slack variables, which will also be used later, tell us whether any - // constraints have been violated. If none have, we know we have found - // the optimal solution under the additional constraints that all - // ineligible variables must take their current settings in x. - final IVector violated = new IVector(); - for (int i = 0; i < slack.length; ++i) - if (slack[i] < 0) violated.add(i); - final int violatedSize = violated.size(); - - if (violatedSize == 0) { - solution = (int[]) x.clone(); - objectiveValue = z; - - if (debug) { - final int[] xx = (int[]) x.clone(); - double f = objectiveValue; - - System.out.print("["); - for (int i = 0; i < xx.length; ++i) { - if (negated[i]) { - xx[i] = 1 - xx[i]; - f -= objectiveCoefficients.get(i); - } - - System.out.print(xx[i]); - if (i + 1 < xx.length) System.out.print(", "); - } + /** + * Creates a new ILP solver with the problem represented in the named file loaded and ready to + * solve. The constraints in the problem are assumed to all be "less than or equal to" + * constraints, and the actual (in)equality symbol is ignored during parsing. + * + * @param name The name of the file containing the textual representation of a 0-1 ILP problem. + * @param v Setting for the {@link #verbosity} level. + **/ + public BalasHook(String name, int v) { + this(name, false, v); + } + + /** + * Creates a new ILP solver with the problem represented in the named file loaded and ready to + * solve. The constraints in the problem are assumed to all be "less than or equal to" + * constraints, and the actual (in)equality symbol is ignored during parsing. + * + * @param name The name of the file containing the textual representation of a 0-1 ILP problem. + * @param f Whether or not to stop at the first feasible solution. + * @param v Setting for the {@link #verbosity} level. + **/ + public BalasHook(String name, boolean f, int v) { + super(name); + first = f; + verbosity = v; + } - if (maximize) f = -f; - System.out.println("]: " + f); - } - return true; + /** Sets the value of {@link #first}. */ + public void setFirst(boolean f) { + first = f; } - final IVector eligible = getEligibleVariables(z, violated); - - // Constraints get closer to satisfaction when variables with negative - // coefficients in those constraints get turned on. If there are any - // constraints in which the eligible variables cannot contribute enough in - // negative coefficients to satisfy the constraint, then we'll need to - // backtrack. lhsNegative keeps track of the total negative coefficient - // contribution possible in each constraint as we try turning eligible - // variables on, then turning them off and making them ineligible after - // backtracking. - final int eligibles = eligible.size(); - if (eligibles == 0) return false; - - final IVector atEquality = new IVector(); - final double[] lhsNegative = - constraintSatisfiability(violated, eligible, atEquality); - if (lhsNegative == null) return false; - - // Now the search begins, setting eligible variables on and making - // recursive calls. - final IVector cancelledLocally = new IVector(); - int[] indexes = null; - int bestIndex = 0; - int ineligibles = 0; - boolean result = false; - - for (boolean satisfiable = true; satisfiable; ) { - if (atEquality.size() > 0) { - result |= satisfyAll(atEquality, z, eligible); - satisfiable = false; - } - else { - // If there weren't any constraints satisfied at equality when their - // negative coefficient variables are turned on, then we choose our - // next eligible variable according to the metric proposed by Balas. - if (indexes == null) indexes = sortVariablesByViolations(eligible); - - int bestVariable = eligible.get(indexes[bestIndex]); - while (++bestIndex < eligibles && cancelled[bestVariable]) { - bestVariable = eligible.get(indexes[bestIndex]); - --ineligibles; - } - if (cancelled[bestVariable]) break; - setVariableOn(bestVariable); - cancelled[bestVariable] = true; - cancelledLocally.add(bestVariable); + /** + * This method clears the all constraints and variables out of the ILP solver's problem + * representation, bringing the ILPSolver instance back to the state it was in when + * first constructed. + **/ + public void reset() { + super.reset(); + solution = x = null; + negated = null; + slack = null; + objectiveValue = Double.POSITIVE_INFINITY; + } - final double oldValue = objectiveValue; - result |= solve(z + objectiveCoefficients.get(bestVariable)); - if (first && result) break; - setVariableOff(bestVariable); + /** + * Simply overrides {@link ZeroOneILPProblem#addConstraint(int[],double[],int,double)} so that + * it calls {@link ZeroOneILPProblem#addConstraint(int[],double[],double)} thereby ignoring the + * constraint's type. Overriding this method in this way ensures that types are not stored when + * reading in a textual problem representation, as happens when constructing an instance with + * {@link #BalasHook(String)}. + * + * @param i The indexes of the variables with non-zero coefficients. + * @param a The coefficients of the variables with the given indexes. + * @param t The type of comparison in this constraint. + * @param b The new constraint will enforce equality with this constant. + **/ + protected void addConstraint(int[] i, double[] a, int t, double b) { + addConstraint(i, a, b); + } - final IVector newlyIneligible = new IVector(); - newlyIneligible.add(bestVariable); - if (oldValue != objectiveValue) { - for (int k = bestIndex; k < eligibles; ++k) { - final int j = eligible.get(k); - if (!cancelled[j] - && z + objectiveCoefficients.get(j) - >= objectiveValue - ZeroOneILPProblem.TOLERANCE) { - newlyIneligible.add(j); - cancelled[j] = true; - cancelledLocally.add(j); - } - } + @Override + public int addRealVariable(double c) { + // not implemented + return 0; + } + + @Override + public int addIntegerVariable(double c) { + // not implemented + return 0; + } + + /** + * Adds a new fixed constraint to the problem. The two array arguments must be the same length, + * as their elements correspond to each other. Variables whose coefficients are zero need not be + * mentioned. Variables that are mentioned must have previously been added via + * {@link #addBooleanVariable(double)} or {@link #addDiscreteVariable(double[])}. The resulting + * constraint has the form:
xi * a = b
where + * xi represents the inference variables whose indexes are contained in + * the array i and * represents dot product. + * + * @param i The indexes of the variables with non-zero coefficients. + * @param a The coefficients of the variables with the given indexes. + * @param b The new constraint will enforce equality with this constant. + **/ + public void addEqualityConstraint(int[] i, double[] a, double b) { + addLessThanConstraint(i, a, b); + addGreaterThanConstraint(i, a, b); + } + - ineligibles += newlyIneligible.size() - 1; + /** + * Adds a new lower bounded constraint to the problem. The two array arguments must be the same + * length, as their elements correspond to each other. Variables whose coefficients are zero + * need not be mentioned. Variables that are mentioned must have previously been added via + * {@link #addBooleanVariable(double)} or {@link #addDiscreteVariable(double[])}. The resulting + * constraint has the form:
xi * a >= b
+ * where xi represents the inference variables whose indexes are + * contained in the array i and * represents dot product. + * + * @param I The indexes of the variables with non-zero coefficients. + * @param a The coefficients of the variables with the given indexes. + * @param b The lower bound for the new constraint. + **/ + public void addGreaterThanConstraint(int[] I, double[] a, double b) { + for (int i = 0; i < a.length; ++i) + a[i] = -a[i]; + addLessThanConstraint(I, a, -b); + } + + + /** + * Adds a new upper bounded constraint to the problem. The two array arguments must be the same + * length, as their elements correspond to each other. Variables whose coefficients are zero + * need not be mentioned. Variables that are mentioned must have previously been added via + * {@link #addBooleanVariable(double)} or {@link #addDiscreteVariable(double[])}. The resulting + * constraint has the form:
xi * a <= b
+ * where xi represents the inference variables whose indexes are + * contained in the array i and * represents dot product. + * + * @param i The indexes of the variables with non-zero coefficients. + * @param a The coefficients of the variables with the given indexes. + * @param b The upper bound for the new constraint. + **/ + public void addLessThanConstraint(int[] i, double[] a, double b) { + addConstraint(i, a, b); + } + + + /** + * Solves the ILP problem, saving the solution internally. + * + * @return true iff a solution was found successfully. + **/ + public boolean solve() throws Exception { + int variables = objectiveCoefficients.size(); + int constraints = Ac.size(); + + if (verbosity > ILPInference.VERBOSITY_NONE) { + System.out.println(" variables: " + variables); + System.out.println(" constraints: " + constraints); } - satisfiable = eligibles - bestIndex - ineligibles > 0; + negated = new boolean[variables]; + + for (int i = 0; i < variables; ++i) { + double c = objectiveCoefficients.get(i); + if (Math.abs(c) < TOLERANCE) + objectiveCoefficients.set(i, 0); + else { + if (maximize) + c = -c; + if (c < 0) { + c = -c; + + for (int j = 0; j < constraints; ++j) { + int vIndex = Av.binarySearch(j, i); + if (vIndex >= 0) { + double coefficient = Ac.get(j, vIndex); + bounds.set(j, bounds.get(j) - coefficient); + Ac.set(j, vIndex, -coefficient); + } + } + + negated[i] = true; + } + + objectiveCoefficients.set(i, c); + } + } + + if (verbosity == ILPInference.VERBOSITY_HIGH) { + boolean saveMaximize = maximize; + maximize = false; + StringBuffer buffer = new StringBuffer(); + write(buffer); + System.out.print(buffer); + maximize = saveMaximize; + } - for (int i = 0; i < violatedSize && satisfiable; ++i) { - final int cIndex = violated.get(i); + x = new int[variables]; + slack = slack(x); + cancelled = new boolean[variables]; + boolean result = solve(evaluate(x)); - for (int j = 0; j < newlyIneligible.size(); ++j) { - final int vIndex = - Av.binarySearch(cIndex, newlyIneligible.get(j)); - if (vIndex >= 0) { - final double c = Ac.get(cIndex, vIndex); - if (c < 0) lhsNegative[i] -= c; + for (int i = 0; i < variables; ++i) + if (negated[i]) { + x[i] = 1 - x[i]; + objectiveValue -= objectiveCoefficients.get(i); } - } - - satisfiable = - lhsNegative[i] - ZeroOneILPProblem.TOLERANCE <= slack[cIndex]; - if (satisfiable - && Math.abs(slack[cIndex] - lhsNegative[i]) - < ZeroOneILPProblem.TOLERANCE) - atEquality.add(cIndex); + + if (maximize) + objectiveValue = -objectiveValue; + + return result; + } + + + /** + * Given a potential solution, this method determines the values for the slack violates that + * will satisfy our less-than constraints at equality. + * + * @param x The current settings of the inference variables. + * @return The resulting values of the slack variables. + **/ + private double[] slack(int[] x) { + final double[] result = new double[bounds.size()]; + + for (int i = 0; i < Ac.size(); ++i) { + double lhs = 0; + for (int j = 0; j < Ac.size(i); ++j) + lhs += x[Av.get(i, j)] * Ac.get(i, j); + result[i] = bounds.get(i) - lhs; + final double rounded = Math.round(result[i]); + if (Math.abs(rounded - result[i]) < TOLERANCE) + result[i] = rounded; } - } + + return result; } - for (int i = 0; i < cancelledLocally.size(); ++i) - cancelled[cancelledLocally.get(i)] = false; - return result; - } - - - /** - * Determines which variables have a chance both to improve on the - * incumbunt solution and to bring the current x closer to feasibility. - * - * @param z The value of the objective function with the current - * variable settings. - * @param violated The set of violated constraints. - * @return A vector of variables as described above. - **/ - private IVector getEligibleVariables(final double z, - final IVector violated) { - final IVector eligible = new IVector(); - final int violatedSize = violated.size(); - - for (int j = 0; j < x.length; ++j) - if (!cancelled[j] - && z + objectiveCoefficients.get(j) - < objectiveValue - ZeroOneILPProblem.TOLERANCE) { - boolean good = false; - for (int i = 0; i < violatedSize && !good; ++i) { - final int cIndex = violated.get(i); - final int vIndex = Av.binarySearch(cIndex, j); - good = vIndex >= 0 && Ac.get(cIndex, vIndex) < 0; + + /** + * Implements the meat of the Balas algorithm recursively. + * + * @param z The value of the objective function with the current variable settings. + * @return true iff a solution was found successfully. + **/ + public boolean solve(final double z) { + // The slack variables, which will also be used later, tell us whether any + // constraints have been violated. If none have, we know we have found + // the optimal solution under the additional constraints that all + // ineligible variables must take their current settings in x. + final IVector violated = new IVector(); + for (int i = 0; i < slack.length; ++i) + if (slack[i] < 0) + violated.add(i); + final int violatedSize = violated.size(); + + if (violatedSize == 0) { + solution = (int[]) x.clone(); + objectiveValue = z; + + if (debug) { + final int[] xx = (int[]) x.clone(); + double f = objectiveValue; + + System.out.print("["); + for (int i = 0; i < xx.length; ++i) { + if (negated[i]) { + xx[i] = 1 - xx[i]; + f -= objectiveCoefficients.get(i); + } + + System.out.print(xx[i]); + if (i + 1 < xx.length) + System.out.print(", "); + } + + if (maximize) + f = -f; + System.out.println("]: " + f); + } + + return true; } - if (good) eligible.add(j); - } - - return eligible; - } - - - /** - * This method attempts to satisfy all specified constraints by - * turning on all eligible variables that have a negative coefficient in - * any of them. - * - *

If there are constraints satisfied at equality when their negative - * coefficient variables are turned on, we know the only chance to satisfy - * them is to turn on all eligible variables with negative coefficients in - * such constraints. This method does just that before making the - * recursive call to {@link #solve(double)}. - * - * @param atEquality A vector of constraints which need to be satisfied. - * @param z The value of the objective function with the current - * variable settings. - * @param eligible A vector of variables which are eligible to be turned - * on. - * @return true iff turning on all eligible variables as - * described above lead to a feasible solution. - **/ - private boolean satisfyAll(final IVector atEquality, double z, - final IVector eligible) { - final IVector F = new IVector(); - final int constraints = atEquality.size(); - boolean result = false; - - for (int i = 0; i < constraints; ++i) { - final int cIndex = atEquality.get(i); - final int constraintSize = Ac.size(cIndex); - for (int k = 0; k < constraintSize; ++k) { - final int j = Av.get(cIndex, k); - if (cancelled[j]) continue; - final double c = Ac.get(cIndex, k); - if (c < 0 && eligible.binarySearch(j) >= 0) { - F.add(j); - cancelled[j] = true; - z += objectiveCoefficients.get(j); + + final IVector eligible = getEligibleVariables(z, violated); + + // Constraints get closer to satisfaction when variables with negative + // coefficients in those constraints get turned on. If there are any + // constraints in which the eligible variables cannot contribute enough in + // negative coefficients to satisfy the constraint, then we'll need to + // backtrack. lhsNegative keeps track of the total negative coefficient + // contribution possible in each constraint as we try turning eligible + // variables on, then turning them off and making them ineligible after + // backtracking. + final int eligibles = eligible.size(); + if (eligibles == 0) + return false; + + final IVector atEquality = new IVector(); + final double[] lhsNegative = constraintSatisfiability(violated, eligible, atEquality); + if (lhsNegative == null) + return false; + + // Now the search begins, setting eligible variables on and making + // recursive calls. + final IVector cancelledLocally = new IVector(); + int[] indexes = null; + int bestIndex = 0; + int ineligibles = 0; + boolean result = false; + + for (boolean satisfiable = true; satisfiable;) { + if (atEquality.size() > 0) { + result |= satisfyAll(atEquality, z, eligible); + satisfiable = false; + } else { + // If there weren't any constraints satisfied at equality when their + // negative coefficient variables are turned on, then we choose our + // next eligible variable according to the metric proposed by Balas. + if (indexes == null) + indexes = sortVariablesByViolations(eligible); + + int bestVariable = eligible.get(indexes[bestIndex]); + while (++bestIndex < eligibles && cancelled[bestVariable]) { + bestVariable = eligible.get(indexes[bestIndex]); + --ineligibles; + } + if (cancelled[bestVariable]) + break; + + setVariableOn(bestVariable); + cancelled[bestVariable] = true; + cancelledLocally.add(bestVariable); + + final double oldValue = objectiveValue; + result |= solve(z + objectiveCoefficients.get(bestVariable)); + if (first && result) + break; + + setVariableOff(bestVariable); + + final IVector newlyIneligible = new IVector(); + newlyIneligible.add(bestVariable); + + if (oldValue != objectiveValue) { + for (int k = bestIndex; k < eligibles; ++k) { + final int j = eligible.get(k); + if (!cancelled[j] + && z + objectiveCoefficients.get(j) >= objectiveValue - TOLERANCE) { + newlyIneligible.add(j); + cancelled[j] = true; + cancelledLocally.add(j); + } + } + + ineligibles += newlyIneligible.size() - 1; + } + + satisfiable = eligibles - bestIndex - ineligibles > 0; + + for (int i = 0; i < violatedSize && satisfiable; ++i) { + final int cIndex = violated.get(i); + + for (int j = 0; j < newlyIneligible.size(); ++j) { + final int vIndex = Av.binarySearch(cIndex, newlyIneligible.get(j)); + if (vIndex >= 0) { + final double c = Ac.get(cIndex, vIndex); + if (c < 0) + lhsNegative[i] -= c; + } + } + + satisfiable = lhsNegative[i] - TOLERANCE <= slack[cIndex]; + if (satisfiable && Math.abs(slack[cIndex] - lhsNegative[i]) < TOLERANCE) + atEquality.add(cIndex); + } + } } - } + + for (int i = 0; i < cancelledLocally.size(); ++i) + cancelled[cancelledLocally.get(i)] = false; + return result; } - final int FSize = F.size(); - if (z < objectiveValue - ZeroOneILPProblem.TOLERANCE) { - for (int i = 0; i < FSize; ++i) - setVariableOn(F.get(i)); - result = solve(z); - for (int i = 0; i < FSize; ++i) - setVariableOff(F.get(i)); + /** + * Determines which variables have a chance both to improve on the incumbunt solution and to + * bring the current x closer to feasibility. + * + * @param z The value of the objective function with the current variable settings. + * @param violated The set of violated constraints. + * @return A vector of variables as described above. + **/ + private IVector getEligibleVariables(final double z, final IVector violated) { + final IVector eligible = new IVector(); + final int violatedSize = violated.size(); + + for (int j = 0; j < x.length; ++j) + if (!cancelled[j] && z + objectiveCoefficients.get(j) < objectiveValue - TOLERANCE) { + boolean good = false; + for (int i = 0; i < violatedSize && !good; ++i) { + final int cIndex = violated.get(i); + final int vIndex = Av.binarySearch(cIndex, j); + good = vIndex >= 0 && Ac.get(cIndex, vIndex) < 0; + } + if (good) + eligible.add(j); + } + + return eligible; } - for (int i = 0; i < FSize; ++i) - cancelled[F.get(i)] = false; - - return result; - } - - - /** - * For each violated constraint, this method determines whether it is - * individually satisfiable given the eligible variables remaining. If all - * constraints are still satisfiable, the sums of the negative coefficients - * on eligible variables for each constraint is returned. Otherwise, - * null is returned. - * - *

As a side effect, this method also determines which constraints can - * only be satisfied exactly at equality and stores them in the given - * vector. - * - * @param violated The set of violated constraints. - * @param eligible The set of variables still eligible to be turned on. - * @param atEquality A vector into which is stored the set of constraints - * that can only be satisfied at equality. - **/ - private double[] constraintSatisfiability(final IVector violated, - final IVector eligible, - final IVector atEquality) { - final int violatedSize = violated.size(); - final double[] lhsNegative = new double[violatedSize]; - - for (int i = 0; i < violatedSize; ++i) { - final int index = violated.get(i); - - for (int j = 0; j < Ac.size(index); ++j) { - final double c = Ac.get(index, j); - if (c < 0 && eligible.binarySearch(Av.get(index, j)) >= 0) - lhsNegative[i] += c; - } - - if (lhsNegative[i] - ZeroOneILPProblem.TOLERANCE > slack[index]) - return null; - if (Math.abs(slack[index] - lhsNegative[i]) - < ZeroOneILPProblem.TOLERANCE) - atEquality.add(index); + + /** + * This method attempts to satisfy all specified constraints by turning on all eligible + * variables that have a negative coefficient in any of them. + * + *

+ * If there are constraints satisfied at equality when their negative coefficient variables are + * turned on, we know the only chance to satisfy them is to turn on all eligible variables with + * negative coefficients in such constraints. This method does just that before making the + * recursive call to {@link #solve(double)}. + * + * @param atEquality A vector of constraints which need to be satisfied. + * @param z The value of the objective function with the current variable settings. + * @param eligible A vector of variables which are eligible to be turned on. + * @return true iff turning on all eligible variables as described above lead to a + * feasible solution. + **/ + private boolean satisfyAll(final IVector atEquality, double z, final IVector eligible) { + final IVector F = new IVector(); + final int constraints = atEquality.size(); + boolean result = false; + + for (int i = 0; i < constraints; ++i) { + final int cIndex = atEquality.get(i); + final int constraintSize = Ac.size(cIndex); + for (int k = 0; k < constraintSize; ++k) { + final int j = Av.get(cIndex, k); + if (cancelled[j]) + continue; + final double c = Ac.get(cIndex, k); + if (c < 0 && eligible.binarySearch(j) >= 0) { + F.add(j); + cancelled[j] = true; + z += objectiveCoefficients.get(j); + } + } + } + + final int FSize = F.size(); + + if (z < objectiveValue - TOLERANCE) { + for (int i = 0; i < FSize; ++i) + setVariableOn(F.get(i)); + result = solve(z); + for (int i = 0; i < FSize; ++i) + setVariableOff(F.get(i)); + } + + for (int i = 0; i < FSize; ++i) + cancelled[F.get(i)] = false; + + return result; } - return lhsNegative; - } + /** + * For each violated constraint, this method determines whether it is individually satisfiable + * given the eligible variables remaining. If all constraints are still satisfiable, the sums of + * the negative coefficients on eligible variables for each constraint is returned. Otherwise, + * null is returned. + * + *

+ * As a side effect, this method also determines which constraints can only be satisfied exactly + * at equality and stores them in the given vector. + * + * @param violated The set of violated constraints. + * @param eligible The set of variables still eligible to be turned on. + * @param atEquality A vector into which is stored the set of constraints that can only be + * satisfied at equality. + **/ + private double[] constraintSatisfiability(final IVector violated, final IVector eligible, + final IVector atEquality) { + final int violatedSize = violated.size(); + final double[] lhsNegative = new double[violatedSize]; + + for (int i = 0; i < violatedSize; ++i) { + final int index = violated.get(i); + + for (int j = 0; j < Ac.size(index); ++j) { + final double c = Ac.get(index, j); + if (c < 0 && eligible.binarySearch(Av.get(index, j)) >= 0) + lhsNegative[i] += c; + } - /** - * Sets the given variable on and updates the slack variables. - * - * @param j The variable to set on. - **/ - private void setVariableOn(final int j) { - x[j] = 1; + if (lhsNegative[i] - TOLERANCE > slack[index]) + return null; + if (Math.abs(slack[index] - lhsNegative[i]) < TOLERANCE) + atEquality.add(index); + } - for (int i = 0; i < slack.length; ++i) { - final int vIndex = Av.binarySearch(i, j); - if (vIndex >= 0) slack[i] -= Ac.get(i, vIndex); + return lhsNegative; } - } - /** - * Sets the given variable off and updates the slack variables. - * - * @param j The variable to set on. - **/ - private void setVariableOff(final int j) { - x[j] = 0; + /** + * Sets the given variable on and updates the slack variables. + * + * @param j The variable to set on. + **/ + private void setVariableOn(final int j) { + x[j] = 1; - for (int i = 0; i < slack.length; ++i) { - final int vIndex = Av.binarySearch(i, j); - if (vIndex >= 0) slack[i] += Ac.get(i, vIndex); + for (int i = 0; i < slack.length; ++i) { + final int vIndex = Av.binarySearch(i, j); + if (vIndex >= 0) + slack[i] -= Ac.get(i, vIndex); + } } - } - - - /** - * Computes a vector of indexes that, in effect, sorts the given variables - * according to how violated the constraints would be if each were turned - * on independently. Ties are broken by giving precedence to variables - * with smaller objective coefficients. - * - * @param eligible The variables to be sorted. - * @return An array of Integer indexes pointing into the - * eligible vector. - **/ - private int[] sortVariablesByViolations(final IVector eligible) { - final int eligibles = eligible.size(); - final int[] indexes = new int[eligibles]; - final double[] violations = new double[eligibles]; - - for (int k = 0; k < eligibles; ++k) { - indexes[k] = k; - final int j = eligible.get(k); - - for (int i = 0; i < slack.length; ++i) { - final int vIndex = Av.binarySearch(i, j); - final double aij = vIndex < 0 ? 0 : Ac.get(i, vIndex); - violations[k] += Math.max(0, aij - slack[i]); - } + + + /** + * Sets the given variable off and updates the slack variables. + * + * @param j The variable to set on. + **/ + private void setVariableOff(final int j) { + x[j] = 0; + + for (int i = 0; i < slack.length; ++i) { + final int vIndex = Av.binarySearch(i, j); + if (vIndex >= 0) + slack[i] += Ac.get(i, vIndex); + } } - Sort.sort(indexes, - new Sort.IntComparator() { - public int compare(int i1, int i2) { - if (Math.abs(violations[i1] - violations[i2]) - < ZeroOneILPProblem.TOLERANCE) { - double c1 = objectiveCoefficients.get(eligible.get(i1)); - double c2 = objectiveCoefficients.get(eligible.get(i2)); - if (Math.abs(c1 - c2) < ZeroOneILPProblem.TOLERANCE) - return i1 - i2; - if (c1 < c2) return -1; - return 1; + + /** + * Computes a vector of indexes that, in effect, sorts the given variables according to how + * violated the constraints would be if each were turned on independently. Ties are broken by + * giving precedence to variables with smaller objective coefficients. + * + * @param eligible The variables to be sorted. + * @return An array of Integer indexes pointing into the eligible + * vector. + **/ + private int[] sortVariablesByViolations(final IVector eligible) { + final int eligibles = eligible.size(); + final int[] indexes = new int[eligibles]; + final double[] violations = new double[eligibles]; + + for (int k = 0; k < eligibles; ++k) { + indexes[k] = k; + final int j = eligible.get(k); + + for (int i = 0; i < slack.length; ++i) { + final int vIndex = Av.binarySearch(i, j); + final double aij = vIndex < 0 ? 0 : Ac.get(i, vIndex); + violations[k] += Math.max(0, aij - slack[i]); } + } - if (violations[i1] < violations[i2]) return -1; - return 1; - } + Sort.sort(indexes, new Sort.IntComparator() { + public int compare(int i1, int i2) { + if (Math.abs(violations[i1] - violations[i2]) < TOLERANCE) { + double c1 = objectiveCoefficients.get(eligible.get(i1)); + double c2 = objectiveCoefficients.get(eligible.get(i2)); + if (Math.abs(c1 - c2) < TOLERANCE) + return i1 - i2; + if (c1 < c2) + return -1; + return 1; + } + + if (violations[i1] < violations[i2]) + return -1; + return 1; + } }); - return indexes; - } - - - /** - * Tests whether the problem represented by this ILPSolver - * instance has been solved already. - **/ - public boolean isSolved() { return solution != null; } - - - /** - * When the problem has been solved, use this method to retrieve the value - * of any Boolean inference variable. The result of this method is - * undefined when the problem has not yet been solved. - * - * @param index The index of the variable whose value is requested. - * @return The value of the variable. - **/ - public boolean getBooleanValue(int index) { return solution[index] == 1; } - - - /** - * When the problem has been solved, use this method to retrieve the value - * of the objective function at the solution. The result of this method is - * undefined when the problem has not yet been solved. If the problem had - * no feasible solutions, negative (positive, respectively) infinity will - * be returned if maximizing (minimizing). - * - * @return The value of the objective function at the solution. - **/ - public double objectiveValue() { return objectiveValue; } - - - /** - * Creates a textual representation of the ILP problem in an algebraic - * notation. - * - * @param buffer The created textual representation will be appended here. - **/ - public void write(StringBuffer buffer) { - if (maximize) buffer.append("max"); - else buffer.append("min"); - - int variables = objectiveCoefficients.size(); - for (int i = 0; i < variables; ++i) { - double c = objectiveCoefficients.get(i); - buffer.append(" "); - if (c >= 0) buffer.append("+"); - buffer.append(c); - buffer.append(" "); - if (negated[i]) buffer.append("("); - buffer.append("x_"); - buffer.append(i); - if (negated[i]) buffer.append(")"); + return indexes; } - buffer.append("\n"); - - int constraints = Ac.size(); - for (int i = 0; i < constraints; ++i) { - int constraintSize = Ac.size(i); - buffer.append(" "); - - for (int j = 0; j < constraintSize; ++j) { - double c = Ac.get(i, j); - buffer.append(" "); - if (c >= 0) buffer.append("+"); - buffer.append(c); - buffer.append(" x_"); - buffer.append(Av.get(i, j)); - } - - buffer.append(" <= "); - buffer.append(bounds.get(i)); - buffer.append("\n"); + + /** + * Tests whether the problem represented by this ILPSolver instance has been solved + * already. + **/ + public boolean isSolved() { + return solution != null; } - } -} + + /** + * When the problem has been solved, use this method to retrieve the value of any Boolean + * inference variable. The result of this method is undefined when the problem has not yet been + * solved. + * + * @param index The index of the variable whose value is requested. + * @return The value of the variable. + **/ + public boolean getBooleanValue(int index) { + return solution[index] == 1; + } + + @Override + public int getIntegerValue(int index) { + // not implemented yet + return 0; + } + + @Override + public double getRealValue(int index) { + // not implemented yet + return 0; + } + + + /** + * When the problem has been solved, use this method to retrieve the value of the objective + * function at the solution. The result of this method is undefined when the problem has not yet + * been solved. If the problem had no feasible solutions, negative (positive, respectively) + * infinity will be returned if maximizing (minimizing). + * + * @return The value of the objective function at the solution. + **/ + public double objectiveValue() { + return objectiveValue; + } + + + /** + * Creates a textual representation of the ILP problem in an algebraic notation. + * + * @param buffer The created textual representation will be appended here. + **/ + public void write(StringBuffer buffer) { + if (maximize) + buffer.append("max"); + else + buffer.append("min"); + + int variables = objectiveCoefficients.size(); + for (int i = 0; i < variables; ++i) { + double c = objectiveCoefficients.get(i); + buffer.append(" "); + if (c >= 0) + buffer.append("+"); + buffer.append(c); + buffer.append(" "); + if (negated[i]) + buffer.append("("); + buffer.append("x_"); + buffer.append(i); + if (negated[i]) + buffer.append(")"); + } + + buffer.append("\n"); + + int constraints = Ac.size(); + for (int i = 0; i < constraints; ++i) { + int constraintSize = Ac.size(i); + buffer.append(" "); + + for (int j = 0; j < constraintSize; ++j) { + double c = Ac.get(i, j); + buffer.append(" "); + if (c >= 0) + buffer.append("+"); + buffer.append(c); + buffer.append(" x_"); + buffer.append(Av.get(i, j)); + } + + buffer.append(" <= "); + buffer.append(bounds.get(i)); + buffer.append("\n"); + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/Constraint.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/Constraint.java index e0e7b608..aaa24879 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/Constraint.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/Constraint.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -14,42 +11,38 @@ /** - * A constraint is an expression that is either satisified or unsatisfied by - * its constituent classifier applications. - * - * @author Nick Rizzolo + * A constraint is an expression that is either satisified or unsatisfied by its constituent + * classifier applications. + * + * @author Nick Rizzolo **/ -public abstract class Constraint -{ - /** Determines whether the constraint is satisfied. */ - abstract public boolean evaluate(); - - - /** - * Replaces all unquantified variables with the unique copy stored as a - * value of the given map; also instantiates all quantified variables and - * stores them in the given map. - * - * @param m The map in which to find unique copies of the variables. - **/ - abstract public void consolidateVariables(AbstractMap m); - - - /** - * Returns the children of this constraint in an array. - * - * @return The children of this constraint in an array. - **/ - abstract public Constraint[] getChildren(); - - - /** - * Calls the appropriate visit(·) method of the given - * Inference for this Constraint, as per the - * visitor pattern. - * - * @param infer The inference visiting this constraint. - **/ - abstract public void runVisit(Inference infer); +public abstract class Constraint { + /** Determines whether the constraint is satisfied. */ + abstract public boolean evaluate(); + + + /** + * Replaces all unquantified variables with the unique copy stored as a value of the given map; + * also instantiates all quantified variables and stores them in the given map. + * + * @param m The map in which to find unique copies of the variables. + **/ + abstract public void consolidateVariables(AbstractMap m); + + + /** + * Returns the children of this constraint in an array. + * + * @return The children of this constraint in an array. + **/ + abstract public Constraint[] getChildren(); + + + /** + * Calls the appropriate visit(·) method of the given Inference + * for this Constraint, as per the visitor pattern. + * + * @param infer The inference visiting this constraint. + **/ + abstract public void runVisit(Inference infer); } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/EqualityArgumentReplacer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/EqualityArgumentReplacer.java index bc7e451b..e3a1b989 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/EqualityArgumentReplacer.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/EqualityArgumentReplacer.java @@ -1,113 +1,97 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; /** - * Anonymous inner classes extending this class are instantiated by the code - * generated by the LBJava compiler when creating - * FirstOrderConstraint representations. The methods of this - * class are used to compute new values for the arguments of a quantified - * FirstOrderEquality. Only certain value returning methods are - * overridden. The others will throw - * UnsupportedOperationExceptions. - * - * @see edu.illinois.cs.cogcomp.lbjava.infer.FirstOrderConstraint - * @see edu.illinois.cs.cogcomp.lbjava.infer.FirstOrderEquality - * @see java.lang.UnsupportedOperationException - * @author Nick Rizzolo + * Anonymous inner classes extending this class are instantiated by the code generated by the LBJava + * compiler when creating FirstOrderConstraint representations. The methods of this + * class are used to compute new values for the arguments of a quantified + * FirstOrderEquality. Only certain value returning methods are overridden. The others + * will throw UnsupportedOperationExceptions. + * + * @see edu.illinois.cs.cogcomp.lbjava.infer.FirstOrderConstraint + * @see edu.illinois.cs.cogcomp.lbjava.infer.FirstOrderEquality + * @see java.lang.UnsupportedOperationException + * @author Nick Rizzolo **/ -abstract public class EqualityArgumentReplacer extends ArgumentReplacer -{ - /** - * This flag is set if the left hand side of the equality is not - * quantified. - **/ - public boolean leftConstant; - /** - * This flag is set if the right hand side of the equality is not - * quantified. - **/ - public boolean rightConstant; +abstract public class EqualityArgumentReplacer extends ArgumentReplacer { + /** + * This flag is set if the left hand side of the equality is not quantified. + **/ + public boolean leftConstant; + /** + * This flag is set if the right hand side of the equality is not quantified. + **/ + public boolean rightConstant; - /** - * Initializing constructor. - * - * @param c The context of the corresponding equality, except for - * quantification variables. - **/ - public EqualityArgumentReplacer(Object[] c) { - super(c); - leftConstant = rightConstant = false; - } + /** + * Initializing constructor. + * + * @param c The context of the corresponding equality, except for quantification variables. + **/ + public EqualityArgumentReplacer(Object[] c) { + super(c); + leftConstant = rightConstant = false; + } - /** - * Use this constructor to indicate which of the two arguments of the - * equality is in fact not quantified. - * - * @param c The context of the corresponding equality, except for - * quantification variables. - * @param r Set to false if the unquantified argument is the - * left; set to true if the unquantified argument is - * the right. - **/ - public EqualityArgumentReplacer(Object[] c, boolean r) { - super(c); - leftConstant = !r; - rightConstant = r; - } + /** + * Use this constructor to indicate which of the two arguments of the equality is in fact not + * quantified. + * + * @param c The context of the corresponding equality, except for quantification variables. + * @param r Set to false if the unquantified argument is the left; set to + * true if the unquantified argument is the right. + **/ + public EqualityArgumentReplacer(Object[] c, boolean r) { + super(c); + leftConstant = !r; + rightConstant = r; + } - /** - * Computes the value on the left hand side of the equality. This method - * needs to be overridden if it is to be called, since by default it simply - * throws an UnsupportedOperationException. - **/ - public String getLeftValue() { - throw new UnsupportedOperationException( - "LBJava ERROR: getLeftValue() not supported."); - } + /** + * Computes the value on the left hand side of the equality. This method needs to be overridden + * if it is to be called, since by default it simply throws an + * UnsupportedOperationException. + **/ + public String getLeftValue() { + throw new UnsupportedOperationException("LBJava ERROR: getLeftValue() not supported."); + } - /** - * Computes the value on the right hand side of the equality. This method - * needs to be overridden if it is to be called, since by default it simply - * throws an UnsupportedOperationException. - **/ - public String getRightValue() { - throw new UnsupportedOperationException( - "LBJava ERROR: getRightValue() not supported."); - } + /** + * Computes the value on the right hand side of the equality. This method needs to be overridden + * if it is to be called, since by default it simply throws an + * UnsupportedOperationException. + **/ + public String getRightValue() { + throw new UnsupportedOperationException("LBJava ERROR: getRightValue() not supported."); + } - /** - * Computes the object on the left hand side of the equality. This method - * needs to be overridden if it is to be called, since by default it simply - * throws an UnsupportedOperationException. - **/ - public Object getLeftObject() { - throw new UnsupportedOperationException( - "LBJava ERROR: getLeftObject() not supported."); - } + /** + * Computes the object on the left hand side of the equality. This method needs to be overridden + * if it is to be called, since by default it simply throws an + * UnsupportedOperationException. + **/ + public Object getLeftObject() { + throw new UnsupportedOperationException("LBJava ERROR: getLeftObject() not supported."); + } - /** - * Computes the object on the right hand side of the equality. This method - * needs to be overridden if it is to be called, since by default it simply - * throws an UnsupportedOperationException. - **/ - public Object getRightObject() { - throw new UnsupportedOperationException( - "LBJava ERROR: getRightObject() not supported."); - } + /** + * Computes the object on the right hand side of the equality. This method needs to be + * overridden if it is to be called, since by default it simply throws an + * UnsupportedOperationException. + **/ + public Object getRightObject() { + throw new UnsupportedOperationException("LBJava ERROR: getRightObject() not supported."); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/ExistentialQuantifier.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/ExistentialQuantifier.java index d8005789..099fac8f 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/ExistentialQuantifier.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/ExistentialQuantifier.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -16,133 +13,131 @@ /** - * An existential quantifier states that the constraint must hold for at - * least one object from the collection. - * - * @author Nick Rizzolo + * An existential quantifier states that the constraint must hold for at least one object from the + * collection. + * + * @author Nick Rizzolo **/ -public class ExistentialQuantifier extends Quantifier -{ - /** - * Initializing constructor. - * - * @param q The name of the quantification variable. - * @param col The collection of objects to iterate over. - * @param con The constraint being quantified. - **/ - public ExistentialQuantifier(String q, Collection col, - FirstOrderConstraint con) { - super(q, col, con); - } - - /** - * This constructor specifies a variable setter for when this quantifier is - * itself quantified. - * - * @param q The name of the quantification variable. - * @param col The collection of objects to iterate over. - * @param con The constraint being quantified. - * @param qar The variable setter. - **/ - public ExistentialQuantifier(String q, Collection col, - FirstOrderConstraint con, - QuantifierArgumentReplacer qar) { - super(q, col, con, qar); - } - - - /** Determines whether the constraint is satisfied. */ - public boolean evaluate() { - int index = initialize(); - - for (Iterator I = collection.iterator(); I.hasNext(); ) { - enclosingQuantificationSettings.set(index, I.next()); - constraint.setQuantificationVariables(enclosingQuantificationSettings); - - if (constraint.evaluate()) { +public class ExistentialQuantifier extends Quantifier { + /** + * Initializing constructor. + * + * @param q The name of the quantification variable. + * @param col The collection of objects to iterate over. + * @param con The constraint being quantified. + **/ + public ExistentialQuantifier(String q, Collection col, FirstOrderConstraint con) { + super(q, col, con); + } + + /** + * This constructor specifies a variable setter for when this quantifier is itself quantified. + * + * @param q The name of the quantification variable. + * @param col The collection of objects to iterate over. + * @param con The constraint being quantified. + * @param qar The variable setter. + **/ + public ExistentialQuantifier(String q, Collection col, FirstOrderConstraint con, + QuantifierArgumentReplacer qar) { + super(q, col, con, qar); + } + + + /** Determines whether the constraint is satisfied. */ + public boolean evaluate() { + int index = initialize(); + + for (Iterator I = collection.iterator(); I.hasNext();) { + enclosingQuantificationSettings.set(index, I.next()); + constraint.setQuantificationVariables(enclosingQuantificationSettings); + + if (constraint.evaluate()) { + enclosingQuantificationSettings.removeElementAt(index); + return true; + } + } + enclosingQuantificationSettings.removeElementAt(index); - return true; - } + return false; } - enclosingQuantificationSettings.removeElementAt(index); - return false; - } - - - /** - * This method sets the given quantification variables to the given object - * references and evaluates the expressions involving those variables in - * this constraint's children. - * - * @param o The new object references for the enclosing quantification - * variables, in order of nesting. - **/ - public void setQuantificationVariables(Vector o) { - enclosingQuantificationSettings = o; - - if (replacer != null) { - replacer.setQuantificationVariables(o); - collection = replacer.getCollection(); + + /** + * This method sets the given quantification variables to the given object references and + * evaluates the expressions involving those variables in this constraint's children. + * + * @param o The new object references for the enclosing quantification variables, in order of + * nesting. + **/ + public void setQuantificationVariables(Vector o) { + enclosingQuantificationSettings = o; + + if (replacer != null) { + replacer.setQuantificationVariables(o); + collection = replacer.getCollection(); + } } - } - - - /** - * Transforms this first order constraint into a propositional constraint. - * - * @return The propositionalized constraint. - **/ - public PropositionalConstraint propositionalize() { - PropositionalConstraint result = null; - - int index = initialize(); - for (Iterator I = collection.iterator(); I.hasNext(); ) { - enclosingQuantificationSettings.set(index, I.next()); - constraint.setQuantificationVariables(enclosingQuantificationSettings); - - if (result == null) result = constraint.propositionalize(); - else - result = - new PropositionalDisjunction(result, constraint.propositionalize()); + + + /** + * Transforms this first order constraint into a propositional constraint. + * + * @return The propositionalized constraint. + **/ + public PropositionalConstraint propositionalize() { + PropositionalConstraint result = null; + + int index = initialize(); + for (Iterator I = collection.iterator(); I.hasNext();) { + enclosingQuantificationSettings.set(index, I.next()); + constraint.setQuantificationVariables(enclosingQuantificationSettings); + + if (result == null) + result = constraint.propositionalize(); + else + result = new PropositionalDisjunction(result, constraint.propositionalize()); + } + + enclosingQuantificationSettings.removeElementAt(index); + if (result == null) + result = new PropositionalConstant(false); + return result; } - enclosingQuantificationSettings.removeElementAt(index); - if (result == null) result = new PropositionalConstant(false); - return result; - } - - - /** - * The hash code of a ExistentialQuantifier is the sum of the - * hash codes of its children plus one. - * - * @return The hash code for this ExistentialQuantifier. - **/ - public int hashCode() { return super.hashCode() + 1; } - - - /** - * Two ExistentialQuantifiers are equivalent when their - * children are equivalent. - * - * @return true iff the argument is an equivalent - * ExistentialQuantifier. - **/ - public boolean equals(Object o) { - if (!(o instanceof ExistentialQuantifier)) return false; - ExistentialQuantifier q = (ExistentialQuantifier) o; - return super.equals(q); - } - - - /** - * Calls the appropriate visit(·) method of the given - * Inference for this Constraint, as per the - * visitor pattern. - * - * @param infer The inference visiting this constraint. - **/ - public void runVisit(Inference infer) { infer.visit(this); } -} + /** + * The hash code of a ExistentialQuantifier is the sum of the hash codes of its + * children plus one. + * + * @return The hash code for this ExistentialQuantifier. + **/ + public int hashCode() { + return super.hashCode() + 1; + } + + + /** + * Two ExistentialQuantifiers are equivalent when their children are equivalent. + * + * @return true iff the argument is an equivalent + * ExistentialQuantifier. + **/ + public boolean equals(Object o) { + if (!(o instanceof ExistentialQuantifier)) + return false; + ExistentialQuantifier q = (ExistentialQuantifier) o; + return super.equals(q); + } + + + /** + * Calls the appropriate visit(·) method of the given Inference + * for this Constraint, as per the visitor pattern. + * + * @param infer The inference visiting this constraint. + **/ + public void runVisit(Inference infer) { + infer.visit(this); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderBinaryConstraint.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderBinaryConstraint.java index 7c3dff0a..58b34311 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderBinaryConstraint.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderBinaryConstraint.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -14,65 +11,61 @@ /** - * Represents a first order constraint involving a binary operator. - * - * @author Nick Rizzolo + * Represents a first order constraint involving a binary operator. + * + * @author Nick Rizzolo **/ -public abstract class FirstOrderBinaryConstraint extends FirstOrderConstraint -{ - /** The constraint on the left of the operator. */ - protected FirstOrderConstraint left; - /** The constraint on the right of the operator. */ - protected FirstOrderConstraint right; +public abstract class FirstOrderBinaryConstraint extends FirstOrderConstraint { + /** The constraint on the left of the operator. */ + protected FirstOrderConstraint left; + /** The constraint on the right of the operator. */ + protected FirstOrderConstraint right; - /** - * Initializing constructor. - * - * @param l The constraint on the left of the operator. - * @param r The constraint on the right of the operator. - **/ - public FirstOrderBinaryConstraint(FirstOrderConstraint l, - FirstOrderConstraint r) { - left = l; - right = r; - } + /** + * Initializing constructor. + * + * @param l The constraint on the left of the operator. + * @param r The constraint on the right of the operator. + **/ + public FirstOrderBinaryConstraint(FirstOrderConstraint l, FirstOrderConstraint r) { + left = l; + right = r; + } - /** - * Replaces all unquantified variables with the unique copy stored as a - * value of the given map; also instantiates all quantified variables and - * stores them in the given map. - * - * @param m The map in which to find unique copies of the variables. - **/ - public void consolidateVariables(java.util.AbstractMap m) { - left.consolidateVariables(m); - right.consolidateVariables(m); - } + /** + * Replaces all unquantified variables with the unique copy stored as a value of the given map; + * also instantiates all quantified variables and stores them in the given map. + * + * @param m The map in which to find unique copies of the variables. + **/ + public void consolidateVariables(java.util.AbstractMap m) { + left.consolidateVariables(m); + right.consolidateVariables(m); + } - /** - * This method sets the given quantification variables to the given object - * references and evaluates the expressions involving those variables in - * this constraint's FirstOrderEquality children. - * - * @param o The new object references for the enclosing quantification - * variables, in order of nesting. - **/ - public void setQuantificationVariables(Vector o) { - left.setQuantificationVariables(o); - right.setQuantificationVariables(o); - } + /** + * This method sets the given quantification variables to the given object references and + * evaluates the expressions involving those variables in this constraint's + * FirstOrderEquality children. + * + * @param o The new object references for the enclosing quantification variables, in order of + * nesting. + **/ + public void setQuantificationVariables(Vector o) { + left.setQuantificationVariables(o); + right.setQuantificationVariables(o); + } - /** - * Returns the children of this constraint in an array. - * - * @return The children of this constraint in an array. - **/ - public Constraint[] getChildren() { - return new FirstOrderConstraint[]{ left, right }; - } + /** + * Returns the children of this constraint in an array. + * + * @return The children of this constraint in an array. + **/ + public Constraint[] getChildren() { + return new FirstOrderConstraint[] {left, right}; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderConjunction.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderConjunction.java index 34da4c60..34df278c 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderConjunction.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderConjunction.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -14,108 +11,108 @@ /** - * Represents the conjunction of first order constraints. - * - * @author Nick Rizzolo + * Represents the conjunction of first order constraints. + * + * @author Nick Rizzolo **/ -public class FirstOrderConjunction extends FirstOrderNAryConstraint -{ - /** - * If either of the arguments is itself a - * FirstOrderConjunction, its contents are flattened into - * this FirstOrderConjunction. - * - * @param c1 One constraint to disjunct. - * @param c2 Another constraint to disjunct. - **/ - public FirstOrderConjunction(FirstOrderConstraint c1, - FirstOrderConstraint c2) { - add(c1); - add(c2); - } - - - /** - * If the given constraint has the same type as this constraint, its terms - * are merged into this constraint; otherwise, it is added as a new term. - * - * @param c The constraint to add. - **/ - public void add(FirstOrderConstraint c) { - if (c instanceof FirstOrderConjunction) { - Iterator I = ((FirstOrderConjunction) c).children.iterator(); - while (I.hasNext()) add((FirstOrderConstraint) I.next()); +public class FirstOrderConjunction extends FirstOrderNAryConstraint { + /** + * If either of the arguments is itself a FirstOrderConjunction, its contents are + * flattened into this FirstOrderConjunction. + * + * @param c1 One constraint to disjunct. + * @param c2 Another constraint to disjunct. + **/ + public FirstOrderConjunction(FirstOrderConstraint c1, FirstOrderConstraint c2) { + add(c1); + add(c2); } - else children.add(c); - } - - - /** Determines whether the constraint is satisfied. */ - public boolean evaluate() { - for (Iterator I = children.iterator(); I.hasNext(); ) - if (!((FirstOrderConstraint) I.next()).evaluate()) return false; - return true; - } - - - /** - * Transforms this first order constraint into a propositional constraint. - * - * @return The propositionalized constraint. - **/ - public PropositionalConstraint propositionalize() { - if (children.size() == 0) return new PropositionalConstant(true); - - FirstOrderConstraint[] c = - (FirstOrderConstraint[]) children.toArray(new FirstOrderConstraint[0]); - if (c.length == 1) return c[0].propositionalize(); - - PropositionalConjunction result = - new PropositionalConjunction(c[0].propositionalize(), - c[1].propositionalize()); - for (int i = 2; i < c.length; ++i) - result = new PropositionalConjunction(result, c[i].propositionalize()); - - return result; - } - - - /** - * The hash code of a FirstOrderConjunction is the sum of - * the hash codes of its children plus one. - * - * @return The hash code for this FirstOrderConjunction. - **/ - public int hashCode() { - int result = 1; - for (Iterator I = children.iterator(); I.hasNext(); ) - result += I.next().hashCode(); - return result; - } - - - /** - * Two FirstOrderConjunctions are equivalent when they are - * topologically equivalent, respecting the associativity and commutivity - * of disjunction. - * - * @return true iff the argument is an equivalent - * FirstOrderConjunction. - **/ - public boolean equals(Object o) { - if (!(o instanceof FirstOrderConjunction)) return false; - FirstOrderConjunction d = (FirstOrderConjunction) o; - return children.equals(d.children); - } - - - /** - * Calls the appropriate visit(·) method of the given - * Inference for this Constraint, as per the - * visitor pattern. - * - * @param infer The inference visiting this constraint. - **/ - public void runVisit(Inference infer) { infer.visit(this); } -} + + /** + * If the given constraint has the same type as this constraint, its terms are merged into this + * constraint; otherwise, it is added as a new term. + * + * @param c The constraint to add. + **/ + public void add(FirstOrderConstraint c) { + if (c instanceof FirstOrderConjunction) { + Iterator I = ((FirstOrderConjunction) c).children.iterator(); + while (I.hasNext()) + add((FirstOrderConstraint) I.next()); + } else + children.add(c); + } + + + /** Determines whether the constraint is satisfied. */ + public boolean evaluate() { + for (Iterator I = children.iterator(); I.hasNext();) + if (!((FirstOrderConstraint) I.next()).evaluate()) + return false; + return true; + } + + + /** + * Transforms this first order constraint into a propositional constraint. + * + * @return The propositionalized constraint. + **/ + public PropositionalConstraint propositionalize() { + if (children.size() == 0) + return new PropositionalConstant(true); + + FirstOrderConstraint[] c = + (FirstOrderConstraint[]) children.toArray(new FirstOrderConstraint[0]); + if (c.length == 1) + return c[0].propositionalize(); + + PropositionalConjunction result = + new PropositionalConjunction(c[0].propositionalize(), c[1].propositionalize()); + for (int i = 2; i < c.length; ++i) + result = new PropositionalConjunction(result, c[i].propositionalize()); + + return result; + } + + + /** + * The hash code of a FirstOrderConjunction is the sum of the hash codes of its + * children plus one. + * + * @return The hash code for this FirstOrderConjunction. + **/ + public int hashCode() { + int result = 1; + for (Iterator I = children.iterator(); I.hasNext();) + result += I.next().hashCode(); + return result; + } + + + /** + * Two FirstOrderConjunctions are equivalent when they are topologically + * equivalent, respecting the associativity and commutivity of disjunction. + * + * @return true iff the argument is an equivalent + * FirstOrderConjunction. + **/ + public boolean equals(Object o) { + if (!(o instanceof FirstOrderConjunction)) + return false; + FirstOrderConjunction d = (FirstOrderConjunction) o; + return children.equals(d.children); + } + + + /** + * Calls the appropriate visit(·) method of the given Inference + * for this Constraint, as per the visitor pattern. + * + * @param infer The inference visiting this constraint. + **/ + public void runVisit(Inference infer) { + infer.visit(this); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderConstant.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderConstant.java index 9a61b76b..1bf02948 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderConstant.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderConstant.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -14,98 +11,103 @@ /** - * A first order constant is either true or false. - * - * @author Nick Rizzolo + * A first order constant is either true or false. + * + * @author Nick Rizzolo **/ -public class FirstOrderConstant extends FirstOrderConstraint -{ - /** The constant value. */ - private boolean constant; - - - /** - * Initializing constructor. - * - * @param v The value of this constant. - **/ - public FirstOrderConstant(boolean v) { constant = v; } - - - /** - * This method sets the given quantification variables to the given object - * references and evaluates the expressions involving those variables in - * this constraint's FirstOrderEquality children. - * - * @param o The new object references for the enclosing quantification - * variables, in order of nesting. - **/ - public void setQuantificationVariables(Vector o) { } - - - /** - * Returns the children of this constraint in an array. - * - * @return The children of this constraint in an array. - **/ - public Constraint[] getChildren() { return new FirstOrderConstraint[0]; } - - - /** Determines whether the constraint is satisfied. */ - public boolean evaluate() { return constant; } - - - /** - * Replaces all unquantified variables with the unique copy stored as a - * value of the given map; also instantiates all quantified variables and - * stores them in the given map. - * - * @param m The map in which to find unique copies of the variables. - **/ - public void consolidateVariables(java.util.AbstractMap m) { } - - - /** - * Transforms this first order constraint into a propositional constraint. - * - * @return The propositionalized constraint. - **/ - public PropositionalConstraint propositionalize() { - return new PropositionalConstant(constant); - } - - - /** - * The hash code of a FirstOrderConstant is the hash code of - * the Boolean object formed from the constant. - * - * @return The hash code for this FirstOrderConstant. - **/ - public int hashCode() { return new Boolean(constant).hashCode(); } - - - /** - * Two FirstOrderConstants are equivalent when their constants - * are equal. - * - * @return true iff the argument is a - * FirstOrderConstant set to the same value as this - * constant. - **/ - public boolean equals(Object o) { - if (!(o instanceof FirstOrderConstant)) return false; - FirstOrderConstant c = (FirstOrderConstant) o; - return constant == c.constant; - } - - - /** - * Calls the appropriate visit(·) method of the given - * Inference for this Constraint, as per the - * visitor pattern. - * - * @param infer The inference visiting this constraint. - **/ - public void runVisit(Inference infer) { infer.visit(this); } +public class FirstOrderConstant extends FirstOrderConstraint { + /** The constant value. */ + private boolean constant; + + + /** + * Initializing constructor. + * + * @param v The value of this constant. + **/ + public FirstOrderConstant(boolean v) { + constant = v; + } + + + /** + * This method sets the given quantification variables to the given object references and + * evaluates the expressions involving those variables in this constraint's + * FirstOrderEquality children. + * + * @param o The new object references for the enclosing quantification variables, in order of + * nesting. + **/ + public void setQuantificationVariables(Vector o) {} + + + /** + * Returns the children of this constraint in an array. + * + * @return The children of this constraint in an array. + **/ + public Constraint[] getChildren() { + return new FirstOrderConstraint[0]; + } + + + /** Determines whether the constraint is satisfied. */ + public boolean evaluate() { + return constant; + } + + + /** + * Replaces all unquantified variables with the unique copy stored as a value of the given map; + * also instantiates all quantified variables and stores them in the given map. + * + * @param m The map in which to find unique copies of the variables. + **/ + public void consolidateVariables(java.util.AbstractMap m) {} + + + /** + * Transforms this first order constraint into a propositional constraint. + * + * @return The propositionalized constraint. + **/ + public PropositionalConstraint propositionalize() { + return new PropositionalConstant(constant); + } + + + /** + * The hash code of a FirstOrderConstant is the hash code of the + * Boolean object formed from the constant. + * + * @return The hash code for this FirstOrderConstant. + **/ + public int hashCode() { + return new Boolean(constant).hashCode(); + } + + + /** + * Two FirstOrderConstants are equivalent when their constants are equal. + * + * @return true iff the argument is a FirstOrderConstant set to the + * same value as this constant. + **/ + public boolean equals(Object o) { + if (!(o instanceof FirstOrderConstant)) + return false; + FirstOrderConstant c = (FirstOrderConstant) o; + return constant == c.constant; + } + + + /** + * Calls the appropriate visit(·) method of the given Inference + * for this Constraint, as per the visitor pattern. + * + * @param infer The inference visiting this constraint. + **/ + public void runVisit(Inference infer) { + infer.visit(this); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderConstraint.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderConstraint.java index 8ea32c09..e0d153cd 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderConstraint.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderConstraint.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -14,73 +11,66 @@ /** - * All classes for representing first order constraints are derived from this - * base class. A first order constraint is: - * - *

    - *
  • The constant true or the constant false. - *
  • - * An equality or inequality between a classifier application and a - * value (which may be specified with an arbitrary java expression) or - * between two classifier applications. Operators: == != - *
  • The negation of a first order constraint: ~ - *
  • The conjunction of two first order constraints: /\ - *
  • The disjunction of two first order constraints: \/ - *
  • - * An implication between two first order constraints: => - *
  • - * A double implication between two first order constraints: - * <=> - *
  • - * An existential quantification: exists identifier in - * identifier, first-order-constraint
    - * The second identifier must refer to a Java Collection. - * The first identifier is a new Java variable of type - * Object that appears in the first order constraint. - *
  • - * A universal quantification: forall identifier in - * identifier, first-order-constraint
    - * The second identifier must refer to a Java Collection. - * The first identifier is a new Java variable of type - * Object that appears in the first order constraint. - *
  • - * An at least counting quantification: atleast - * expression of identifier in identifier, - * first-order-constraint
    - * The expression is arbitrary Java that must evaluate to a - * double. The two identifiers play the same role as in the other - * quatifications. This quatification is satisfied when the number of - * objects in the collection that satisfy first-order-constraint - * is greater than or equal to expression. - *
  • - * An at most counting quatification: atmost - * expression of identifier in identifier, - * first-order-constraint
    - * The expression is arbitrary Java that must evaluate to a - * double. The two identifiers play the same role as in the other - * quatifications. This quatification is satisfied when the number of - * objects in the collection that satisfy first-order-constraint - * is less than or equal to expression. - *
+ * All classes for representing first order constraints are derived from this base class. A first + * order constraint is: + * + *
    + *
  • The constant true or the constant false. + *
  • + * An equality or inequality between a classifier application and a value (which may be specified + * with an arbitrary java expression) or between two classifier applications. Operators: + * == != + *
  • The negation of a first order constraint: ~ + *
  • The conjunction of two first order constraints: /\ + *
  • The disjunction of two first order constraints: \/ + *
  • + * An implication between two first order constraints: => + *
  • + * A double implication between two first order constraints: <=> + *
  • + * An existential quantification: exists identifier in + * identifier, first-order-constraint
    + * The second identifier must refer to a Java Collection. The first identifier is a new + * Java variable of type Object that appears in the first order constraint. + *
  • + * A universal quantification: forall identifier in + * identifier, first-order-constraint
    + * The second identifier must refer to a Java Collection. The first identifier is a new + * Java variable of type Object that appears in the first order constraint. + *
  • + * An at least counting quantification: atleast + * expression of identifier in identifier, + * first-order-constraint
    + * The expression is arbitrary Java that must evaluate to a double. The two identifiers play + * the same role as in the other quatifications. This quatification is satisfied when the number of + * objects in the collection that satisfy first-order-constraint is greater than or equal to + * expression. + *
  • + * An at most counting quatification: atmost + * expression of identifier in identifier, + * first-order-constraint
    + * The expression is arbitrary Java that must evaluate to a double. The two identifiers play + * the same role as in the other quatifications. This quatification is satisfied when the number of + * objects in the collection that satisfy first-order-constraint is less than or equal to + * expression. + *
**/ -public abstract class FirstOrderConstraint extends Constraint -{ - /** - * This method sets the given quantification variables to the given object - * references and evaluates the expressions involving those variables in - * this constraint's FirstOrderEquality children. - * - * @param o The new object references for the enclosing quantification - * variables, in order of nesting. - **/ - abstract public void setQuantificationVariables(Vector o); +public abstract class FirstOrderConstraint extends Constraint { + /** + * This method sets the given quantification variables to the given object references and + * evaluates the expressions involving those variables in this constraint's + * FirstOrderEquality children. + * + * @param o The new object references for the enclosing quantification variables, in order of + * nesting. + **/ + abstract public void setQuantificationVariables(Vector o); - /** - * Transforms this first order constraint into a propositional constraint. - * - * @return The propositionalized constraint. - **/ - abstract public PropositionalConstraint propositionalize(); + /** + * Transforms this first order constraint into a propositional constraint. + * + * @return The propositionalized constraint. + **/ + abstract public PropositionalConstraint propositionalize(); } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderDisjunction.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderDisjunction.java index 5eecec8f..a84f846a 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderDisjunction.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderDisjunction.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -14,108 +11,108 @@ /** - * Represents the disjunction of first order constraints. - * - * @author Nick Rizzolo + * Represents the disjunction of first order constraints. + * + * @author Nick Rizzolo **/ -public class FirstOrderDisjunction extends FirstOrderNAryConstraint -{ - /** - * If either of the arguments is itself a - * FirstOrderDisjunction, its contents are flattened into - * this FirstOrderDisjunction. - * - * @param c1 One constraint to disjunct. - * @param c2 Another constraint to disjunct. - **/ - public FirstOrderDisjunction(FirstOrderConstraint c1, - FirstOrderConstraint c2) { - add(c1); - add(c2); - } - - - /** - * If the given constraint has the same type as this constraint, its terms - * are merged into this constraint; otherwise, it is added as a new term. - * - * @param c The constraint to add. - **/ - public void add(FirstOrderConstraint c) { - if (c instanceof FirstOrderDisjunction) { - Iterator I = ((FirstOrderDisjunction) c).children.iterator(); - while (I.hasNext()) add((FirstOrderConstraint) I.next()); +public class FirstOrderDisjunction extends FirstOrderNAryConstraint { + /** + * If either of the arguments is itself a FirstOrderDisjunction, its contents are + * flattened into this FirstOrderDisjunction. + * + * @param c1 One constraint to disjunct. + * @param c2 Another constraint to disjunct. + **/ + public FirstOrderDisjunction(FirstOrderConstraint c1, FirstOrderConstraint c2) { + add(c1); + add(c2); } - else children.add(c); - } - - - /** Determines whether the constraint is satisfied. */ - public boolean evaluate() { - for (Iterator I = children.iterator(); I.hasNext(); ) - if (((FirstOrderConstraint) I.next()).evaluate()) return true; - return false; - } - - - /** - * Transforms this first order constraint into a propositional constraint. - * - * @return The propositionalized constraint. - **/ - public PropositionalConstraint propositionalize() { - if (children.size() == 0) return new PropositionalConstant(true); - - FirstOrderConstraint[] c = - (FirstOrderConstraint[]) children.toArray(new FirstOrderConstraint[0]); - if (c.length == 1) return c[0].propositionalize(); - - PropositionalDisjunction result = - new PropositionalDisjunction(c[0].propositionalize(), - c[1].propositionalize()); - for (int i = 2; i < c.length; ++i) - result = new PropositionalDisjunction(result, c[i].propositionalize()); - - return result; - } - - - /** - * The hash code of a FirstOrderDisjunction is the sum of - * the hash codes of its children. - * - * @return The hash code for this FirstOrderDisjunction. - **/ - public int hashCode() { - int result = 0; - for (Iterator I = children.iterator(); I.hasNext(); ) - result += I.next().hashCode(); - return result; - } - - - /** - * Two FirstOrderDisjunctions are equivalent when they are - * topologically equivalent, respecting the associativity and commutivity - * of disjunction. - * - * @return true iff the argument is an equivalent - * FirstOrderDisjunction. - **/ - public boolean equals(Object o) { - if (!(o instanceof FirstOrderDisjunction)) return false; - FirstOrderDisjunction d = (FirstOrderDisjunction) o; - return children.equals(d.children); - } - - - /** - * Calls the appropriate visit(·) method of the given - * Inference for this Constraint, as per the - * visitor pattern. - * - * @param infer The inference visiting this constraint. - **/ - public void runVisit(Inference infer) { infer.visit(this); } -} + + /** + * If the given constraint has the same type as this constraint, its terms are merged into this + * constraint; otherwise, it is added as a new term. + * + * @param c The constraint to add. + **/ + public void add(FirstOrderConstraint c) { + if (c instanceof FirstOrderDisjunction) { + Iterator I = ((FirstOrderDisjunction) c).children.iterator(); + while (I.hasNext()) + add((FirstOrderConstraint) I.next()); + } else + children.add(c); + } + + + /** Determines whether the constraint is satisfied. */ + public boolean evaluate() { + for (Iterator I = children.iterator(); I.hasNext();) + if (((FirstOrderConstraint) I.next()).evaluate()) + return true; + return false; + } + + + /** + * Transforms this first order constraint into a propositional constraint. + * + * @return The propositionalized constraint. + **/ + public PropositionalConstraint propositionalize() { + if (children.size() == 0) + return new PropositionalConstant(true); + + FirstOrderConstraint[] c = + (FirstOrderConstraint[]) children.toArray(new FirstOrderConstraint[0]); + if (c.length == 1) + return c[0].propositionalize(); + + PropositionalDisjunction result = + new PropositionalDisjunction(c[0].propositionalize(), c[1].propositionalize()); + for (int i = 2; i < c.length; ++i) + result = new PropositionalDisjunction(result, c[i].propositionalize()); + + return result; + } + + + /** + * The hash code of a FirstOrderDisjunction is the sum of the hash codes of its + * children. + * + * @return The hash code for this FirstOrderDisjunction. + **/ + public int hashCode() { + int result = 0; + for (Iterator I = children.iterator(); I.hasNext();) + result += I.next().hashCode(); + return result; + } + + + /** + * Two FirstOrderDisjunctions are equivalent when they are topologically + * equivalent, respecting the associativity and commutivity of disjunction. + * + * @return true iff the argument is an equivalent + * FirstOrderDisjunction. + **/ + public boolean equals(Object o) { + if (!(o instanceof FirstOrderDisjunction)) + return false; + FirstOrderDisjunction d = (FirstOrderDisjunction) o; + return children.equals(d.children); + } + + + /** + * Calls the appropriate visit(·) method of the given Inference + * for this Constraint, as per the visitor pattern. + * + * @param infer The inference visiting this constraint. + **/ + public void runVisit(Inference infer) { + infer.visit(this); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderDoubleImplication.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderDoubleImplication.java index 98638c80..50351333 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderDoubleImplication.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderDoubleImplication.java @@ -1,82 +1,80 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; /** - * Represents a double implication between two first order constraints. - * - * @author Nick Rizzolo + * Represents a double implication between two first order constraints. + * + * @author Nick Rizzolo **/ -public class FirstOrderDoubleImplication extends FirstOrderBinaryConstraint -{ - /** - * Initializing constructor. - * - * @param l The constraint on the left of the operator. - * @param r The constraint on the right of the operator. - **/ - public FirstOrderDoubleImplication(FirstOrderConstraint l, - FirstOrderConstraint r) { - super(l, r); - } +public class FirstOrderDoubleImplication extends FirstOrderBinaryConstraint { + /** + * Initializing constructor. + * + * @param l The constraint on the left of the operator. + * @param r The constraint on the right of the operator. + **/ + public FirstOrderDoubleImplication(FirstOrderConstraint l, FirstOrderConstraint r) { + super(l, r); + } - /** Determines whether the constraint is satisfied. */ - public boolean evaluate() { return left.evaluate() == right.evaluate(); } + /** Determines whether the constraint is satisfied. */ + public boolean evaluate() { + return left.evaluate() == right.evaluate(); + } - /** - * Transforms this first order constraint into a propositional constraint. - * - * @return The propositionalized constraint. - **/ - public PropositionalConstraint propositionalize() { - return new PropositionalDoubleImplication(left.propositionalize(), - right.propositionalize()); - } + /** + * Transforms this first order constraint into a propositional constraint. + * + * @return The propositionalized constraint. + **/ + public PropositionalConstraint propositionalize() { + return new PropositionalDoubleImplication(left.propositionalize(), right.propositionalize()); + } - /** - * The hash code of a FirstOrderDoubleImplication is the sum - * of the hash codes of its children plus three. - * - * @return The hash code for this FirstOrderDoubleImplication. - **/ - public int hashCode() { return left.hashCode() + right.hashCode() + 3; } + /** + * The hash code of a FirstOrderDoubleImplication is the sum of the hash codes of + * its children plus three. + * + * @return The hash code for this FirstOrderDoubleImplication. + **/ + public int hashCode() { + return left.hashCode() + right.hashCode() + 3; + } - /** - * Two FirstOrderDoubleImplications are equivalent when - * they are topologically equivalent, respecting the commutativity of - * double implication. - * - * @return true iff the argument is an equivalent - * FirstOrderDoubleImplication. - **/ - public boolean equals(Object o) { - if (!(o instanceof FirstOrderDoubleImplication)) return false; - FirstOrderDoubleImplication i = (FirstOrderDoubleImplication) o; - return left.equals(i.left) && right.equals(i.right) - || left.equals(i.right) && right.equals(i.left); - } + /** + * Two FirstOrderDoubleImplications are equivalent when they are topologically + * equivalent, respecting the commutativity of double implication. + * + * @return true iff the argument is an equivalent + * FirstOrderDoubleImplication. + **/ + public boolean equals(Object o) { + if (!(o instanceof FirstOrderDoubleImplication)) + return false; + FirstOrderDoubleImplication i = (FirstOrderDoubleImplication) o; + return left.equals(i.left) && right.equals(i.right) || left.equals(i.right) + && right.equals(i.left); + } - /** - * Calls the appropriate visit(·) method of the given - * Inference for this Constraint, as per the - * visitor pattern. - * - * @param infer The inference visiting this constraint. - **/ - public void runVisit(Inference infer) { infer.visit(this); } + /** + * Calls the appropriate visit(·) method of the given Inference + * for this Constraint, as per the visitor pattern. + * + * @param infer The inference visiting this constraint. + **/ + public void runVisit(Inference infer) { + infer.visit(this); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderEquality.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderEquality.java index f6bc907d..1a54b3f8 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderEquality.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderEquality.java @@ -1,67 +1,64 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; /** - * Represents either an equality or an inequality between two values, a - * classifier application and a value, or two classifier applications. - * - * @author Nick Rizzolo + * Represents either an equality or an inequality between two values, a classifier application and a + * value, or two classifier applications. + * + * @author Nick Rizzolo **/ -public abstract class FirstOrderEquality extends FirstOrderConstraint -{ - /** true if equality, false if inequality. */ - protected boolean equality; - /** - * This object provides the implementation of the method that replaces the - * values and variables in an equality given new settings of the - * quantification variables; if this member variable is set to - * null, it means this FirstOrderEquality is not - * nested in a quantification. - **/ - protected EqualityArgumentReplacer replacer; - /** - * The map that this constraint's variables have been consolidated into, or - * null if variable consolidation has not been performed. - **/ - protected java.util.AbstractMap variableMap; +public abstract class FirstOrderEquality extends FirstOrderConstraint { + /** true if equality, false if inequality. */ + protected boolean equality; + /** + * This object provides the implementation of the method that replaces the values and variables + * in an equality given new settings of the quantification variables; if this member variable is + * set to null, it means this FirstOrderEquality is not nested in a + * quantification. + **/ + protected EqualityArgumentReplacer replacer; + /** + * The map that this constraint's variables have been consolidated into, or null if + * variable consolidation has not been performed. + **/ + protected java.util.AbstractMap variableMap; - /** - * Initializing constructor. - * - * @param e Indicates whether this is an equality or an inequality. - **/ - public FirstOrderEquality(boolean e) { this(e, null); } + /** + * Initializing constructor. + * + * @param e Indicates whether this is an equality or an inequality. + **/ + public FirstOrderEquality(boolean e) { + this(e, null); + } - /** - * This constructor specifies a variable setter for when this equality is - * quantified. - * - * @param e Indicates whether this is an equality or an inequality. - * @param r An argument replacer. - **/ - public FirstOrderEquality(boolean e, EqualityArgumentReplacer r) { - equality = e; - replacer = r; - variableMap = null; - } + /** + * This constructor specifies a variable setter for when this equality is quantified. + * + * @param e Indicates whether this is an equality or an inequality. + * @param r An argument replacer. + **/ + public FirstOrderEquality(boolean e, EqualityArgumentReplacer r) { + equality = e; + replacer = r; + variableMap = null; + } - /** - * Returns the children of this constraint in an array. - * - * @return The children of this constraint in an array. - **/ - public Constraint[] getChildren() { return new FirstOrderConstraint[0]; } + /** + * Returns the children of this constraint in an array. + * + * @return The children of this constraint in an array. + **/ + public Constraint[] getChildren() { + return new FirstOrderConstraint[0]; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderEqualityTwoValues.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderEqualityTwoValues.java index 3e44473b..3284b975 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderEqualityTwoValues.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderEqualityTwoValues.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -14,131 +11,130 @@ /** - * Represents the comparison of two String values. - * - * @author Nick Rizzolo + * Represents the comparison of two String values. + * + * @author Nick Rizzolo **/ -public class FirstOrderEqualityTwoValues extends FirstOrderEquality -{ - /** The value on the left of the equality. */ - protected String left; - /** The value on the right of the equality. */ - protected String right; - - - /** - * Initializing constructor. - * - * @param e Indicates whether this is an equality or an inequality. - * @param l The left value. - * @param r The right value. - **/ - public FirstOrderEqualityTwoValues(boolean e, String l, String r) { - this(e, l, r, null); - } - - /** - * This constructor specifies a variable setter for when this equality is - * quantified. - * - * @param e Indicates whether this is an equality or an inequality. - * @param l The left value. - * @param r The right value. - * @param ear An argument replacer. - **/ - public FirstOrderEqualityTwoValues(boolean e, String l, String r, - EqualityArgumentReplacer ear) { - super(e, ear); - left = l; - right = r; - } - - - /** - * Replaces all unquantified variables with the unique copy stored as a - * value of the given map; also instantiates all quantified variables and - * stores them in the given map. - * - * @param m The map in which to find unique copies of the variables. - **/ - public void consolidateVariables(java.util.AbstractMap m) { } - - - /** - * This method sets the given quantification variables to the given object - * references and evaluates the expressions involving those variables in - * this constraint's FirstOrderEquality children. - * - * @param o The new object references for the enclosing quantification - * variables, in order of nesting. - **/ - public void setQuantificationVariables(Vector o) { - if (replacer == null) { - System.err.println( - "LBJava ERROR: Attempting to set quantification variable in " - + "FirstOrderEqualityTwoValues with no variable setter " - + "implementation provided."); - System.exit(1); +public class FirstOrderEqualityTwoValues extends FirstOrderEquality { + /** The value on the left of the equality. */ + protected String left; + /** The value on the right of the equality. */ + protected String right; + + + /** + * Initializing constructor. + * + * @param e Indicates whether this is an equality or an inequality. + * @param l The left value. + * @param r The right value. + **/ + public FirstOrderEqualityTwoValues(boolean e, String l, String r) { + this(e, l, r, null); } - replacer.setQuantificationVariables(o); - if (!replacer.leftConstant) left = replacer.getLeftValue(); - if (!replacer.rightConstant) right = replacer.getRightValue(); - } - - - /** Determines whether the constraint is satisfied. */ - public boolean evaluate() { return equality == left.equals(right); } - - - /** - * Transforms this first order constraint into a propositional constraint. - * - * @return The propositionalized constraint. - **/ - public PropositionalConstraint propositionalize() { - return new PropositionalConstant(evaluate()); - } - - - /** - * The hash code of a FirstOrderEqualityTwoValues is the sum - * of the hash codes of its children. - * - * @return The hash code for this FirstOrderEqualityTwoValues. - **/ - public int hashCode() { - if (replacer != null) return replacer.hashCode(); - return left.hashCode() + right.hashCode(); - } - - - /** - * Two FirstOrderEqualityTwoValuess are equivalent when their - * children are equivalent in either order. - * - * @return true iff the argument is a - * FirstOrderEqualityTwoValues involving the same - * children. - **/ - public boolean equals(Object o) { - if (!(o instanceof FirstOrderEqualityTwoValues)) return false; - FirstOrderEqualityTwoValues n = (FirstOrderEqualityTwoValues) o; - return replacer == n.replacer - && (replacer != null - || replacer == null - && (left.equals(n.left) && right.equals(n.right) - || left.equals(n.right) && right.equals(n.left))); - } - - - /** - * Calls the appropriate visit(·) method of the given - * Inference for this Constraint, as per the - * visitor pattern. - * - * @param infer The inference visiting this constraint. - **/ - public void runVisit(Inference infer) { infer.visit(this); } -} + /** + * This constructor specifies a variable setter for when this equality is quantified. + * + * @param e Indicates whether this is an equality or an inequality. + * @param l The left value. + * @param r The right value. + * @param ear An argument replacer. + **/ + public FirstOrderEqualityTwoValues(boolean e, String l, String r, EqualityArgumentReplacer ear) { + super(e, ear); + left = l; + right = r; + } + + + /** + * Replaces all unquantified variables with the unique copy stored as a value of the given map; + * also instantiates all quantified variables and stores them in the given map. + * + * @param m The map in which to find unique copies of the variables. + **/ + public void consolidateVariables(java.util.AbstractMap m) {} + + + /** + * This method sets the given quantification variables to the given object references and + * evaluates the expressions involving those variables in this constraint's + * FirstOrderEquality children. + * + * @param o The new object references for the enclosing quantification variables, in order of + * nesting. + **/ + public void setQuantificationVariables(Vector o) { + if (replacer == null) { + System.err.println("LBJava ERROR: Attempting to set quantification variable in " + + "FirstOrderEqualityTwoValues with no variable setter " + + "implementation provided."); + System.exit(1); + } + + replacer.setQuantificationVariables(o); + if (!replacer.leftConstant) + left = replacer.getLeftValue(); + if (!replacer.rightConstant) + right = replacer.getRightValue(); + } + + /** Determines whether the constraint is satisfied. */ + public boolean evaluate() { + return equality == left.equals(right); + } + + + /** + * Transforms this first order constraint into a propositional constraint. + * + * @return The propositionalized constraint. + **/ + public PropositionalConstraint propositionalize() { + return new PropositionalConstant(evaluate()); + } + + + /** + * The hash code of a FirstOrderEqualityTwoValues is the sum of the hash codes of + * its children. + * + * @return The hash code for this FirstOrderEqualityTwoValues. + **/ + public int hashCode() { + if (replacer != null) + return replacer.hashCode(); + return left.hashCode() + right.hashCode(); + } + + + /** + * Two FirstOrderEqualityTwoValuess are equivalent when their children are + * equivalent in either order. + * + * @return true iff the argument is a FirstOrderEqualityTwoValues + * involving the same children. + **/ + public boolean equals(Object o) { + if (!(o instanceof FirstOrderEqualityTwoValues)) + return false; + FirstOrderEqualityTwoValues n = (FirstOrderEqualityTwoValues) o; + return replacer == n.replacer + && (replacer != null || replacer == null + && (left.equals(n.left) && right.equals(n.right) || left.equals(n.right) + && right.equals(n.left))); + } + + + /** + * Calls the appropriate visit(·) method of the given Inference + * for this Constraint, as per the visitor pattern. + * + * @param infer The inference visiting this constraint. + **/ + public void runVisit(Inference infer) { + infer.visit(this); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderEqualityWithValue.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderEqualityWithValue.java index 940afbea..292bec71 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderEqualityWithValue.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderEqualityWithValue.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -16,156 +13,153 @@ /** - * Represents the comparison of a classifier application with a value. - * - * @author Nick Rizzolo + * Represents the comparison of a classifier application with a value. + * + * @author Nick Rizzolo **/ -public class FirstOrderEqualityWithValue extends FirstOrderEquality -{ - /** The variable on the left of the equality. */ - protected FirstOrderVariable left; - /** The value on the right of the equality. */ - protected String right; - - - /** - * Initializing constructor. - * - * @param e Indicates whether this is an equality or an inequality. - * @param l The classifier application. - * @param r The value. - **/ - public FirstOrderEqualityWithValue(boolean e, FirstOrderVariable l, - String r) { - this(e, l, r, null); - } - - /** - * This constructor specifies a variable setter for when this equality is - * quantified. - * - * @param e Indicates whether this is an equality or an inequality. - * @param l The classifier application. - * @param r The value. - * @param ear An argument replacer. - **/ - public FirstOrderEqualityWithValue(boolean e, FirstOrderVariable l, - String r, EqualityArgumentReplacer ear) { - super(e, ear); - left = l; - right = r; - } - - - /** - * Replaces all unquantified variables with the unique copy stored as a - * value of the given map; also instantiates all quantified variables and - * stores them in the given map. - * - * @param m The map in which to find unique copies of the variables. - **/ - public void consolidateVariables(java.util.AbstractMap m) { - variableMap = m; - if (m.containsKey(left)) left = (FirstOrderVariable) m.get(left); - else m.put(left, left); - } - - - /** - * This method sets the given quantification variables to the given object - * references and evaluates the expressions involving those variables in - * this constraint's FirstOrderEquality children. - * - * @param o The new object references for the enclosing quantification - * variables, in order of nesting. - **/ - public void setQuantificationVariables(Vector o) { - if (replacer == null) { - System.err.println( - "LBJava ERROR: Attempting to set quantification variable in " - + "FirstOrderEqualityWithValue with no variable setter " - + "implementation provided."); - System.exit(1); +public class FirstOrderEqualityWithValue extends FirstOrderEquality { + /** The variable on the left of the equality. */ + protected FirstOrderVariable left; + /** The value on the right of the equality. */ + protected String right; + + + /** + * Initializing constructor. + * + * @param e Indicates whether this is an equality or an inequality. + * @param l The classifier application. + * @param r The value. + **/ + public FirstOrderEqualityWithValue(boolean e, FirstOrderVariable l, String r) { + this(e, l, r, null); } - replacer.setQuantificationVariables(o); - if (!replacer.leftConstant) { - left = - new FirstOrderVariable(left.getClassifier(), - replacer.getLeftObject()); - if (variableMap != null && variableMap.containsKey(left)) - left = (FirstOrderVariable) variableMap.get(left); + /** + * This constructor specifies a variable setter for when this equality is quantified. + * + * @param e Indicates whether this is an equality or an inequality. + * @param l The classifier application. + * @param r The value. + * @param ear An argument replacer. + **/ + public FirstOrderEqualityWithValue(boolean e, FirstOrderVariable l, String r, + EqualityArgumentReplacer ear) { + super(e, ear); + left = l; + right = r; + } + + + /** + * Replaces all unquantified variables with the unique copy stored as a value of the given map; + * also instantiates all quantified variables and stores them in the given map. + * + * @param m The map in which to find unique copies of the variables. + **/ + public void consolidateVariables(java.util.AbstractMap m) { + variableMap = m; + if (m.containsKey(left)) + left = (FirstOrderVariable) m.get(left); + else + m.put(left, left); + } + + + /** + * This method sets the given quantification variables to the given object references and + * evaluates the expressions involving those variables in this constraint's + * FirstOrderEquality children. + * + * @param o The new object references for the enclosing quantification variables, in order of + * nesting. + **/ + public void setQuantificationVariables(Vector o) { + if (replacer == null) { + System.err.println("LBJava ERROR: Attempting to set quantification variable in " + + "FirstOrderEqualityWithValue with no variable setter " + + "implementation provided."); + System.exit(1); + } + + replacer.setQuantificationVariables(o); + if (!replacer.leftConstant) { + left = new FirstOrderVariable(left.getClassifier(), replacer.getLeftObject()); + if (variableMap != null && variableMap.containsKey(left)) + left = (FirstOrderVariable) variableMap.get(left); + } + + if (!replacer.rightConstant) + right = replacer.getRightValue(); + } + + + /** Determines whether the constraint is satisfied. */ + public boolean evaluate() { + return equality == left.getValue().equals(right); } - if (!replacer.rightConstant) right = replacer.getRightValue(); - } - - - /** Determines whether the constraint is satisfied. */ - public boolean evaluate() { - return equality == left.getValue().equals(right); - } - - - /** - * Transforms this first order constraint into a propositional constraint. - * - * @return The propositionalized constraint. - **/ - public PropositionalConstraint propositionalize() { - Score[] leftScores = left.getScores().toArray(); - boolean found = false; - for (int i = 0; i < leftScores.length && !found; ++i) - found = leftScores[i].value.equals(right); - - PropositionalConstraint result = null; - if (!found) result = new PropositionalConstant(false); - else - result = new PropositionalVariable(left.getClassifier(), - left.getExample(), right); - - if (!equality) result = new PropositionalNegation(result); - return result; - } - - - /** - * The hash code of a FirstOrderEqualityWithValue is the sum - * of the hash codes of its children plus 1. - * - * @return The hash code for this FirstOrderEqualityWithValue. - **/ - public int hashCode() { - if (replacer != null) return replacer.hashCode(); - return left.hashCode() + right.hashCode() + 1; - } - - - /** - * Two FirstOrderEqualityWithValues are equivalent when their - * children are equivalent. - * - * @return true iff the argument is a - * FirstOrderEqualityWithValue involving the same - * children. - **/ - public boolean equals(Object o) { - if (!(o instanceof FirstOrderEqualityWithValue)) return false; - FirstOrderEqualityWithValue n = (FirstOrderEqualityWithValue) o; - return replacer == n.replacer - && (replacer != null - || replacer == null - && left.equals(n.left) && right.equals(n.right)); - } - - - /** - * Calls the appropriate visit(·) method of the given - * Inference for this Constraint, as per the - * visitor pattern. - * - * @param infer The inference visiting this constraint. - **/ - public void runVisit(Inference infer) { infer.visit(this); } -} + /** + * Transforms this first order constraint into a propositional constraint. + * + * @return The propositionalized constraint. + **/ + public PropositionalConstraint propositionalize() { + Score[] leftScores = left.getScores().toArray(); + boolean found = false; + for (int i = 0; i < leftScores.length && !found; ++i) + found = leftScores[i].value.equals(right); + + PropositionalConstraint result = null; + if (!found) + result = new PropositionalConstant(false); + else + result = new PropositionalVariable(left.getClassifier(), left.getExample(), right); + + if (!equality) + result = new PropositionalNegation(result); + return result; + } + + + /** + * The hash code of a FirstOrderEqualityWithValue is the sum of the hash codes of + * its children plus 1. + * + * @return The hash code for this FirstOrderEqualityWithValue. + **/ + public int hashCode() { + if (replacer != null) + return replacer.hashCode(); + return left.hashCode() + right.hashCode() + 1; + } + + + /** + * Two FirstOrderEqualityWithValues are equivalent when their children are + * equivalent. + * + * @return true iff the argument is a FirstOrderEqualityWithValue + * involving the same children. + **/ + public boolean equals(Object o) { + if (!(o instanceof FirstOrderEqualityWithValue)) + return false; + FirstOrderEqualityWithValue n = (FirstOrderEqualityWithValue) o; + return replacer == n.replacer + && (replacer != null || replacer == null && left.equals(n.left) + && right.equals(n.right)); + } + + + /** + * Calls the appropriate visit(·) method of the given Inference + * for this Constraint, as per the visitor pattern. + * + * @param infer The inference visiting this constraint. + **/ + public void runVisit(Inference infer) { + infer.visit(this); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderEqualityWithVariable.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderEqualityWithVariable.java index 88d4321b..d53f22d7 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderEqualityWithVariable.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderEqualityWithVariable.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -16,222 +13,206 @@ /** - * Represents the comparison of two classifier applications. - * - * @author Nick Rizzolo + * Represents the comparison of two classifier applications. + * + * @author Nick Rizzolo **/ -public class FirstOrderEqualityWithVariable extends FirstOrderEquality -{ - /** The variable on the left of the equality. */ - protected FirstOrderVariable left; - /** The classifier application on the right of the equality. */ - protected FirstOrderVariable right; - - - /** - * Initializing constructor. - * - * @param e Indicates whether this is an equality or an inequality. - * @param l The left classifier application. - * @param r The right classifier application. - **/ - public FirstOrderEqualityWithVariable(boolean e, FirstOrderVariable l, - FirstOrderVariable r) { - this(e, l, r, null); - } - - /** - * This constructor specifies a variable setter for when this equality is - * quantified. - * - * @param e Indicates whether this is an equality or an inequality. - * @param l The left classifier application. - * @param r The right classifier application. - * @param ear An argument replacer. - **/ - public FirstOrderEqualityWithVariable(boolean e, FirstOrderVariable l, - FirstOrderVariable r, - EqualityArgumentReplacer ear) { - super(e, ear); - left = l; - right = r; - } - - - /** - * Replaces all unquantified variables with the unique copy stored as a - * value of the given map; also instantiates all quantified variables and - * stores them in the given map. - * - * @param m The map in which to find unique copies of the variables. - **/ - public void consolidateVariables(java.util.AbstractMap m) { - variableMap = m; - if (m.containsKey(left)) left = (FirstOrderVariable) m.get(left); - else m.put(left, left); - if (m.containsKey(right)) right = (FirstOrderVariable) m.get(right); - else m.put(right, right); - } - - - /** - * This method sets the given quantification variables to the given object - * references and evaluates the expressions involving those variables in - * this constraint's FirstOrderEquality children. - * - * @param o The new object references for the enclosing quantification - * variables, in order of nesting. - **/ - public void setQuantificationVariables(Vector o) { - if (replacer == null) { - System.err.println( - "LBJava ERROR: Attempting to set quantification variable in " - + "FirstOrderEqualityWithVariable with no variable setter " - + "implementation provided."); - System.exit(1); +public class FirstOrderEqualityWithVariable extends FirstOrderEquality { + /** The variable on the left of the equality. */ + protected FirstOrderVariable left; + /** The classifier application on the right of the equality. */ + protected FirstOrderVariable right; + + + /** + * Initializing constructor. + * + * @param e Indicates whether this is an equality or an inequality. + * @param l The left classifier application. + * @param r The right classifier application. + **/ + public FirstOrderEqualityWithVariable(boolean e, FirstOrderVariable l, FirstOrderVariable r) { + this(e, l, r, null); + } + + /** + * This constructor specifies a variable setter for when this equality is quantified. + * + * @param e Indicates whether this is an equality or an inequality. + * @param l The left classifier application. + * @param r The right classifier application. + * @param ear An argument replacer. + **/ + public FirstOrderEqualityWithVariable(boolean e, FirstOrderVariable l, FirstOrderVariable r, + EqualityArgumentReplacer ear) { + super(e, ear); + left = l; + right = r; } - replacer.setQuantificationVariables(o); - if (!replacer.leftConstant) { - left = - new FirstOrderVariable(left.getClassifier(), - replacer.getLeftObject()); - if (variableMap != null && variableMap.containsKey(left)) - left = (FirstOrderVariable) variableMap.get(left); + /** + * Replaces all unquantified variables with the unique copy stored as a value of the given map; + * also instantiates all quantified variables and stores them in the given map. + * + * @param m The map in which to find unique copies of the variables. + **/ + public void consolidateVariables(java.util.AbstractMap m) { + variableMap = m; + if (m.containsKey(left)) + left = (FirstOrderVariable) m.get(left); + else + m.put(left, left); + if (m.containsKey(right)) + right = (FirstOrderVariable) m.get(right); + else + m.put(right, right); } - if (!replacer.rightConstant) { - right = new FirstOrderVariable(right.getClassifier(), - replacer.getRightObject()); - if (variableMap != null && variableMap.containsKey(right)) - right = (FirstOrderVariable) variableMap.get(right); + + /** + * This method sets the given quantification variables to the given object references and + * evaluates the expressions involving those variables in this constraint's + * FirstOrderEquality children. + * + * @param o The new object references for the enclosing quantification variables, in order of + * nesting. + **/ + public void setQuantificationVariables(Vector o) { + if (replacer == null) { + System.err.println("LBJava ERROR: Attempting to set quantification variable in " + + "FirstOrderEqualityWithVariable with no variable setter " + + "implementation provided."); + System.exit(1); + } + + replacer.setQuantificationVariables(o); + + if (!replacer.leftConstant) { + left = new FirstOrderVariable(left.getClassifier(), replacer.getLeftObject()); + if (variableMap != null && variableMap.containsKey(left)) + left = (FirstOrderVariable) variableMap.get(left); + } + + if (!replacer.rightConstant) { + right = new FirstOrderVariable(right.getClassifier(), replacer.getRightObject()); + if (variableMap != null && variableMap.containsKey(right)) + right = (FirstOrderVariable) variableMap.get(right); + } } - } - - - /** Determines whether the constraint is satisfied. */ - public boolean evaluate() { - return equality == left.getValue().equals(right.getValue()); - } - - - /** - * Transforms this first order constraint into a propositional constraint. - * - * @return The propositionalized constraint. - **/ - public PropositionalConstraint propositionalize() { - Score[] leftScores = left.getScores().toArray(); - Score[] rightScores = right.getScores().toArray(); - if (leftScores.length == 0 || rightScores.length == 0) - return new PropositionalConstant(false); - if (leftScores.length == 1 && rightScores.length == 1) - return - new PropositionalConstant(leftScores[0].value - .equals(rightScores[0].value)); - - PropositionalVariable[] leftVariables = - new PropositionalVariable[leftScores.length]; - PropositionalVariable[] rightVariables = - new PropositionalVariable[rightScores.length]; - - int size = 0; - for (int i = 0; i < leftScores.length; ++i) { - boolean found = false; - for (int j = 0; j < rightScores.length && !found; ++j) { - if (!leftScores[i].value.equals(rightScores[j].value)) continue; - found = true; - leftVariables[size] = - new PropositionalVariable(left.getClassifier(), left.getExample(), - leftScores[i].value); - rightVariables[size] = - new PropositionalVariable(right.getClassifier(), right.getExample(), - rightScores[j].value); - ++size; - } + + + /** Determines whether the constraint is satisfied. */ + public boolean evaluate() { + return equality == left.getValue().equals(right.getValue()); } - if (size == 0) return new PropositionalConstant(false); - - if (equality && size == leftScores.length && size == rightScores.length) - --size; - - PropositionalConstraint rightVariable = rightVariables[0]; - if (!equality) rightVariable = new PropositionalNegation(rightVariable); - PropositionalConstraint result = - new PropositionalDisjunction( - new PropositionalNegation(leftVariables[0]), - rightVariable); - if (equality) - result = - new PropositionalConjunction( - result, - new PropositionalDisjunction( - new PropositionalNegation(rightVariable), - leftVariables[0])); - - for (int i = 1; i < size; ++i) { - rightVariable = rightVariables[i]; - if (!equality) rightVariable = new PropositionalNegation(rightVariable); - result = - new PropositionalConjunction( - result, - new PropositionalDisjunction( - new PropositionalNegation(leftVariables[i]), - rightVariable)); - if (equality) - result = - new PropositionalConjunction( - result, - new PropositionalDisjunction( - new PropositionalNegation(rightVariable), - leftVariables[i])); + + /** + * Transforms this first order constraint into a propositional constraint. + * + * @return The propositionalized constraint. + **/ + public PropositionalConstraint propositionalize() { + Score[] leftScores = left.getScores().toArray(); + Score[] rightScores = right.getScores().toArray(); + if (leftScores.length == 0 || rightScores.length == 0) + return new PropositionalConstant(false); + if (leftScores.length == 1 && rightScores.length == 1) + return new PropositionalConstant(leftScores[0].value.equals(rightScores[0].value)); + + PropositionalVariable[] leftVariables = new PropositionalVariable[leftScores.length]; + PropositionalVariable[] rightVariables = new PropositionalVariable[rightScores.length]; + + int size = 0; + for (int i = 0; i < leftScores.length; ++i) { + boolean found = false; + for (int j = 0; j < rightScores.length && !found; ++j) { + if (!leftScores[i].value.equals(rightScores[j].value)) + continue; + found = true; + leftVariables[size] = + new PropositionalVariable(left.getClassifier(), left.getExample(), + leftScores[i].value); + rightVariables[size] = + new PropositionalVariable(right.getClassifier(), right.getExample(), + rightScores[j].value); + ++size; + } + } + + if (size == 0) + return new PropositionalConstant(false); + + if (equality && size == leftScores.length && size == rightScores.length) + --size; + + PropositionalConstraint rightVariable = rightVariables[0]; + if (!equality) + rightVariable = new PropositionalNegation(rightVariable); + PropositionalConstraint result = + new PropositionalDisjunction(new PropositionalNegation(leftVariables[0]), + rightVariable); + if (equality) + result = + new PropositionalConjunction(result, new PropositionalDisjunction( + new PropositionalNegation(rightVariable), leftVariables[0])); + + for (int i = 1; i < size; ++i) { + rightVariable = rightVariables[i]; + if (!equality) + rightVariable = new PropositionalNegation(rightVariable); + result = + new PropositionalConjunction(result, new PropositionalDisjunction( + new PropositionalNegation(leftVariables[i]), rightVariable)); + if (equality) + result = + new PropositionalConjunction(result, new PropositionalDisjunction( + new PropositionalNegation(rightVariable), leftVariables[i])); + } + + return result; } - return result; - } - - - /** - * The hash code of a FirstOrderEqualityWithVariable is the - * sum of the hash codes of its children plus 2. - * - * @return The hash code for this - * FirstOrderEqualityWithVariable. - **/ - public int hashCode() { - if (replacer != null) return replacer.hashCode(); - return left.hashCode() + right.hashCode() + 2; - } - - - /** - * Two FirstOrderEqualityWithVariables are equivalent when - * their children are equivalent in either order. - * - * @return true iff the argument is a - * FirstOrderEqualityWithVariable involving the same - * variables. - **/ - public boolean equals(Object o) { - if (!(o instanceof FirstOrderEqualityWithVariable)) return false; - FirstOrderEqualityWithVariable n = (FirstOrderEqualityWithVariable) o; - return replacer == n.replacer - && (replacer != null - || replacer == null - && (left.equals(n.left) && right.equals(n.right) - || left.equals(n.right) && right.equals(n.left))); - } - - - /** - * Calls the appropriate visit(·) method of the given - * Inference for this Constraint, as per the - * visitor pattern. - * - * @param infer The inference visiting this constraint. - **/ - public void runVisit(Inference infer) { infer.visit(this); } -} + /** + * The hash code of a FirstOrderEqualityWithVariable is the sum of the hash codes + * of its children plus 2. + * + * @return The hash code for this FirstOrderEqualityWithVariable. + **/ + public int hashCode() { + if (replacer != null) + return replacer.hashCode(); + return left.hashCode() + right.hashCode() + 2; + } + + + /** + * Two FirstOrderEqualityWithVariables are equivalent when their children are + * equivalent in either order. + * + * @return true iff the argument is a FirstOrderEqualityWithVariable + * involving the same variables. + **/ + public boolean equals(Object o) { + if (!(o instanceof FirstOrderEqualityWithVariable)) + return false; + FirstOrderEqualityWithVariable n = (FirstOrderEqualityWithVariable) o; + return replacer == n.replacer + && (replacer != null || replacer == null + && (left.equals(n.left) && right.equals(n.right) || left.equals(n.right) + && right.equals(n.left))); + } + + + /** + * Calls the appropriate visit(·) method of the given Inference + * for this Constraint, as per the visitor pattern. + * + * @param infer The inference visiting this constraint. + **/ + public void runVisit(Inference infer) { + infer.visit(this); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderImplication.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderImplication.java index 29d75ee7..ca3be411 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderImplication.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderImplication.java @@ -1,80 +1,79 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; /** - * Represents an implication between two first order constraints. - * - * @author Nick Rizzolo + * Represents an implication between two first order constraints. + * + * @author Nick Rizzolo **/ -public class FirstOrderImplication extends FirstOrderBinaryConstraint -{ - /** - * Initializing constructor. - * - * @param l The constraint on the left of the operator. - * @param r The constraint on the right of the operator. - **/ - public FirstOrderImplication(FirstOrderConstraint l, FirstOrderConstraint r) - { - super(l, r); - } +public class FirstOrderImplication extends FirstOrderBinaryConstraint { + /** + * Initializing constructor. + * + * @param l The constraint on the left of the operator. + * @param r The constraint on the right of the operator. + **/ + public FirstOrderImplication(FirstOrderConstraint l, FirstOrderConstraint r) { + super(l, r); + } - /** Determines whether the constraint is satisfied. */ - public boolean evaluate() { return !left.evaluate() || right.evaluate(); } + /** Determines whether the constraint is satisfied. */ + public boolean evaluate() { + return !left.evaluate() || right.evaluate(); + } - /** - * Transforms this first order constraint into a propositional constraint. - * - * @return The propositionalized constraint. - **/ - public PropositionalConstraint propositionalize() { - return new PropositionalImplication(left.propositionalize(), - right.propositionalize()); - } + /** + * Transforms this first order constraint into a propositional constraint. + * + * @return The propositionalized constraint. + **/ + public PropositionalConstraint propositionalize() { + return new PropositionalImplication(left.propositionalize(), right.propositionalize()); + } - /** - * The hash code of a FirstOrderImplication is the sum of the - * hash codes of its children plus two. - * - * @return The hash code for this FirstOrderImplication. - **/ - public int hashCode() { return left.hashCode() + right.hashCode() + 2; } + /** + * The hash code of a FirstOrderImplication is the sum of the hash codes of its + * children plus two. + * + * @return The hash code for this FirstOrderImplication. + **/ + public int hashCode() { + return left.hashCode() + right.hashCode() + 2; + } - /** - * Two FirstOrderImplications are equivalent when they are - * topologically equivalent. - * - * @return true iff the argument is an equivalent - * FirstOrderImplication. - **/ - public boolean equals(Object o) { - if (!(o instanceof FirstOrderImplication)) return false; - FirstOrderImplication i = (FirstOrderImplication) o; - return left.equals(i.left) && right.equals(i.right); - } + /** + * Two FirstOrderImplications are equivalent when they are topologically + * equivalent. + * + * @return true iff the argument is an equivalent + * FirstOrderImplication. + **/ + public boolean equals(Object o) { + if (!(o instanceof FirstOrderImplication)) + return false; + FirstOrderImplication i = (FirstOrderImplication) o; + return left.equals(i.left) && right.equals(i.right); + } - /** - * Calls the appropriate visit(·) method of the given - * Inference for this Constraint, as per the - * visitor pattern. - * - * @param infer The inference visiting this constraint. - **/ - public void runVisit(Inference infer) { infer.visit(this); } + /** + * Calls the appropriate visit(·) method of the given Inference + * for this Constraint, as per the visitor pattern. + * + * @param infer The inference visiting this constraint. + **/ + public void runVisit(Inference infer) { + infer.visit(this); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderNAryConstraint.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderNAryConstraint.java index 984d297f..4c64fa97 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderNAryConstraint.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderNAryConstraint.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -16,85 +13,85 @@ /** - * Represents a first order constraint with an arbitrary number of arguments, - * usually assumed to be greater than or equal to 2. - * - * @author Nick Rizzolo + * Represents a first order constraint with an arbitrary number of arguments, usually assumed to be + * greater than or equal to 2. + * + * @author Nick Rizzolo **/ -public abstract class FirstOrderNAryConstraint extends FirstOrderConstraint -{ - /** The children of the operator. */ - protected HashSet children; - - - /** Default constructor. */ - public FirstOrderNAryConstraint() { children = new HashSet(); } - - - /** - * Replaces all unquantified variables with the unique copy stored as a - * value of the given map; also instantiates all quantified variables and - * stores them in the given map. - * - * @param m The map in which to find unique copies of the variables. - **/ - public void consolidateVariables(java.util.AbstractMap m) { - for (Iterator I = children.iterator(); I.hasNext(); ) - ((FirstOrderConstraint) I.next()).consolidateVariables(m); - } - - - /** - * This method sets the given quantification variables to the given object - * references and evaluates the expressions involving those variables in - * this constraint's FirstOrderEquality children. - * - * @param o The new object references for the enclosing quantification - * variables, in order of nesting. - **/ - public void setQuantificationVariables(Vector o) { - for (Iterator I = children.iterator(); I.hasNext(); ) - ((FirstOrderConstraint) I.next()).setQuantificationVariables(o); - } - - - /** - * Returns the children of this constraint in an array. - * - * @return The children of this constraint in an array. - **/ - public Constraint[] getChildren() { - return (PropositionalConstraint[]) - children.toArray(new PropositionalConstraint[children.size()]); - } - - - /** - * Determines whether the given constraint is a term of this constraint. - * - * @param c The given constraint. - * @return true iff the given constraint is contained in this - * constraint. - **/ - public boolean contains(FirstOrderConstraint c) { - return children.contains(c); - } - - - /** - * Returns the number of terms in this constraint. - * - * @return The number of terms in this constraint. - **/ - public int size() { return children.size(); } - - - /** - * If the given constraint has the same type as this constraint, its terms - * are merged into this constraint; otherwise, it is added as a new term. - * - * @param c The constraint to add. - **/ - abstract public void add(FirstOrderConstraint c); +public abstract class FirstOrderNAryConstraint extends FirstOrderConstraint { + /** The children of the operator. */ + protected HashSet children; + + + /** Default constructor. */ + public FirstOrderNAryConstraint() { + children = new HashSet(); + } + + + /** + * Replaces all unquantified variables with the unique copy stored as a value of the given map; + * also instantiates all quantified variables and stores them in the given map. + * + * @param m The map in which to find unique copies of the variables. + **/ + public void consolidateVariables(java.util.AbstractMap m) { + for (Iterator I = children.iterator(); I.hasNext();) + ((FirstOrderConstraint) I.next()).consolidateVariables(m); + } + + + /** + * This method sets the given quantification variables to the given object references and + * evaluates the expressions involving those variables in this constraint's + * FirstOrderEquality children. + * + * @param o The new object references for the enclosing quantification variables, in order of + * nesting. + **/ + public void setQuantificationVariables(Vector o) { + for (Iterator I = children.iterator(); I.hasNext();) + ((FirstOrderConstraint) I.next()).setQuantificationVariables(o); + } + + + /** + * Returns the children of this constraint in an array. + * + * @return The children of this constraint in an array. + **/ + public Constraint[] getChildren() { + return (PropositionalConstraint[]) children.toArray(new PropositionalConstraint[children + .size()]); + } + + + /** + * Determines whether the given constraint is a term of this constraint. + * + * @param c The given constraint. + * @return true iff the given constraint is contained in this constraint. + **/ + public boolean contains(FirstOrderConstraint c) { + return children.contains(c); + } + + + /** + * Returns the number of terms in this constraint. + * + * @return The number of terms in this constraint. + **/ + public int size() { + return children.size(); + } + + + /** + * If the given constraint has the same type as this constraint, its terms are merged into this + * constraint; otherwise, it is added as a new term. + * + * @param c The constraint to add. + **/ + abstract public void add(FirstOrderConstraint c); } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderNegation.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderNegation.java index 8d923bf5..567f2151 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderNegation.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderNegation.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -14,101 +11,107 @@ /** - * Represents the negation operator applied to a first order constraint. - * - * @author Nick Rizzolo + * Represents the negation operator applied to a first order constraint. + * + * @author Nick Rizzolo **/ -public class FirstOrderNegation extends FirstOrderConstraint -{ - /** The constraint that the negation is applied to. */ - protected FirstOrderConstraint constraint; - - - /** - * Initializing constructor. - * - * @param c The constraint to negate. - **/ - public FirstOrderNegation(FirstOrderConstraint c) { constraint = c; } - - - /** - * Replaces all unquantified variables with the unique copy stored as a - * value of the given map; also instantiates all quantified variables and - * stores them in the given map. - * - * @param m The map in which to find unique copies of the variables. - **/ - public void consolidateVariables(java.util.AbstractMap m) { - constraint.consolidateVariables(m); - } - - - /** - * This method sets the given quantification variables to the given object - * references and evaluates the expressions involving those variables in - * this constraint's FirstOrderEquality children. - * - * @param o The new object references for the enclosing quantification - * variables, in order of nesting. - **/ - public void setQuantificationVariables(Vector o) { - constraint.setQuantificationVariables(o); - } - - - /** - * Returns the children of this constraint in an array. - * - * @return The children of this constraint in an array. - **/ - public Constraint[] getChildren() { return new FirstOrderConstraint[0]; } - - - /** Determines whether the constraint is satisfied. */ - public boolean evaluate() { return !constraint.evaluate(); } - - - /** - * Transforms this first order constraint into a propositional constraint. - * - * @return The propositionalized constraint. - **/ - public PropositionalConstraint propositionalize() { - return new PropositionalNegation(constraint.propositionalize()); - } - - - /** - * The hash code of a FirstOrderNegation is the hash code of - * its child constraint plus 1. - * - * @return The hash code for this FirstOrderNegation. - **/ - public int hashCode() { return constraint.hashCode() + 1; } - - - /** - * Two FirstOrderNegations are equivalent when their - * constraints are equivalent. - * - * @return true iff the argument is a - * FirstOrderNegation of the same constraint. - **/ - public boolean equals(Object o) { - if (!(o instanceof FirstOrderNegation)) return false; - FirstOrderNegation n = (FirstOrderNegation) o; - return constraint.equals(n.constraint); - } - - - /** - * Calls the appropriate visit(·) method of the given - * Inference for this Constraint, as per the - * visitor pattern. - * - * @param infer The inference visiting this constraint. - **/ - public void runVisit(Inference infer) { infer.visit(this); } +public class FirstOrderNegation extends FirstOrderConstraint { + /** The constraint that the negation is applied to. */ + protected FirstOrderConstraint constraint; + + + /** + * Initializing constructor. + * + * @param c The constraint to negate. + **/ + public FirstOrderNegation(FirstOrderConstraint c) { + constraint = c; + } + + + /** + * Replaces all unquantified variables with the unique copy stored as a value of the given map; + * also instantiates all quantified variables and stores them in the given map. + * + * @param m The map in which to find unique copies of the variables. + **/ + public void consolidateVariables(java.util.AbstractMap m) { + constraint.consolidateVariables(m); + } + + + /** + * This method sets the given quantification variables to the given object references and + * evaluates the expressions involving those variables in this constraint's + * FirstOrderEquality children. + * + * @param o The new object references for the enclosing quantification variables, in order of + * nesting. + **/ + public void setQuantificationVariables(Vector o) { + constraint.setQuantificationVariables(o); + } + + + /** + * Returns the children of this constraint in an array. + * + * @return The children of this constraint in an array. + **/ + public Constraint[] getChildren() { + return new FirstOrderConstraint[0]; + } + + + /** Determines whether the constraint is satisfied. */ + public boolean evaluate() { + return !constraint.evaluate(); + } + + + /** + * Transforms this first order constraint into a propositional constraint. + * + * @return The propositionalized constraint. + **/ + public PropositionalConstraint propositionalize() { + return new PropositionalNegation(constraint.propositionalize()); + } + + + /** + * The hash code of a FirstOrderNegation is the hash code of its child constraint + * plus 1. + * + * @return The hash code for this FirstOrderNegation. + **/ + public int hashCode() { + return constraint.hashCode() + 1; + } + + + /** + * Two FirstOrderNegations are equivalent when their constraints are equivalent. + * + * @return true iff the argument is a FirstOrderNegation of the same + * constraint. + **/ + public boolean equals(Object o) { + if (!(o instanceof FirstOrderNegation)) + return false; + FirstOrderNegation n = (FirstOrderNegation) o; + return constraint.equals(n.constraint); + } + + + /** + * Calls the appropriate visit(·) method of the given Inference + * for this Constraint, as per the visitor pattern. + * + * @param infer The inference visiting this constraint. + **/ + public void runVisit(Inference infer) { + infer.visit(this); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderVariable.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderVariable.java index a087d84c..b69383a4 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderVariable.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/FirstOrderVariable.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -15,136 +12,142 @@ /** - * Represents a classifier application. An inference algorithm may change - * the value returned by the classifier application when satisfying - * constraints. - * - * @author Nick Rizzolo + * Represents a classifier application. An inference algorithm may change the value returned by the + * classifier application when satisfying constraints. + * + * @author Nick Rizzolo **/ -public class FirstOrderVariable implements Cloneable -{ - /** The classifier being applied. */ - private Learner classifier; - /** The classifier is applied to this example object. */ - private Object example; - /** The scores of the possible values this variable might be set to. */ - private ScoreSet scores; - /** The value imposed on the classifier when applied to the example. */ - private String value; - - - /** - * Initializing constructor. - * - * @param c The classifier being applied. - * @param e The classifier is applied to this example object. - **/ - public FirstOrderVariable(Learner c, Object e) { - classifier = c; - example = e; - } - - - /** Retrieves the classifier. */ - public Learner getClassifier() { return classifier; } - - - /** Retrieves the example object. */ - public Object getExample() { return example; } - - - /** Retrieves the value this variable currently takes. */ - public String getValue() { - if (value == null) { - if (scores == null) scores = classifier.scores(example); - value = scores.highScoreValue(); +public class FirstOrderVariable implements Cloneable { + /** The classifier being applied. */ + private Learner classifier; + /** The classifier is applied to this example object. */ + private Object example; + /** The scores of the possible values this variable might be set to. */ + private ScoreSet scores; + /** The value imposed on the classifier when applied to the example. */ + private String value; + + + /** + * Initializing constructor. + * + * @param c The classifier being applied. + * @param e The classifier is applied to this example object. + **/ + public FirstOrderVariable(Learner c, Object e) { + classifier = c; + example = e; } - return value; - } - - - /** - * Sets the value of this variable. - * - * @param v The new value of this variable. - **/ - public void setValue(String v) { value = v; } - - - /** - * Sets the example object. - * - * @param e The new example object. - **/ - public void setExample(Object e) { example = e; } - - - /** Retrieves the score of the current value of this variable. */ - public double getScore() { - if (scores == null) scores = classifier.scores(example); - return scores.get(getValue()); - } - - - /** Retrieves all the scores for the values this variable may take. */ - public ScoreSet getScores() { - if (scores == null) scores = classifier.scores(example); - return scores; - } - - - /** Returns a string representation of this variable. */ - public String toString() { - return classifier + "(" + Inference.exampleToString(example) + ") = " - + value; - } - - - /** - * The hash code of a FirstOrderVariable is the hash code of - * the string representation of the classifier plus the system's hash code - * for the example object. - * - * @return The hash code of this FirstOrderVariable. - **/ - public int hashCode() { - return classifier.toString().hashCode() - + System.identityHashCode(example); - } - - - /** - * Two FirstOrderVariables are equivalent when their - * classifiers are equivalent and they store the same example object. - * - * @param o The object to test equivalence with. - * @return true iff this object is equivalent to the argument - * object. - **/ - public boolean equals(Object o) { - if (!(o instanceof FirstOrderVariable)) return false; - FirstOrderVariable v = (FirstOrderVariable) o; - return classifier.equals(v.classifier) && example == v.example; - } - - - /** - * This method returns a shallow clone. - * - * @return A shallow clone. - **/ - public Object clone() { - Object clone = null; - - try { clone = super.clone(); } - catch (Exception e) { - System.err.println("Error cloning FirstOrderVariable:"); - e.printStackTrace(); - System.exit(1); + + /** Retrieves the classifier. */ + public Learner getClassifier() { + return classifier; } - return clone; - } -} + /** Retrieves the example object. */ + public Object getExample() { + return example; + } + + + /** Retrieves the value this variable currently takes. */ + public String getValue() { + if (value == null) { + if (scores == null) + scores = classifier.scores(example); + value = scores.highScoreValue(); + } + + return value; + } + + + /** + * Sets the value of this variable. + * + * @param v The new value of this variable. + **/ + public void setValue(String v) { + value = v; + } + + + /** + * Sets the example object. + * + * @param e The new example object. + **/ + public void setExample(Object e) { + example = e; + } + + + /** Retrieves the score of the current value of this variable. */ + public double getScore() { + if (scores == null) + scores = classifier.scores(example); + return scores.get(getValue()); + } + + + /** Retrieves all the scores for the values this variable may take. */ + public ScoreSet getScores() { + if (scores == null) + scores = classifier.scores(example); + return scores; + } + + + /** Returns a string representation of this variable. */ + public String toString() { + return classifier + "(" + Inference.exampleToString(example) + ") = " + value; + } + + + /** + * The hash code of a FirstOrderVariable is the hash code of the string + * representation of the classifier plus the system's hash code for the example object. + * + * @return The hash code of this FirstOrderVariable. + **/ + public int hashCode() { + return classifier.toString().hashCode() + System.identityHashCode(example); + } + + + /** + * Two FirstOrderVariables are equivalent when their classifiers are equivalent and + * they store the same example object. + * + * @param o The object to test equivalence with. + * @return true iff this object is equivalent to the argument object. + **/ + public boolean equals(Object o) { + if (!(o instanceof FirstOrderVariable)) + return false; + FirstOrderVariable v = (FirstOrderVariable) o; + return classifier.equals(v.classifier) && example == v.example; + } + + + /** + * This method returns a shallow clone. + * + * @return A shallow clone. + **/ + public Object clone() { + Object clone = null; + + try { + clone = super.clone(); + } catch (Exception e) { + System.err.println("Error cloning FirstOrderVariable:"); + e.printStackTrace(); + System.exit(1); + } + + return clone; + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/GurobiHook.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/GurobiHook.java deleted file mode 100644 index e7a695fd..00000000 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/GurobiHook.java +++ /dev/null @@ -1,486 +0,0 @@ -/** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 - * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign - * http://cogcomp.cs.illinois.edu/ - */ -package edu.illinois.cs.cogcomp.lbjava.infer; - -import edu.illinois.cs.cogcomp.lbjava.classify.Score; -import edu.illinois.cs.cogcomp.lbjava.util.DVector; -import edu.illinois.cs.cogcomp.lbjava.util.OVector; -import gurobi.GRB; -import gurobi.GRBConstr; -import gurobi.GRBEnv; -import gurobi.GRBException; -import gurobi.GRBLinExpr; -import gurobi.GRBModel; -import gurobi.GRBSOS; -import gurobi.GRBVar; - -import java.util.Arrays; - - -/** - * This is an interface to the
Gurobi - * Optimizer. Make sure the jar file is on your CLASSPATH - * and that the Gurobi libraries are installed appropriately on your system - * before attempting to compile and use this class. - * - * @author Nick Rizzolo - **/ -public class GurobiHook implements ILPSolver -{ - /** Prints an error message and exits the JVM. */ - protected static void handleException(GRBException e) { - System.out.println( - "Gurobi error " + e.getErrorCode() + ": " + e.getMessage()); - e.printStackTrace(); - System.exit(-1); - } - - - /** The model to be optimized will be associated with this environment. */ - protected GRBEnv environment; - /** The model to be optimized. */ - protected GRBModel model; - /** The model's decision variables. */ - protected OVector variables; - /** Contains all of the Gurobi SOS objects created for the model. */ - protected OVector SOSes; - /** - * Whether or not the GRBModel.update() method needs to be - * called before adding more constraints. - **/ - protected boolean needsUpdate; - /** Whether or not the model has been solved. */ - protected boolean isSolved; - /** - * Verbosity level. {@link ILPInference#VERBOSITY_NONE} produces no - * incidental output. If set to {@link ILPInference#VERBOSITY_LOW}, only - * variable and constraint counts are reported on STDOUT. If - * set to {@link ILPInference#VERBOSITY_HIGH}, a textual representation of - * the entire optimization problem is also generated on - * STDOUT. - **/ - protected int verbosity; - /** - * The coefficients of the variables in the objective function. This is - * redundant memory, and it's only being stored in the event that someone - * wants to call {@link #write(StringBuffer)}. Once we get Gurobi 4.0, we - * can discard of it. - **/ - protected DVector objectiveCoefficients; - - - /** Create a new Gurobi hook with the default environment parameters. */ - public GurobiHook() { this(ILPInference.VERBOSITY_NONE); } - - /** - * Create a new Gurobi hook with the default environment parameters. - * - * @param v Setting for the {@link #verbosity} level. - **/ - public GurobiHook(int v) { - try { - environment = new GRBEnv(); - environment.set(GRB.IntParam.OutputFlag, 0); - } - catch (GRBException e) { handleException(e); } - verbosity = v; - reset(); - } - - /** - * Create a new Gurobi hook with the specified environment. - * - * @param env An environment containing user-specified parameters. - **/ - public GurobiHook(GRBEnv env) { this(env, ILPInference.VERBOSITY_NONE); } - - /** - * Create a new Gurobi hook with the specified environment. - * - * @param env An environment containing user-specified parameters. - * @param v Setting for the {@link #verbosity} level. - **/ - public GurobiHook(GRBEnv env, int v) { - environment = env; - verbosity = v; - reset(); - } - - - /** - * This method clears the all constraints and variables out of the ILP - * solver's problem representation, bringing the ILPSolver - * instance back to the state it was in when first constructed. - **/ - public void reset() { - try { model = new GRBModel(environment); } - catch (GRBException e) { handleException(e); } - variables = new OVector(); - SOSes = new OVector(); - objectiveCoefficients = new DVector(); - needsUpdate = isSolved = false; - } - - - /** - * Sets the direction of the objective function. - * - * @param d true if the objective function is to be - * maximized. - **/ - public void setMaximize(boolean d) { - try { model.set(GRB.IntAttr.ModelSense, d ? -1 : 1); } - catch (GRBException e) { handleException(e); } - } - - - /** - * Adds a new Boolean variable (an integer variable constrained to take - * either the value 0 or the value 1) with the specified coefficient in the - * objective function to the problem. - * - * @param c The objective function coefficient for the new Boolean - * variable. - * @return The index of the created variable. - **/ - public int addBooleanVariable(double c) { - int id = variables.size(); - try { variables.add(model.addVar(0, 1, c, GRB.BINARY, "x_" + id)); } - catch (GRBException e) { handleException(e); } - // TODO: delete the line below once we get Gurobi 4.0 - objectiveCoefficients.add(c); - needsUpdate = true; - return id; - } - - - /** - * Adds a general, multi-valued discrete variable, which is implemented as - * a set of Boolean variables, one per value of the discrete variable, with - * exactly one of those variables set true at any given time. - * - * @param c The objective function coefficients for the new Boolean - * variables. - * @return The indexes of the newly created variables. - **/ - public int[] addDiscreteVariable(double[] c) { - int[] result = new int[c.length]; - for (int i = 0; i < c.length; ++i) - result[i] = addBooleanVariable(c[i]); - - double[] w = new double[c.length]; - Arrays.fill(w, 1); - addGreaterThanConstraint(result, w, 1); - try { SOSes.add(model.addSOS(idsToVariables(result), w, GRB.SOS_TYPE1)); } - catch (GRBException e) { handleException(e); } - return result; - } - - - /** - * Adds a general, multi-valued discrete variable, which is implemented as - * a set of Boolean variables, one per value of the discrete variable, with - * exactly one of those variables set true at any given time. - * - * @param c An array of {@link edu.illinois.cs.cogcomp.lbjava.classify.Score}s containing the - * objective function coefficients for the new Boolean variables. - * @return The indexes of the newly created variables. - **/ - public int[] addDiscreteVariable(Score[] c) { - double[] scores = new double[c.length]; - for (int i = 0; i < c.length; ++i) scores[i] = c[i].score; - return addDiscreteVariable(scores); - } - - - /** - * Adds a new constraint to the problem with the specified type. This - * method is called by all the other add*Constraint() methods. - * - * @param i The indexes of the variables with non-zero coefficients. - * @param a The coefficients of the variables with the given indexes. - * @param b The new constraint will enforce (in)equality with this - * constant. - * @param t The type of linear inequality constraint to add. - **/ - protected void addConstraint(int[] i, double[] a, double b, char t) { - if (needsUpdate) { - try { model.update(); } - catch (GRBException e) { handleException(e); } - needsUpdate = false; - } - - try { - int constraints = model.get(GRB.IntAttr.NumConstrs); - model.addConstr(makeLinearExpression(i, a), t, b, "c_" + constraints); - } - catch (GRBException e) { handleException(e); } - } - - - /** - * Adds a new fixed constraint to the problem. The two array arguments - * must be the same length, as their elements correspond to each other. - * Variables whose coefficients are zero need not be mentioned. Variables - * that are mentioned must have previously been added via - * {@link #addBooleanVariable(double)} or - * {@link #addDiscreteVariable(double[])}. The resulting constraint has - * the form: - *
xi * a = b
- * where xi represents the inference variables - * whose indexes are contained in the array i and - * * represents dot product. - * - * @param i The indexes of the variables with non-zero coefficients. - * @param a The coefficients of the variables with the given indexes. - * @param b The new constraint will enforce equality with this constant. - **/ - public void addEqualityConstraint(int[] i, double[] a, double b) { - addConstraint(i, a, b, GRB.EQUAL); - } - - - /** - * Adds a new lower bounded constraint to the problem. The two array - * arguments must be the same length, as their elements correspond to each - * other. Variables whose coefficients are zero need not be mentioned. - * Variables that are mentioned must have previously been added via - * {@link #addBooleanVariable(double)} or - * {@link #addDiscreteVariable(double[])}. The resulting constraint has - * the form: - *
xi * a >= b
- * where xi represents the inference variables - * whose indexes are contained in the array i and - * * represents dot product. - * - * @param i The indexes of the variables with non-zero coefficients. - * @param a The coefficients of the variables with the given indexes. - * @param b The lower bound for the new constraint. - **/ - public void addGreaterThanConstraint(int[] i, double[] a, double b) { - addConstraint(i, a, b, GRB.GREATER_EQUAL); - } - - - /** - * Adds a new upper bounded constraint to the problem. The two array - * arguments must be the same length, as their elements correspond to each - * other. Variables whose coefficients are zero need not be mentioned. - * Variables that are mentioned must have previously been added via - * {@link #addBooleanVariable(double)} or - * {@link #addDiscreteVariable(double[])}. The resulting constraint has - * the form: - *
xi * a <= b
- * where xi represents the inference variables - * whose indexes are contained in the array i and - * * represents dot product. - * - * @param i The indexes of the variables with non-zero coefficients. - * @param a The coefficients of the variables with the given indexes. - * @param b The upper bound for the new constraint. - **/ - public void addLessThanConstraint(int[] i, double[] a, double b) { - addConstraint(i, a, b, GRB.LESS_EQUAL); - } - - - /** - * Solves the ILP problem, saving the solution internally. - * - * @return true iff a solution was found successfully. - **/ - public boolean solve() throws Exception { - if (verbosity > ILPInference.VERBOSITY_NONE) { - System.out.println(" variables: " + model.get(GRB.IntAttr.NumVars)); - System.out.println(" constraints: " - + model.get(GRB.IntAttr.NumConstrs)); - } - - if (verbosity == ILPInference.VERBOSITY_HIGH) { - StringBuffer buffer = new StringBuffer(); - write(buffer); - System.out.print(buffer); - } - - model.optimize(); - int status = model.get(GRB.IntAttr.Status); - isSolved = status == GRB.OPTIMAL || status == GRB.SUBOPTIMAL; - return isSolved; - } - - - /** - * Tests whether the problem represented by this ILPSolver - * instance has been solved already. - **/ - public boolean isSolved() { return isSolved; } - - - /** - * When the problem has been solved, use this method to retrieve the value - * of any Boolean inference variable. The result of this method is - * undefined when the problem has not yet been solved. - * - * @param index The index of the variable whose value is requested. - * @return The value of the variable. - **/ - public boolean getBooleanValue(int index) { - if (!isSolved) return false; - try { - double x = ((GRBVar) variables.get(index)).get(GRB.DoubleAttr.X); - return x > 0.5; - } - catch (GRBException e) { handleException(e); } - return false; - } - - - /** - * When the problem has been solved, use this method to retrieve the value - * of the objective function at the solution. The result of this method is - * undefined when the problem has not yet been solved. If the problem had - * no feasible solutions, negative (positive, respectively) infinity will - * be returned if maximizing (minimizing). - * - * @return The value of the objective function at the solution. - **/ - public double objectiveValue() { - try { - if (isSolved) return model.get(GRB.DoubleAttr.ObjVal); - int status = model.get(GRB.IntAttr.Status); - if (status == GRB.INFEASIBLE || status == GRB.INF_OR_UNBD - || status == GRB.UNBOUNDED) - return - model.get(GRB.IntAttr.ModelSense) == -1 ? Double.NEGATIVE_INFINITY - : Double.POSITIVE_INFINITY; - } - catch (GRBException e) { handleException(e); } - return 0; - } - - - /** - * Given an array of variable indexes, this method returns the - * corresponding Gurobi variable objects in an array. - * - * @param ids The array of variable indexes. - * @return The corresponding Gurobi variable objects. - **/ - protected GRBVar[] idsToVariables(int[] ids) { - GRBVar[] result = new GRBVar[ids.length]; - for (int i = 0; i < ids.length; i++) - result[i] = (GRBVar) variables.get(ids[i]); - return result; - } - - - /** - * Creates a Gurobi linear expression object representing the dot product - * of the variables with the specified indexes and the specified - * coefficients. - * - * @param ids The indexes of the variables. - * @param c The corresponding coefficients. - * @return A Gurobi linear expression representing the dot product. - **/ - protected GRBLinExpr makeLinearExpression(int[] ids, double[] c) { - try { - GRBLinExpr expr = new GRBLinExpr(); - expr.addTerms(c, idsToVariables(ids)); - return expr; - } - catch (GRBException e) { handleException(e); } - return null; - } - - - /** - * Creates a textual representation of the ILP problem in an algebraic - * notation. - * - * @param buffer The created textual representation will be appended here. - **/ - public void write(StringBuffer buffer) { - try { - model.update(); - if (model.get(GRB.IntAttr.ModelSense) == -1) buffer.append("max"); - else buffer.append("min"); - - // Using this bit of code until we get Gurobi 4.0 or higher. - for (int i = 0; i < objectiveCoefficients.size(); ++i) { - double c = objectiveCoefficients.get(i); - buffer.append(" "); - if (c >= 0) buffer.append("+"); - buffer.append(c); - buffer.append(" x_"); - buffer.append(i); - } - - /* This code should work once we have Gurobi 4.0 or higher. Then we - * don't have to redundantly store objectiveCoefficients. - GRBLinExpr objective = (GRBLinExpr) model.getObjective(); - int objectiveSize = objective.size(); - for (int i = 0; i < objectiveSize; ++i) { - double c = objective.getCoeff(i); - buffer.append(" "); - if (c >= 0) buffer.append("+"); - buffer.append(c); - buffer.append(" "); - buffer.append(objective.getVar(i).get(GRB.StringAttr.VarName)); - } - */ - - buffer.append("\n"); - - int SOSesSize = SOSes.size(); - for (int i = 0; i < SOSesSize; ++i) { - GRBSOS sos = (GRBSOS) SOSes.get(i); - int[] type = new int[1]; - int size = model.getSOS(sos, null, null, type); - GRBVar[] sosVariables = new GRBVar[size]; - model.getSOS(sos, sosVariables, new double[size], type); - - buffer.append(" atmost 1 of (x in {"); - for (int j = 0; j < size; ++j) { - buffer.append(sosVariables[j].get(GRB.StringAttr.VarName)); - if (j + 1 < size) buffer.append(", "); - } - buffer.append("}) (x > 0)\n"); - } - - GRBConstr[] constraints = model.getConstrs(); - for (int i = 0; i < constraints.length; ++i) { - GRBLinExpr row = model.getRow(constraints[i]); - int rowSize = row.size(); - buffer.append(" "); - - for (int j = 0; j < rowSize; ++j) { - double c = row.getCoeff(j); - buffer.append(" "); - if (c >= 0) buffer.append("+"); - buffer.append(c); - buffer.append(" "); - buffer.append(row.getVar(j).get(GRB.StringAttr.VarName)); - } - - char type = constraints[i].get(GRB.CharAttr.Sense); - if (type == GRB.LESS_EQUAL) buffer.append(" <= "); - else if (type == GRB.GREATER_EQUAL) buffer.append(" >= "); - else buffer.append(" = "); - - buffer.append(constraints[i].get(GRB.DoubleAttr.RHS)); - buffer.append("\n"); - } - } - catch (GRBException e) { handleException(e); } - } -} - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/ILPInference.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/ILPInference.java index 627c6b1d..8304c0aa 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/ILPInference.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/ILPInference.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -18,668 +15,661 @@ import edu.illinois.cs.cogcomp.lbjava.classify.Score; import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet; import edu.illinois.cs.cogcomp.lbjava.learn.Learner; +import edu.illinois.cs.cogcomp.infer.ilp.ILPSolver; /** - * This class employs an {@link ILPSolver} to solve a constrained inference - * problem. When constructing an instance of this class in an LBJava source - * file, use one of the constructors that does not specify a head - * object. The generated code will fill in the head object automatically. - * The other constructor parameters are used to specify the ILP algorithm and - * enable textual output of ILP variable descriptions to STDOUT. - * Textual output of the ILP problem itself is controlled by the - * {@link ILPSolver}. - * - * @author Nick Rizzolo + * This class employs an {@link ILPSolver} to solve a constrained inference problem. When + * constructing an instance of this class in an LBJava source file, use one of the constructors that + * does not specify a head object. The generated code will fill in the head object + * automatically. The other constructor parameters are used to specify the ILP algorithm and enable + * textual output of ILP variable descriptions to STDOUT. Textual output of the ILP + * problem itself is controlled by the {@link ILPSolver}. + * + * @author Nick Rizzolo **/ -public class ILPInference extends Inference -{ - /** A possible setting for {@link #verbosity}. */ - public static final int VERBOSITY_NONE = 0; - /** A possible setting for {@link #verbosity}. */ - public static final int VERBOSITY_LOW = 1; - /** A possible setting for {@link #verbosity}. */ - public static final int VERBOSITY_HIGH = 2; - - /** Keeps the next ID number for objects of this class. */ - protected static int nextID = 0; - - - /** The identification number for this object, used in debug file names. */ - protected int ID; - - /** The ILP algorithm. */ - protected ILPSolver solver; - /** This flag is set if the constraints turn out to be true in all cases. */ - protected boolean tautology; - /** - * Used during ILP constraint generation. When a propositional constraint - * finishes generating any ILP constraints that may be associated with it, - * it sets this variable to its own index. - **/ - protected int returnIndex; - /** - * Used during ILP constraint generation. This flag is set iff the - * variable corresponding to returnIndex is negated in its - * current context. - **/ - protected boolean returnNegation; - /** - * Used during ILP constraint generation. This map associates each - * variable index with a representation of the expression whose value is - * represented by the variable. The keys associated with indexes of - * variables that were originally part of the inference problem are - * PropositionalVariable objects. The keys associated with - * indexes of temporary variables created during constraint translation are - * strings. - **/ - protected HashMap indexMap; - /** - * Used during ILP constraint generation. Constraints are treated - * differently if they are part of another constraint expression than if - * they are a term in the top level conjunction. - **/ - protected boolean topLevel; - /** - * Verbosity level. {@link ILPInference#VERBOSITY_NONE} produces no - * incidental output. If set to {@link ILPInference#VERBOSITY_LOW}, only - * timing information is printed on STDOUT. If set to - * {@link ILPInference#VERBOSITY_HIGH}, information mapping the generated - * ILP variables to the first order variables they were generated from and - * their settings in the ILP problem's solution is printed to - * STDOUT. - **/ - protected int verbosity; - - - /** Don't use this constructor, since it doesn't set an ILP algorithm. */ - public ILPInference() { this(null); } - - /** - * Initializes the ILP algorithm, but not the head object. - * - * @param a The ILP algorithm. - **/ - public ILPInference(ILPSolver a) { this(null, a); } - - /** - * Initializes the ILP algorithm, but not the head object. - * - * @param a The ILP algorithm. - * @param v Sets the value of {@link #verbosity}. - **/ - public ILPInference(ILPSolver a, int v) { this(null, a, v); } - - /** Don't use this constructor, since it doesn't set an ILP algorithm. */ - public ILPInference(Object h) { this(h, null); } - - /** - * Sets the head object and the ILP algorithm. - * - * @param h The head object. - * @param a The ILP algorithm. - **/ - public ILPInference(Object h, ILPSolver a) { this(h, a, VERBOSITY_NONE); } - - /** - * Sets the head object and the ILP algorithm. - * - * @param h The head object. - * @param a The ILP algorithm. - * @param v Sets the value of {@link #verbosity}. - **/ - public ILPInference(Object h, ILPSolver a, int v) { - super(h); - solver = a; - verbosity = v; - ID = nextID++; - } - - - /** - * Adds a constraint to the inference. - * - * @param c The constraint to add. - **/ - public void addConstraint(FirstOrderConstraint c) { - solver.reset(); - if (constraint == null) constraint = c; - else - constraint = - new FirstOrderConjunction((FirstOrderConstraint) constraint, c); - } - - - /** - * Uses the provided ILP algorithm to solve the ILP proglem if it hasn't - * already been solved. - **/ - protected void infer() throws Exception { - if (tautology || solver.isSolved()) return; - - solver.setMaximize(true); - constraint.consolidateVariables(variables); - indexMap = new HashMap(); - - if (verbosity > VERBOSITY_NONE) - System.out.println("variables: (" + new Date() + ")"); - - for (Iterator I = variables.values().iterator(); I.hasNext(); ) { - FirstOrderVariable v = (FirstOrderVariable) I.next(); - ScoreSet ss = getNormalizer(v.getClassifier()).normalize(v.getScores()); - Score[] scores = null; - if (ss != null) scores = ss.toArray(); - - if (scores == null || scores.length == 0) { - System.err.println( - "LBJava ERROR: Classifier " + v.getClassifier() - + " did not return any scores. ILP inference cannot be " - + "performed."); - System.exit(1); - } - - int[] indexes = solver.addDiscreteVariable(scores); - - for (int j = 0; j < scores.length; ++j) { - indexMap.put( - new PropositionalVariable(v.getClassifier(), v.getExample(), - scores[j].value), - new Integer(indexes[j])); - - if (verbosity >= VERBOSITY_HIGH) { - StringBuffer toPrint = new StringBuffer(); - toPrint.append("x_"); - toPrint.append(indexes[j]); - while (toPrint.length() < 8) toPrint.insert(0, ' '); - toPrint.append(" ("); - toPrint.append(scores[j].score); - toPrint.append("): "); - toPrint.append(v.getClassifier()); - toPrint.append("("); - toPrint.append(Inference.exampleToString(v.getExample())); - toPrint.append(") == "); - toPrint.append(scores[j].value); - System.out.println(toPrint); - } - } +public class ILPInference extends Inference { + /** A possible setting for {@link #verbosity}. */ + public static final int VERBOSITY_NONE = 0; + /** A possible setting for {@link #verbosity}. */ + public static final int VERBOSITY_LOW = 1; + /** A possible setting for {@link #verbosity}. */ + public static final int VERBOSITY_HIGH = 2; + + /** Keeps the next ID number for objects of this class. */ + protected static int nextID = 0; + + + /** The identification number for this object, used in debug file names. */ + protected int ID; + + /** The ILP algorithm. */ + protected ILPSolver solver; + /** This flag is set if the constraints turn out to be true in all cases. */ + protected boolean tautology; + /** + * Used during ILP constraint generation. When a propositional constraint finishes generating + * any ILP constraints that may be associated with it, it sets this variable to its own index. + **/ + protected int returnIndex; + /** + * Used during ILP constraint generation. This flag is set iff the variable corresponding to + * returnIndex is negated in its current context. + **/ + protected boolean returnNegation; + /** + * Used during ILP constraint generation. This map associates each variable index with a + * representation of the expression whose value is represented by the variable. The keys + * associated with indexes of variables that were originally part of the inference problem are + * PropositionalVariable objects. The keys associated with indexes of temporary + * variables created during constraint translation are strings. + **/ + protected HashMap indexMap; + /** + * Used during ILP constraint generation. Constraints are treated differently if they are part + * of another constraint expression than if they are a term in the top level conjunction. + **/ + protected boolean topLevel; + /** + * Verbosity level. {@link ILPInference#VERBOSITY_NONE} produces no incidental output. If set to + * {@link ILPInference#VERBOSITY_LOW}, only timing information is printed on STDOUT + * . If set to {@link ILPInference#VERBOSITY_HIGH}, information mapping the generated ILP + * variables to the first order variables they were generated from and their settings in the ILP + * problem's solution is printed to STDOUT. + **/ + protected int verbosity; + + + /** Don't use this constructor, since it doesn't set an ILP algorithm. */ + public ILPInference() { + this(null); } - if (verbosity > VERBOSITY_NONE) - System.out.println("propositionalization: (" + new Date() + ")"); - PropositionalConstraint propositional = - ((FirstOrderConstraint) constraint).propositionalize(); - - if (verbosity > VERBOSITY_NONE) - System.out.println("simplification: (" + new Date() + ")"); - if (propositional instanceof PropositionalConjunction) - propositional = - ((PropositionalConjunction) propositional).simplify(true); - else propositional = propositional.simplify(); - - if (propositional instanceof PropositionalConstant) { - if (propositional.evaluate()) { - tautology = true; - return; - } - else { - System.err.println("ILP ERROR: Unsatisfiable constraints!"); - solver.addEqualityConstraint(new int[]{ 0 }, new double[]{ 1 }, 2); - } + /** + * Initializes the ILP algorithm, but not the head object. + * + * @param a The ILP algorithm. + **/ + public ILPInference(ILPSolver a) { + this(null, a); } - if (verbosity > VERBOSITY_NONE) - System.out.println("translation: (" + new Date() + ")"); - topLevel = true; - propositional.runVisit(this); - - if (verbosity > VERBOSITY_NONE) - System.out.println("solution: (" + new Date() + ")"); - if (!solver.solve()) throw new InferenceNotOptimalException(solver, head); - int variableIndex = 0; - if (verbosity > VERBOSITY_NONE) - System.out.println("variables set true in solution: (" + new Date() - + ")"); - - for (Iterator I = variables.values().iterator(); I.hasNext(); ) { - FirstOrderVariable v = (FirstOrderVariable) I.next(); - Score[] scores = v.getScores().toArray(); - for (int j = 0; j < scores.length; ++j, ++variableIndex) - if (solver.getBooleanValue(variableIndex)) { - v.setValue(scores[j].value); - - if (verbosity >= VERBOSITY_HIGH) { - StringBuffer toPrint = new StringBuffer(); - toPrint.append("x_"); - toPrint.append(variableIndex); - while (toPrint.length() < 8) toPrint.insert(0, ' '); - toPrint.append(": "); - toPrint.append(v); - System.out.println(toPrint); - } - } + /** + * Initializes the ILP algorithm, but not the head object. + * + * @param a The ILP algorithm. + * @param v Sets the value of {@link #verbosity}. + **/ + public ILPInference(ILPSolver a, int v) { + this(null, a, v); } - } - - - /** - * Retrieves the value of the specified variable as identified by the - * classifier and the object that produce that variable. - * - * @param c The classifier producing the variable. - * @param o The object from which the variable is produced. - * @return The current value of the requested variable. If the variable - * does not exist in this inference, the result of the - * Learner's discreteValue(Object) method - * applied to the Object is returned. - **/ - public String valueOf(Learner c, Object o) throws Exception { - infer(); - return getVariable(new FirstOrderVariable(c, o)).getValue(); - } - - - /** - * Two Inference objects are equal when they have the same - * run-time type and store the same head object. I.e., the == - * operator must return true when comparing the two head - * objects for this method to return true. - * - * @param o The object to compare to this object. - * @return true iff this object equals the argument object as - * defined above. - **/ - public boolean equals(Object o) { - if (!(o instanceof ILPInference)) return false; - return head == ((ILPInference) o).head; - } - - - /** - * Simply returns the head's hash code. - * - * @see java.lang.Object#hashCode() - **/ - public int hashCode() { return head.hashCode(); } - - - /** - * Creates a new Boolean variable to represent the value of a subexpression - * of some constraint. - * - * @param d A textual description of the subexpression whose value is - * represented by the new variable. - * @return The index of the new variable. - **/ - protected int createVariable(String d) { - int result = solver.addBooleanVariable(0); - if (verbosity >= VERBOSITY_HIGH) System.out.println(result + ": " + d); - return result; - } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(PropositionalDoubleImplication c) { - assert topLevel : "ILP: PropositionalDoubleImplication encountered."; - topLevel = false; - - int[] indexes = new int[2]; - double[] coefficients = new double[2]; - double bound = 0; - - c.left.runVisit(this); - indexes[0] = returnIndex; - if (returnNegation) { - coefficients[0] = -1; - --bound; - } - else coefficients[0] = 1; - c.right.runVisit(this); - indexes[1] = returnIndex; - if (returnNegation) { - coefficients[1] = 1; - ++bound; + /** Don't use this constructor, since it doesn't set an ILP algorithm. */ + public ILPInference(Object h) { + this(h, null); } - else coefficients[1] = -1; - - solver.addEqualityConstraint(indexes, coefficients, bound); - - topLevel = true; - } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(PropositionalImplication c) { - assert false : "ILP: PropositionalImplication encountered."; - } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(PropositionalConjunction c) { - PropositionalConstraint[] children = - (PropositionalConstraint[]) c.getChildren(); - - int[] indexes = null; - double[] coefficients = null; - double bound; - - if (topLevel) { - PropositionalConstraint[] variables = - new PropositionalConstraint[children.length]; - int size = 0; - for (int i = 0; i < children.length; ++i) { - if (children[i] instanceof PropositionalVariable - || children[i] instanceof PropositionalNegation) - variables[size++] = children[i]; - else children[i].runVisit(this); - } - - if (size > 0) { - indexes = new int[size]; - coefficients = new double[size]; - bound = size; - - for (int i = 0; i < size; ++i) { - variables[i].runVisit(this); - indexes[i] = returnIndex; - if (returnNegation) { - coefficients[i] = -1; - --bound; - } - else coefficients[i] = 1; - } - solver.addEqualityConstraint(indexes, coefficients, bound); - } + /** + * Sets the head object and the ILP algorithm. + * + * @param h The head object. + * @param a The ILP algorithm. + **/ + public ILPInference(Object h, ILPSolver a) { + this(h, a, VERBOSITY_NONE); } - else { - indexes = new int[children.length + 1]; - coefficients = new double[children.length + 1]; - bound = 0; - - for (int i = 0; i < children.length; ++i) { - children[i].runVisit(this); - indexes[i] = returnIndex; - if (returnNegation) { - coefficients[i] = -1; - --bound; - } - else coefficients[i] = 1; - } - - String[] stringIndexes = new String[children.length]; - for (int i = 0; i < children.length; ++i) - stringIndexes[i] = (coefficients[i] < 0 ? "!" : "") + indexes[i]; - Arrays.sort(stringIndexes); - String key = stringIndexes[0]; - for (int i = 1; i < stringIndexes.length; ++i) - key += "&" + stringIndexes[i]; - Integer I = (Integer) indexMap.get(key); - - if (I == null) { - I = new Integer(createVariable(key)); - indexMap.put(key, I); - - indexes[children.length] = I.intValue(); - coefficients[children.length] = -children.length; - solver.addGreaterThanConstraint(indexes, coefficients, bound); - - coefficients[children.length] = -1; - solver.addLessThanConstraint(indexes, coefficients, - bound + children.length - 1); - } - - returnIndex = I.intValue(); - returnNegation = false; + + /** + * Sets the head object and the ILP algorithm. + * + * @param h The head object. + * @param a The ILP algorithm. + * @param v Sets the value of {@link #verbosity}. + **/ + public ILPInference(Object h, ILPSolver a, int v) { + super(h); + solver = a; + verbosity = v; + ID = nextID++; } - } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(PropositionalDisjunction c) { - PropositionalConstraint[] children = - (PropositionalConstraint[]) c.getChildren(); - - int[] indexes = null; - double[] coefficients = null; - double bound = 0; - - if (topLevel) { - int subConstraintIndex = -1; - - for (int i = 0; i < children.length && subConstraintIndex == -1; ++i) { - if (children[i] instanceof PropositionalVariable - || children[i] instanceof PropositionalNegation) - continue; - if (children[i] instanceof PropositionalConjunction) - subConstraintIndex = i; - else if (children[i] instanceof PropositionalAtLeast) - subConstraintIndex = i; - } - - if (subConstraintIndex > -1) { - PropositionalConstraint[] subChildren = - (PropositionalConstraint[]) - children[subConstraintIndex].getChildren(); - int multiplier = - children[subConstraintIndex] instanceof PropositionalConjunction - ? subChildren.length - : ((PropositionalAtLeast) children[subConstraintIndex]).getM(); - - indexes = new int[subChildren.length + children.length - 1]; - coefficients = new double[subChildren.length + children.length - 1]; - bound = multiplier; - topLevel = false; - int j = 0; - for (int i = 0; i < children.length; ++i) { - if (i == subConstraintIndex) continue; + /** + * Adds a constraint to the inference. + * + * @param c The constraint to add. + **/ + public void addConstraint(FirstOrderConstraint c) { + solver.reset(); + if (constraint == null) + constraint = c; + else + constraint = new FirstOrderConjunction((FirstOrderConstraint) constraint, c); + } - children[i].runVisit(this); - indexes[j] = returnIndex; - if (returnNegation) { - coefficients[j] = -multiplier; - bound -= multiplier; - } - else coefficients[j] = multiplier; - ++j; + /** + * Uses the provided ILP algorithm to solve the ILP proglem if it hasn't already been solved. + **/ + protected void infer() throws Exception { + if (tautology || solver.isSolved()) + return; + + solver.setMaximize(true); + constraint.consolidateVariables(variables); + indexMap = new HashMap(); + + if (verbosity > VERBOSITY_NONE) + System.out.println("variables: (" + new Date() + ")"); + + for (Iterator I = variables.values().iterator(); I.hasNext();) { + FirstOrderVariable v = (FirstOrderVariable) I.next(); + ScoreSet ss = getNormalizer(v.getClassifier()).normalize(v.getScores()); + Score[] scores = null; + if (ss != null) + scores = ss.toArray(); + + if (scores == null || scores.length == 0) { + System.err.println("LBJava ERROR: Classifier " + v.getClassifier() + + " did not return any scores. ILP inference cannot be " + "performed."); + System.exit(1); + } + + // putting scores in a real-valued array + double[] weights = new double[scores.length]; + for (int idx = 0; idx < scores.length; idx++) + weights[idx] = scores[idx].score; + int[] indexes = solver.addDiscreteVariable(weights); + + for (int j = 0; j < scores.length; ++j) { + indexMap.put(new PropositionalVariable(v.getClassifier(), v.getExample(), + scores[j].value), new Integer(indexes[j])); + + if (verbosity >= VERBOSITY_HIGH) { + StringBuffer toPrint = new StringBuffer(); + toPrint.append("x_"); + toPrint.append(indexes[j]); + while (toPrint.length() < 8) + toPrint.insert(0, ' '); + toPrint.append(" ("); + toPrint.append(scores[j].score); + toPrint.append("): "); + toPrint.append(v.getClassifier()); + toPrint.append("("); + toPrint.append(Inference.exampleToString(v.getExample())); + toPrint.append(") == "); + toPrint.append(scores[j].value); + System.out.println(toPrint); + } + } } - for (int i = 0; i < subChildren.length; ++i, ++j) { - subChildren[i].runVisit(this); - indexes[j] = returnIndex; - if (returnNegation) { - coefficients[j] = -1; - --bound; - } - else coefficients[j] = 1; + if (verbosity > VERBOSITY_NONE) + System.out.println("propositionalization: (" + new Date() + ")"); + PropositionalConstraint propositional = + ((FirstOrderConstraint) constraint).propositionalize(); + + if (verbosity > VERBOSITY_NONE) + System.out.println("simplification: (" + new Date() + ")"); + if (propositional instanceof PropositionalConjunction) + propositional = ((PropositionalConjunction) propositional).simplify(true); + else + propositional = propositional.simplify(); + + if (propositional instanceof PropositionalConstant) { + if (propositional.evaluate()) { + tautology = true; + return; + } else { + System.err.println("ILP ERROR: Unsatisfiable constraints!"); + solver.addEqualityConstraint(new int[] {0}, new double[] {1}, 2); + } } + if (verbosity > VERBOSITY_NONE) + System.out.println("translation: (" + new Date() + ")"); topLevel = true; + propositional.runVisit(this); + + if (verbosity > VERBOSITY_NONE) + System.out.println("solution: (" + new Date() + ")"); + if (!solver.solve()) + throw new InferenceNotOptimalException(solver, head); + int variableIndex = 0; + if (verbosity > VERBOSITY_NONE) + System.out.println("variables set true in solution: (" + new Date() + ")"); + + for (Iterator I = variables.values().iterator(); I.hasNext();) { + FirstOrderVariable v = (FirstOrderVariable) I.next(); + Score[] scores = v.getScores().toArray(); + for (int j = 0; j < scores.length; ++j, ++variableIndex) + if (solver.getBooleanValue(variableIndex)) { + v.setValue(scores[j].value); + + if (verbosity >= VERBOSITY_HIGH) { + StringBuffer toPrint = new StringBuffer(); + toPrint.append("x_"); + toPrint.append(variableIndex); + while (toPrint.length() < 8) + toPrint.insert(0, ' '); + toPrint.append(": "); + toPrint.append(v); + System.out.println(toPrint); + } + } + } + } + - solver.addGreaterThanConstraint(indexes, coefficients, bound); - return; - } + /** + * Retrieves the value of the specified variable as identified by the classifier and the object + * that produce that variable. + * + * @param c The classifier producing the variable. + * @param o The object from which the variable is produced. + * @return The current value of the requested variable. If the variable does not exist in this + * inference, the result of the Learner's + * discreteValue(Object) method applied to the Object is + * returned. + **/ + public String valueOf(Learner c, Object o) throws Exception { + infer(); + return getVariable(new FirstOrderVariable(c, o)).getValue(); } - if (topLevel) { - indexes = new int[children.length]; - coefficients = new double[children.length]; - bound = 1; + + /** + * Two Inference objects are equal when they have the same run-time type and store + * the same head object. I.e., the == operator must return true when + * comparing the two head objects for this method to return true. + * + * @param o The object to compare to this object. + * @return true iff this object equals the argument object as defined above. + **/ + public boolean equals(Object o) { + if (!(o instanceof ILPInference)) + return false; + return head == ((ILPInference) o).head; + } + + + /** + * Simply returns the head's hash code. + * + * @see java.lang.Object#hashCode() + **/ + public int hashCode() { + return head.hashCode(); + } + + + /** + * Creates a new Boolean variable to represent the value of a subexpression of some constraint. + * + * @param d A textual description of the subexpression whose value is represented by the new + * variable. + * @return The index of the new variable. + **/ + protected int createVariable(String d) { + int result = solver.addBooleanVariable(0); + if (verbosity >= VERBOSITY_HIGH) + System.out.println(result + ": " + d); + return result; + } + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(PropositionalDoubleImplication c) { + assert topLevel : "ILP: PropositionalDoubleImplication encountered."; + topLevel = false; + + int[] indexes = new int[2]; + double[] coefficients = new double[2]; + double bound = 0; + + c.left.runVisit(this); + indexes[0] = returnIndex; + if (returnNegation) { + coefficients[0] = -1; + --bound; + } else + coefficients[0] = 1; + + c.right.runVisit(this); + indexes[1] = returnIndex; + if (returnNegation) { + coefficients[1] = 1; + ++bound; + } else + coefficients[1] = -1; + + solver.addEqualityConstraint(indexes, coefficients, bound); + + topLevel = true; } - else { - indexes = new int[children.length + 1]; - coefficients = new double[children.length + 1]; + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(PropositionalImplication c) { + assert false : "ILP: PropositionalImplication encountered."; } - boolean saveTopLevel = topLevel; - topLevel = false; - for (int i = 0; i < children.length; ++i) { - children[i].runVisit(this); - indexes[i] = returnIndex; - if (returnNegation) { - coefficients[i] = -1; - --bound; - } - else coefficients[i] = 1; + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(PropositionalConjunction c) { + PropositionalConstraint[] children = (PropositionalConstraint[]) c.getChildren(); + + int[] indexes = null; + double[] coefficients = null; + double bound; + + if (topLevel) { + PropositionalConstraint[] variables = new PropositionalConstraint[children.length]; + int size = 0; + for (int i = 0; i < children.length; ++i) { + if (children[i] instanceof PropositionalVariable + || children[i] instanceof PropositionalNegation) + variables[size++] = children[i]; + else + children[i].runVisit(this); + } + + if (size > 0) { + indexes = new int[size]; + coefficients = new double[size]; + bound = size; + + for (int i = 0; i < size; ++i) { + variables[i].runVisit(this); + indexes[i] = returnIndex; + if (returnNegation) { + coefficients[i] = -1; + --bound; + } else + coefficients[i] = 1; + } + + solver.addEqualityConstraint(indexes, coefficients, bound); + } + } else { + indexes = new int[children.length + 1]; + coefficients = new double[children.length + 1]; + bound = 0; + + for (int i = 0; i < children.length; ++i) { + children[i].runVisit(this); + indexes[i] = returnIndex; + if (returnNegation) { + coefficients[i] = -1; + --bound; + } else + coefficients[i] = 1; + } + + String[] stringIndexes = new String[children.length]; + for (int i = 0; i < children.length; ++i) + stringIndexes[i] = (coefficients[i] < 0 ? "!" : "") + indexes[i]; + Arrays.sort(stringIndexes); + String key = stringIndexes[0]; + for (int i = 1; i < stringIndexes.length; ++i) + key += "&" + stringIndexes[i]; + Integer I = (Integer) indexMap.get(key); + + if (I == null) { + I = new Integer(createVariable(key)); + indexMap.put(key, I); + + indexes[children.length] = I.intValue(); + coefficients[children.length] = -children.length; + solver.addGreaterThanConstraint(indexes, coefficients, bound); + + coefficients[children.length] = -1; + solver.addLessThanConstraint(indexes, coefficients, bound + children.length - 1); + } + + returnIndex = I.intValue(); + returnNegation = false; + } } - topLevel = saveTopLevel; - if (topLevel) - solver.addGreaterThanConstraint(indexes, coefficients, bound); - else { - String[] stringIndexes = new String[children.length]; - for (int i = 0; i < children.length; ++i) - stringIndexes[i] = (coefficients[i] < 0 ? "!" : "") + indexes[i]; - Arrays.sort(stringIndexes); - String key = stringIndexes[0]; - for (int i = 1; i < stringIndexes.length; ++i) - key += "|" + stringIndexes[i]; - Integer I = (Integer) indexMap.get(key); - - if (I == null) { - I = new Integer(createVariable(key)); - indexMap.put(key, I); - - indexes[children.length] = I.intValue(); - coefficients[children.length] = -1; - solver.addGreaterThanConstraint(indexes, coefficients, bound); - - coefficients[children.length] = -children.length; - solver.addLessThanConstraint(indexes, coefficients, bound); - } - - returnIndex = I.intValue(); - returnNegation = false; + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(PropositionalDisjunction c) { + PropositionalConstraint[] children = (PropositionalConstraint[]) c.getChildren(); + + int[] indexes = null; + double[] coefficients = null; + double bound = 0; + + if (topLevel) { + int subConstraintIndex = -1; + + for (int i = 0; i < children.length && subConstraintIndex == -1; ++i) { + if (children[i] instanceof PropositionalVariable + || children[i] instanceof PropositionalNegation) + continue; + if (children[i] instanceof PropositionalConjunction) + subConstraintIndex = i; + else if (children[i] instanceof PropositionalAtLeast) + subConstraintIndex = i; + } + + if (subConstraintIndex > -1) { + PropositionalConstraint[] subChildren = + (PropositionalConstraint[]) children[subConstraintIndex].getChildren(); + int multiplier = + children[subConstraintIndex] instanceof PropositionalConjunction ? subChildren.length + : ((PropositionalAtLeast) children[subConstraintIndex]).getM(); + + indexes = new int[subChildren.length + children.length - 1]; + coefficients = new double[subChildren.length + children.length - 1]; + bound = multiplier; + + topLevel = false; + + int j = 0; + for (int i = 0; i < children.length; ++i) { + if (i == subConstraintIndex) + continue; + + children[i].runVisit(this); + indexes[j] = returnIndex; + if (returnNegation) { + coefficients[j] = -multiplier; + bound -= multiplier; + } else + coefficients[j] = multiplier; + + ++j; + } + + for (int i = 0; i < subChildren.length; ++i, ++j) { + subChildren[i].runVisit(this); + indexes[j] = returnIndex; + if (returnNegation) { + coefficients[j] = -1; + --bound; + } else + coefficients[j] = 1; + } + + topLevel = true; + + solver.addGreaterThanConstraint(indexes, coefficients, bound); + return; + } + } + + if (topLevel) { + indexes = new int[children.length]; + coefficients = new double[children.length]; + bound = 1; + } else { + indexes = new int[children.length + 1]; + coefficients = new double[children.length + 1]; + } + + boolean saveTopLevel = topLevel; + topLevel = false; + for (int i = 0; i < children.length; ++i) { + children[i].runVisit(this); + indexes[i] = returnIndex; + if (returnNegation) { + coefficients[i] = -1; + --bound; + } else + coefficients[i] = 1; + } + + topLevel = saveTopLevel; + if (topLevel) + solver.addGreaterThanConstraint(indexes, coefficients, bound); + else { + String[] stringIndexes = new String[children.length]; + for (int i = 0; i < children.length; ++i) + stringIndexes[i] = (coefficients[i] < 0 ? "!" : "") + indexes[i]; + Arrays.sort(stringIndexes); + String key = stringIndexes[0]; + for (int i = 1; i < stringIndexes.length; ++i) + key += "|" + stringIndexes[i]; + Integer I = (Integer) indexMap.get(key); + + if (I == null) { + I = new Integer(createVariable(key)); + indexMap.put(key, I); + + indexes[children.length] = I.intValue(); + coefficients[children.length] = -1; + solver.addGreaterThanConstraint(indexes, coefficients, bound); + + coefficients[children.length] = -children.length; + solver.addLessThanConstraint(indexes, coefficients, bound); + } + + returnIndex = I.intValue(); + returnNegation = false; + } } - } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(PropositionalAtLeast c) { - PropositionalConstraint[] children = - (PropositionalConstraint[]) c.getChildren(); - - int[] indexes = null; - double[] coefficients = null; - double bound = 0; - - if (topLevel) { - indexes = new int[children.length]; - coefficients = new double[children.length]; - bound = c.getM(); + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(PropositionalAtLeast c) { + PropositionalConstraint[] children = (PropositionalConstraint[]) c.getChildren(); + + int[] indexes = null; + double[] coefficients = null; + double bound = 0; + + if (topLevel) { + indexes = new int[children.length]; + coefficients = new double[children.length]; + bound = c.getM(); + } else { + indexes = new int[children.length + 1]; + coefficients = new double[children.length + 1]; + } + + boolean saveTopLevel = topLevel; + topLevel = false; + for (int i = 0; i < children.length; ++i) { + children[i].runVisit(this); + indexes[i] = returnIndex; + if (returnNegation) { + coefficients[i] = -1; + --bound; + } else + coefficients[i] = 1; + } + + topLevel = saveTopLevel; + if (topLevel) + solver.addGreaterThanConstraint(indexes, coefficients, bound); + else { + String[] stringIndexes = new String[children.length]; + for (int i = 0; i < children.length; ++i) + stringIndexes[i] = (coefficients[i] < 0 ? "!" : "") + indexes[i]; + Arrays.sort(stringIndexes); + String key = "atl" + c.getM() + "of" + stringIndexes[0]; + for (int i = 1; i < stringIndexes.length; ++i) + key += "&" + stringIndexes[i]; + Integer I = (Integer) indexMap.get(key); + + if (I == null) { + I = new Integer(createVariable(key)); + indexMap.put(key, I); + + indexes[children.length] = I.intValue(); + coefficients[children.length] = -c.getM(); + solver.addGreaterThanConstraint(indexes, coefficients, bound); + + coefficients[children.length] = -children.length; + solver.addLessThanConstraint(indexes, coefficients, bound + c.getM() - 1); + } + + returnIndex = I.intValue(); + returnNegation = false; + } } - else { - indexes = new int[children.length + 1]; - coefficients = new double[children.length + 1]; + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(PropositionalNegation c) { + assert c.constraint instanceof PropositionalVariable : "ILP: Negation of a " + + c.constraint.getClass().getName() + " encountered."; + c.constraint.runVisit(this); + returnNegation = true; } - boolean saveTopLevel = topLevel; - topLevel = false; - for (int i = 0; i < children.length; ++i) { - children[i].runVisit(this); - indexes[i] = returnIndex; - if (returnNegation) { - coefficients[i] = -1; - --bound; - } - else coefficients[i] = 1; + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(PropositionalVariable c) { + returnIndex = ((Integer) indexMap.get(c)).intValue(); + returnNegation = false; } - topLevel = saveTopLevel; - if (topLevel) - solver.addGreaterThanConstraint(indexes, coefficients, bound); - else { - String[] stringIndexes = new String[children.length]; - for (int i = 0; i < children.length; ++i) - stringIndexes[i] = (coefficients[i] < 0 ? "!" : "") + indexes[i]; - Arrays.sort(stringIndexes); - String key = "atl" + c.getM() + "of" + stringIndexes[0]; - for (int i = 1; i < stringIndexes.length; ++i) - key += "&" + stringIndexes[i]; - Integer I = (Integer) indexMap.get(key); - - if (I == null) { - I = new Integer(createVariable(key)); - indexMap.put(key, I); - - indexes[children.length] = I.intValue(); - coefficients[children.length] = -c.getM(); - solver.addGreaterThanConstraint(indexes, coefficients, bound); - - coefficients[children.length] = -children.length; - solver.addLessThanConstraint(indexes, coefficients, - bound + c.getM() - 1); - } - - returnIndex = I.intValue(); - returnNegation = false; + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(PropositionalConstant c) { + assert false : "ILP: Constant encountered. (" + c.evaluate() + ")"; } - } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(PropositionalNegation c) { - assert c.constraint instanceof PropositionalVariable - : "ILP: Negation of a " + c.constraint.getClass().getName() - + " encountered."; - c.constraint.runVisit(this); - returnNegation = true; - } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(PropositionalVariable c) { - returnIndex = ((Integer) indexMap.get(c)).intValue(); - returnNegation = false; - } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(PropositionalConstant c) { - assert false : "ILP: Constant encountered. (" + c.evaluate() + ")"; - } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/ILPSolver.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/ILPSolver.java deleted file mode 100644 index 00176b03..00000000 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/ILPSolver.java +++ /dev/null @@ -1,182 +0,0 @@ -/** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 - * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign - * http://cogcomp.cs.illinois.edu/ - */ -package edu.illinois.cs.cogcomp.lbjava.infer; - -import edu.illinois.cs.cogcomp.lbjava.classify.Score; - - -/** - * Classes that implement this interface contain implementations of - * algorithms that solve Integer Linear Programming problems. - * - * @author Nick Rizzolo - **/ -public interface ILPSolver -{ - /** - * Sets the direction of the objective function. - * - * @param d true if the objective function is to be - * maximized. - **/ - public void setMaximize(boolean d); - - - /** - * Adds a new Boolean variable (an integer variable constrained to take - * either the value 0 or the value 1) with the specified coefficient in the - * objective function to the problem. - * - * @param c The objective function coefficient for the new Boolean - * variable. - * @return The indexes of the created variable. - **/ - public int addBooleanVariable(double c); - - - /** - * Adds a general, multi-valued discrete variable, which is implemented as - * a set of Boolean variables, one per value of the discrete variable, with - * exactly one of those variables set true at any given time. - * - * @param c The objective function coefficients for the new Boolean - * variables. - * @return The indexes of the newly created variables. - **/ - public int[] addDiscreteVariable(double[] c); - - - /** - * Adds a general, multi-valued discrete variable, which is implemented as - * a set of Boolean variables, one per value of the discrete variable, with - * exactly one of those variables set true at any given time. - * - * @param c An array of {@link Score}s containing the - * objective function coefficients for the new Boolean variables. - * @return The indexes of the newly created variables. - **/ - public int[] addDiscreteVariable(Score[] c); - - - /** - * Adds a new fixed constraint to the problem. The two array arguments - * must be the same length, as their elements correspond to each other. - * Variables whose coefficients are zero need not be mentioned. Variables - * that are mentioned must have previously been added via - * {@link #addBooleanVariable(double)} or - * {@link #addDiscreteVariable(double[])}. The resulting constraint has - * the form: - *
xi * a = b
- * where xi represents the inference variables - * whose indexes are contained in the array i and - * * represents dot product. - * - * @param i The indexes of the variables with non-zero coefficients. - * @param a The coefficients of the variables with the given indexes. - * @param b The new constraint will enforce equality with this constant. - **/ - public void addEqualityConstraint(int[] i, double[] a, double b); - - - /** - * Adds a new lower bounded constraint to the problem. The two array - * arguments must be the same length, as their elements correspond to each - * other. Variables whose coefficients are zero need not be mentioned. - * Variables that are mentioned must have previously been added via - * {@link #addBooleanVariable(double)} or - * {@link #addDiscreteVariable(double[])}. The resulting constraint has - * the form: - *
xi * a >= b
- * where xi represents the inference variables - * whose indexes are contained in the array i and - * * represents dot product. - * - * @param i The indexes of the variables with non-zero coefficients. - * @param a The coefficients of the variables with the given indexes. - * @param b The lower bound for the new constraint. - **/ - public void addGreaterThanConstraint(int[] i, double[] a, double b); - - - /** - * Adds a new upper bounded constraint to the problem. The two array - * arguments must be the same length, as their elements correspond to each - * other. Variables whose coefficients are zero need not be mentioned. - * Variables that are mentioned must have previously been added via - * {@link #addBooleanVariable(double)} or - * {@link #addDiscreteVariable(double[])}. The resulting constraint has - * the form: - *
xi * a <= b
- * where xi represents the inference variables - * whose indexes are contained in the array i and - * * represents dot product. - * - * @param i The indexes of the variables with non-zero coefficients. - * @param a The coefficients of the variables with the given indexes. - * @param b The upper bound for the new constraint. - **/ - public void addLessThanConstraint(int[] i, double[] a, double b); - - - /** - * Solves the ILP problem, saving the solution internally. This method may - * throw an exception if something doesn't go right. - **/ - public boolean solve() throws Exception; - - - /** - * Tests whether the problem represented by this ILPSolver - * instance has been solved already. - **/ - public boolean isSolved(); - - - /** - * When the problem has been solved, use this method to retrieve the value - * of any Boolean inference variable. The result of this method is - * undefined when the problem has not yet been solved. - * - * @param index The index of the variable whose value is requested. - * @return The value of the variable. - **/ - public boolean getBooleanValue(int index); - - - /** - * When the problem has been solved, use this method to retrieve the value - * of the objective function at the solution. The result of this method is - * undefined when the problem has not yet been solved. If the problem had - * no feasible solutions, negative (positive, respectively) infinity will - * be returned if maximizing (minimizing). - * - * @return The value of the objective function at the solution. - **/ - public double objectiveValue(); - - - /** - * This method clears the all constraints and variables out of the ILP - * solver's problem representation, bringing the ILPSolver - * instance back to the state it was in when first constructed. - **/ - public void reset(); - - - /** - * Creates a textual representation of the ILP problem in an algebraic - * notation. - * - * @param buffer The created textual representation will be appended here. - **/ - public void write(StringBuffer buffer); -} - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/Inference.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/Inference.java index 281335cf..48413c9a 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/Inference.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/Inference.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -18,384 +15,425 @@ /** - * An object of this class keeps track of all the information necessary to - * perform inference. Once that inference has been performed, constrained - * classifiers access the results through this class's interface to determine - * what their constrained predictions are. - * - * @author Nick Rizzolo + * An object of this class keeps track of all the information necessary to perform inference. Once + * that inference has been performed, constrained classifiers access the results through this + * class's interface to determine what their constrained predictions are. + * + * @author Nick Rizzolo **/ -public abstract class Inference -{ - /** - * Produces a string representation of an example object. This method is - * used mainly for debugging messages. - * - * @param example The example object, which may be an array. - * @return A string representation of example. - **/ - public static String exampleToString(Object example) { - String result = ""; - - if (example instanceof Object[]) { - Object[] array = (Object[]) example; - result += "["; - if (array.length > 0) result += array[0]; - for (int k = 1; k < array.length; ++k) result += ", " + array[k]; - result += "]"; - } - else result += example; - - return result; - } - - - /** - * The values of this map are the variables we perform inference over; they - * are the actual FirstOrderVariable objects found in this - * inference's constraints. The keys are also objects of type - * FirstOrderVariable, but they are not necessarily the actual - * objects found in the constraints. This map is populated by the first - * evaluation of the constraints. - **/ - protected LinkedHashMap variables; - /** The constraints which must be satisfied by the inference algorithm. */ - protected Constraint constraint; - /** Objects of this class are differentiated by their "head" objects. */ - protected Object head; - - - /** Default constructor. */ - public Inference() { this(null); } - - /** - * Initializes the head object. - * - * @param h The head object. - **/ - public Inference(Object h) { - head = h; - variables = new LinkedHashMap(); - } - - - /** Retrieves the head object. */ - public Object getHead() { return head; } - - - /** - * Derived classes implement this method to perform the inference, setting - * the values of the variables such that they maximize the objective - * function while satisfying the constraints. When implementing this - * method in a derived class Foo, it may be assumed that the - * constraint member field has already been filled in - * appropriately, since the LBJava compiler will generate a class extending - * Foo whose constructor does so. - **/ - abstract protected void infer() throws Exception; - - - /** - * Retrieves the value of the specified variable as identified by the - * classifier and the object that produce that variable. - * - * @param c The classifier producing the variable. - * @param o The object from which the variable is produced. - * @return The current value of the requested variable. - **/ - abstract public String valueOf(Learner c, Object o) throws Exception; - - - /** - * Returns the normalization function associated with the given classifier - * in this inference. Derived classes that implement an inference - * algorithm for use in an LBJava source file are required to call this method - * to normalize the scores produced by classifiers before making use of - * those scores. By default, this method returns the - * IdentityNormalizer. - * - * @param c The classifier. - * @return The normalization function associated with the classifier. - **/ - public Normalizer getNormalizer(Learner c) { - return new IdentityNormalizer(); - } - - - /** - * Returns the fully qualified name of the type of the head object for this - * inference. By default, this method returns - * "java.lang.Object". It should be overridden by derived - * classes. - **/ - public String getHeadType() { return "java.lang.Object"; } - - - /** - * Returns the fully qualified names of the types of objects for which head - * finder methods have been defined. This method must be overridden by - * derived classes, since by default it returns a 0-length array and every - * Inference is required to have at least one head finder. - * Inference classes written by the compiler automatically - * override this method appropriately. - **/ - public String[] getHeadFinderTypes() { return new String[0]; } - - - /** - * Determines if the constraints are satisfied by the current variable - * assignments. - **/ - public boolean satisfied() { return constraint.evaluate(); } - - - /** - * Retrieves the requested variable, creating it first if it doesn't yet - * exist. - * - * @param v A variable containing the same classifier, object, and - * prediction value as the desired variable. - * @return The Boolean variable corresponding to the event - * classifier(object) == prediction. - **/ - public PropositionalVariable getVariable(PropositionalVariable v) { - PropositionalVariable variable = (PropositionalVariable) variables.get(v); - - if (variable == null) { - variable = (PropositionalVariable) v.clone(); - variables.put(variable, variable); - } - - return variable; - } - - - /** - * Retrieves the requested variable, creating it first if it doesn't yet - * exist. - * - * @param v A variable containing the same classifier and object as the - * desired variable. - * @return The variable corresponding to the application of the classifier - * on the object. - **/ - public FirstOrderVariable getVariable(FirstOrderVariable v) { - FirstOrderVariable variable = (FirstOrderVariable) variables.get(v); - - if (variable == null) { - variable = (FirstOrderVariable) v.clone(); - variables.put(variable, variable); - } - - return variable; - } - - - /** - * The default method for visiting a constraint simply visits that - * constraint's children. - **/ - public void visitAll(Constraint c) { - Constraint[] children = c.getChildren(); - for (int i = 0; i < children.length; ++i) children[i].runVisit(this); - } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(FirstOrderDoubleImplication c) { visitAll(c); } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(FirstOrderImplication c) { visitAll(c); } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(FirstOrderConjunction c) { visitAll(c); } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(FirstOrderDisjunction c) { visitAll(c); } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(FirstOrderEqualityTwoValues c) { visitAll(c); } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(FirstOrderEqualityWithValue c) { visitAll(c); } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(FirstOrderEqualityWithVariable c) { visitAll(c); } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(FirstOrderNegation c) { visitAll(c); } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(FirstOrderConstant c) { visitAll(c); } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(UniversalQuantifier c) { visitAll(c); } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(ExistentialQuantifier c) { visitAll(c); } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(AtLeastQuantifier c) { visitAll(c); } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(AtMostQuantifier c) { visitAll(c); } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(QuantifiedConstraintInvocation c) { visitAll(c); } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(PropositionalDoubleImplication c) { visitAll(c); } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(PropositionalImplication c) { visitAll(c); } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(PropositionalConjunction c) { visitAll(c); } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(PropositionalDisjunction c) { visitAll(c); } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(PropositionalAtLeast c) { visitAll(c); } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(PropositionalConstant c) { visitAll(c); } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(PropositionalNegation c) { visitAll(c); } - - - /** - * Derived classes override this method to do some type of processing on - * constraints of the parameter's type. - * - * @param c The constraint to process. - **/ - public void visit(PropositionalVariable c) { visitAll(c); } -} +public abstract class Inference { + /** + * Produces a string representation of an example object. This method is used mainly for + * debugging messages. + * + * @param example The example object, which may be an array. + * @return A string representation of example. + **/ + public static String exampleToString(Object example) { + String result = ""; + + if (example instanceof Object[]) { + Object[] array = (Object[]) example; + result += "["; + if (array.length > 0) + result += array[0]; + for (int k = 1; k < array.length; ++k) + result += ", " + array[k]; + result += "]"; + } else + result += example; + + return result; + } + + + /** + * The values of this map are the variables we perform inference over; they are the actual + * FirstOrderVariable objects found in this inference's constraints. The keys are + * also objects of type FirstOrderVariable, but they are not necessarily the actual + * objects found in the constraints. This map is populated by the first evaluation of the + * constraints. + **/ + protected LinkedHashMap variables; + /** The constraints which must be satisfied by the inference algorithm. */ + protected Constraint constraint; + /** Objects of this class are differentiated by their "head" objects. */ + protected Object head; + + + /** Default constructor. */ + public Inference() { + this(null); + } + + /** + * Initializes the head object. + * + * @param h The head object. + **/ + public Inference(Object h) { + head = h; + variables = new LinkedHashMap(); + } + + + /** Retrieves the head object. */ + public Object getHead() { + return head; + } + + + /** + * Derived classes implement this method to perform the inference, setting the values of the + * variables such that they maximize the objective function while satisfying the constraints. + * When implementing this method in a derived class Foo, it may be assumed that the + * constraint member field has already been filled in appropriately, since the + * LBJava compiler will generate a class extending Foo whose constructor does so. + **/ + abstract protected void infer() throws Exception; + + + /** + * Retrieves the value of the specified variable as identified by the classifier and the object + * that produce that variable. + * + * @param c The classifier producing the variable. + * @param o The object from which the variable is produced. + * @return The current value of the requested variable. + **/ + abstract public String valueOf(Learner c, Object o) throws Exception; + + + /** + * Returns the normalization function associated with the given classifier in this inference. + * Derived classes that implement an inference algorithm for use in an LBJava source file are + * required to call this method to normalize the scores produced by classifiers before making + * use of those scores. By default, this method returns the IdentityNormalizer. + * + * @param c The classifier. + * @return The normalization function associated with the classifier. + **/ + public Normalizer getNormalizer(Learner c) { + return new IdentityNormalizer(); + } + + + /** + * Returns the fully qualified name of the type of the head object for this inference. By + * default, this method returns "java.lang.Object". It should be overridden by + * derived classes. + **/ + public String getHeadType() { + return "java.lang.Object"; + } + + + /** + * Returns the fully qualified names of the types of objects for which head finder methods have + * been defined. This method must be overridden by derived classes, since by default it returns + * a 0-length array and every Inference is required to have at least one head + * finder. Inference classes written by the compiler automatically override this + * method appropriately. + **/ + public String[] getHeadFinderTypes() { + return new String[0]; + } + + + /** + * Determines if the constraints are satisfied by the current variable assignments. + **/ + public boolean satisfied() { + return constraint.evaluate(); + } + + + /** + * Retrieves the requested variable, creating it first if it doesn't yet exist. + * + * @param v A variable containing the same classifier, object, and prediction value as the + * desired variable. + * @return The Boolean variable corresponding to the event classifier(object) == + * prediction. + **/ + public PropositionalVariable getVariable(PropositionalVariable v) { + PropositionalVariable variable = (PropositionalVariable) variables.get(v); + + if (variable == null) { + variable = (PropositionalVariable) v.clone(); + variables.put(variable, variable); + } + + return variable; + } + + + /** + * Retrieves the requested variable, creating it first if it doesn't yet exist. + * + * @param v A variable containing the same classifier and object as the desired variable. + * @return The variable corresponding to the application of the classifier on the object. + **/ + public FirstOrderVariable getVariable(FirstOrderVariable v) { + FirstOrderVariable variable = (FirstOrderVariable) variables.get(v); + + if (variable == null) { + variable = (FirstOrderVariable) v.clone(); + variables.put(variable, variable); + } + + return variable; + } + + + /** + * The default method for visiting a constraint simply visits that constraint's children. + **/ + public void visitAll(Constraint c) { + Constraint[] children = c.getChildren(); + for (int i = 0; i < children.length; ++i) + children[i].runVisit(this); + } + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(FirstOrderDoubleImplication c) { + visitAll(c); + } + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(FirstOrderImplication c) { + visitAll(c); + } + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(FirstOrderConjunction c) { + visitAll(c); + } + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(FirstOrderDisjunction c) { + visitAll(c); + } + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(FirstOrderEqualityTwoValues c) { + visitAll(c); + } + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(FirstOrderEqualityWithValue c) { + visitAll(c); + } + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(FirstOrderEqualityWithVariable c) { + visitAll(c); + } + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(FirstOrderNegation c) { + visitAll(c); + } + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(FirstOrderConstant c) { + visitAll(c); + } + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(UniversalQuantifier c) { + visitAll(c); + } + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(ExistentialQuantifier c) { + visitAll(c); + } + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(AtLeastQuantifier c) { + visitAll(c); + } + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(AtMostQuantifier c) { + visitAll(c); + } + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(QuantifiedConstraintInvocation c) { + visitAll(c); + } + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(PropositionalDoubleImplication c) { + visitAll(c); + } + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(PropositionalImplication c) { + visitAll(c); + } + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(PropositionalConjunction c) { + visitAll(c); + } + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(PropositionalDisjunction c) { + visitAll(c); + } + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(PropositionalAtLeast c) { + visitAll(c); + } + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(PropositionalConstant c) { + visitAll(c); + } + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(PropositionalNegation c) { + visitAll(c); + } + + + /** + * Derived classes override this method to do some type of processing on constraints of the + * parameter's type. + * + * @param c The constraint to process. + **/ + public void visit(PropositionalVariable c) { + visitAll(c); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/InferenceManager.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/InferenceManager.java index 1537c720..700c54e2 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/InferenceManager.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/InferenceManager.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -14,121 +11,96 @@ /** - * The inference manager is a cache of Inference objects - * accessed via their names and head objects. However, only one - * Inference object is stored per Inference class. - * For example, if the LBJava source file contains the following two - * inferences: - * - *
- * inference Foo head MyClass { ... }
- * inference Bar head MyClass { ... } - *
- * - * then this InferenceManager will store a maximum of one - * Foo object and one Bar object. - * - * @author Nick Rizzolo + * The inference manager is a cache of Inference objects accessed via their names and + * head objects. However, only one Inference object is stored per + * Inference class. For example, if the LBJava source file contains the following two + * inferences: + * + *
inference Foo head MyClass { ... }
+ * inference Bar head MyClass { ... }
+ * + * then this InferenceManager will store a maximum of one Foo object and + * one Bar object. + * + * @author Nick Rizzolo **/ -public class InferenceManager -{ - /** - * The cache of Inference objects, indexed by - * Keys. - **/ - private static final LinkedHashMap cache = new LinkedHashMap(); - - - /** - * Adds the given Inference object to the cache, indexed its - * fully qualified name. - * - * @param i The inference object. - **/ - public static void put(Inference i) { - cache.put(i.getClass().getName(), i); - } - - /** - * Adds the given Inference object to the cache, indexed by an - * arbitrary name (NB: Don't use unless you know what you're doing). - * - * @param name The (arbitrary) name of the inference object. - * @param i The inference object. - **/ - public static void put(String name, Inference i) { - cache.put(name, i); - } - - /** - * Retrieves the Inference object whose fully qualified name - * and head object are specified. - * - * @param n The fully qualified name of the inference. - * @param h The head object of the inference. - * @return The Inference object corresponding to the given - * parameters, or null if no Inference is - * associated with them. - **/ - public static Inference get(String n, Object h) { - Inference result = (Inference) cache.get(n); - if (result != null && result.getHead() == h) return result; - return null; - } - - - /** - * Removes the inference object with the given name. - * - * @param n The name of the unwanted inference. - **/ - public static void remove(String n) { cache.remove(n); } +public class InferenceManager { + /** + * The cache of Inference objects, indexed by Keys. + **/ + private static final LinkedHashMap cache = new LinkedHashMap(); - /** - * Objects of this class are used as the keys of the cache - * map. They are distinguished from each other by comparing the contents - * of the Strings storing their names and by their head - * objects, which must actually be exactly the same object for the two keys - * to be equivalent. - * - * @author Nick Rizzolo - ** / - private static class Key - { - /** The name of the inference. * / - public String name; - /** The inference's head object. * / - public Object head; + /** + * Adds the given Inference object to the cache, indexed its fully qualified name. + * + * @param i The inference object. + **/ + public static void put(Inference i) { + cache.put(i.getClass().getName(), i); + } + /** + * Adds the given Inference object to the cache, indexed by an arbitrary name (NB: + * Don't use unless you know what you're doing). + * + * @param name The (arbitrary) name of the inference object. + * @param i The inference object. + **/ + public static void put(String name, Inference i) { + cache.put(name, i); + } /** - * Initializing constructor. - * - * @param n The name of the inference. - * @param h The inference's head object. - ** / - public Key(String n, Object h) { - name = n; - head = h; + * Retrieves the Inference object whose fully qualified name and head object are + * specified. + * + * @param n The fully qualified name of the inference. + * @param h The head object of the inference. + * @return The Inference object corresponding to the given parameters, or + * null if no Inference is associated with them. + **/ + public static Inference get(String n, Object h) { + Inference result = (Inference) cache.get(n); + if (result != null && result.getHead() == h) + return result; + return null; } /** - * The hash code of a Key is the hash code of its name plus - * the system's hash code for the head object. - ** / - public int hashCode() { - return name.hashCode() + System.identityHashCode(head); + * Removes the inference object with the given name. + * + * @param n The name of the unwanted inference. + **/ + public static void remove(String n) { + cache.remove(n); } - /** Two Keys are equivalent as described above. * / - public boolean equals(Object o) { - Key k = (Key) o; - return name.equals(k.name) && head == k.head; - } - } - */ + /** + * Objects of this class are used as the keys of the cache map. They are + * distinguished from each other by comparing the contents of the Strings storing + * their names and by their head objects, which must actually be exactly the same object for the + * two keys to be equivalent. + * + * @author Nick Rizzolo / private static class Key { /** The name of the inference. * / public + * String name; /** The inference's head object. * / public Object head; + * + * + * /** Initializing constructor. + * + * @param n The name of the inference. + * @param h The inference's head object. / public Key(String n, Object h) { name = n; head = h; + * } + * + * + * /** The hash code of a Key is the hash code of its name plus the system's + * hash code for the head object. / public int hashCode() { return name.hashCode() + + * System.identityHashCode(head); } + * + * + * /** Two Keys are equivalent as described above. * / public boolean + * equals(Object o) { Key k = (Key) o; return name.equals(k.name) && head == k.head; } } + */ } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/InferenceNotOptimalException.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/InferenceNotOptimalException.java index 9debc687..d083d861 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/InferenceNotOptimalException.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/InferenceNotOptimalException.java @@ -1,53 +1,51 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; +import edu.illinois.cs.cogcomp.infer.ilp.ILPSolver; + /** - * Exceptions of this type are thrown by the {@link ILPInference} class when - * the selected {@link ILPSolver} did not successfully find the optimal - * solution to the inference problem. Instances of this class contain a - * reference to the {@link ILPSolver} instance so that the user can, for - * instance, call the {@link ILPSolver#write(java.lang.StringBuffer)} method. - * - * @author Nick Rizzolo + * Exceptions of this type are thrown by the {@link ILPInference} class when the selected + * {@link ILPSolver} did not successfully find the optimal solution to the inference problem. + * Instances of this class contain a reference to the {@link ILPSolver} instance so that the user + * can, for instance, call the {@link ILPSolver#write(java.lang.StringBuffer)} method. + * + * @author Nick Rizzolo **/ -public class InferenceNotOptimalException extends Exception -{ - /** The ILP algorithm and problem representation that failed. */ - private ILPSolver solver; - /** The head object of the inference problem. */ - private Object head; - - - /** - * Initializing constructor. - * - * @param solver The ILP algorithm and problem representation that failed. - * @param head The head object of the inference problem. - **/ - public InferenceNotOptimalException(ILPSolver solver, Object head) { - super( - "Failed to solve inference problem with the following head object: " - + head); - this.solver = solver; - this.head = head; - } - - - /** Retrieves the ILP problem instance, {@link #solver}. */ - public ILPSolver getSolver() { return solver; } - - - /** Retrieves the head object, {@link #head}. */ - public Object getHead() { return head; } +public class InferenceNotOptimalException extends Exception { + /** The ILP algorithm and problem representation that failed. */ + private ILPSolver solver; + /** The head object of the inference problem. */ + private Object head; + + + /** + * Initializing constructor. + * + * @param solver The ILP algorithm and problem representation that failed. + * @param head The head object of the inference problem. + **/ + public InferenceNotOptimalException(ILPSolver solver, Object head) { + super("Failed to solve inference problem with the following head object: " + head); + this.solver = solver; + this.head = head; + } + + + /** Retrieves the ILP problem instance, {@link #solver}. */ + public ILPSolver getSolver() { + return solver; + } + + + /** Retrieves the head object, {@link #head}. */ + public Object getHead() { + return head; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/InvocationArgumentReplacer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/InvocationArgumentReplacer.java index b3f96d00..4282a584 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/InvocationArgumentReplacer.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/InvocationArgumentReplacer.java @@ -1,40 +1,36 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; /** - * Anonymous inner classes extending this class are instantiated by the code - * generated by the LBJava compiler when creating - * QuantifiedConstraintInvocation representations. Its lone - * method computes the value of the constraint invocation's parameter. - * - * @author Nick Rizzolo + * Anonymous inner classes extending this class are instantiated by the code generated by the LBJava + * compiler when creating QuantifiedConstraintInvocation representations. Its lone + * method computes the value of the constraint invocation's parameter. + * + * @author Nick Rizzolo **/ -abstract public class InvocationArgumentReplacer extends ArgumentReplacer -{ - /** - * Initializing constructor. - * - * @param c The context of the corresponding quantified constraint - * invocation, except for quantification variables. - **/ - public InvocationArgumentReplacer(Object[] c) { super(c); } +abstract public class InvocationArgumentReplacer extends ArgumentReplacer { + /** + * Initializing constructor. + * + * @param c The context of the corresponding quantified constraint invocation, except for + * quantification variables. + **/ + public InvocationArgumentReplacer(Object[] c) { + super(c); + } - /** - * Computes the value of the constraint invocation's parameter. - * - * @return The value of the constraint invocation's parameter. - **/ - abstract public Object compute(); + /** + * Computes the value of the constraint invocation's parameter. + * + * @return The value of the constraint invocation's parameter. + **/ + abstract public Object compute(); } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/OJalgoHook.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/OJalgoHook.java deleted file mode 100644 index 59a961b2..00000000 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/OJalgoHook.java +++ /dev/null @@ -1,313 +0,0 @@ -/** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 - * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign - * http://cogcomp.cs.illinois.edu/ - */ -package edu.illinois.cs.cogcomp.lbjava.infer; - -import edu.illinois.cs.cogcomp.lbjava.classify.Score; -import org.ojalgo.optimisation.Expression; -import org.ojalgo.optimisation.ExpressionsBasedModel; -import org.ojalgo.optimisation.Optimisation; -import org.ojalgo.optimisation.Variable; - -import java.util.Arrays; - -/** - * A hook for the oj! Algorithms (ojAlgo), an Open Source Java code to do mathematics, linear algebra and optimisation. - * For further information, see http://ojalgo.org/ - */ -public class OJalgoHook implements ILPSolver { - - // initially there are no variables in the model. - private int numvars = 0; - - // initial number of constraints - private int numConstraints = 0; - - private ExpressionsBasedModel model = new ExpressionsBasedModel(); - - private String nameOfObjectiveExpression = "objective"; - private Expression objectiveFunction = model.getObjectiveExpression(); - - // Internal flag for keeping optimization state - private boolean minimize = true; - - // internal variable for result of optimization - private Optimisation.Result result; - - private boolean log = false; - - /** - * Set bounds of variable in the specified position. - * - * @param colId position of the variable - * @param lower domain lower bound - * @param upper domain upper bound - */ - public void setBounds(int colId, double lower, double upper) { - if(upper == Double.POSITIVE_INFINITY) - model.getVariable(colId).upper(null); - else - model.getVariable(colId).upper(upper); - - if(lower == Double.NEGATIVE_INFINITY) - model.getVariable(colId).lower(null); - else - model.getVariable(colId).lower(lower); - } - - /** - * Set lower bound to unbounded (infinite) - * - * @param colId position of the variable - */ - public void setUnboundUpperBound(int colId) { - model.getVariable(colId).upper(null); - } - - public void setUpperBound(int colId, double u) { - model.getVariable(colId).upper(u); - } - - /** - * Set upper bound to unbounded (infinite) - * - * @param colId position of the variable - */ - public void setUnboundLowerBound(int colId) { - model.getVariable(colId).lower(null); - } - - public void setLowerBound(int colId, double l) { - model.getVariable(colId).lower(l); - } - - /** - * Set the column/variable as an integer variable - * - * @param colId position of the variable - */ - public void setInteger(int colId) { - model.getVariable(colId).integer(true); - } - - /** - * Set the column / variable as an binary integer variable - * - * @param colId position of the variable - */ - public void setBinary(int colId) { - model.getVariable(colId).binary(); - } - - /** - * Set the column/variable as a float variable - * - * @param colId position of the variable - */ - public void setFloat(int colId) { - model.getVariable(colId).integer(false); - } - - public void setMaximize(boolean d) { - if(log) - System.out.println("OJalgoHook: setMaximize("+d + ")"); - if(d) { - model.setMaximisation(); - minimize = false; - } - else { - model.setMinimisation(); - minimize = true; - } - } - - public int addBooleanVariable(double c) { - if(log) - System.out.println("OJalgoHook: addBooleanVariable(c=" + c + ")"); - - numvars ++; - Variable var = Variable.makeBinary(Integer.toString(numvars)).weight(c); - model.addVariable(var); - return numvars-1; - } - - /** - * Adds a general, multi-valued discrete variable, which is implemented as - * a set of Boolean variables, one per value of the discrete variable, with - * exactly one of those variables set true at any given time. - * */ - public int[] addDiscreteVariable(double[] c) { - if(log) { - System.out.print("OJalgoHook: addDiscreteVariable("); - for (double w : c) - System.out.print(w + ", "); - System.out.println(")"); - } - - int[] varIndices = new int[c.length]; - int ind = 0; - while (ind < c.length) { - varIndices[ind] = addBooleanVariable(c[ind]); - ind++; - } - - if(log) { - System.out.print("output indices = "); - for(int idx: varIndices) - System.out.print(idx + ", "); - System.out.println(); - } - - // make sure only one of them is true - double[] ones = new double[varIndices.length]; - Arrays.fill(ones, 1); - if(log) { - System.out.println("ones size = " + ones.length); - System.out.println("varindices size = " + varIndices.length); - } - addEqualityConstraint(varIndices, ones, 1); - - return varIndices; - } - - public int[] addDiscreteVariable(Score[] c) { - double[] weights = new double[c.length]; - for(int idx = 0; idx < c.length; idx++) - weights[idx] = c[idx].score; - return addDiscreteVariable(weights); - } - - public void addEqualityConstraint(int[] i, double[] a, double b) { - if(log) { - System.out.print("OJalgoHook: addEqualityConstraint("); - for (int idx = 0; idx < i.length; idx++) - System.out.print("(i=" + i[idx] + ", a=" + a[idx] + ") "); - System.out.println("b= " + b + ")"); - } - - numConstraints++; - Expression constraint = model.addExpression("EqualityConstraint: " + Integer.toString(numConstraints)); - constraint.level(b); - for(int ind = 0; ind < i.length; ind++) { - constraint.setLinearFactor(i[ind], a[ind]); - // in jdk8: - //constraint.set(i[ind], a[ind]) - } - } - - public void addGreaterThanConstraint(int[] i, double[] a, double b) { - if(log) { - System.out.print("OJalgoHook: addGreaterThanConstraint("); - for (int idx = 0; idx < i.length; idx++) - System.out.print("(i=" + i[idx] + ", a=" + a[idx] + ") "); - System.out.println("b= " + b + ")"); - } - - numConstraints++; - Expression constraint = model.addExpression("GreaterThanConstraint: " + Integer.toString(numConstraints)); - constraint.lower(b); - for(int ind = 0; ind < i.length; ind++) { - constraint.setLinearFactor(i[ind], a[ind]); - // in jdk8: - //constraint.set(i[ind], a[ind]); - } - } - - public void addLessThanConstraint(int[] i, double[] a, double b) { - if(log) { - System.out.print("OJalgoHook: addLessThanConstraint("); - for (int idx = 0; idx < i.length; idx++) - System.out.print("(i=" + i[idx] + ", a=" + a[idx] + ") "); - System.out.println("b= " + b + ")"); - } - - numConstraints++; - Expression constraint = model.addExpression("LessThanConstraint: " + Integer.toString(numConstraints)); - constraint.upper(b); - for(int ind = 0; ind < i.length; ind++) { - constraint.setLinearFactor(i[ind], a[ind]); - // in jdk8: - // constraint.set(i[ind], a[ind]); - } - } - - // Note: oJalgo does not support pre-solving! - public boolean solve() throws Exception { - if(log) - System.out.println("OJalgoHook: solve() "); - - if(minimize) - result = model.minimise(); - else - result = model.maximise(); - - if(log) { - if( result.getState() == Optimisation.State.OPTIMAL ) - System.out.println("Good news!: the optimization solution is optimal! "); - if( result.getState() == Optimisation.State.DISTINCT ) - System.out.println("Good news!: the optimization solution is unique! "); - if( result.getState() == Optimisation.State.INFEASIBLE ) - System.out.println("Warning: the optimization is infeasible! "); - if( result.getState() == Optimisation.State.UNBOUNDED ) - System.out.println("Warning: the optimization is unbounded! "); - if( result.getState() == Optimisation.State.APPROXIMATE ) - System.out.println("Warning: the optimization is approximate! "); - } - return result.getState().isSuccess(); - } - - public boolean isSolved() { - if(log) - System.out.println("OJalgoHook: isSolved() "); - return result != null && result.getState().isSuccess(); - } - - public boolean getBooleanValue(int index) { - if(log) - System.out.println("OJalgoHook: getBooleanValue(" + index + ") "); - if( result.get(index).intValue() != 1 && result.get(index).intValue() != 0 ) - System.out.println("Warning! The value of the binary variable is not 0/1! "); - return (result.get(index).intValue() == 1); - } - - public double objectiveValue() { - if(log) - System.out.println("OJalgoHook: objectiveValue()"); - return result.getValue(); - } - - public void reset() { - if(log) - System.out.println("OJalgoHook: reset()"); - // no implementation - } - - public void write(StringBuffer buffer) { - if(log) - System.out.println("OJalgoHook: write()"); - // no implementation - } - - /** - * Set a time limit for solver optimization. After the limit - * is reached the solver stops running. - * - * @param limit the time limit - */ - public void setTimeout(int limit) { - assert (0 <= limit); - model.options.time_abort = limit; - } - - public void printModelInfo() { - System.out.println(model.toString()); - System.out.println("objective: " + result.getValue()); - } -} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/ParameterizedConstraint.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/ParameterizedConstraint.java index c72b6c99..8aff5785 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/ParameterizedConstraint.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/ParameterizedConstraint.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -18,96 +15,92 @@ /** - * This class represents an LBJava constraint as it appears in a source file. - * When given its input object, an object of this class generates objects of - * type FirstOrderConstraint. - * ParameterizedConstraints are also Boolean - * Classifiers. - * - *

ParameterizedConstraint depends on extending classes to - * override the discreteValue(Object) method so that it returns - * "true" or "false" as appropriate. When this is - * done, there is no need to override the classify(Object) - * method. - * - * @author Nick Rizzolo + * This class represents an LBJava constraint as it appears in a source file. When given its input + * object, an object of this class generates objects of type FirstOrderConstraint. + * ParameterizedConstraints are also Boolean Classifiers. + * + *

+ * ParameterizedConstraint depends on extending classes to override the + * discreteValue(Object) method so that it returns "true" or + * "false" as appropriate. When this is done, there is no need to override the + * classify(Object) method. + * + * @author Nick Rizzolo **/ -public abstract class ParameterizedConstraint extends Classifier -{ - /** Default constructor. */ - public ParameterizedConstraint() { } +public abstract class ParameterizedConstraint extends Classifier { + /** Default constructor. */ + public ParameterizedConstraint() {} - /** - * Initializes the name. - * - * @param n The name of this constraint. - **/ - public ParameterizedConstraint(String n) { super(n); } + /** + * Initializes the name. + * + * @param n The name of this constraint. + **/ + public ParameterizedConstraint(String n) { + super(n); + } - /** - * This method makes one or more decisions about a single object, returning - * those decisions as Features in a vector. - * - * @param o The object to make decisions about. - * @return A vector of Features about the input object. - **/ - public FeatureVector classify(Object o) { - return new FeatureVector(featureValue(o)); - } + /** + * This method makes one or more decisions about a single object, returning those decisions as + * Features in a vector. + * + * @param o The object to make decisions about. + * @return A vector of Features about the input object. + **/ + public FeatureVector classify(Object o) { + return new FeatureVector(featureValue(o)); + } - /** - * Returns the classification of the given example object as a single - * feature instead of a {@link FeatureVector}. - * - * @param o The object to classify. - * @return The classification of o as a feature. - **/ - public Feature featureValue(Object o) { - short index = shortValue(o); - return - new DiscretePrimitiveStringFeature( - containingPackage, name, "", DiscreteFeature.BooleanValues[index], - index, (short) 2); - } + /** + * Returns the classification of the given example object as a single feature instead of a + * {@link FeatureVector}. + * + * @param o The object to classify. + * @return The classification of o as a feature. + **/ + public Feature featureValue(Object o) { + short index = shortValue(o); + return new DiscretePrimitiveStringFeature(containingPackage, name, "", + DiscreteFeature.BooleanValues[index], index, (short) 2); + } - /** - * Returns the prediction of this classifier as a short that - * acts as a pointer into {@link DiscreteFeature#BooleanValues}. - * - * @param o The object to classify. - * @return The classification of o as a short. - **/ - public short shortValue(Object o) { - String value = discreteValue(o); - return (short) (value.equals("true") ? 1 : 0); - } + /** + * Returns the prediction of this classifier as a short that acts as a pointer into + * {@link DiscreteFeature#BooleanValues}. + * + * @param o The object to classify. + * @return The classification of o as a short. + **/ + public short shortValue(Object o) { + String value = discreteValue(o); + return (short) (value.equals("true") ? 1 : 0); + } - /** - * Returns the array of allowable values that a feature returned by this - * classifier may take. If the array has length 0, it means either that - * the feature has discrete type and allowable values were not specified or - * that the feature has real or mixed type. The default return value of - * this method is a 0 length array.

- * - * This method should be overridden by derived classes. - * - * @return The allowable values that a feature returned by this classifier - * may take. - **/ - public String[] allowableValues() { return DiscreteFeature.BooleanValues; } + /** + * Returns the array of allowable values that a feature returned by this classifier may take. If + * the array has length 0, it means either that the feature has discrete type and allowable + * values were not specified or that the feature has real or mixed type. The default return + * value of this method is a 0 length array.
+ *
+ * + * This method should be overridden by derived classes. + * + * @return The allowable values that a feature returned by this classifier may take. + **/ + public String[] allowableValues() { + return DiscreteFeature.BooleanValues; + } - /** - * This method builds a first order constraint based on the given input - * object. - * - * @param o The object to build a constraint with respect to. - * @return A first order constraint. - **/ - public abstract FirstOrderConstraint makeConstraint(Object o); + /** + * This method builds a first order constraint based on the given input object. + * + * @param o The object to build a constraint with respect to. + * @return A first order constraint. + **/ + public abstract FirstOrderConstraint makeConstraint(Object o); } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalAtLeast.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalAtLeast.java index 64ddf054..ae2245ce 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalAtLeast.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalAtLeast.java @@ -1,488 +1,475 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; /** - * Represents the constraint that at least m of the children - * constraints must be true. - * - * @author Nick Rizzolo + * Represents the constraint that at least m of the children constraints must be true. + * + * @author Nick Rizzolo **/ -public class PropositionalAtLeast extends PropositionalNAryConstraint -{ - /** The children are stored in an array in this class. */ - protected PropositionalConstraint[] children; - /** The number of child constraints that must be true. */ - protected int m; - - - /** Default constructor. */ - private PropositionalAtLeast() { } - - /** - * Initializing constructor. - * - * @param c A collection of children constraints. - * @param m The number of children that must be true. - **/ - public PropositionalAtLeast(PropositionalConstraint[] c, int m) { - this.m = m; - children = c; - super.children = null; - } - - - /** - * Returns the children of this constraint in an array. - * - * @return The children of this constraint in an array. - **/ - public Constraint[] getChildren() { - return (PropositionalConstraint[]) children.clone(); - } - - - /** Returns the value of m. */ - public int getM() { return m; } - - - /** - * Determines whether the given constraint is a term of this constraint. - * - * @param c The given constraint. - * @return true iff the given constraint is contained in this - * constraint. - **/ - public boolean contains(PropositionalConstraint c) { - for (int i = 0; i < children.length; ++i) - if (c.equals(children[i])) return true; - return false; - } - - - /** - * Returns the number of terms in this constraint. - * - * @return The number of terms in this constraint. - **/ - public int size() { return children.length; } - - - /** Determines whether the constraint is satisfied. */ - public boolean evaluate() { - int trueChildren = 0; - for (int i = 0; i < children.length && trueChildren < m; ++i) - if (children[i].evaluate()) ++trueChildren; - return trueChildren == m; - } - - - /** - * Replaces the children array with a new array containing all - * the same elements except the element with the given index. - * - * @param r The index of the child to remove. - **/ - public void remove(int r) { - PropositionalConstraint[] temp = - new PropositionalConstraint[children.length - 1]; - - for (int i = 0, j = 0; i < children.length; ++i) { - if (i == r) continue; - temp[j++] = children[i]; +public class PropositionalAtLeast extends PropositionalNAryConstraint { + /** The children are stored in an array in this class. */ + protected PropositionalConstraint[] children; + /** The number of child constraints that must be true. */ + protected int m; + + + /** Default constructor. */ + private PropositionalAtLeast() {} + + /** + * Initializing constructor. + * + * @param c A collection of children constraints. + * @param m The number of children that must be true. + **/ + public PropositionalAtLeast(PropositionalConstraint[] c, int m) { + this.m = m; + children = c; + super.children = null; } - children = temp; - } - - - /** - * Produces a new, logically simplified version of this constraint, - * preserving variable consolidation. - * - * @see Constraint#consolidateVariables(java.util.AbstractMap) - * @return A logically simplified version of this constraint. - **/ - public PropositionalConstraint simplify() { - if (m <= 0) return PropositionalConstant.True; - if (m > children.length) return PropositionalConstant.False; - - PropositionalAtLeast result = new PropositionalAtLeast(); - result.m = m; - result.children = new PropositionalConstraint[children.length]; - for (int i = 0; i < children.length; ++i) - result.children[i] = children[i].simplify(); - - for (int i = result.children.length - 1; i >= 0; --i) { - if (result.children[i] == PropositionalConstant.True) { - result.remove(i); - --result.m; - } - else if (result.children[i] == PropositionalConstant.False) - result.remove(i); + + /** + * Returns the children of this constraint in an array. + * + * @return The children of this constraint in an array. + **/ + public Constraint[] getChildren() { + return (PropositionalConstraint[]) children.clone(); + } + + + /** Returns the value of m. */ + public int getM() { + return m; } - /* - HashSet positive = new HashSet(); - HashSet negative = new HashSet(); - for (int i = 0; i < result.children.length; ++i) { - if (result.children[i] instanceof PropositionalNegation) - negative.add(result.children[i].getChildren()[0]); - else positive.add(result.children[i]); + + /** + * Determines whether the given constraint is a term of this constraint. + * + * @param c The given constraint. + * @return true iff the given constraint is contained in this constraint. + **/ + public boolean contains(PropositionalConstraint c) { + for (int i = 0; i < children.length; ++i) + if (c.equals(children[i])) + return true; + return false; + } + + + /** + * Returns the number of terms in this constraint. + * + * @return The number of terms in this constraint. + **/ + public int size() { + return children.length; } - for (Iterator I = positive.iterator(); I.hasNext(); ) { - PropositionalConstraint p = (PropositionalConstraint) I.next(); - if (negative.contains(p)) { - LinkedList positiveIndexes = new LinkedList(); - LinkedList negativeIndexes = new LinkedList(); - for (int i = 0; i < result.children.length; ++i) { - if (result.children[i].equals(p)) - positiveIndexes.add(new Integer(i)); - else if (result.children[i].equals(new PropositionalNegation(p))) - negativeIndexes.add(new Integer(i)); + + /** Determines whether the constraint is satisfied. */ + public boolean evaluate() { + int trueChildren = 0; + for (int i = 0; i < children.length && trueChildren < m; ++i) + if (children[i].evaluate()) + ++trueChildren; + return trueChildren == m; + } + + + /** + * Replaces the children array with a new array containing all the same elements + * except the element with the given index. + * + * @param r The index of the child to remove. + **/ + public void remove(int r) { + PropositionalConstraint[] temp = new PropositionalConstraint[children.length - 1]; + + for (int i = 0, j = 0; i < children.length; ++i) { + if (i == r) + continue; + temp[j++] = children[i]; } - int toRemove = positiveIndexes.size(); - if (negativeIndexes.size() < toRemove) - toRemove = negativeIndexes.size(); + children = temp; + } + + + /** + * Produces a new, logically simplified version of this constraint, preserving variable + * consolidation. + * + * @see Constraint#consolidateVariables(java.util.AbstractMap) + * @return A logically simplified version of this constraint. + **/ + public PropositionalConstraint simplify() { + if (m <= 0) + return PropositionalConstant.True; + if (m > children.length) + return PropositionalConstant.False; + + PropositionalAtLeast result = new PropositionalAtLeast(); + result.m = m; + result.children = new PropositionalConstraint[children.length]; + for (int i = 0; i < children.length; ++i) + result.children[i] = children[i].simplify(); + + for (int i = result.children.length - 1; i >= 0; --i) { + if (result.children[i] == PropositionalConstant.True) { + result.remove(i); + --result.m; + } else if (result.children[i] == PropositionalConstant.False) + result.remove(i); + } - Integer[] removedIndexes = new Integer[toRemove * 2]; - for (int i = 0; i < toRemove; ++i) { - removedIndexes[2 * i] = (Integer) positiveIndexes.removeLast(); - removedIndexes[2 * i + 1] = (Integer) negativeIndexes.removeLast(); + /* + * HashSet positive = new HashSet(); HashSet negative = new HashSet(); for (int i = 0; i < + * result.children.length; ++i) { if (result.children[i] instanceof PropositionalNegation) + * negative.add(result.children[i].getChildren()[0]); else positive.add(result.children[i]); + * } + * + * for (Iterator I = positive.iterator(); I.hasNext(); ) { PropositionalConstraint p = + * (PropositionalConstraint) I.next(); if (negative.contains(p)) { LinkedList + * positiveIndexes = new LinkedList(); LinkedList negativeIndexes = new LinkedList(); for + * (int i = 0; i < result.children.length; ++i) { if (result.children[i].equals(p)) + * positiveIndexes.add(new Integer(i)); else if (result.children[i].equals(new + * PropositionalNegation(p))) negativeIndexes.add(new Integer(i)); } + * + * int toRemove = positiveIndexes.size(); if (negativeIndexes.size() < toRemove) toRemove = + * negativeIndexes.size(); + * + * Integer[] removedIndexes = new Integer[toRemove * 2]; for (int i = 0; i < toRemove; ++i) + * { removedIndexes[2 * i] = (Integer) positiveIndexes.removeLast(); removedIndexes[2 * i + + * 1] = (Integer) negativeIndexes.removeLast(); } + * + * Arrays.sort(removedIndexes); for (int i = removedIndexes.length - 1; i >= 0; --i) + * result.remove(removedIndexes[i].intValue()); result.m -= toRemove; } } + */ + + if (result.m <= 0) + return PropositionalConstant.True; + if (result.m > result.children.length) + return PropositionalConstant.False; + if (result.children.length == 1) + return result.children[0]; + if (result.m == 1) { + PropositionalDisjunction disjunction = + new PropositionalDisjunction(result.children[0], result.children[1]); + for (int i = 2; i < result.children.length; ++i) + disjunction = new PropositionalDisjunction(disjunction, result.children[i]); + return disjunction.simplify(); } - Arrays.sort(removedIndexes); - for (int i = removedIndexes.length - 1; i >= 0; --i) - result.remove(removedIndexes[i].intValue()); - result.m -= toRemove; - } + return result; } - */ - - if (result.m <= 0) return PropositionalConstant.True; - if (result.m > result.children.length) return PropositionalConstant.False; - if (result.children.length == 1) return result.children[0]; - if (result.m == 1) { - PropositionalDisjunction disjunction = - new PropositionalDisjunction(result.children[0], result.children[1]); - for (int i = 2; i < result.children.length; ++i) - disjunction = - new PropositionalDisjunction(disjunction, result.children[i]); - return disjunction.simplify(); + + + /** + * The negation of an at-least(m) is the at-least(n-m+1) of the negated children. + * + * @return A simplified constraint representing the negation of this constraint. + **/ + public PropositionalConstraint negate() { + PropositionalAtLeast result = new PropositionalAtLeast(); + result.children = new PropositionalConstraint[children.length]; + for (int i = 0; i < children.length; ++i) + result.children[i] = children[i].negate(); + result.m = children.length - m + 1; + return result; } - return result; - } - - - /** - * The negation of an at-least(m) is the at-least(n-m+1) of the negated - * children. - * - * @return A simplified constraint representing the negation of this - * constraint. - **/ - public PropositionalConstraint negate() { - PropositionalAtLeast result = new PropositionalAtLeast(); - result.children = new PropositionalConstraint[children.length]; - for (int i = 0; i < children.length; ++i) - result.children[i] = children[i].negate(); - result.m = children.length - m + 1; - return result; - } - - - /** - * Produces a new, logically simplified version of this constraint in - * conjunctive normal form (CNF). - * - * @return The conjunctive normal form of this constraint. - **/ - public PropositionalConstraint CNF() { return DNF().CNF(); } - - - /** - * Produces a new, logically simplified version of this constraint in - * disjunctive normal form (DNF). - * - * @return The disjunctive normal form of this constraint. - **/ - public PropositionalConstraint DNF() { - PropositionalConstraint result = null; - - if (m == 1) { - result = new PropositionalDisjunction(children[0], children[1]); - for (int i = 2; i < m; ++i) - result = new PropositionalDisjunction(result, children[i]); + + /** + * Produces a new, logically simplified version of this constraint in conjunctive normal form + * (CNF). + * + * @return The conjunctive normal form of this constraint. + **/ + public PropositionalConstraint CNF() { + return DNF().CNF(); } - else { - result = new PropositionalConjunction(children[0], children[1]); - for (int i = 2; i < m; ++i) - result = new PropositionalConjunction(result, children[i]); - - int[] indexes = new int[m]; - for (int i = 0; i < m; ++i) indexes[i] = i; - - while (nextChoice(indexes, children.length - 1)) { - PropositionalConjunction term = - new PropositionalConjunction(children[indexes[0]], - children[indexes[1]]); - for (int i = 2; i < m; ++i) - term = new PropositionalConjunction(term, children[indexes[i]]); - result = new PropositionalDisjunction(result, term); - } + + + /** + * Produces a new, logically simplified version of this constraint in disjunctive normal form + * (DNF). + * + * @return The disjunctive normal form of this constraint. + **/ + public PropositionalConstraint DNF() { + PropositionalConstraint result = null; + + if (m == 1) { + result = new PropositionalDisjunction(children[0], children[1]); + for (int i = 2; i < m; ++i) + result = new PropositionalDisjunction(result, children[i]); + } else { + result = new PropositionalConjunction(children[0], children[1]); + for (int i = 2; i < m; ++i) + result = new PropositionalConjunction(result, children[i]); + + int[] indexes = new int[m]; + for (int i = 0; i < m; ++i) + indexes[i] = i; + + while (nextChoice(indexes, children.length - 1)) { + PropositionalConjunction term = + new PropositionalConjunction(children[indexes[0]], children[indexes[1]]); + for (int i = 2; i < m; ++i) + term = new PropositionalConjunction(term, children[indexes[i]]); + result = new PropositionalDisjunction(result, term); + } + } + + return result; } - return result; - } - - - /** - * Given a particular choice of k of the first n non-negative integers, - * this method computes the next logical choice of k integers, modifying - * the input array to contain that choice. The parameter I - * contains the current choice, and it must be sorted in ascending order. - * The parameter max contains the largest allowable value for - * any integer in I. Therefore, n = max + 1, and - * k = I.length. It is also assumed that the "first" choice - * is the integers 0 through k - 1 inclusive and the "last" choice is - * max - k + 1 through max inclusive. - * - * @param I The current choice of k out of the first n non-negative - * integers, sorted in decreasing order. - * @param max The largest value allowed to appear in I (n - - * 1). - * @return true iff the input did not represent the last - * choice. - **/ - protected static boolean nextChoice(int[] I, int max) { - int i = 1; - while (i < I.length && I[i] - I[i - 1] == 1) ++i; - if (i == I.length && I[i - 1] == max) return false; - ++I[i - 1]; - for (int j = 0; j < i - 1; ++j) I[j] = j; - return true; - } - - - /** - * Compares topology to determine if this constraint is more general than - * the given constraint; note: this method is not required to be correct - * when it answers false. - * - * @param c The given constraint. - * @return true if a topological analysis determined that this - * constraint is more general than the given constraint. - **/ - public boolean moreGeneralThan(PropositionalConstraint c) { - return c.moreSpecificThan(this); - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given implication; note: this method is not required to be - * correct when it answers false. - * - * @param c The given implication. - * @return true if a topological analysis determined that this - * constraint is more general than the given implication. - **/ - public boolean moreSpecificThan(PropositionalImplication c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given double implication; note: this method is not required to be - * correct when it answers false. - * - * @param c The given double implication. - * @return true if a topological analysis determined that this - * constraint is more general than the given double implication. - **/ - public boolean moreSpecificThan(PropositionalDoubleImplication c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given conjunction; note: this method is not required to be - * correct when it answers false. - * - * @param c The given conjunction. - * @return true if a topological analysis determined that this - * constraint is more general than the given conjunction. - **/ - public boolean moreSpecificThan(PropositionalConjunction c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given disjunction; note: this method is not required to be - * correct when it answers false. - * - * @param c The given disjunction. - * @return true if a topological analysis determined that this - * constraint is more general than the given disjunction. - **/ - public boolean moreSpecificThan(PropositionalDisjunction c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given at-least; note: this method is not required to be correct - * when it answers false. - * - * @param c The given at-least. - * @return true if a topological analysis determined that this - * constraint is more specific than the given disjunction. - **/ - public boolean moreSpecificThan(PropositionalAtLeast c) { - if (c.children.length != children.length) return false; - for (int i = 0; i < children.length; ++i) - if (!children[i].equals(c.children[i])) return false; - return m >= c.m; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given negation; note: this method is not required to be correct - * when it answers false. - * - * @param c The given negation. - * @return true if a topological analysis determined that this - * constraint is more general than the given negation. - **/ - public boolean moreSpecificThan(PropositionalNegation c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given variable; note: this method is not required to be correct - * when it answers false. - * - * @param c The given variable. - * @return true if a topological analysis determined that this - * constraint is more general than the given variable. - **/ - public boolean moreSpecificThan(PropositionalVariable c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given constant; note: this method is not required to be correct - * when it answers false. - * - * @param c The given constant. - * @return true if a topological analysis determined that this - * constraint is more general than the given constant. - **/ - public boolean moreSpecificThan(PropositionalConstant c) { - return c.evaluate(); - } - - - /** - * The hash code of a PropositionalAtLeast is the sum of - * the hash codes of its children plus two. - * - * @return The hash code for this PropositionalConjunction. - **/ - public int hashCode() { - int result = 2; - for (int i = 0; i < children.length; ++i) - result += children[i].hashCode(); - return result; - } - - - /** - * Two PropositionalAtLeasts are equivalent when they are - * topologically equivalent; this implementation currently does not respect - * the associativity and commutativity of at-least. - * - * @return true iff the argument is an equivalent - * PropositionalAtLeast. - **/ - public boolean equals(Object o) { - if (!(o instanceof PropositionalAtLeast)) return false; - PropositionalAtLeast a = (PropositionalAtLeast) o; - if (children.length != a.children.length) return false; - for (int i = 0; i < children.length; ++i) - if (!children[i].equals(a.children[i])) return false; - return true; - } - - - /** - * Calls the appropriate visit(·) method of the given - * Inference for this Constraint, as per the - * visitor pattern. - * - * @param infer The inference visiting this constraint. - **/ - public void runVisit(Inference infer) { infer.visit(this); } - - - /** - * Creates a string respresentation of this constraint using the string - * representations of the objects involved. - * - * @param buffer The output of this method will be appended to this buffer. - **/ - public void write(StringBuffer buffer) { - buffer.append("(atleast " + m + " of "); - - children[0].write(buffer); - for (int i = 1; i < children.length; ++i) { - buffer.append(", "); - children[i].write(buffer); + + /** + * Given a particular choice of k of the first n non-negative integers, this method computes the + * next logical choice of k integers, modifying the input array to contain that choice. The + * parameter I contains the current choice, and it must be sorted in ascending + * order. The parameter max contains the largest allowable value for any integer in + * I. Therefore, n = max + 1, and k = I.length. It is + * also assumed that the "first" choice is the integers 0 through k - 1 inclusive and the "last" + * choice is max - k + 1 through max inclusive. + * + * @param I The current choice of k out of the first n non-negative integers, sorted in + * decreasing order. + * @param max The largest value allowed to appear in I (n - 1). + * @return true iff the input did not represent the last choice. + **/ + protected static boolean nextChoice(int[] I, int max) { + int i = 1; + while (i < I.length && I[i] - I[i - 1] == 1) + ++i; + if (i == I.length && I[i - 1] == max) + return false; + ++I[i - 1]; + for (int j = 0; j < i - 1; ++j) + I[j] = j; + return true; } - buffer.append(")"); - } + /** + * Compares topology to determine if this constraint is more general than the given constraint; + * note: this method is not required to be correct when it answers false. + * + * @param c The given constraint. + * @return true if a topological analysis determined that this constraint is more + * general than the given constraint. + **/ + public boolean moreGeneralThan(PropositionalConstraint c) { + return c.moreSpecificThan(this); + } - /** - * This method returns a shallow clone. - * - * @return A shallow clone. - **/ - public Object clone() { - PropositionalAtLeast clone = null; - try { clone = (PropositionalAtLeast) super.clone(); } - catch (Exception e) { - System.err.println("Error cloning PropositionalAtLeast:"); - e.printStackTrace(); - System.exit(1); + /** + * Compares topology to determine if this constraint is more specific than the given + * implication; note: this method is not required to be correct when it answers + * false. + * + * @param c The given implication. + * @return true if a topological analysis determined that this constraint is more + * general than the given implication. + **/ + public boolean moreSpecificThan(PropositionalImplication c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given double + * implication; note: this method is not required to be correct when it answers + * false. + * + * @param c The given double implication. + * @return true if a topological analysis determined that this constraint is more + * general than the given double implication. + **/ + public boolean moreSpecificThan(PropositionalDoubleImplication c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given + * conjunction; note: this method is not required to be correct when it answers + * false. + * + * @param c The given conjunction. + * @return true if a topological analysis determined that this constraint is more + * general than the given conjunction. + **/ + public boolean moreSpecificThan(PropositionalConjunction c) { + return false; } - clone.children = (PropositionalConstraint[]) clone.children.clone(); - return clone; - } -} + /** + * Compares topology to determine if this constraint is more specific than the given + * disjunction; note: this method is not required to be correct when it answers + * false. + * + * @param c The given disjunction. + * @return true if a topological analysis determined that this constraint is more + * general than the given disjunction. + **/ + public boolean moreSpecificThan(PropositionalDisjunction c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given at-least; + * note: this method is not required to be correct when it answers false. + * + * @param c The given at-least. + * @return true if a topological analysis determined that this constraint is more + * specific than the given disjunction. + **/ + public boolean moreSpecificThan(PropositionalAtLeast c) { + if (c.children.length != children.length) + return false; + for (int i = 0; i < children.length; ++i) + if (!children[i].equals(c.children[i])) + return false; + return m >= c.m; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given negation; + * note: this method is not required to be correct when it answers false. + * + * @param c The given negation. + * @return true if a topological analysis determined that this constraint is more + * general than the given negation. + **/ + public boolean moreSpecificThan(PropositionalNegation c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given variable; + * note: this method is not required to be correct when it answers false. + * + * @param c The given variable. + * @return true if a topological analysis determined that this constraint is more + * general than the given variable. + **/ + public boolean moreSpecificThan(PropositionalVariable c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given constant; + * note: this method is not required to be correct when it answers false. + * + * @param c The given constant. + * @return true if a topological analysis determined that this constraint is more + * general than the given constant. + **/ + public boolean moreSpecificThan(PropositionalConstant c) { + return c.evaluate(); + } + + + /** + * The hash code of a PropositionalAtLeast is the sum of the hash codes of its + * children plus two. + * + * @return The hash code for this PropositionalConjunction. + **/ + public int hashCode() { + int result = 2; + for (int i = 0; i < children.length; ++i) + result += children[i].hashCode(); + return result; + } + + + /** + * Two PropositionalAtLeasts are equivalent when they are topologically equivalent; + * this implementation currently does not respect the associativity and commutativity of + * at-least. + * + * @return true iff the argument is an equivalent PropositionalAtLeast + * . + **/ + public boolean equals(Object o) { + if (!(o instanceof PropositionalAtLeast)) + return false; + PropositionalAtLeast a = (PropositionalAtLeast) o; + if (children.length != a.children.length) + return false; + for (int i = 0; i < children.length; ++i) + if (!children[i].equals(a.children[i])) + return false; + return true; + } + + + /** + * Calls the appropriate visit(·) method of the given Inference + * for this Constraint, as per the visitor pattern. + * + * @param infer The inference visiting this constraint. + **/ + public void runVisit(Inference infer) { + infer.visit(this); + } + + + /** + * Creates a string respresentation of this constraint using the string representations of the + * objects involved. + * + * @param buffer The output of this method will be appended to this buffer. + **/ + public void write(StringBuffer buffer) { + buffer.append("(atleast " + m + " of "); + + children[0].write(buffer); + for (int i = 1; i < children.length; ++i) { + buffer.append(", "); + children[i].write(buffer); + } + + buffer.append(")"); + } + + + /** + * This method returns a shallow clone. + * + * @return A shallow clone. + **/ + public Object clone() { + PropositionalAtLeast clone = null; + + try { + clone = (PropositionalAtLeast) super.clone(); + } catch (Exception e) { + System.err.println("Error cloning PropositionalAtLeast:"); + e.printStackTrace(); + System.exit(1); + } + + clone.children = (PropositionalConstraint[]) clone.children.clone(); + return clone; + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalBinaryConstraint.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalBinaryConstraint.java index d284800f..1af2b32f 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalBinaryConstraint.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalBinaryConstraint.java @@ -1,72 +1,68 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; /** - * Represents a propositional constraint involving a binary operator. - * - * @author Nick Rizzolo + * Represents a propositional constraint involving a binary operator. + * + * @author Nick Rizzolo **/ -public abstract class PropositionalBinaryConstraint - extends PropositionalConstraint -{ - /** The constraint on the left of the operator. */ - protected PropositionalConstraint left; - /** The constraint on the right of the operator. */ - protected PropositionalConstraint right; +public abstract class PropositionalBinaryConstraint extends PropositionalConstraint { + /** The constraint on the left of the operator. */ + protected PropositionalConstraint left; + /** The constraint on the right of the operator. */ + protected PropositionalConstraint right; - /** - * Initializing constructor. - * - * @param l The constraint on the left of the operator. - * @param r The constraint on the right of the operator. - **/ - public PropositionalBinaryConstraint(PropositionalConstraint l, - PropositionalConstraint r) { - left = l; - right = r; - } + /** + * Initializing constructor. + * + * @param l The constraint on the left of the operator. + * @param r The constraint on the right of the operator. + **/ + public PropositionalBinaryConstraint(PropositionalConstraint l, PropositionalConstraint r) { + left = l; + right = r; + } - /** - * Replaces all unquantified variables with the unique copy stored as a - * value of the given map; also instantiates all quantified variables and - * stores them in the given map. - * - * @param m The map in which to find unique copies of the variables. - **/ - public void consolidateVariables(java.util.AbstractMap m) { - if (left instanceof PropositionalVariable) { - if (m.containsKey(left)) left = (PropositionalVariable) m.get(left); - else m.put(left, left); - } - else left.consolidateVariables(m); + /** + * Replaces all unquantified variables with the unique copy stored as a value of the given map; + * also instantiates all quantified variables and stores them in the given map. + * + * @param m The map in which to find unique copies of the variables. + **/ + public void consolidateVariables(java.util.AbstractMap m) { + if (left instanceof PropositionalVariable) { + if (m.containsKey(left)) + left = (PropositionalVariable) m.get(left); + else + m.put(left, left); + } else + left.consolidateVariables(m); - if (right instanceof PropositionalVariable) { - if (m.containsKey(right)) right = (PropositionalVariable) m.get(right); - else m.put(right, right); + if (right instanceof PropositionalVariable) { + if (m.containsKey(right)) + right = (PropositionalVariable) m.get(right); + else + m.put(right, right); + } else + right.consolidateVariables(m); } - else right.consolidateVariables(m); - } - /** - * Returns the children of this constraint in an array. - * - * @return The children of this constraint in an array. - **/ - public Constraint[] getChildren() { - return new PropositionalConstraint[]{ left, right }; - } + /** + * Returns the children of this constraint in an array. + * + * @return The children of this constraint in an array. + **/ + public Constraint[] getChildren() { + return new PropositionalConstraint[] {left, right}; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalConjunction.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalConjunction.java index 9d0159f0..5871e9a1 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalConjunction.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalConjunction.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -17,595 +14,578 @@ /** - * Represents the conjunction of two propositional constraints. - * - * @author Nick Rizzolo + * Represents the conjunction of two propositional constraints. + * + * @author Nick Rizzolo **/ -public class PropositionalConjunction extends PropositionalNAryConstraint -{ - /** Default constructor. */ - private PropositionalConjunction() { } - - /** - * If either of the arguments is itself a - * PropositionalConjunction, its contents are flattened into - * this PropositionalConjunction. - * - * @param c1 One constraint to disjunct. - * @param c2 Another constraint to disjunct. - **/ - public PropositionalConjunction(PropositionalConstraint c1, - PropositionalConstraint c2) { - add(c1); - add(c2); - } - - - /** Determines whether the constraint is satisfied. */ - public boolean evaluate() { - for (Iterator I = children.iterator(); I.hasNext(); ) - if (!((PropositionalConstraint) I.next()).evaluate()) return false; - return true; - } - - - /** - * Produces a new, logically simplified version of this constraint, - * preserving variable consolidation. - * - * @see Constraint#consolidateVariables(java.util.AbstractMap) - * @return A logically simplified version of this constraint. - **/ - public PropositionalConstraint simplify() { return simplify(false); } - - - /** - * Same as simplify(), except this method gives the caller the - * ability to optionally leave double implications that are immediate - * children of this conjunction in tact. - * - * @param d true iff double implications that are immediate - * children of this conjunction are to be left in tact. - * @return A logically simplified version of this constraint. - **/ - public PropositionalConstraint simplify(boolean d) { - PropositionalConjunction result = new PropositionalConjunction(); - - if (d) { - for (Iterator I = children.iterator(); I.hasNext(); ) { - PropositionalConstraint c = (PropositionalConstraint) I.next(); - - if (c instanceof PropositionalDoubleImplication) { - PropositionalDoubleImplication di = - (PropositionalDoubleImplication) c; - di.left = di.left.simplify(); - di.right = di.right.simplify(); - - if (di.left.equals(di.right)) c = PropositionalConstant.True; - else if (di.left.equals(PropositionalConstant.False)) - c = di.right.negate().simplify(); - else if (di.left.equals(PropositionalConstant.True)) c = di.right; - else if (di.right.equals(PropositionalConstant.False)) - c = di.left.negate().simplify(); - else if (di.right.equals(PropositionalConstant.True)) c = di.left; - /* - else if (di.right instanceof PropositionalNegation - && di.left.equals(di.right.getChildren()[0]) - || di.left instanceof PropositionalNegation - && di.right.equals(di.left.getChildren()[0])) - c = PropositionalConstant.False; - */ +public class PropositionalConjunction extends PropositionalNAryConstraint { + /** Default constructor. */ + private PropositionalConjunction() {} + + /** + * If either of the arguments is itself a PropositionalConjunction, its contents + * are flattened into this PropositionalConjunction. + * + * @param c1 One constraint to disjunct. + * @param c2 Another constraint to disjunct. + **/ + public PropositionalConjunction(PropositionalConstraint c1, PropositionalConstraint c2) { + add(c1); + add(c2); + } + + + /** Determines whether the constraint is satisfied. */ + public boolean evaluate() { + for (Iterator I = children.iterator(); I.hasNext();) + if (!((PropositionalConstraint) I.next()).evaluate()) + return false; + return true; + } + + + /** + * Produces a new, logically simplified version of this constraint, preserving variable + * consolidation. + * + * @see Constraint#consolidateVariables(java.util.AbstractMap) + * @return A logically simplified version of this constraint. + **/ + public PropositionalConstraint simplify() { + return simplify(false); + } + + + /** + * Same as simplify(), except this method gives the caller the ability to + * optionally leave double implications that are immediate children of this conjunction in tact. + * + * @param d true iff double implications that are immediate children of this + * conjunction are to be left in tact. + * @return A logically simplified version of this constraint. + **/ + public PropositionalConstraint simplify(boolean d) { + PropositionalConjunction result = new PropositionalConjunction(); + + if (d) { + for (Iterator I = children.iterator(); I.hasNext();) { + PropositionalConstraint c = (PropositionalConstraint) I.next(); + + if (c instanceof PropositionalDoubleImplication) { + PropositionalDoubleImplication di = (PropositionalDoubleImplication) c; + di.left = di.left.simplify(); + di.right = di.right.simplify(); + + if (di.left.equals(di.right)) + c = PropositionalConstant.True; + else if (di.left.equals(PropositionalConstant.False)) + c = di.right.negate().simplify(); + else if (di.left.equals(PropositionalConstant.True)) + c = di.right; + else if (di.right.equals(PropositionalConstant.False)) + c = di.left.negate().simplify(); + else if (di.right.equals(PropositionalConstant.True)) + c = di.left; + /* + * else if (di.right instanceof PropositionalNegation && + * di.left.equals(di.right.getChildren()[0]) || di.left instanceof + * PropositionalNegation && di.right.equals(di.left.getChildren()[0])) c = + * PropositionalConstant.False; + */ + } else + c = c.simplify(); + + result.add(c); + } + } else { + for (Iterator I = children.iterator(); I.hasNext();) + result.add(((PropositionalConstraint) I.next()).simplify()); } - else c = c.simplify(); - result.add(c); - } + if (result.children.contains(PropositionalConstant.False)) + return PropositionalConstant.False; + + result.children.remove(PropositionalConstant.True); + + if (result.children.size() == 1) + return (PropositionalConstraint) result.children.iterator().next(); + + /* + * HashSet positive = new HashSet(); HashSet negative = new HashSet(); for (Iterator I = + * result.children.iterator(); I.hasNext(); ) { Object next = I.next(); if (next instanceof + * PropositionalNegation) negative.add(((PropositionalConstraint) next).getChildren()[0]); + * else positive.add(next); } + * + * for (Iterator I = positive.iterator(); I.hasNext(); ) if (negative.contains(I.next())) + * return PropositionalConstant.False; + * + * PropositionalConstraint[] terms = (PropositionalConstraint[]) getChildren(); HashSet + * toRemove = new HashSet(); for (int i = 0; i < terms.length - 1; ++i) for (int j = i + 1; + * j < terms.length; ++j) { if (terms[i].moreGeneralThan(terms[j])) toRemove.add(new + * Integer(i)); if (terms[j].moreGeneralThan(terms[i])) toRemove.add(new Integer(j)); } + * + * for (Iterator I = toRemove.iterator(); I.hasNext(); ) + * result.children.remove(terms[((Integer) I.next()).intValue()]); + */ + + if (result.children.size() == 0) + return PropositionalConstant.True; + + return result; + } + + + /** + * Uses DeMorgan's law to compute the negation of this constraint by distributing that negation + * to each child. + * + * @return A simplified constraint representing the negation of this constraint. + **/ + public PropositionalConstraint negate() { + if (children.size() == 1) + return ((PropositionalConstraint) children.iterator().next()).negate(); + + PropositionalConstraint[] array = + (PropositionalConstraint[]) children.toArray(new PropositionalConstraint[children + .size()]); + for (int i = 0; i < array.length; ++i) + array[i] = array[i].negate(); + + PropositionalDisjunction result = new PropositionalDisjunction(array[0], array[1]); + for (int i = 2; i < array.length; ++i) + result.add(array[i]); + return result; + } + + + /** + * Produces a new, logically simplified version of this constraint in conjunctive normal form + * (CNF). + * + * @return The conjunctive normal form of this constraint. + **/ + public PropositionalConstraint CNF() { + PropositionalConjunction result = new PropositionalConjunction(); + for (Iterator I = children.iterator(); I.hasNext();) + result.add(((PropositionalConstraint) I.next()).CNF()); + return result.simplify(); + } + + + /** + * Produces a new, logically simplified version of this constraint in disjunctive normal form + * (DNF). + * + * @return The disjunctive normal form of this constraint. + **/ + public PropositionalConstraint DNF() { + PropositionalConstraint c = factor(); + if (!(c instanceof PropositionalConjunction)) + return c.DNF(); + + PropositionalConjunction simplified = (PropositionalConjunction) c; + + PropositionalConjunction childrenDNF = new PropositionalConjunction(); + for (Iterator I = simplified.children.iterator(); I.hasNext();) + childrenDNF.add(((PropositionalConstraint) I.next()).DNF()); + if (childrenDNF.children.size() == 1) + return (PropositionalConstraint) childrenDNF.getChildren()[0]; + + PropositionalConstraint[][] children = + new PropositionalConstraint[childrenDNF.children.size()][]; + int i = 0; + boolean foundDisjunction = false; + for (Iterator I = childrenDNF.children.iterator(); I.hasNext(); ++i) { + PropositionalConstraint parent = (PropositionalConstraint) I.next(); + if (parent instanceof PropositionalDisjunction) { + children[i] = (PropositionalConstraint[]) parent.getChildren(); + foundDisjunction = true; + } else { + children[i] = new PropositionalConstraint[1]; + children[i][0] = parent; + } + } + + if (!foundDisjunction) + return childrenDNF; + + int[] indexes = new int[children.length]; + PropositionalConstraint result = + new PropositionalConjunction(children[0][0], children[1][0]); + for (i = 2; i < children.length; ++i) + result = new PropositionalConjunction(result, children[i][0]); + + while (PropositionalDisjunction.increment(children, indexes)) { + PropositionalConstraint combination = + new PropositionalConjunction(children[0][indexes[0]], children[1][indexes[1]]); + for (i = 2; i < children.length; ++i) + combination = new PropositionalConjunction(combination, children[i][indexes[i]]); + result = new PropositionalDisjunction(result, combination); + } + + return result; + } + + + /** + * Compares topology to determine if this constraint is more general than the given constraint; + * note: this method is not required to be correct when it answers false. + * + * @param c The given constraint. + * @return true if a topological analysis determined that this constraint is more + * general than the given constraint. + **/ + public boolean moreGeneralThan(PropositionalConstraint c) { + return c.moreSpecificThan(this); + } + + + /** + * Compares topology to determine if this constraint is more specific than the given + * implication; note: this method is not required to be correct when it answers + * false. + * + * @param c The given implication. + * @return true if a topological analysis determined that this constraint is more + * specific than the given implication. + **/ + public boolean moreSpecificThan(PropositionalImplication c) { + return size() > 1 && contains(c); + } + + + /** + * Compares topology to determine if this constraint is more specific than the given double + * implication; note: this method is not required to be correct when it answers + * false. + * + * @param c The given double implication. + * @return true if a topological analysis determined that this constraint is more + * specific than the given double implication. + **/ + public boolean moreSpecificThan(PropositionalDoubleImplication c) { + return size() > 1 && contains(c); } - else { - for (Iterator I = children.iterator(); I.hasNext(); ) - result.add(((PropositionalConstraint) I.next()).simplify()); + + + /** + * Compares topology to determine if this constraint is more specific than the given + * conjunction; note: this method is not required to be correct when it answers + * false. + * + * @param c The given conjunction. + * @return true if a topological analysis determined that this constraint is more + * specific than the given conjunction. + **/ + public boolean moreSpecificThan(PropositionalConjunction c) { + return size() > c.size() && containsAll(c); } - if (result.children.contains(PropositionalConstant.False)) - return PropositionalConstant.False; - result.children.remove(PropositionalConstant.True); + /** + * Compares topology to determine if this constraint is more specific than the given + * disjunction; note: this method is not required to be correct when it answers + * false. + * + * @param c The given disjunction. + * @return true if a topological analysis determined that this constraint is more + * specific than the given disjunction. + **/ + public boolean moreSpecificThan(PropositionalDisjunction c) { + return size() > 1 && contains(c); + } - if (result.children.size() == 1) - return (PropositionalConstraint) result.children.iterator().next(); - /* - HashSet positive = new HashSet(); - HashSet negative = new HashSet(); - for (Iterator I = result.children.iterator(); I.hasNext(); ) { - Object next = I.next(); - if (next instanceof PropositionalNegation) - negative.add(((PropositionalConstraint) next).getChildren()[0]); - else positive.add(next); + /** + * Compares topology to determine if this constraint is more specific than the given at-least; + * note: this method is not required to be correct when it answers false. + * + * @param c The given at-least. + * @return true if a topological analysis determined that this constraint is more + * specific than the given disjunction. + **/ + public boolean moreSpecificThan(PropositionalAtLeast c) { + return false; } - for (Iterator I = positive.iterator(); I.hasNext(); ) - if (negative.contains(I.next())) return PropositionalConstant.False; - - PropositionalConstraint[] terms = - (PropositionalConstraint[]) getChildren(); - HashSet toRemove = new HashSet(); - for (int i = 0; i < terms.length - 1; ++i) - for (int j = i + 1; j < terms.length; ++j) { - if (terms[i].moreGeneralThan(terms[j])) toRemove.add(new Integer(i)); - if (terms[j].moreGeneralThan(terms[i])) toRemove.add(new Integer(j)); - } - - for (Iterator I = toRemove.iterator(); I.hasNext(); ) - result.children.remove(terms[((Integer) I.next()).intValue()]); - */ - - if (result.children.size() == 0) return PropositionalConstant.True; - - return result; - } - - - /** - * Uses DeMorgan's law to compute the negation of this constraint by - * distributing that negation to each child. - * - * @return A simplified constraint representing the negation of this - * constraint. - **/ - public PropositionalConstraint negate() { - if (children.size() == 1) - return ((PropositionalConstraint) children.iterator().next()).negate(); - - PropositionalConstraint[] array = - (PropositionalConstraint[]) - children.toArray(new PropositionalConstraint[children.size()]); - for (int i = 0; i < array.length; ++i) array[i] = array[i].negate(); - - PropositionalDisjunction result = - new PropositionalDisjunction(array[0], array[1]); - for (int i = 2; i < array.length; ++i) result.add(array[i]); - return result; - } - - - /** - * Produces a new, logically simplified version of this constraint in - * conjunctive normal form (CNF). - * - * @return The conjunctive normal form of this constraint. - **/ - public PropositionalConstraint CNF() { - PropositionalConjunction result = new PropositionalConjunction(); - for (Iterator I = children.iterator(); I.hasNext(); ) - result.add(((PropositionalConstraint) I.next()).CNF()); - return result.simplify(); - } - - - /** - * Produces a new, logically simplified version of this constraint in - * disjunctive normal form (DNF). - * - * @return The disjunctive normal form of this constraint. - **/ - public PropositionalConstraint DNF() { - PropositionalConstraint c = factor(); - if (!(c instanceof PropositionalConjunction)) return c.DNF(); - - PropositionalConjunction simplified = (PropositionalConjunction) c; - - PropositionalConjunction childrenDNF = new PropositionalConjunction(); - for (Iterator I = simplified.children.iterator(); I.hasNext(); ) - childrenDNF.add(((PropositionalConstraint) I.next()).DNF()); - if (childrenDNF.children.size() == 1) - return (PropositionalConstraint) childrenDNF.getChildren()[0]; - - PropositionalConstraint[][] children = - new PropositionalConstraint[childrenDNF.children.size()][]; - int i = 0; - boolean foundDisjunction = false; - for (Iterator I = childrenDNF.children.iterator(); I.hasNext(); ++i) { - PropositionalConstraint parent = (PropositionalConstraint) I.next(); - if (parent instanceof PropositionalDisjunction) { - children[i] = (PropositionalConstraint[]) parent.getChildren(); - foundDisjunction = true; - } - else { - children[i] = new PropositionalConstraint[1]; - children[i][0] = parent; - } + + /** + * Compares topology to determine if this constraint is more specific than the given negation; + * note: this method is not required to be correct when it answers false. + * + * @param c The given negation. + * @return true if a topological analysis determined that this constraint is more + * specific than the given negation. + **/ + public boolean moreSpecificThan(PropositionalNegation c) { + return size() > 1 && contains(c); } - if (!foundDisjunction) return childrenDNF; - - int[] indexes = new int[children.length]; - PropositionalConstraint result = - new PropositionalConjunction(children[0][0], children[1][0]); - for (i = 2; i < children.length; ++i) - result = new PropositionalConjunction(result, children[i][0]); - - while (PropositionalDisjunction.increment(children, indexes)) { - PropositionalConstraint combination = - new PropositionalConjunction(children[0][indexes[0]], - children[1][indexes[1]]); - for (i = 2; i < children.length; ++i) - combination = - new PropositionalConjunction(combination, children[i][indexes[i]]); - result = new PropositionalDisjunction(result, combination); + + /** + * Compares topology to determine if this constraint is more specific than the given variable; + * note: this method is not required to be correct when it answers false. + * + * @param c The given variable. + * @return true if a topological analysis determined that this constraint is more + * specific than the given variable. + **/ + public boolean moreSpecificThan(PropositionalVariable c) { + return size() > 1 && contains(c); } - return result; - } - - - /** - * Compares topology to determine if this constraint is more general than - * the given constraint; note: this method is not required to be correct - * when it answers false. - * - * @param c The given constraint. - * @return true if a topological analysis determined that this - * constraint is more general than the given constraint. - **/ - public boolean moreGeneralThan(PropositionalConstraint c) { - return c.moreSpecificThan(this); - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given implication; note: this method is not required to be - * correct when it answers false. - * - * @param c The given implication. - * @return true if a topological analysis determined that this - * constraint is more specific than the given implication. - **/ - public boolean moreSpecificThan(PropositionalImplication c) { - return size() > 1 && contains(c); - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given double implication; note: this method is not required to be - * correct when it answers false. - * - * @param c The given double implication. - * @return true if a topological analysis determined that this - * constraint is more specific than the given double implication. - **/ - public boolean moreSpecificThan(PropositionalDoubleImplication c) { - return size() > 1 && contains(c); - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given conjunction; note: this method is not required to be - * correct when it answers false. - * - * @param c The given conjunction. - * @return true if a topological analysis determined that this - * constraint is more specific than the given conjunction. - **/ - public boolean moreSpecificThan(PropositionalConjunction c) { - return size() > c.size() && containsAll(c); - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given disjunction; note: this method is not required to be - * correct when it answers false. - * - * @param c The given disjunction. - * @return true if a topological analysis determined that this - * constraint is more specific than the given disjunction. - **/ - public boolean moreSpecificThan(PropositionalDisjunction c) { - return size() > 1 && contains(c); - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given at-least; note: this method is not required to be correct - * when it answers false. - * - * @param c The given at-least. - * @return true if a topological analysis determined that this - * constraint is more specific than the given disjunction. - **/ - public boolean moreSpecificThan(PropositionalAtLeast c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given negation; note: this method is not required to be correct - * when it answers false. - * - * @param c The given negation. - * @return true if a topological analysis determined that this - * constraint is more specific than the given negation. - **/ - public boolean moreSpecificThan(PropositionalNegation c) { - return size() > 1 && contains(c); - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given variable; note: this method is not required to be correct - * when it answers false. - * - * @param c The given variable. - * @return true if a topological analysis determined that this - * constraint is more specific than the given variable. - **/ - public boolean moreSpecificThan(PropositionalVariable c) { - return size() > 1 && contains(c); - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given constant; note: this method is not required to be correct - * when it answers false. - * - * @param c The given constant. - * @return true if a topological analysis determined that this - * constraint is more specific than the given constant. - **/ - public boolean moreSpecificThan(PropositionalConstant c) { - return c.evaluate(); - } - - - /** - * If the given constraint has the same type as this constraint, its terms - * are merged into this constraint; otherwise, it is added as a new term. - * - * @param c The constraint to add. - **/ - public void add(PropositionalConstraint c) { - if (c instanceof PropositionalConjunction) { - PropositionalConstraint[] terms = - (PropositionalConstraint[]) c.getChildren(); - for (int i = 0; i < terms.length; ++i) add(terms[i]); + + /** + * Compares topology to determine if this constraint is more specific than the given constant; + * note: this method is not required to be correct when it answers false. + * + * @param c The given constant. + * @return true if a topological analysis determined that this constraint is more + * specific than the given constant. + **/ + public boolean moreSpecificThan(PropositionalConstant c) { + return c.evaluate(); } - else children.add(c); - } - - - /** - * Factoring a conjunction is the opposite of distributing a disjunction - * over a conjunction. - * - * @return A constraint that represents a factoring of this conjunction. - **/ - public PropositionalConstraint factor() { - PropositionalConstraint c = simplify(); - if (!(c instanceof PropositionalConjunction)) return c; - PropositionalConjunction simplified = (PropositionalConjunction) c; - - PropositionalConstraint[] best = new PropositionalConstraint[0]; - while (best != null) { - int bestDisjunction = -1; - int bestOther = -1; - best = null; - - PropositionalConstraint[] children = - (PropositionalConstraint[]) simplified.getChildren(); - Arrays.sort(children, - new Comparator() { - public int compare(Object o1, Object o2) { - if (o1 instanceof PropositionalDisjunction) { - if (o2 instanceof PropositionalDisjunction) return 0; - return -1; - } - - if (o2 instanceof PropositionalDisjunction) return 1; - return 0; + + + /** + * If the given constraint has the same type as this constraint, its terms are merged into this + * constraint; otherwise, it is added as a new term. + * + * @param c The constraint to add. + **/ + public void add(PropositionalConstraint c) { + if (c instanceof PropositionalConjunction) { + PropositionalConstraint[] terms = (PropositionalConstraint[]) c.getChildren(); + for (int i = 0; i < terms.length; ++i) + add(terms[i]); + } else + children.add(c); + } + + + /** + * Factoring a conjunction is the opposite of distributing a disjunction over a conjunction. + * + * @return A constraint that represents a factoring of this conjunction. + **/ + public PropositionalConstraint factor() { + PropositionalConstraint c = simplify(); + if (!(c instanceof PropositionalConjunction)) + return c; + PropositionalConjunction simplified = (PropositionalConjunction) c; + + PropositionalConstraint[] best = new PropositionalConstraint[0]; + while (best != null) { + int bestDisjunction = -1; + int bestOther = -1; + best = null; + + PropositionalConstraint[] children = + (PropositionalConstraint[]) simplified.getChildren(); + Arrays.sort(children, new Comparator() { + public int compare(Object o1, Object o2) { + if (o1 instanceof PropositionalDisjunction) { + if (o2 instanceof PropositionalDisjunction) + return 0; + return -1; + } + + if (o2 instanceof PropositionalDisjunction) + return 1; + return 0; + } + }); + + for (int i = 0; i < children.length - 1 + && children[i] instanceof PropositionalDisjunction; ++i) + for (int j = i + 1; j < children.length; ++j) { + PropositionalConstraint[] current = + ((PropositionalDisjunction) children[i]).intersect(children[j]); + if (current != null && (best == null || current.length > best.length)) { + best = current; + bestDisjunction = i; + bestOther = j; + } + } + + if (best != null) { + PropositionalConstraint toAdd = null; + if (best.length == 1) + toAdd = best[0]; + else { + toAdd = new PropositionalDisjunction(best[0], best[1]); + for (int i = 2; i < best.length; ++i) + toAdd = new PropositionalDisjunction(toAdd, best[i]); + } + + if (children[bestOther] instanceof PropositionalDisjunction) { + PropositionalConstraint disjunct1 = + ((PropositionalDisjunction) children[bestDisjunction]).subtract(best); + PropositionalConstraint disjunct2 = + ((PropositionalDisjunction) children[bestOther]).subtract(best); + + toAdd = + new PropositionalDisjunction(toAdd, new PropositionalConjunction( + disjunct1, disjunct2)).simplify(); + } + + simplified.children.remove(children[bestDisjunction]); + simplified.children.remove(children[bestOther]); + simplified.add(toAdd); } - }); - - for (int i = 0; - i < children.length - 1 - && children[i] instanceof PropositionalDisjunction; - ++i) - for (int j = i + 1; j < children.length; ++j) { - PropositionalConstraint[] current = - ((PropositionalDisjunction) children[i]).intersect(children[j]); - if (current != null - && (best == null || current.length > best.length)) { - best = current; - bestDisjunction = i; - bestOther = j; - } } - if (best != null) { - PropositionalConstraint toAdd = null; - if (best.length == 1) toAdd = best[0]; - else { - toAdd = new PropositionalDisjunction(best[0], best[1]); - for (int i = 2; i < best.length; ++i) - toAdd = new PropositionalDisjunction(toAdd, best[i]); + if (simplified.children.size() == 1) + return (PropositionalConstraint) simplified.getChildren()[0]; + return simplified; + } + + + /** + * The intersection of two conjunctions is the set of all terms that are common to both + * conjunctions; the intersection of a conjunction and some other constraint c is + * c if c is contained in the conjunction and the empty set otherwise. + * + * @param c The constraint to intersect with. + * @return The set of common terms in array form or null if there are none. + **/ + public PropositionalConstraint[] intersect(PropositionalConstraint c) { + if (!(c instanceof PropositionalConjunction)) { + if (children.contains(c)) + return new PropositionalConstraint[] {c}; + return null; } - if (children[bestOther] instanceof PropositionalDisjunction) { - PropositionalConstraint disjunct1 = - ((PropositionalDisjunction) children[bestDisjunction]) - .subtract(best); - PropositionalConstraint disjunct2 = - ((PropositionalDisjunction) children[bestOther]).subtract(best); - - toAdd = - new PropositionalDisjunction( - toAdd, - new PropositionalConjunction(disjunct1, disjunct2)) - .simplify(); + PropositionalConjunction conjunction = (PropositionalConjunction) c; + LinkedList result = new LinkedList(); + for (Iterator I = children.iterator(); I.hasNext();) { + Object next = I.next(); + if (conjunction.children.contains(next)) + result.add(next); } - simplified.children.remove(children[bestDisjunction]); - simplified.children.remove(children[bestOther]); - simplified.add(toAdd); - } + if (result.size() == 0) + return null; + return (PropositionalConstraint[]) result + .toArray(new PropositionalConstraint[result.size()]); } - if (simplified.children.size() == 1) - return (PropositionalConstraint) simplified.getChildren()[0]; - return simplified; - } - - - /** - * The intersection of two conjunctions is the set of all terms that are - * common to both conjunctions; the intersection of a conjunction and some - * other constraint c is c if c is contained in the - * conjunction and the empty set otherwise. - * - * @param c The constraint to intersect with. - * @return The set of common terms in array form or null if - * there are none. - **/ - public PropositionalConstraint[] intersect(PropositionalConstraint c) { - if (!(c instanceof PropositionalConjunction)) { - if (children.contains(c)) return new PropositionalConstraint[]{ c }; - return null; + + /** + * Subtraction from a conjunction simply removes all of the specified terms from it; this method + * returns a new constraint representing the subtraction. + * + * @param terms The terms to remove. + * @return A new representation of this n-ary constraint with the specified terms removed. + **/ + public PropositionalConstraint subtract(PropositionalConstraint[] terms) { + PropositionalConjunction clone = (PropositionalConjunction) clone(); + for (int i = 0; i < terms.length; ++i) + clone.children.remove(terms[i]); + if (clone.children.size() == 0) + return new PropositionalConstant(true); + if (clone.children.size() == 1) + return (PropositionalConstraint) clone.getChildren()[0]; + return clone; } - PropositionalConjunction conjunction = (PropositionalConjunction) c; - LinkedList result = new LinkedList(); - for (Iterator I = children.iterator(); I.hasNext(); ) { - Object next = I.next(); - if (conjunction.children.contains(next)) result.add(next); + + /** + * Distributes the given disjunction over this conjunction. + * + * @return A simplified constraint representing the distribution of the given disjunction over + * this conjunction. + **/ + public PropositionalConstraint distribute(PropositionalDisjunction d) { + PropositionalConstraint[] array = + (PropositionalConstraint[]) children.toArray(new PropositionalConstraint[children + .size()]); + for (int i = 0; i < array.length; ++i) { + PropositionalDisjunction clone = (PropositionalDisjunction) d.clone(); + clone.add(array[i]); + array[i] = clone; + } + + if (array.length == 1) + return array[0].simplify(); + + PropositionalConjunction result = new PropositionalConjunction(array[0], array[1]); + for (int i = 2; i < array.length; ++i) + result.add(array[i]); + return result.simplify(); } - if (result.size() == 0) return null; - return (PropositionalConstraint[]) - result.toArray(new PropositionalConstraint[result.size()]); - } - - - /** - * Subtraction from a conjunction simply removes all of the specified - * terms from it; this method returns a new constraint representing the - * subtraction. - * - * @param terms The terms to remove. - * @return A new representation of this n-ary constraint with the specified - * terms removed. - **/ - public PropositionalConstraint subtract(PropositionalConstraint[] terms) { - PropositionalConjunction clone = (PropositionalConjunction) clone(); - for (int i = 0; i < terms.length; ++i) clone.children.remove(terms[i]); - if (clone.children.size() == 0) return new PropositionalConstant(true); - if (clone.children.size() == 1) - return (PropositionalConstraint) clone.getChildren()[0]; - return clone; - } - - - /** - * Distributes the given disjunction over this conjunction. - * - * @return A simplified constraint representing the distribution of the - * given disjunction over this conjunction. - **/ - public PropositionalConstraint distribute(PropositionalDisjunction d) { - PropositionalConstraint[] array = - (PropositionalConstraint[]) - children.toArray(new PropositionalConstraint[children.size()]); - for (int i = 0; i < array.length; ++i) { - PropositionalDisjunction clone = (PropositionalDisjunction) d.clone(); - clone.add(array[i]); - array[i] = clone; + + /** + * Determines whether this conjunction contains all of the terms that the given conjunction + * contains. + * + * @param c The given conjunction. + * @return true iff this conjunction contains all of the terms that the given + * conjunction contains. + **/ + public boolean containsAll(PropositionalConjunction c) { + return children.containsAll(c.children); } - if (array.length == 1) return array[0].simplify(); - - PropositionalConjunction result = - new PropositionalConjunction(array[0], array[1]); - for (int i = 2; i < array.length; ++i) result.add(array[i]); - return result.simplify(); - } - - - /** - * Determines whether this conjunction contains all of the terms that the - * given conjunction contains. - * - * @param c The given conjunction. - * @return true iff this conjunction contains all of the terms - * that the given conjunction contains. - **/ - public boolean containsAll(PropositionalConjunction c) { - return children.containsAll(c.children); - } - - - /** - * The hash code of a PropositionalConjunction is the sum of - * the hash codes of its children plus one. - * - * @return The hash code for this PropositionalConjunction. - **/ - public int hashCode() { - int result = 1; - for (Iterator I = children.iterator(); I.hasNext(); ) - result += I.next().hashCode(); - return result; - } - - - /** - * Two PropositionalConjunctions are equivalent when they are - * topologically equivalent, respecting the associativity and commutivity - * of conjunction. - * - * @return true iff the argument is an equivalent - * PropositionalConjunction. - **/ - public boolean equals(Object o) { - if (!(o instanceof PropositionalConjunction)) return false; - PropositionalConjunction c = (PropositionalConjunction) o; - return children.equals(c.children); - } - - - /** - * Calls the appropriate visit(·) method of the given - * Inference for this Constraint, as per the - * visitor pattern. - * - * @param infer The inference visiting this constraint. - **/ - public void runVisit(Inference infer) { infer.visit(this); } - - - /** - * Creates a string respresentation of this constraint using the string - * representations of the objects involved. - * - * @param buffer The output of this method will be appended to this buffer. - **/ - public void write(StringBuffer buffer) { - buffer.append("("); - - PropositionalConstraint[] children = - (PropositionalConstraint[]) getChildren(); - children[0].write(buffer); - for (int i = 1; i < children.length; ++i) { - buffer.append(" /\\ "); - children[i].write(buffer); + + /** + * The hash code of a PropositionalConjunction is the sum of the hash codes of its + * children plus one. + * + * @return The hash code for this PropositionalConjunction. + **/ + public int hashCode() { + int result = 1; + for (Iterator I = children.iterator(); I.hasNext();) + result += I.next().hashCode(); + return result; } - buffer.append(")"); - } -} + /** + * Two PropositionalConjunctions are equivalent when they are topologically + * equivalent, respecting the associativity and commutivity of conjunction. + * + * @return true iff the argument is an equivalent + * PropositionalConjunction. + **/ + public boolean equals(Object o) { + if (!(o instanceof PropositionalConjunction)) + return false; + PropositionalConjunction c = (PropositionalConjunction) o; + return children.equals(c.children); + } + + + /** + * Calls the appropriate visit(·) method of the given Inference + * for this Constraint, as per the visitor pattern. + * + * @param infer The inference visiting this constraint. + **/ + public void runVisit(Inference infer) { + infer.visit(this); + } + + + /** + * Creates a string respresentation of this constraint using the string representations of the + * objects involved. + * + * @param buffer The output of this method will be appended to this buffer. + **/ + public void write(StringBuffer buffer) { + buffer.append("("); + + PropositionalConstraint[] children = (PropositionalConstraint[]) getChildren(); + children[0].write(buffer); + for (int i = 1; i < children.length; ++i) { + buffer.append(" /\\ "); + children[i].write(buffer); + } + + buffer.append(")"); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalConstant.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalConstant.java index 5067c47c..a879ef7c 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalConstant.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalConstant.java @@ -1,273 +1,273 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; /** - * A propositional constant is either true or - * false. - * - * @author Nick Rizzolo + * A propositional constant is either true or false. + * + * @author Nick Rizzolo **/ -public class PropositionalConstant extends PropositionalConstraint -{ - /** true */ - public static final PropositionalConstant True = - new PropositionalConstant(true); - /** false */ - public static final PropositionalConstant False = - new PropositionalConstant(false); - - - /** The constant value. */ - protected boolean constant; - - - /** - * Initializing constructor. - * - * @param v The value of this constant. - **/ - public PropositionalConstant(boolean v) { constant = v; } - - - /** - * Replaces all unquantified variables with the unique copy stored as a - * value of the given map; also instantiates all quantified variables and - * stores them in the given map. - * - * @param m The map in which to find unique copies of the variables. - **/ - public void consolidateVariables(java.util.AbstractMap m) { } - - - /** Determines whether the constraint is satisfied. */ - public boolean evaluate() { return constant; } - - - /** - * Produces a new, logically simplified version of this constraint, - * preserving variable consolidation. - * - * @see Constraint#consolidateVariables(java.util.AbstractMap) - * @return A logically simplified version of this constraint. - **/ - public PropositionalConstraint simplify() { - return constant ? True : False; - } - - - /** - * Produces a new propositional constraint equivalent to this constraint - * and that contains no negated constraints other than variables. - * - * @return A constraint representing the negation of this constraint. - **/ - public PropositionalConstraint negate() { return constant ? False : True; } - - - /** - * Produces a new, logically simplified version of this constraint in - * conjunctive normal form (CNF). - * - * @return The conjunctive normal form of this constraint. - **/ - public PropositionalConstraint CNF() { return simplify(); } - - - /** - * Produces a new, logically simplified version of this constraint in - * disjunctive normal form (DNF). - * - * @return The disjunctive normal form of this constraint. - **/ - public PropositionalConstraint DNF() { return simplify(); } - - - /** - * Returns the children of this constraint in an array. - * - * @return The children of this constraint in an array. - **/ - public Constraint[] getChildren() { return new PropositionalConstraint[0]; } - - - /** - * Compares topology to determine if this constraint is more general than - * the given constraint; note: this method is not required to be correct - * when it answers false. - * - * @param c The given constraint. - * @return true if a topological analysis determined that this - * constraint is more general than the given constraint. - **/ - public boolean moreGeneralThan(PropositionalConstraint c) { - return c.moreSpecificThan(this); - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given implication; note: this method is not required to be - * correct when it answers false. - * - * @param c The given implication. - * @return true if a topological analysis determined that this - * constraint is more specific than the given implication. - **/ - public boolean moreSpecificThan(PropositionalImplication c) { - return !constant; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given double implication; note: this method is not required to be - * correct when it answers false. - * - * @param c The given double implication. - * @return true if a topological analysis determined that this - * constraint is more specific than the given double implication. - **/ - public boolean moreSpecificThan(PropositionalDoubleImplication c) { - return !constant; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given conjunction; note: this method is not required to be - * correct when it answers false. - * - * @param c The given conjunction. - * @return true if a topological analysis determined that this - * constraint is more specific than the given conjunction. - **/ - public boolean moreSpecificThan(PropositionalConjunction c) { - return !constant; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given disjunction; note: this method is not required to be - * correct when it answers false. - * - * @param c The given disjunction. - * @return true if a topological analysis determined that this - * constraint is more specific than the given disjunction. - **/ - public boolean moreSpecificThan(PropositionalDisjunction c) { - return !constant; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given at-least; note: this method is not required to be correct - * when it answers false. - * - * @param c The given at-least. - * @return true if a topological analysis determined that this - * constraint is more specific than the given disjunction. - **/ - public boolean moreSpecificThan(PropositionalAtLeast c) { - return !constant; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given negation; note: this method is not required to be correct - * when it answers false. - * - * @param c The given negation. - * @return true if a topological analysis determined that this - * constraint is more specific than the given negation. - **/ - public boolean moreSpecificThan(PropositionalNegation c) { - return !constant; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given variable; note: this method is not required to be correct - * when it answers false. - * - * @param c The given variable. - * @return true if a topological analysis determined that this - * constraint is more specific than the given variable. - **/ - public boolean moreSpecificThan(PropositionalVariable c) { - return !constant; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given constant; note: this method is not required to be correct - * when it answers false. - * - * @param c The given constant. - * @return true if a topological analysis determined that this - * constraint is more specific than the given constant. - **/ - public boolean moreSpecificThan(PropositionalConstant c) { - return c.evaluate() && !constant; - } - - - /** - * The hash code of a PropositionalConstant is the hash code - * of the Boolean object formed from the constant. - * - * @return The hash code for this PropositionalConstant. - **/ - public int hashCode() { return new Boolean(constant).hashCode(); } - - - /** - * Two PropositionalConstants are equivalent when their - * constants are equal. - * - * @return true iff the argument is a - * PropositionalConstant set to the same value as this - * constant. - **/ - public boolean equals(Object o) { - if (!(o instanceof PropositionalConstant)) return false; - PropositionalConstant c = (PropositionalConstant) o; - return constant == c.constant; - } - - - /** - * Calls the appropriate visit(·) method of the given - * Inference for this Constraint, as per the - * visitor pattern. - * - * @param infer The inference visiting this constraint. - **/ - public void runVisit(Inference infer) { infer.visit(this); } - - - /** - * Creates a string respresentation of this constraint using the string - * representations of the objects involved. - * - * @param buffer The output of this method will be appended to this buffer. - **/ - public void write(StringBuffer buffer) { - buffer.append(constant); - } +public class PropositionalConstant extends PropositionalConstraint { + /** true */ + public static final PropositionalConstant True = new PropositionalConstant(true); + /** false */ + public static final PropositionalConstant False = new PropositionalConstant(false); + + + /** The constant value. */ + protected boolean constant; + + + /** + * Initializing constructor. + * + * @param v The value of this constant. + **/ + public PropositionalConstant(boolean v) { + constant = v; + } + + + /** + * Replaces all unquantified variables with the unique copy stored as a value of the given map; + * also instantiates all quantified variables and stores them in the given map. + * + * @param m The map in which to find unique copies of the variables. + **/ + public void consolidateVariables(java.util.AbstractMap m) {} + + + /** Determines whether the constraint is satisfied. */ + public boolean evaluate() { + return constant; + } + + + /** + * Produces a new, logically simplified version of this constraint, preserving variable + * consolidation. + * + * @see Constraint#consolidateVariables(java.util.AbstractMap) + * @return A logically simplified version of this constraint. + **/ + public PropositionalConstraint simplify() { + return constant ? True : False; + } + + + /** + * Produces a new propositional constraint equivalent to this constraint and that contains no + * negated constraints other than variables. + * + * @return A constraint representing the negation of this constraint. + **/ + public PropositionalConstraint negate() { + return constant ? False : True; + } + + + /** + * Produces a new, logically simplified version of this constraint in conjunctive normal form + * (CNF). + * + * @return The conjunctive normal form of this constraint. + **/ + public PropositionalConstraint CNF() { + return simplify(); + } + + + /** + * Produces a new, logically simplified version of this constraint in disjunctive normal form + * (DNF). + * + * @return The disjunctive normal form of this constraint. + **/ + public PropositionalConstraint DNF() { + return simplify(); + } + + + /** + * Returns the children of this constraint in an array. + * + * @return The children of this constraint in an array. + **/ + public Constraint[] getChildren() { + return new PropositionalConstraint[0]; + } + + + /** + * Compares topology to determine if this constraint is more general than the given constraint; + * note: this method is not required to be correct when it answers false. + * + * @param c The given constraint. + * @return true if a topological analysis determined that this constraint is more + * general than the given constraint. + **/ + public boolean moreGeneralThan(PropositionalConstraint c) { + return c.moreSpecificThan(this); + } + + + /** + * Compares topology to determine if this constraint is more specific than the given + * implication; note: this method is not required to be correct when it answers + * false. + * + * @param c The given implication. + * @return true if a topological analysis determined that this constraint is more + * specific than the given implication. + **/ + public boolean moreSpecificThan(PropositionalImplication c) { + return !constant; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given double + * implication; note: this method is not required to be correct when it answers + * false. + * + * @param c The given double implication. + * @return true if a topological analysis determined that this constraint is more + * specific than the given double implication. + **/ + public boolean moreSpecificThan(PropositionalDoubleImplication c) { + return !constant; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given + * conjunction; note: this method is not required to be correct when it answers + * false. + * + * @param c The given conjunction. + * @return true if a topological analysis determined that this constraint is more + * specific than the given conjunction. + **/ + public boolean moreSpecificThan(PropositionalConjunction c) { + return !constant; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given + * disjunction; note: this method is not required to be correct when it answers + * false. + * + * @param c The given disjunction. + * @return true if a topological analysis determined that this constraint is more + * specific than the given disjunction. + **/ + public boolean moreSpecificThan(PropositionalDisjunction c) { + return !constant; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given at-least; + * note: this method is not required to be correct when it answers false. + * + * @param c The given at-least. + * @return true if a topological analysis determined that this constraint is more + * specific than the given disjunction. + **/ + public boolean moreSpecificThan(PropositionalAtLeast c) { + return !constant; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given negation; + * note: this method is not required to be correct when it answers false. + * + * @param c The given negation. + * @return true if a topological analysis determined that this constraint is more + * specific than the given negation. + **/ + public boolean moreSpecificThan(PropositionalNegation c) { + return !constant; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given variable; + * note: this method is not required to be correct when it answers false. + * + * @param c The given variable. + * @return true if a topological analysis determined that this constraint is more + * specific than the given variable. + **/ + public boolean moreSpecificThan(PropositionalVariable c) { + return !constant; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given constant; + * note: this method is not required to be correct when it answers false. + * + * @param c The given constant. + * @return true if a topological analysis determined that this constraint is more + * specific than the given constant. + **/ + public boolean moreSpecificThan(PropositionalConstant c) { + return c.evaluate() && !constant; + } + + + /** + * The hash code of a PropositionalConstant is the hash code of the + * Boolean object formed from the constant. + * + * @return The hash code for this PropositionalConstant. + **/ + public int hashCode() { + return new Boolean(constant).hashCode(); + } + + + /** + * Two PropositionalConstants are equivalent when their constants are equal. + * + * @return true iff the argument is a PropositionalConstant set to the + * same value as this constant. + **/ + public boolean equals(Object o) { + if (!(o instanceof PropositionalConstant)) + return false; + PropositionalConstant c = (PropositionalConstant) o; + return constant == c.constant; + } + + + /** + * Calls the appropriate visit(·) method of the given Inference + * for this Constraint, as per the visitor pattern. + * + * @param infer The inference visiting this constraint. + **/ + public void runVisit(Inference infer) { + infer.visit(this); + } + + + /** + * Creates a string respresentation of this constraint using the string representations of the + * objects involved. + * + * @param buffer The output of this method will be appended to this buffer. + **/ + public void write(StringBuffer buffer) { + buffer.append(constant); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalConstraint.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalConstraint.java index 7f41d0bc..3d2c4980 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalConstraint.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalConstraint.java @@ -1,214 +1,204 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; /** - * All classes for representing propositional constraints are derived from - * this base class. A propositional constraint is: - * - *

    - *
  • The constant true or the constant false. - *
  • A variable name, which is an assertion of that variable's truth. - *
  • The negation of a propositional constraint. - *
  • The conjunction of two propositional constraints. - *
  • The disjunction of two propositional constraints. - *
  • An implication between two propositional constraints. - *
  • A double implication between two propositional constraints. - *
+ * All classes for representing propositional constraints are derived from this base class. A + * propositional constraint is: + * + *
    + *
  • The constant true or the constant false. + *
  • A variable name, which is an assertion of that variable's truth. + *
  • The negation of a propositional constraint. + *
  • The conjunction of two propositional constraints. + *
  • The disjunction of two propositional constraints. + *
  • An implication between two propositional constraints. + *
  • A double implication between two propositional constraints. + *
**/ -public abstract class PropositionalConstraint extends Constraint - implements Cloneable -{ - /** - * Produces a new, logically simplified version of this constraint, - * preserving variable consolidation. - * - * @see Constraint#consolidateVariables(java.util.AbstractMap) - * @return A logically simplified version of this constraint. - **/ - abstract public PropositionalConstraint simplify(); - - - /** - * Produces a new propositional constraint equivalent to this constraint - * and that contains no negated constraints other than variables. - * - * @return A constraint representing the negation of this constraint. - **/ - abstract public PropositionalConstraint negate(); - - - /** - * Produces a new, logically simplified version of this constraint in - * conjunctive normal form (CNF). - * - * @return The conjunctive normal form of this constraint. - **/ - abstract public PropositionalConstraint CNF(); - - - /** - * Produces a new, logically simplified version of this constraint in - * disjunctive normal form (DNF). - * - * @return The disjunctive normal form of this constraint. - **/ - abstract public PropositionalConstraint DNF(); - - - /** - * Compares topology to determine if this constraint is more general than - * the given constraint; note: this method is not required to be correct - * when it answers false. - * - * @param c The given constraint. - * @return true if a topological analysis determined that this - * constraint is more general than the given constraint. - **/ - abstract public boolean moreGeneralThan(PropositionalConstraint c); - - - /** - * Compares topology to determine if this constraint is more specific than - * the given implication; note: this method is not required to be - * correct when it answers false. - * - * @param c The given implication. - * @return true if a topological analysis determined that this - * constraint is more specific than the given implication. - **/ - abstract public boolean moreSpecificThan(PropositionalImplication c); - - - /** - * Compares topology to determine if this constraint is more specific than - * the given double implication; note: this method is not required to be - * correct when it answers false. - * - * @param c The given double implication. - * @return true if a topological analysis determined that this - * constraint is more specific than the given double implication. - **/ - abstract public boolean moreSpecificThan(PropositionalDoubleImplication c); - - - /** - * Compares topology to determine if this constraint is more specific than - * the given conjunction; note: this method is not required to be - * correct when it answers false. - * - * @param c The given conjunction. - * @return true if a topological analysis determined that this - * constraint is more specific than the given conjunction. - **/ - abstract public boolean moreSpecificThan(PropositionalConjunction c); - - - /** - * Compares topology to determine if this constraint is more specific than - * the given disjunction; note: this method is not required to be - * correct when it answers false. - * - * @param c The given disjunction. - * @return true if a topological analysis determined that this - * constraint is more specific than the given disjunction. - **/ - abstract public boolean moreSpecificThan(PropositionalDisjunction c); - - - /** - * Compares topology to determine if this constraint is more specific than - * the given at-least; note: this method is not required to be correct - * when it answers false. - * - * @param c The given at-least. - * @return true if a topological analysis determined that this - * constraint is more specific than the given disjunction. - **/ - abstract public boolean moreSpecificThan(PropositionalAtLeast c); - - - /** - * Compares topology to determine if this constraint is more specific than - * the given negation; note: this method is not required to be correct - * when it answers false. - * - * @param c The given negation. - * @return true if a topological analysis determined that this - * constraint is more specific than the given negation. - **/ - abstract public boolean moreSpecificThan(PropositionalNegation c); - - - /** - * Compares topology to determine if this constraint is more specific than - * the given variable; note: this method is not required to be correct - * when it answers false. - * - * @param c The given variable. - * @return true if a topological analysis determined that this - * constraint is more specific than the given variable. - **/ - abstract public boolean moreSpecificThan(PropositionalVariable c); - - - /** - * Compares topology to determine if this constraint is more specific than - * the given constant; note: this method is not required to be correct - * when it answers false. - * - * @param c The given constant. - * @return true if a topological analysis determined that this - * constraint is more specific than the given constant. - **/ - abstract public boolean moreSpecificThan(PropositionalConstant c); - - - /** - * Creates a string respresentation of this constraint using the string - * representations of the objects involved. This method employs the - * write(StringBuffer) method to compute its output. - **/ - public String toString() { - StringBuffer result = new StringBuffer(); - write(result); - return result.toString(); - } - - - /** - * Creates a string respresentation of this constraint using the string - * representations of the objects involved. - * - * @param buffer The output of this method will be appended to this buffer. - **/ - abstract public void write(StringBuffer buffer); - - - /** - * This method returns a shallow clone. - * - * @return A shallow clone. - **/ - public Object clone() { - Object clone = null; - try { clone = super.clone(); } - catch (Exception e) { - System.err.println("Error cloning " + getClass().getName() + ":"); - e.printStackTrace(); - System.exit(1); +public abstract class PropositionalConstraint extends Constraint implements Cloneable { + /** + * Produces a new, logically simplified version of this constraint, preserving variable + * consolidation. + * + * @see Constraint#consolidateVariables(java.util.AbstractMap) + * @return A logically simplified version of this constraint. + **/ + abstract public PropositionalConstraint simplify(); + + + /** + * Produces a new propositional constraint equivalent to this constraint and that contains no + * negated constraints other than variables. + * + * @return A constraint representing the negation of this constraint. + **/ + abstract public PropositionalConstraint negate(); + + + /** + * Produces a new, logically simplified version of this constraint in conjunctive normal form + * (CNF). + * + * @return The conjunctive normal form of this constraint. + **/ + abstract public PropositionalConstraint CNF(); + + + /** + * Produces a new, logically simplified version of this constraint in disjunctive normal form + * (DNF). + * + * @return The disjunctive normal form of this constraint. + **/ + abstract public PropositionalConstraint DNF(); + + + /** + * Compares topology to determine if this constraint is more general than the given constraint; + * note: this method is not required to be correct when it answers false. + * + * @param c The given constraint. + * @return true if a topological analysis determined that this constraint is more + * general than the given constraint. + **/ + abstract public boolean moreGeneralThan(PropositionalConstraint c); + + + /** + * Compares topology to determine if this constraint is more specific than the given + * implication; note: this method is not required to be correct when it answers + * false. + * + * @param c The given implication. + * @return true if a topological analysis determined that this constraint is more + * specific than the given implication. + **/ + abstract public boolean moreSpecificThan(PropositionalImplication c); + + + /** + * Compares topology to determine if this constraint is more specific than the given double + * implication; note: this method is not required to be correct when it answers + * false. + * + * @param c The given double implication. + * @return true if a topological analysis determined that this constraint is more + * specific than the given double implication. + **/ + abstract public boolean moreSpecificThan(PropositionalDoubleImplication c); + + + /** + * Compares topology to determine if this constraint is more specific than the given + * conjunction; note: this method is not required to be correct when it answers + * false. + * + * @param c The given conjunction. + * @return true if a topological analysis determined that this constraint is more + * specific than the given conjunction. + **/ + abstract public boolean moreSpecificThan(PropositionalConjunction c); + + + /** + * Compares topology to determine if this constraint is more specific than the given + * disjunction; note: this method is not required to be correct when it answers + * false. + * + * @param c The given disjunction. + * @return true if a topological analysis determined that this constraint is more + * specific than the given disjunction. + **/ + abstract public boolean moreSpecificThan(PropositionalDisjunction c); + + + /** + * Compares topology to determine if this constraint is more specific than the given at-least; + * note: this method is not required to be correct when it answers false. + * + * @param c The given at-least. + * @return true if a topological analysis determined that this constraint is more + * specific than the given disjunction. + **/ + abstract public boolean moreSpecificThan(PropositionalAtLeast c); + + + /** + * Compares topology to determine if this constraint is more specific than the given negation; + * note: this method is not required to be correct when it answers false. + * + * @param c The given negation. + * @return true if a topological analysis determined that this constraint is more + * specific than the given negation. + **/ + abstract public boolean moreSpecificThan(PropositionalNegation c); + + + /** + * Compares topology to determine if this constraint is more specific than the given variable; + * note: this method is not required to be correct when it answers false. + * + * @param c The given variable. + * @return true if a topological analysis determined that this constraint is more + * specific than the given variable. + **/ + abstract public boolean moreSpecificThan(PropositionalVariable c); + + + /** + * Compares topology to determine if this constraint is more specific than the given constant; + * note: this method is not required to be correct when it answers false. + * + * @param c The given constant. + * @return true if a topological analysis determined that this constraint is more + * specific than the given constant. + **/ + abstract public boolean moreSpecificThan(PropositionalConstant c); + + + /** + * Creates a string respresentation of this constraint using the string representations of the + * objects involved. This method employs the write(StringBuffer) method to compute + * its output. + **/ + public String toString() { + StringBuffer result = new StringBuffer(); + write(result); + return result.toString(); } - return clone; - } -} + /** + * Creates a string respresentation of this constraint using the string representations of the + * objects involved. + * + * @param buffer The output of this method will be appended to this buffer. + **/ + abstract public void write(StringBuffer buffer); + + + /** + * This method returns a shallow clone. + * + * @return A shallow clone. + **/ + public Object clone() { + Object clone = null; + try { + clone = super.clone(); + } catch (Exception e) { + System.err.println("Error cloning " + getClass().getName() + ":"); + e.printStackTrace(); + System.exit(1); + } + + return clone; + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalDisjunction.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalDisjunction.java index 3c92de21..29204e3b 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalDisjunction.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalDisjunction.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -17,570 +14,553 @@ /** - * Represents the disjunction of two propositional constraints. - * - * @author Nick Rizzolo + * Represents the disjunction of two propositional constraints. + * + * @author Nick Rizzolo **/ -public class PropositionalDisjunction extends PropositionalNAryConstraint -{ - /** Default constructor. */ - private PropositionalDisjunction() { } - - /** - * If either of the arguments is itself a - * PropositionalDisjunction, its contents are flattened into - * this PropositionalDisjunction. - * - * @param c1 One constraint to disjunct. - * @param c2 Another constraint to disjunct. - **/ - public PropositionalDisjunction(PropositionalConstraint c1, - PropositionalConstraint c2) { - add(c1); - add(c2); - } - - - /** Determines whether the constraint is satisfied. */ - public boolean evaluate() { - for (Iterator I = children.iterator(); I.hasNext(); ) - if (((PropositionalConstraint) I.next()).evaluate()) return true; - return false; - } - - - /** - * Produces a new, logically simplified version of this constraint, - * preserving variable consolidation. - * - * @see Constraint#consolidateVariables(java.util.AbstractMap) - * @return A logically simplified version of this constraint. - **/ - public PropositionalConstraint simplify() { - PropositionalDisjunction result = new PropositionalDisjunction(); - for (Iterator I = children.iterator(); I.hasNext(); ) - result.add(((PropositionalConstraint) I.next()).simplify()); - - if (result.children.contains(PropositionalConstant.True)) - return PropositionalConstant.True; - - result.children.remove(PropositionalConstant.False); - - if (result.children.size() == 1) - return (PropositionalConstraint) result.children.iterator().next(); - - /* - HashSet positive = new HashSet(); - HashSet negative = new HashSet(); - for (Iterator I = result.children.iterator(); I.hasNext(); ) { - Object next = I.next(); - if (next instanceof PropositionalNegation) - negative.add(((PropositionalConstraint) next).getChildren()[0]); - else positive.add(next); +public class PropositionalDisjunction extends PropositionalNAryConstraint { + /** Default constructor. */ + private PropositionalDisjunction() {} + + /** + * If either of the arguments is itself a PropositionalDisjunction, its contents + * are flattened into this PropositionalDisjunction. + * + * @param c1 One constraint to disjunct. + * @param c2 Another constraint to disjunct. + **/ + public PropositionalDisjunction(PropositionalConstraint c1, PropositionalConstraint c2) { + add(c1); + add(c2); } - for (Iterator I = positive.iterator(); I.hasNext(); ) - if (negative.contains(I.next())) return PropositionalConstant.True; - - PropositionalConstraint[] terms = - (PropositionalConstraint[]) getChildren(); - HashSet toRemove = new HashSet(); - for (int i = 0; i < terms.length - 1; ++i) - for (int j = i + 1; j < terms.length; ++j) { - if (terms[i].moreGeneralThan(terms[j])) toRemove.add(new Integer(j)); - if (terms[j].moreGeneralThan(terms[i])) toRemove.add(new Integer(i)); - } - - for (Iterator I = toRemove.iterator(); I.hasNext(); ) - result.children.remove(terms[((Integer) I.next()).intValue()]); - */ - - if (result.children.size() == 0) return PropositionalConstant.False; - - return result; - } - - - /** - * Uses DeMorgan's law to compute the negation of this constraint by - * distributing that negation to each child. - * - * @return A simplified constraint representing the negation of this - * constraint. - **/ - public PropositionalConstraint negate() { - if (children.size() == 1) - return ((PropositionalConstraint) children.iterator().next()).negate(); - - PropositionalConstraint[] array = - (PropositionalConstraint[]) - children.toArray(new PropositionalConstraint[children.size()]); - for (int i = 0; i < array.length; ++i) array[i] = array[i].negate(); - - PropositionalConjunction result = - new PropositionalConjunction(array[0], array[1]); - for (int i = 2; i < array.length; ++i) result.add(array[i]); - return result; - } - - - /** - * Produces a new, logically simplified version of this constraint in - * conjunctive normal form (CNF). - * - * @return The conjunctive normal form of this constraint. - **/ - public PropositionalConstraint CNF() { - PropositionalConstraint c = factor(); - if (!(c instanceof PropositionalDisjunction)) return c.CNF(); - - PropositionalDisjunction simplified = (PropositionalDisjunction) c; - - PropositionalDisjunction childrenCNF = new PropositionalDisjunction(); - for (Iterator I = simplified.children.iterator(); I.hasNext(); ) - childrenCNF.add(((PropositionalConstraint) I.next()).CNF()); - if (childrenCNF.children.size() == 1) - return (PropositionalConstraint) childrenCNF.getChildren()[0]; - - PropositionalConstraint[][] children = - new PropositionalConstraint[childrenCNF.children.size()][]; - int i = 0; - boolean foundConjunction = false; - for (Iterator I = childrenCNF.children.iterator(); I.hasNext(); ++i) { - PropositionalConstraint parent = (PropositionalConstraint) I.next(); - if (parent instanceof PropositionalConjunction) { - children[i] = (PropositionalConstraint[]) parent.getChildren(); - foundConjunction = true; - } - else { - children[i] = new PropositionalConstraint[1]; - children[i][0] = parent; - } + + /** Determines whether the constraint is satisfied. */ + public boolean evaluate() { + for (Iterator I = children.iterator(); I.hasNext();) + if (((PropositionalConstraint) I.next()).evaluate()) + return true; + return false; } - if (!foundConjunction) return childrenCNF; - - int[] indexes = new int[children.length]; - PropositionalConstraint result = - new PropositionalDisjunction(children[0][0], children[1][0]); - for (i = 2; i < children.length; ++i) - result = new PropositionalDisjunction(result, children[i][0]); - - while (increment(children, indexes)) { - PropositionalConstraint combination = - new PropositionalDisjunction(children[0][indexes[0]], - children[1][indexes[1]]); - for (i = 2; i < children.length; ++i) - combination = - new PropositionalDisjunction(combination, children[i][indexes[i]]); - result = new PropositionalConjunction(result, combination); + + /** + * Produces a new, logically simplified version of this constraint, preserving variable + * consolidation. + * + * @see Constraint#consolidateVariables(java.util.AbstractMap) + * @return A logically simplified version of this constraint. + **/ + public PropositionalConstraint simplify() { + PropositionalDisjunction result = new PropositionalDisjunction(); + for (Iterator I = children.iterator(); I.hasNext();) + result.add(((PropositionalConstraint) I.next()).simplify()); + + if (result.children.contains(PropositionalConstant.True)) + return PropositionalConstant.True; + + result.children.remove(PropositionalConstant.False); + + if (result.children.size() == 1) + return (PropositionalConstraint) result.children.iterator().next(); + + /* + * HashSet positive = new HashSet(); HashSet negative = new HashSet(); for (Iterator I = + * result.children.iterator(); I.hasNext(); ) { Object next = I.next(); if (next instanceof + * PropositionalNegation) negative.add(((PropositionalConstraint) next).getChildren()[0]); + * else positive.add(next); } + * + * for (Iterator I = positive.iterator(); I.hasNext(); ) if (negative.contains(I.next())) + * return PropositionalConstant.True; + * + * PropositionalConstraint[] terms = (PropositionalConstraint[]) getChildren(); HashSet + * toRemove = new HashSet(); for (int i = 0; i < terms.length - 1; ++i) for (int j = i + 1; + * j < terms.length; ++j) { if (terms[i].moreGeneralThan(terms[j])) toRemove.add(new + * Integer(j)); if (terms[j].moreGeneralThan(terms[i])) toRemove.add(new Integer(i)); } + * + * for (Iterator I = toRemove.iterator(); I.hasNext(); ) + * result.children.remove(terms[((Integer) I.next()).intValue()]); + */ + + if (result.children.size() == 0) + return PropositionalConstant.False; + + return result; } - return result; - } - - - /** - * Utility method for iterating through all combinations of constraint - * children. - * - * @param c Each element of this array is an array of children, exactly - * one child of which appears in each combination. - * @param I The indexes of the children in the current combination. - * @return true iff I contains valid indexes for - * a new combination; false iff if there are no more - * combinations. - **/ - public static boolean increment(PropositionalConstraint[][] c, int[] I) { - int i = 0; - while (i < c.length && ++I[i] == c[i].length) ++i; - if (i == c.length) return false; - for (--i; i >= 0; --i) I[i] = 0; - return true; - } - - - /** - * Produces a new, logically simplified version of this constraint in - * disjunctive normal form (DNF). - * - * @return The disjunctive normal form of this constraint. - **/ - public PropositionalConstraint DNF() { - PropositionalDisjunction result = new PropositionalDisjunction(); - for (Iterator I = children.iterator(); I.hasNext(); ) - result.add(((PropositionalConstraint) I.next()).DNF()); - return result.simplify(); - } - - - /** - * Compares topology to determine if this constraint is more general than - * the given constraint; note: this method is not required to be correct - * when it answers false. - * - * @param c The given constraint. - * @return true if a topological analysis determined that this - * constraint is more general than the given constraint. - **/ - public boolean moreGeneralThan(PropositionalConstraint c) { - return c.moreSpecificThan(this); - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given implication; note: this method is not required to be - * correct when it answers false. - * - * @param c The given implication. - * @return true if a topological analysis determined that this - * constraint is more specific than the given implication. - **/ - public boolean moreSpecificThan(PropositionalImplication c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given double implication; note: this method is not required to be - * correct when it answers false. - * - * @param c The given double implication. - * @return true if a topological analysis determined that this - * constraint is more specific than the given double implication. - **/ - public boolean moreSpecificThan(PropositionalDoubleImplication c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given conjunction; note: this method is not required to be - * correct when it answers false. - * - * @param c The given conjunction. - * @return true if a topological analysis determined that this - * constraint is more specific than the given conjunction. - **/ - public boolean moreSpecificThan(PropositionalConjunction c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given disjunction; note: this method is not required to be - * correct when it answers false. - * - * @param c The given disjunction. - * @return true if a topological analysis determined that this - * constraint is more specific than the given disjunction. - **/ - public boolean moreSpecificThan(PropositionalDisjunction c) { - return c.size() > size() && c.containsAll(this); - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given at-least; note: this method is not required to be correct - * when it answers false. - * - * @param c The given at-least. - * @return true if a topological analysis determined that this - * constraint is more specific than the given disjunction. - **/ - public boolean moreSpecificThan(PropositionalAtLeast c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given negation; note: this method is not required to be correct - * when it answers false. - * - * @param c The given negation. - * @return true if a topological analysis determined that this - * constraint is more specific than the given negation. - **/ - public boolean moreSpecificThan(PropositionalNegation c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given variable; note: this method is not required to be correct - * when it answers false. - * - * @param c The given variable. - * @return true if a topological analysis determined that this - * constraint is more specific than the given variable. - **/ - public boolean moreSpecificThan(PropositionalVariable c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given constant; note: this method is not required to be correct - * when it answers false. - * - * @param c The given constant. - * @return true if a topological analysis determined that this - * constraint is more specific than the given constant. - **/ - public boolean moreSpecificThan(PropositionalConstant c) { - return c.evaluate(); - } - - - /** - * If the given constraint has the same type as this constraint, its terms - * are merged into this constraint; otherwise, it is added as a new term. - * - * @param c The constraint to add. - **/ - public void add(PropositionalConstraint c) { - if (c instanceof PropositionalDisjunction) { - PropositionalConstraint[] terms = - (PropositionalConstraint[]) c.getChildren(); - for (int i = 0; i < terms.length; ++i) add(terms[i]); + + /** + * Uses DeMorgan's law to compute the negation of this constraint by distributing that negation + * to each child. + * + * @return A simplified constraint representing the negation of this constraint. + **/ + public PropositionalConstraint negate() { + if (children.size() == 1) + return ((PropositionalConstraint) children.iterator().next()).negate(); + + PropositionalConstraint[] array = + (PropositionalConstraint[]) children.toArray(new PropositionalConstraint[children + .size()]); + for (int i = 0; i < array.length; ++i) + array[i] = array[i].negate(); + + PropositionalConjunction result = new PropositionalConjunction(array[0], array[1]); + for (int i = 2; i < array.length; ++i) + result.add(array[i]); + return result; } - else children.add(c); - } - - - /** - * Factoring a disjunction is the opposite of distributing a conjunction - * over a disjunction. - * - * @return A constraint that represents a factoring of this disjunction. - **/ - public PropositionalConstraint factor() { - PropositionalConstraint c = simplify(); - if (!(c instanceof PropositionalDisjunction)) return c; - PropositionalDisjunction simplified = (PropositionalDisjunction) c; - - PropositionalConstraint[] best = new PropositionalConstraint[0]; - while (best != null) { - int bestConjunction = -1; - int bestOther = -1; - best = null; - - PropositionalConstraint[] children = - (PropositionalConstraint[]) simplified.getChildren(); - Arrays.sort(children, - new Comparator() { - public int compare(Object o1, Object o2) { - if (o1 instanceof PropositionalConjunction) { - if (o2 instanceof PropositionalConjunction) return 0; - return -1; - } - - if (o2 instanceof PropositionalConjunction) return 1; - return 0; + + + /** + * Produces a new, logically simplified version of this constraint in conjunctive normal form + * (CNF). + * + * @return The conjunctive normal form of this constraint. + **/ + public PropositionalConstraint CNF() { + PropositionalConstraint c = factor(); + if (!(c instanceof PropositionalDisjunction)) + return c.CNF(); + + PropositionalDisjunction simplified = (PropositionalDisjunction) c; + + PropositionalDisjunction childrenCNF = new PropositionalDisjunction(); + for (Iterator I = simplified.children.iterator(); I.hasNext();) + childrenCNF.add(((PropositionalConstraint) I.next()).CNF()); + if (childrenCNF.children.size() == 1) + return (PropositionalConstraint) childrenCNF.getChildren()[0]; + + PropositionalConstraint[][] children = + new PropositionalConstraint[childrenCNF.children.size()][]; + int i = 0; + boolean foundConjunction = false; + for (Iterator I = childrenCNF.children.iterator(); I.hasNext(); ++i) { + PropositionalConstraint parent = (PropositionalConstraint) I.next(); + if (parent instanceof PropositionalConjunction) { + children[i] = (PropositionalConstraint[]) parent.getChildren(); + foundConjunction = true; + } else { + children[i] = new PropositionalConstraint[1]; + children[i][0] = parent; } - }); - - for (int i = 0; - i < children.length - 1 - && children[i] instanceof PropositionalConjunction; - ++i) - for (int j = i + 1; j < children.length; ++j) { - PropositionalConstraint[] current = - ((PropositionalConjunction) children[i]).intersect(children[j]); - if (current != null - && (best == null || current.length > best.length)) { - best = current; - bestConjunction = i; - bestOther = j; - } } - if (best != null) { - PropositionalConstraint toAdd = null; - if (best.length == 1) toAdd = best[0]; - else { - toAdd = new PropositionalConjunction(best[0], best[1]); - for (int i = 2; i < best.length; ++i) - toAdd = new PropositionalConjunction(toAdd, best[i]); + if (!foundConjunction) + return childrenCNF; + + int[] indexes = new int[children.length]; + PropositionalConstraint result = + new PropositionalDisjunction(children[0][0], children[1][0]); + for (i = 2; i < children.length; ++i) + result = new PropositionalDisjunction(result, children[i][0]); + + while (increment(children, indexes)) { + PropositionalConstraint combination = + new PropositionalDisjunction(children[0][indexes[0]], children[1][indexes[1]]); + for (i = 2; i < children.length; ++i) + combination = new PropositionalDisjunction(combination, children[i][indexes[i]]); + result = new PropositionalConjunction(result, combination); } - if (children[bestOther] instanceof PropositionalConjunction) { - PropositionalConstraint conjunct1 = - ((PropositionalConjunction) children[bestConjunction]) - .subtract(best); - PropositionalConstraint conjunct2 = - ((PropositionalConjunction) children[bestOther]).subtract(best); - - toAdd = - new PropositionalConjunction( - toAdd, - new PropositionalDisjunction(conjunct1, conjunct2)) - .simplify(); + return result; + } + + + /** + * Utility method for iterating through all combinations of constraint children. + * + * @param c Each element of this array is an array of children, exactly one child of which + * appears in each combination. + * @param I The indexes of the children in the current combination. + * @return true iff I contains valid indexes for a new combination; + * false iff if there are no more combinations. + **/ + public static boolean increment(PropositionalConstraint[][] c, int[] I) { + int i = 0; + while (i < c.length && ++I[i] == c[i].length) + ++i; + if (i == c.length) + return false; + for (--i; i >= 0; --i) + I[i] = 0; + return true; + } + + + /** + * Produces a new, logically simplified version of this constraint in disjunctive normal form + * (DNF). + * + * @return The disjunctive normal form of this constraint. + **/ + public PropositionalConstraint DNF() { + PropositionalDisjunction result = new PropositionalDisjunction(); + for (Iterator I = children.iterator(); I.hasNext();) + result.add(((PropositionalConstraint) I.next()).DNF()); + return result.simplify(); + } + + + /** + * Compares topology to determine if this constraint is more general than the given constraint; + * note: this method is not required to be correct when it answers false. + * + * @param c The given constraint. + * @return true if a topological analysis determined that this constraint is more + * general than the given constraint. + **/ + public boolean moreGeneralThan(PropositionalConstraint c) { + return c.moreSpecificThan(this); + } + + + /** + * Compares topology to determine if this constraint is more specific than the given + * implication; note: this method is not required to be correct when it answers + * false. + * + * @param c The given implication. + * @return true if a topological analysis determined that this constraint is more + * specific than the given implication. + **/ + public boolean moreSpecificThan(PropositionalImplication c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given double + * implication; note: this method is not required to be correct when it answers + * false. + * + * @param c The given double implication. + * @return true if a topological analysis determined that this constraint is more + * specific than the given double implication. + **/ + public boolean moreSpecificThan(PropositionalDoubleImplication c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given + * conjunction; note: this method is not required to be correct when it answers + * false. + * + * @param c The given conjunction. + * @return true if a topological analysis determined that this constraint is more + * specific than the given conjunction. + **/ + public boolean moreSpecificThan(PropositionalConjunction c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given + * disjunction; note: this method is not required to be correct when it answers + * false. + * + * @param c The given disjunction. + * @return true if a topological analysis determined that this constraint is more + * specific than the given disjunction. + **/ + public boolean moreSpecificThan(PropositionalDisjunction c) { + return c.size() > size() && c.containsAll(this); + } + + + /** + * Compares topology to determine if this constraint is more specific than the given at-least; + * note: this method is not required to be correct when it answers false. + * + * @param c The given at-least. + * @return true if a topological analysis determined that this constraint is more + * specific than the given disjunction. + **/ + public boolean moreSpecificThan(PropositionalAtLeast c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given negation; + * note: this method is not required to be correct when it answers false. + * + * @param c The given negation. + * @return true if a topological analysis determined that this constraint is more + * specific than the given negation. + **/ + public boolean moreSpecificThan(PropositionalNegation c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given variable; + * note: this method is not required to be correct when it answers false. + * + * @param c The given variable. + * @return true if a topological analysis determined that this constraint is more + * specific than the given variable. + **/ + public boolean moreSpecificThan(PropositionalVariable c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given constant; + * note: this method is not required to be correct when it answers false. + * + * @param c The given constant. + * @return true if a topological analysis determined that this constraint is more + * specific than the given constant. + **/ + public boolean moreSpecificThan(PropositionalConstant c) { + return c.evaluate(); + } + + + /** + * If the given constraint has the same type as this constraint, its terms are merged into this + * constraint; otherwise, it is added as a new term. + * + * @param c The constraint to add. + **/ + public void add(PropositionalConstraint c) { + if (c instanceof PropositionalDisjunction) { + PropositionalConstraint[] terms = (PropositionalConstraint[]) c.getChildren(); + for (int i = 0; i < terms.length; ++i) + add(terms[i]); + } else + children.add(c); + } + + + /** + * Factoring a disjunction is the opposite of distributing a conjunction over a disjunction. + * + * @return A constraint that represents a factoring of this disjunction. + **/ + public PropositionalConstraint factor() { + PropositionalConstraint c = simplify(); + if (!(c instanceof PropositionalDisjunction)) + return c; + PropositionalDisjunction simplified = (PropositionalDisjunction) c; + + PropositionalConstraint[] best = new PropositionalConstraint[0]; + while (best != null) { + int bestConjunction = -1; + int bestOther = -1; + best = null; + + PropositionalConstraint[] children = + (PropositionalConstraint[]) simplified.getChildren(); + Arrays.sort(children, new Comparator() { + public int compare(Object o1, Object o2) { + if (o1 instanceof PropositionalConjunction) { + if (o2 instanceof PropositionalConjunction) + return 0; + return -1; + } + + if (o2 instanceof PropositionalConjunction) + return 1; + return 0; + } + }); + + for (int i = 0; i < children.length - 1 + && children[i] instanceof PropositionalConjunction; ++i) + for (int j = i + 1; j < children.length; ++j) { + PropositionalConstraint[] current = + ((PropositionalConjunction) children[i]).intersect(children[j]); + if (current != null && (best == null || current.length > best.length)) { + best = current; + bestConjunction = i; + bestOther = j; + } + } + + if (best != null) { + PropositionalConstraint toAdd = null; + if (best.length == 1) + toAdd = best[0]; + else { + toAdd = new PropositionalConjunction(best[0], best[1]); + for (int i = 2; i < best.length; ++i) + toAdd = new PropositionalConjunction(toAdd, best[i]); + } + + if (children[bestOther] instanceof PropositionalConjunction) { + PropositionalConstraint conjunct1 = + ((PropositionalConjunction) children[bestConjunction]).subtract(best); + PropositionalConstraint conjunct2 = + ((PropositionalConjunction) children[bestOther]).subtract(best); + + toAdd = + new PropositionalConjunction(toAdd, new PropositionalDisjunction( + conjunct1, conjunct2)).simplify(); + } + + simplified.children.remove(children[bestConjunction]); + simplified.children.remove(children[bestOther]); + simplified.add(toAdd); + } } - simplified.children.remove(children[bestConjunction]); - simplified.children.remove(children[bestOther]); - simplified.add(toAdd); - } + if (simplified.children.size() == 1) + return (PropositionalConstraint) simplified.getChildren()[0]; + return simplified; } - if (simplified.children.size() == 1) - return (PropositionalConstraint) simplified.getChildren()[0]; - return simplified; - } - - - /** - * The intersection of two disjunctions is the set of all terms that are - * common to both disjunctions; the intersection of a disjunction and some - * other constraint c is c if c is contained in the - * disjunction and the empty set otherwise. - * - * @param c The constraint to intersect with. - * @return The set of common terms in array form or null if - * there are none. - **/ - public PropositionalConstraint[] intersect(PropositionalConstraint c) { - if (!(c instanceof PropositionalDisjunction)) { - if (children.contains(c)) return new PropositionalConstraint[]{ c }; - return null; + + /** + * The intersection of two disjunctions is the set of all terms that are common to both + * disjunctions; the intersection of a disjunction and some other constraint c is + * c if c is contained in the disjunction and the empty set otherwise. + * + * @param c The constraint to intersect with. + * @return The set of common terms in array form or null if there are none. + **/ + public PropositionalConstraint[] intersect(PropositionalConstraint c) { + if (!(c instanceof PropositionalDisjunction)) { + if (children.contains(c)) + return new PropositionalConstraint[] {c}; + return null; + } + + PropositionalDisjunction disjunction = (PropositionalDisjunction) c; + LinkedList result = new LinkedList(); + for (Iterator I = children.iterator(); I.hasNext();) { + Object next = I.next(); + if (disjunction.children.contains(next)) + result.add(next); + } + + if (result.size() == 0) + return null; + return (PropositionalConstraint[]) result + .toArray(new PropositionalConstraint[result.size()]); } - PropositionalDisjunction disjunction = (PropositionalDisjunction) c; - LinkedList result = new LinkedList(); - for (Iterator I = children.iterator(); I.hasNext(); ) { - Object next = I.next(); - if (disjunction.children.contains(next)) result.add(next); + + /** + * Subtraction from a disjunction simply removes all of the specified terms from it; this method + * returns a new constraint representing the subtraction. + * + * @param terms The terms to remove. + * @return A new representation of this n-ary constraint with the specified terms removed. + **/ + public PropositionalConstraint subtract(PropositionalConstraint[] terms) { + PropositionalDisjunction clone = (PropositionalDisjunction) clone(); + for (int i = 0; i < terms.length; ++i) + clone.children.remove(terms[i]); + if (clone.children.size() == 0) + return new PropositionalConstant(false); + if (clone.children.size() == 1) + return (PropositionalConstraint) clone.getChildren()[0]; + return clone; } - if (result.size() == 0) return null; - return (PropositionalConstraint[]) - result.toArray(new PropositionalConstraint[result.size()]); - } - - - /** - * Subtraction from a disjunction simply removes all of the specified - * terms from it; this method returns a new constraint representing the - * subtraction. - * - * @param terms The terms to remove. - * @return A new representation of this n-ary constraint with the specified - * terms removed. - **/ - public PropositionalConstraint subtract(PropositionalConstraint[] terms) { - PropositionalDisjunction clone = (PropositionalDisjunction) clone(); - for (int i = 0; i < terms.length; ++i) clone.children.remove(terms[i]); - if (clone.children.size() == 0) return new PropositionalConstant(false); - if (clone.children.size() == 1) - return (PropositionalConstraint) clone.getChildren()[0]; - return clone; - } - - - /** - * Distributes the given conjunction over this disjunction. - * - * @return A simplified constraint representing the distribution of the - * given conjunction over this disjunction. - **/ - public PropositionalConstraint distribute(PropositionalConjunction c) { - PropositionalConstraint[] array = - (PropositionalConstraint[]) - children.toArray(new PropositionalConstraint[children.size()]); - for (int i = 0; i < array.length; ++i) { - PropositionalConjunction clone = (PropositionalConjunction) c.clone(); - clone.add(array[i]); - array[i] = clone; + + /** + * Distributes the given conjunction over this disjunction. + * + * @return A simplified constraint representing the distribution of the given conjunction over + * this disjunction. + **/ + public PropositionalConstraint distribute(PropositionalConjunction c) { + PropositionalConstraint[] array = + (PropositionalConstraint[]) children.toArray(new PropositionalConstraint[children + .size()]); + for (int i = 0; i < array.length; ++i) { + PropositionalConjunction clone = (PropositionalConjunction) c.clone(); + clone.add(array[i]); + array[i] = clone; + } + + if (array.length == 1) + return array[0].simplify(); + + PropositionalDisjunction result = new PropositionalDisjunction(array[0], array[1]); + for (int i = 2; i < array.length; ++i) + result.add(array[i]); + return result.simplify(); } - if (array.length == 1) return array[0].simplify(); - - PropositionalDisjunction result = - new PropositionalDisjunction(array[0], array[1]); - for (int i = 2; i < array.length; ++i) result.add(array[i]); - return result.simplify(); - } - - - /** - * Determines whether this disjunction contains all of the terms that the - * given disjunction contains. - * - * @param d The given disjunction. - * @return true iff this disjunction contains all of the terms - * that the given disjunction contains. - **/ - public boolean containsAll(PropositionalDisjunction d) { - return children.containsAll(d.children); - } - - - /** - * The hash code of a PropositionalDisjunction is the sum of - * the hash codes of its children. - * - * @return The hash code for this PropositionalDisjunction. - **/ - public int hashCode() { - int result = 0; - for (Iterator I = children.iterator(); I.hasNext(); ) - result += I.next().hashCode(); - return result; - } - - - /** - * Two PropositionalDisjunctions are equivalent when they are - * topologically equivalent, respecting the associativity and commutivity - * of disjunction. - * - * @return true iff the argument is an equivalent - * PropositionalDisjunction. - **/ - public boolean equals(Object o) { - if (!(o instanceof PropositionalDisjunction)) return false; - PropositionalDisjunction d = (PropositionalDisjunction) o; - return children.equals(d.children); - } - - - /** - * Calls the appropriate visit(·) method of the given - * Inference for this Constraint, as per the - * visitor pattern. - * - * @param infer The inference visiting this constraint. - **/ - public void runVisit(Inference infer) { infer.visit(this); } - - - /** - * Creates a string respresentation of this constraint using the string - * representations of the objects involved. - * - * @param buffer The output of this method will be appended to this buffer. - **/ - public void write(StringBuffer buffer) { - buffer.append("("); - - PropositionalConstraint[] children = - (PropositionalConstraint[]) getChildren(); - children[0].write(buffer); - for (int i = 1; i < children.length; ++i) { - buffer.append(" \\/ "); - children[i].write(buffer); + + /** + * Determines whether this disjunction contains all of the terms that the given disjunction + * contains. + * + * @param d The given disjunction. + * @return true iff this disjunction contains all of the terms that the given + * disjunction contains. + **/ + public boolean containsAll(PropositionalDisjunction d) { + return children.containsAll(d.children); } - buffer.append(")"); - } -} + /** + * The hash code of a PropositionalDisjunction is the sum of the hash codes of its + * children. + * + * @return The hash code for this PropositionalDisjunction. + **/ + public int hashCode() { + int result = 0; + for (Iterator I = children.iterator(); I.hasNext();) + result += I.next().hashCode(); + return result; + } + + + /** + * Two PropositionalDisjunctions are equivalent when they are topologically + * equivalent, respecting the associativity and commutivity of disjunction. + * + * @return true iff the argument is an equivalent + * PropositionalDisjunction. + **/ + public boolean equals(Object o) { + if (!(o instanceof PropositionalDisjunction)) + return false; + PropositionalDisjunction d = (PropositionalDisjunction) o; + return children.equals(d.children); + } + + + /** + * Calls the appropriate visit(·) method of the given Inference + * for this Constraint, as per the visitor pattern. + * + * @param infer The inference visiting this constraint. + **/ + public void runVisit(Inference infer) { + infer.visit(this); + } + + + /** + * Creates a string respresentation of this constraint using the string representations of the + * objects involved. + * + * @param buffer The output of this method will be appended to this buffer. + **/ + public void write(StringBuffer buffer) { + buffer.append("("); + + PropositionalConstraint[] children = (PropositionalConstraint[]) getChildren(); + children[0].write(buffer); + for (int i = 1; i < children.length; ++i) { + buffer.append(" \\/ "); + children[i].write(buffer); + } + + buffer.append(")"); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalDoubleImplication.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalDoubleImplication.java index ede49340..a39cf3cd 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalDoubleImplication.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalDoubleImplication.java @@ -1,293 +1,268 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; /** - * Represents a double implication between two propositional constraints. - * - * @author Nick Rizzolo + * Represents a double implication between two propositional constraints. + * + * @author Nick Rizzolo **/ -public class PropositionalDoubleImplication - extends PropositionalBinaryConstraint -{ - /** - * Initializing constructor. - * - * @param l The constraint on the left of the operator. - * @param r The constraint on the right of the operator. - **/ - public PropositionalDoubleImplication(PropositionalConstraint l, - PropositionalConstraint r) { - super(l, r); - } - - - /** Determines whether the constraint is satisfied. */ - public boolean evaluate() { return left.evaluate() == right.evaluate(); } - - - /** - * Produces a new, logically simplified version of this constraint, - * preserving variable consolidation. - * - * @see Constraint#consolidateVariables(java.util.AbstractMap) - * @return A logically simplified version of this constraint. - **/ - public PropositionalConstraint simplify() { - return - new PropositionalConjunction( - new PropositionalDisjunction(left.negate(), right), - new PropositionalDisjunction(right.negate(), left)).simplify(); - } - - - /** - * Produces a new propositional constraint equivalent to this constraint - * and that contains no negated constraints other than variables. - * - * @return A constraint representing the negation of this constraint. - **/ - public PropositionalConstraint negate() { - return - new PropositionalConjunction( - new PropositionalDisjunction(left.negate(), right.negate()), - new PropositionalDisjunction(left, right)); - } - - - - /** - * Produces a new, logically simplified version of this constraint in - * conjunctive normal form (CNF). - * - * @return The conjunctive normal form of this constraint. - **/ - public PropositionalConstraint CNF() { - return - new PropositionalConjunction( - new PropositionalDisjunction( - new PropositionalNegation(left), - right), - new PropositionalDisjunction( - new PropositionalNegation(right), - left)) - .CNF(); - } - - - /** - * Produces a new, logically simplified version of this constraint in - * disjunctive normal form (DNF). - * - * @return The disjunctive normal form of this constraint. - **/ - public PropositionalConstraint DNF() { - return - new PropositionalDisjunction( - new PropositionalConjunction(left, right), - new PropositionalConjunction( - new PropositionalNegation(left), - new PropositionalNegation(right))) - .DNF(); - } - - - /** - * Compares topology to determine if this constraint is more general than - * the given constraint; note: this method is not required to be correct - * when it answers false. - * - * @param c The given constraint. - * @return true if a topological analysis determined that this - * constraint is more general than the given constraint. - **/ - public boolean moreGeneralThan(PropositionalConstraint c) { - return c.moreSpecificThan(this); - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given implication; note: this method is not required to be - * correct when it answers false. - * - * @param c The given implication. - * @return true if a topological analysis determined that this - * constraint is more specific than the given implication. - **/ - public boolean moreSpecificThan(PropositionalImplication c) { - PropositionalConstraint[] children = - (PropositionalConstraint[]) c.getChildren(); - return left.equals(children[0]) && right.equals(children[1]) - || left.equals(children[1]) && right.equals(children[0]); - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given double implication; note: this method is not required to be - * correct when it answers false. - * - * @param c The given double implication. - * @return true if a topological analysis determined that this - * constraint is more specific than the given double implication. - **/ - public boolean moreSpecificThan(PropositionalDoubleImplication c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given conjunction; note: this method is not required to be - * correct when it answers false. - * - * @param c The given conjunction. - * @return true if a topological analysis determined that this - * constraint is more specific than the given conjunction. - **/ - public boolean moreSpecificThan(PropositionalConjunction c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given disjunction; note: this method is not required to be - * correct when it answers false. - * - * @param c The given disjunction. - * @return true if a topological analysis determined that this - * constraint is more specific than the given disjunction. - **/ - public boolean moreSpecificThan(PropositionalDisjunction c) { - PropositionalConstraint[] children = - (PropositionalConstraint[]) c.getChildren(); - return children.length == 2 - && (new PropositionalNegation(left).equals(children[0]) - && right.equals(children[1]) - || new PropositionalNegation(left).equals(children[1]) - && right.equals(children[0]) - || left.equals(children[0]) - && new PropositionalNegation(right).equals(children[1]) - || left.equals(children[1]) - && new PropositionalNegation(right).equals(children[0])); - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given at-least; note: this method is not required to be correct - * when it answers false. - * - * @param c The given at-least. - * @return true if a topological analysis determined that this - * constraint is more specific than the given disjunction. - **/ - public boolean moreSpecificThan(PropositionalAtLeast c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given negation; note: this method is not required to be correct - * when it answers false. - * - * @param c The given negation. - * @return true if a topological analysis determined that this - * constraint is more specific than the given negation. - **/ - public boolean moreSpecificThan(PropositionalNegation c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given variable; note: this method is not required to be correct - * when it answers false. - * - * @param c The given variable. - * @return true if a topological analysis determined that this - * constraint is more specific than the given variable. - **/ - public boolean moreSpecificThan(PropositionalVariable c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given constant; note: this method is not required to be correct - * when it answers false. - * - * @param c The given constant. - * @return true if a topological analysis determined that this - * constraint is more specific than the given constant. - **/ - public boolean moreSpecificThan(PropositionalConstant c) { - return c.evaluate(); - } - - - /** - * The hash code of a PropositionalDoubleImplication is the - * sum of the hash codes of its children plus three. - * - * @return The hash code for this - * PropositionalDoubleImplication. - **/ - public int hashCode() { return left.hashCode() + right.hashCode() + 3; } - - - /** - * Two PropositionalDoubleImplications are equivalent when - * they are topologically equivalent, respecting the commutativity of - * double implication. - * - * @return true iff the argument is an equivalent - * PropositionalDoubleImplication. - **/ - public boolean equals(Object o) { - if (!(o instanceof PropositionalDoubleImplication)) return false; - PropositionalDoubleImplication i = (PropositionalDoubleImplication) o; - return left.equals(i.left) && right.equals(i.right) - || left.equals(i.right) && right.equals(i.left); - } - - - /** - * Calls the appropriate visit(·) method of the given - * Inference for this Constraint, as per the - * visitor pattern. - * - * @param infer The inference visiting this constraint. - **/ - public void runVisit(Inference infer) { infer.visit(this); } - - - /** - * Creates a string respresentation of this constraint using the string - * representations of the objects involved. - * - * @param buffer The output of this method will be appended to this buffer. - **/ - public void write(StringBuffer buffer) { - buffer.append("("); - left.write(buffer); - buffer.append(" <=> "); - right.write(buffer); - buffer.append(")"); - } +public class PropositionalDoubleImplication extends PropositionalBinaryConstraint { + /** + * Initializing constructor. + * + * @param l The constraint on the left of the operator. + * @param r The constraint on the right of the operator. + **/ + public PropositionalDoubleImplication(PropositionalConstraint l, PropositionalConstraint r) { + super(l, r); + } + + + /** Determines whether the constraint is satisfied. */ + public boolean evaluate() { + return left.evaluate() == right.evaluate(); + } + + + /** + * Produces a new, logically simplified version of this constraint, preserving variable + * consolidation. + * + * @see Constraint#consolidateVariables(java.util.AbstractMap) + * @return A logically simplified version of this constraint. + **/ + public PropositionalConstraint simplify() { + return new PropositionalConjunction(new PropositionalDisjunction(left.negate(), right), + new PropositionalDisjunction(right.negate(), left)).simplify(); + } + + + /** + * Produces a new propositional constraint equivalent to this constraint and that contains no + * negated constraints other than variables. + * + * @return A constraint representing the negation of this constraint. + **/ + public PropositionalConstraint negate() { + return new PropositionalConjunction(new PropositionalDisjunction(left.negate(), + right.negate()), new PropositionalDisjunction(left, right)); + } + + + + /** + * Produces a new, logically simplified version of this constraint in conjunctive normal form + * (CNF). + * + * @return The conjunctive normal form of this constraint. + **/ + public PropositionalConstraint CNF() { + return new PropositionalConjunction(new PropositionalDisjunction(new PropositionalNegation( + left), right), new PropositionalDisjunction(new PropositionalNegation(right), left)) + .CNF(); + } + + + /** + * Produces a new, logically simplified version of this constraint in disjunctive normal form + * (DNF). + * + * @return The disjunctive normal form of this constraint. + **/ + public PropositionalConstraint DNF() { + return new PropositionalDisjunction(new PropositionalConjunction(left, right), + new PropositionalConjunction(new PropositionalNegation(left), + new PropositionalNegation(right))).DNF(); + } + + + /** + * Compares topology to determine if this constraint is more general than the given constraint; + * note: this method is not required to be correct when it answers false. + * + * @param c The given constraint. + * @return true if a topological analysis determined that this constraint is more + * general than the given constraint. + **/ + public boolean moreGeneralThan(PropositionalConstraint c) { + return c.moreSpecificThan(this); + } + + + /** + * Compares topology to determine if this constraint is more specific than the given + * implication; note: this method is not required to be correct when it answers + * false. + * + * @param c The given implication. + * @return true if a topological analysis determined that this constraint is more + * specific than the given implication. + **/ + public boolean moreSpecificThan(PropositionalImplication c) { + PropositionalConstraint[] children = (PropositionalConstraint[]) c.getChildren(); + return left.equals(children[0]) && right.equals(children[1]) || left.equals(children[1]) + && right.equals(children[0]); + } + + + /** + * Compares topology to determine if this constraint is more specific than the given double + * implication; note: this method is not required to be correct when it answers + * false. + * + * @param c The given double implication. + * @return true if a topological analysis determined that this constraint is more + * specific than the given double implication. + **/ + public boolean moreSpecificThan(PropositionalDoubleImplication c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given + * conjunction; note: this method is not required to be correct when it answers + * false. + * + * @param c The given conjunction. + * @return true if a topological analysis determined that this constraint is more + * specific than the given conjunction. + **/ + public boolean moreSpecificThan(PropositionalConjunction c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given + * disjunction; note: this method is not required to be correct when it answers + * false. + * + * @param c The given disjunction. + * @return true if a topological analysis determined that this constraint is more + * specific than the given disjunction. + **/ + public boolean moreSpecificThan(PropositionalDisjunction c) { + PropositionalConstraint[] children = (PropositionalConstraint[]) c.getChildren(); + return children.length == 2 + && (new PropositionalNegation(left).equals(children[0]) + && right.equals(children[1]) + || new PropositionalNegation(left).equals(children[1]) + && right.equals(children[0]) || left.equals(children[0]) + && new PropositionalNegation(right).equals(children[1]) || left + .equals(children[1]) + && new PropositionalNegation(right).equals(children[0])); + } + + + /** + * Compares topology to determine if this constraint is more specific than the given at-least; + * note: this method is not required to be correct when it answers false. + * + * @param c The given at-least. + * @return true if a topological analysis determined that this constraint is more + * specific than the given disjunction. + **/ + public boolean moreSpecificThan(PropositionalAtLeast c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given negation; + * note: this method is not required to be correct when it answers false. + * + * @param c The given negation. + * @return true if a topological analysis determined that this constraint is more + * specific than the given negation. + **/ + public boolean moreSpecificThan(PropositionalNegation c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given variable; + * note: this method is not required to be correct when it answers false. + * + * @param c The given variable. + * @return true if a topological analysis determined that this constraint is more + * specific than the given variable. + **/ + public boolean moreSpecificThan(PropositionalVariable c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given constant; + * note: this method is not required to be correct when it answers false. + * + * @param c The given constant. + * @return true if a topological analysis determined that this constraint is more + * specific than the given constant. + **/ + public boolean moreSpecificThan(PropositionalConstant c) { + return c.evaluate(); + } + + + /** + * The hash code of a PropositionalDoubleImplication is the sum of the hash codes + * of its children plus three. + * + * @return The hash code for this PropositionalDoubleImplication. + **/ + public int hashCode() { + return left.hashCode() + right.hashCode() + 3; + } + + + /** + * Two PropositionalDoubleImplications are equivalent when they are topologically + * equivalent, respecting the commutativity of double implication. + * + * @return true iff the argument is an equivalent + * PropositionalDoubleImplication. + **/ + public boolean equals(Object o) { + if (!(o instanceof PropositionalDoubleImplication)) + return false; + PropositionalDoubleImplication i = (PropositionalDoubleImplication) o; + return left.equals(i.left) && right.equals(i.right) || left.equals(i.right) + && right.equals(i.left); + } + + + /** + * Calls the appropriate visit(·) method of the given Inference + * for this Constraint, as per the visitor pattern. + * + * @param infer The inference visiting this constraint. + **/ + public void runVisit(Inference infer) { + infer.visit(this); + } + + + /** + * Creates a string respresentation of this constraint using the string representations of the + * objects involved. + * + * @param buffer The output of this method will be appended to this buffer. + **/ + public void write(StringBuffer buffer) { + buffer.append("("); + left.write(buffer); + buffer.append(" <=> "); + right.write(buffer); + buffer.append(")"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalImplication.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalImplication.java index 6bbc5305..230e8906 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalImplication.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalImplication.java @@ -1,256 +1,250 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; /** - * Represents an implication between two propositional constraints. - * - * @author Nick Rizzolo + * Represents an implication between two propositional constraints. + * + * @author Nick Rizzolo **/ -public class PropositionalImplication extends PropositionalBinaryConstraint -{ - /** - * Initializing constructor. - * - * @param l The constraint on the left of the operator. - * @param r The constraint on the right of the operator. - **/ - public PropositionalImplication(PropositionalConstraint l, - PropositionalConstraint r) { - super(l, r); - } - - - /** Determines whether the constraint is satisfied. */ - public boolean evaluate() { return !left.evaluate() || right.evaluate(); } - - - /** - * Produces a new, logically simplified version of this constraint, - * preserving variable consolidation. - * - * @see Constraint#consolidateVariables(java.util.AbstractMap) - * @return A logically simplified version of this constraint. - **/ - public PropositionalConstraint simplify() { - return new PropositionalDisjunction(left.negate(), right).simplify(); - } - - - /** - * Produces a new propositional constraint equivalent to this constraint - * and that contains no negated constraints other than variables. - * - * @return A constraint representing the negation of this constraint. - **/ - public PropositionalConstraint negate() { - return new PropositionalConjunction(left, right.negate()); - } - - - /** - * Produces a new, logically simplified version of this constraint in - * conjunctive normal form (CNF). - * - * @return The conjunctive normal form of this constraint. - **/ - public PropositionalConstraint CNF() { - return - new PropositionalDisjunction(new PropositionalNegation(left), right) - .CNF(); - } - - - /** - * Produces a new, logically simplified version of this constraint in - * disjunctive normal form (DNF). - * - * @return The disjunctive normal form of this constraint. - **/ - public PropositionalConstraint DNF() { - return - new PropositionalDisjunction(new PropositionalNegation(left), right) - .DNF(); - } - - - /** - * Compares topology to determine if this constraint is more general than - * the given constraint; note: this method is not required to be correct - * when it answers false. - * - * @param c The given constraint. - * @return true if a topological analysis determined that this - * constraint is more general than the given constraint. - **/ - public boolean moreGeneralThan(PropositionalConstraint c) { - return c.moreSpecificThan(this); - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given implication; note: this method is not required to be - * correct when it answers false. - * - * @param c The given implication. - * @return true if a topological analysis determined that this - * constraint is more specific than the given implication. - **/ - public boolean moreSpecificThan(PropositionalImplication c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given double implication; note: this method is not required to be - * correct when it answers false. - * - * @param c The given double implication. - * @return true if a topological analysis determined that this - * constraint is more specific than the given double implication. - **/ - public boolean moreSpecificThan(PropositionalDoubleImplication c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given conjunction; note: this method is not required to be - * correct when it answers false. - * - * @param c The given conjunction. - * @return true if a topological analysis determined that this - * constraint is more specific than the given conjunction. - **/ - public boolean moreSpecificThan(PropositionalConjunction c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given disjunction; note: this method is not required to be - * correct when it answers false. - * - * @param c The given disjunction. - * @return true if a topological analysis determined that this - * constraint is more specific than the given disjunction. - **/ - public boolean moreSpecificThan(PropositionalDisjunction c) { - return c.contains(new PropositionalNegation(left)) && c.contains(right) - || c.contains(this); - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given at-least; note: this method is not required to be correct - * when it answers false. - * - * @param c The given at-least. - * @return true if a topological analysis determined that this - * constraint is more specific than the given disjunction. - **/ - public boolean moreSpecificThan(PropositionalAtLeast c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given negation; note: this method is not required to be correct - * when it answers false. - * - * @param c The given negation. - * @return true if a topological analysis determined that this - * constraint is more specific than the given negation. - **/ - public boolean moreSpecificThan(PropositionalNegation c) { return false; } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given variable; note: this method is not required to be correct - * when it answers false. - * - * @param c The given variable. - * @return true if a topological analysis determined that this - * constraint is more specific than the given variable. - **/ - public boolean moreSpecificThan(PropositionalVariable c) { return false; } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given constant; note: this method is not required to be correct - * when it answers false. - * - * @param c The given constant. - * @return true if a topological analysis determined that this - * constraint is more specific than the given constant. - **/ - public boolean moreSpecificThan(PropositionalConstant c) { - return c.evaluate(); - } - - - /** - * The hash code of a PropositionalImplication is the sum of - * the hash codes of its children plus two. - * - * @return The hash code for this PropositionalImplication. - **/ - public int hashCode() { return left.hashCode() + right.hashCode() + 2; } - - - /** - * Two PropositionalImplications are equivalent when they are - * topologically equivalent. - * - * @return true iff the argument is an equivalent - * PropositionalImplication. - **/ - public boolean equals(Object o) { - if (!(o instanceof PropositionalImplication)) return false; - PropositionalImplication i = (PropositionalImplication) o; - return left.equals(i.left) && right.equals(i.right); - } - - - /** - * Calls the appropriate visit(·) method of the given - * Inference for this Constraint, as per the - * visitor pattern. - * - * @param infer The inference visiting this constraint. - **/ - public void runVisit(Inference infer) { infer.visit(this); } - - - /** - * Creates a string respresentation of this constraint using the string - * representations of the objects involved. - * - * @param buffer The output of this method will be appended to this buffer. - **/ - public void write(StringBuffer buffer) { - buffer.append("("); - left.write(buffer); - buffer.append(" => "); - right.write(buffer); - buffer.append(")"); - } +public class PropositionalImplication extends PropositionalBinaryConstraint { + /** + * Initializing constructor. + * + * @param l The constraint on the left of the operator. + * @param r The constraint on the right of the operator. + **/ + public PropositionalImplication(PropositionalConstraint l, PropositionalConstraint r) { + super(l, r); + } + + + /** Determines whether the constraint is satisfied. */ + public boolean evaluate() { + return !left.evaluate() || right.evaluate(); + } + + + /** + * Produces a new, logically simplified version of this constraint, preserving variable + * consolidation. + * + * @see Constraint#consolidateVariables(java.util.AbstractMap) + * @return A logically simplified version of this constraint. + **/ + public PropositionalConstraint simplify() { + return new PropositionalDisjunction(left.negate(), right).simplify(); + } + + + /** + * Produces a new propositional constraint equivalent to this constraint and that contains no + * negated constraints other than variables. + * + * @return A constraint representing the negation of this constraint. + **/ + public PropositionalConstraint negate() { + return new PropositionalConjunction(left, right.negate()); + } + + + /** + * Produces a new, logically simplified version of this constraint in conjunctive normal form + * (CNF). + * + * @return The conjunctive normal form of this constraint. + **/ + public PropositionalConstraint CNF() { + return new PropositionalDisjunction(new PropositionalNegation(left), right).CNF(); + } + + + /** + * Produces a new, logically simplified version of this constraint in disjunctive normal form + * (DNF). + * + * @return The disjunctive normal form of this constraint. + **/ + public PropositionalConstraint DNF() { + return new PropositionalDisjunction(new PropositionalNegation(left), right).DNF(); + } + + + /** + * Compares topology to determine if this constraint is more general than the given constraint; + * note: this method is not required to be correct when it answers false. + * + * @param c The given constraint. + * @return true if a topological analysis determined that this constraint is more + * general than the given constraint. + **/ + public boolean moreGeneralThan(PropositionalConstraint c) { + return c.moreSpecificThan(this); + } + + + /** + * Compares topology to determine if this constraint is more specific than the given + * implication; note: this method is not required to be correct when it answers + * false. + * + * @param c The given implication. + * @return true if a topological analysis determined that this constraint is more + * specific than the given implication. + **/ + public boolean moreSpecificThan(PropositionalImplication c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given double + * implication; note: this method is not required to be correct when it answers + * false. + * + * @param c The given double implication. + * @return true if a topological analysis determined that this constraint is more + * specific than the given double implication. + **/ + public boolean moreSpecificThan(PropositionalDoubleImplication c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given + * conjunction; note: this method is not required to be correct when it answers + * false. + * + * @param c The given conjunction. + * @return true if a topological analysis determined that this constraint is more + * specific than the given conjunction. + **/ + public boolean moreSpecificThan(PropositionalConjunction c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given + * disjunction; note: this method is not required to be correct when it answers + * false. + * + * @param c The given disjunction. + * @return true if a topological analysis determined that this constraint is more + * specific than the given disjunction. + **/ + public boolean moreSpecificThan(PropositionalDisjunction c) { + return c.contains(new PropositionalNegation(left)) && c.contains(right) || c.contains(this); + } + + + /** + * Compares topology to determine if this constraint is more specific than the given at-least; + * note: this method is not required to be correct when it answers false. + * + * @param c The given at-least. + * @return true if a topological analysis determined that this constraint is more + * specific than the given disjunction. + **/ + public boolean moreSpecificThan(PropositionalAtLeast c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given negation; + * note: this method is not required to be correct when it answers false. + * + * @param c The given negation. + * @return true if a topological analysis determined that this constraint is more + * specific than the given negation. + **/ + public boolean moreSpecificThan(PropositionalNegation c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given variable; + * note: this method is not required to be correct when it answers false. + * + * @param c The given variable. + * @return true if a topological analysis determined that this constraint is more + * specific than the given variable. + **/ + public boolean moreSpecificThan(PropositionalVariable c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given constant; + * note: this method is not required to be correct when it answers false. + * + * @param c The given constant. + * @return true if a topological analysis determined that this constraint is more + * specific than the given constant. + **/ + public boolean moreSpecificThan(PropositionalConstant c) { + return c.evaluate(); + } + + + /** + * The hash code of a PropositionalImplication is the sum of the hash codes of its + * children plus two. + * + * @return The hash code for this PropositionalImplication. + **/ + public int hashCode() { + return left.hashCode() + right.hashCode() + 2; + } + + + /** + * Two PropositionalImplications are equivalent when they are topologically + * equivalent. + * + * @return true iff the argument is an equivalent + * PropositionalImplication. + **/ + public boolean equals(Object o) { + if (!(o instanceof PropositionalImplication)) + return false; + PropositionalImplication i = (PropositionalImplication) o; + return left.equals(i.left) && right.equals(i.right); + } + + + /** + * Calls the appropriate visit(·) method of the given Inference + * for this Constraint, as per the visitor pattern. + * + * @param infer The inference visiting this constraint. + **/ + public void runVisit(Inference infer) { + infer.visit(this); + } + + + /** + * Creates a string respresentation of this constraint using the string representations of the + * objects involved. + * + * @param buffer The output of this method will be appended to this buffer. + **/ + public void write(StringBuffer buffer) { + buffer.append("("); + left.write(buffer); + buffer.append(" => "); + right.write(buffer); + buffer.append(")"); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalNAryConstraint.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalNAryConstraint.java index e9754863..9d886ce8 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalNAryConstraint.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalNAryConstraint.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -17,97 +14,99 @@ /** - * Represents a propositional constraint with an arbitrary number of - * arguments, usually assumed to be greater than or equal to 2. - * - * @author Nick Rizzolo + * Represents a propositional constraint with an arbitrary number of arguments, usually assumed to + * be greater than or equal to 2. + * + * @author Nick Rizzolo **/ -public abstract class PropositionalNAryConstraint - extends PropositionalConstraint -{ - /** The children of the operator. */ - protected HashSet children; - - - /** Default constructor. */ - public PropositionalNAryConstraint() { children = new HashSet(); } - - - /** - * Replaces all unquantified variables with the unique copy stored as a - * value of the given map; also instantiates all quantified variables and - * stores them in the given map. - * - * @param m The map in which to find unique copies of the variables. - **/ - public void consolidateVariables(AbstractMap m) { - LinkedList toRemove = new LinkedList(); - - for (Iterator I = children.iterator(); I.hasNext(); ) { - Object next = I.next(); - if (next instanceof PropositionalVariable) toRemove.add(next); - else ((PropositionalConstraint) next).consolidateVariables(m); +public abstract class PropositionalNAryConstraint extends PropositionalConstraint { + /** The children of the operator. */ + protected HashSet children; + + + /** Default constructor. */ + public PropositionalNAryConstraint() { + children = new HashSet(); } - for (Iterator I = toRemove.iterator(); I.hasNext(); ) { - PropositionalVariable v = (PropositionalVariable) I.next(); - if (m.containsKey(v)) { - children.remove(v); - children.add(m.get(v)); - } - else m.put(v, v); + + /** + * Replaces all unquantified variables with the unique copy stored as a value of the given map; + * also instantiates all quantified variables and stores them in the given map. + * + * @param m The map in which to find unique copies of the variables. + **/ + public void consolidateVariables(AbstractMap m) { + LinkedList toRemove = new LinkedList(); + + for (Iterator I = children.iterator(); I.hasNext();) { + Object next = I.next(); + if (next instanceof PropositionalVariable) + toRemove.add(next); + else + ((PropositionalConstraint) next).consolidateVariables(m); + } + + for (Iterator I = toRemove.iterator(); I.hasNext();) { + PropositionalVariable v = (PropositionalVariable) I.next(); + if (m.containsKey(v)) { + children.remove(v); + children.add(m.get(v)); + } else + m.put(v, v); + } } - } - - - /** - * Returns the children of this constraint in an array. - * - * @return The children of this constraint in an array. - **/ - public Constraint[] getChildren() { - return (PropositionalConstraint[]) - children.toArray(new PropositionalConstraint[children.size()]); - } - - - /** - * Determines whether the given constraint is a term of this constraint. - * - * @param c The given constraint. - * @return true iff the given constraint is contained in this - * constraint. - **/ - public boolean contains(PropositionalConstraint c) { - return children.contains(c); - } - - - /** - * Returns the number of terms in this constraint. - * - * @return The number of terms in this constraint. - **/ - public int size() { return children.size(); } - - - /** - * This method returns a shallow clone. - * - * @return A shallow clone. - **/ - public Object clone() { - PropositionalNAryConstraint clone = null; - - try { clone = (PropositionalNAryConstraint) super.clone(); } - catch (Exception e) { - System.err.println("Error cloning " + getClass().getName() + ":"); - e.printStackTrace(); - System.exit(1); + + + /** + * Returns the children of this constraint in an array. + * + * @return The children of this constraint in an array. + **/ + public Constraint[] getChildren() { + return (PropositionalConstraint[]) children.toArray(new PropositionalConstraint[children + .size()]); } - clone.children = (HashSet) clone.children.clone(); - return clone; - } -} + /** + * Determines whether the given constraint is a term of this constraint. + * + * @param c The given constraint. + * @return true iff the given constraint is contained in this constraint. + **/ + public boolean contains(PropositionalConstraint c) { + return children.contains(c); + } + + + /** + * Returns the number of terms in this constraint. + * + * @return The number of terms in this constraint. + **/ + public int size() { + return children.size(); + } + + + /** + * This method returns a shallow clone. + * + * @return A shallow clone. + **/ + public Object clone() { + PropositionalNAryConstraint clone = null; + + try { + clone = (PropositionalNAryConstraint) super.clone(); + } catch (Exception e) { + System.err.println("Error cloning " + getClass().getName() + ":"); + e.printStackTrace(); + System.exit(1); + } + + clone.children = (HashSet) clone.children.clone(); + return clone; + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalNegation.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalNegation.java index 33d634da..026b1862 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalNegation.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalNegation.java @@ -1,284 +1,285 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; /** - * Represents the negation operator applied to a propositional constraint. - * - * @author Nick Rizzolo + * Represents the negation operator applied to a propositional constraint. + * + * @author Nick Rizzolo **/ -public class PropositionalNegation extends PropositionalConstraint -{ - /** The constraint that the negation is applied to. */ - protected PropositionalConstraint constraint; - - - /** - * Initializing constructor. - * - * @param c The constraint to negate. - **/ - public PropositionalNegation(PropositionalConstraint c) { constraint = c; } - - - /** - * Replaces all unquantified variables with the unique copy stored as a - * value of the given map; also instantiates all quantified variables and - * stores them in the given map. - * - * @param m The map in which to find unique copies of the variables. - **/ - public void consolidateVariables(java.util.AbstractMap m) { - if (constraint instanceof PropositionalVariable) { - if (m.containsKey(constraint)) - constraint = (PropositionalVariable) m.get(constraint); - else m.put(constraint, constraint); +public class PropositionalNegation extends PropositionalConstraint { + /** The constraint that the negation is applied to. */ + protected PropositionalConstraint constraint; + + + /** + * Initializing constructor. + * + * @param c The constraint to negate. + **/ + public PropositionalNegation(PropositionalConstraint c) { + constraint = c; + } + + + /** + * Replaces all unquantified variables with the unique copy stored as a value of the given map; + * also instantiates all quantified variables and stores them in the given map. + * + * @param m The map in which to find unique copies of the variables. + **/ + public void consolidateVariables(java.util.AbstractMap m) { + if (constraint instanceof PropositionalVariable) { + if (m.containsKey(constraint)) + constraint = (PropositionalVariable) m.get(constraint); + else + m.put(constraint, constraint); + } else + constraint.consolidateVariables(m); + } + + + /** Determines whether the constraint is satisfied. */ + public boolean evaluate() { + return !constraint.evaluate(); + } + + + /** + * Produces a new, logically simplified version of this constraint, preserving variable + * consolidation. + * + * @see Constraint#consolidateVariables(java.util.AbstractMap) + * @return A logically simplified version of this constraint. + **/ + public PropositionalConstraint simplify() { + if (constraint instanceof PropositionalVariable) + return this; + return constraint.negate().simplify(); + } + + + /** + * Produces a new propositional constraint equivalent to this constraint and that contains no + * negated constraints other than variables. + * + * @return A constraint representing the negation of this constraint. + **/ + public PropositionalConstraint negate() { + return constraint; + } + + + /** + * Produces a new, logically simplified version of this constraint in conjunctive normal form + * (CNF). + * + * @return The conjunctive normal form of this constraint. + **/ + public PropositionalConstraint CNF() { + PropositionalConstraint simplified = simplify(); + if (simplified instanceof PropositionalNegation) + return simplified; + return simplified.CNF(); + } + + + /** + * Produces a new, logically simplified version of this constraint in disjunctive normal form + * (DNF). + * + * @return The disjunctive normal form of this constraint. + **/ + public PropositionalConstraint DNF() { + PropositionalConstraint simplified = simplify(); + if (simplified instanceof PropositionalNegation) + return simplified; + return simplified.DNF(); + } + + + /** + * Returns the children of this constraint in an array. + * + * @return The children of this constraint in an array. + **/ + public Constraint[] getChildren() { + return new PropositionalConstraint[] {constraint}; + } + + + /** + * Compares topology to determine if this constraint is more general than the given constraint; + * note: this method is not required to be correct when it answers false. + * + * @param c The given constraint. + * @return true if a topological analysis determined that this constraint is more + * general than the given constraint. + **/ + public boolean moreGeneralThan(PropositionalConstraint c) { + return c.moreSpecificThan(this); + } + + + /** + * Compares topology to determine if this constraint is more specific than the given + * implication; note: this method is not required to be correct when it answers + * false. + * + * @param c The given implication. + * @return true if a topological analysis determined that this constraint is more + * specific than the given implication. + **/ + public boolean moreSpecificThan(PropositionalImplication c) { + PropositionalConstraint[] children = (PropositionalConstraint[]) c.getChildren(); + return constraint.equals(children[0]) || equals(children[1]); + } + + + /** + * Compares topology to determine if this constraint is more specific than the given double + * implication; note: this method is not required to be correct when it answers + * false. + * + * @param c The given double implication. + * @return true if a topological analysis determined that this constraint is more + * specific than the given double implication. + **/ + public boolean moreSpecificThan(PropositionalDoubleImplication c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given + * conjunction; note: this method is not required to be correct when it answers + * false. + * + * @param c The given conjunction. + * @return true if a topological analysis determined that this constraint is more + * specific than the given conjunction. + **/ + public boolean moreSpecificThan(PropositionalConjunction c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given + * disjunction; note: this method is not required to be correct when it answers + * false. + * + * @param c The given disjunction. + * @return true if a topological analysis determined that this constraint is more + * specific than the given disjunction. + **/ + public boolean moreSpecificThan(PropositionalDisjunction c) { + return c.size() > 1 && c.contains(this); + } + + + /** + * Compares topology to determine if this constraint is more specific than the given at-least; + * note: this method is not required to be correct when it answers false. + * + * @param c The given at-least. + * @return true if a topological analysis determined that this constraint is more + * specific than the given disjunction. + **/ + public boolean moreSpecificThan(PropositionalAtLeast c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given negation; + * note: this method is not required to be correct when it answers false. + * + * @param c The given negation. + * @return true if a topological analysis determined that this constraint is more + * specific than the given negation. + **/ + public boolean moreSpecificThan(PropositionalNegation c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given variable; + * note: this method is not required to be correct when it answers false. + * + * @param c The given variable. + * @return true if a topological analysis determined that this constraint is more + * specific than the given variable. + **/ + public boolean moreSpecificThan(PropositionalVariable c) { + return false; } - else constraint.consolidateVariables(m); - } - - - /** Determines whether the constraint is satisfied. */ - public boolean evaluate() { return !constraint.evaluate(); } - - - /** - * Produces a new, logically simplified version of this constraint, - * preserving variable consolidation. - * - * @see Constraint#consolidateVariables(java.util.AbstractMap) - * @return A logically simplified version of this constraint. - **/ - public PropositionalConstraint simplify() { - if (constraint instanceof PropositionalVariable) return this; - return constraint.negate().simplify(); - } - - - /** - * Produces a new propositional constraint equivalent to this constraint - * and that contains no negated constraints other than variables. - * - * @return A constraint representing the negation of this constraint. - **/ - public PropositionalConstraint negate() { return constraint; } - - - /** - * Produces a new, logically simplified version of this constraint in - * conjunctive normal form (CNF). - * - * @return The conjunctive normal form of this constraint. - **/ - public PropositionalConstraint CNF() { - PropositionalConstraint simplified = simplify(); - if (simplified instanceof PropositionalNegation) return simplified; - return simplified.CNF(); - } - - - /** - * Produces a new, logically simplified version of this constraint in - * disjunctive normal form (DNF). - * - * @return The disjunctive normal form of this constraint. - **/ - public PropositionalConstraint DNF() { - PropositionalConstraint simplified = simplify(); - if (simplified instanceof PropositionalNegation) return simplified; - return simplified.DNF(); - } - - - /** - * Returns the children of this constraint in an array. - * - * @return The children of this constraint in an array. - **/ - public Constraint[] getChildren() { - return new PropositionalConstraint[]{ constraint }; - } - - - /** - * Compares topology to determine if this constraint is more general than - * the given constraint; note: this method is not required to be correct - * when it answers false. - * - * @param c The given constraint. - * @return true if a topological analysis determined that this - * constraint is more general than the given constraint. - **/ - public boolean moreGeneralThan(PropositionalConstraint c) { - return c.moreSpecificThan(this); - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given implication; note: this method is not required to be - * correct when it answers false. - * - * @param c The given implication. - * @return true if a topological analysis determined that this - * constraint is more specific than the given implication. - **/ - public boolean moreSpecificThan(PropositionalImplication c) { - PropositionalConstraint[] children = - (PropositionalConstraint[]) c.getChildren(); - return constraint.equals(children[0]) || equals(children[1]); - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given double implication; note: this method is not required to be - * correct when it answers false. - * - * @param c The given double implication. - * @return true if a topological analysis determined that this - * constraint is more specific than the given double implication. - **/ - public boolean moreSpecificThan(PropositionalDoubleImplication c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given conjunction; note: this method is not required to be - * correct when it answers false. - * - * @param c The given conjunction. - * @return true if a topological analysis determined that this - * constraint is more specific than the given conjunction. - **/ - public boolean moreSpecificThan(PropositionalConjunction c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given disjunction; note: this method is not required to be - * correct when it answers false. - * - * @param c The given disjunction. - * @return true if a topological analysis determined that this - * constraint is more specific than the given disjunction. - **/ - public boolean moreSpecificThan(PropositionalDisjunction c) { - return c.size() > 1 && c.contains(this); - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given at-least; note: this method is not required to be correct - * when it answers false. - * - * @param c The given at-least. - * @return true if a topological analysis determined that this - * constraint is more specific than the given disjunction. - **/ - public boolean moreSpecificThan(PropositionalAtLeast c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given negation; note: this method is not required to be correct - * when it answers false. - * - * @param c The given negation. - * @return true if a topological analysis determined that this - * constraint is more specific than the given negation. - **/ - public boolean moreSpecificThan(PropositionalNegation c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given variable; note: this method is not required to be correct - * when it answers false. - * - * @param c The given variable. - * @return true if a topological analysis determined that this - * constraint is more specific than the given variable. - **/ - public boolean moreSpecificThan(PropositionalVariable c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given constant; note: this method is not required to be correct - * when it answers false. - * - * @param c The given constant. - * @return true if a topological analysis determined that this - * constraint is more specific than the given constant. - **/ - public boolean moreSpecificThan(PropositionalConstant c) { - return c.evaluate(); - } - - - /** - * The hash code of a PropositionalNegation is the hash code - * of its child constraint plus 1. - * - * @return The hash code for this PropositionalNegation. - **/ - public int hashCode() { return constraint.hashCode() + 1; } - - - /** - * Two PropositionalNegations are equivalent when their - * constraints are equivalent. - * - * @return true iff the argument is a - * PropositionalNegation of the same constraint. - **/ - public boolean equals(Object o) { - if (!(o instanceof PropositionalNegation)) return false; - PropositionalNegation n = (PropositionalNegation) o; - return constraint.equals(n.constraint); - } - - - /** - * Calls the appropriate visit(·) method of the given - * Inference for this Constraint, as per the - * visitor pattern. - * - * @param infer The inference visiting this constraint. - **/ - public void runVisit(Inference infer) { infer.visit(this); } - - - /** - * Creates a string respresentation of this constraint using the string - * representations of the objects involved. - * - * @param buffer The output of this method will be appended to this buffer. - **/ - public void write(StringBuffer buffer) { - buffer.append("!"); - constraint.write(buffer); - } -} + + /** + * Compares topology to determine if this constraint is more specific than the given constant; + * note: this method is not required to be correct when it answers false. + * + * @param c The given constant. + * @return true if a topological analysis determined that this constraint is more + * specific than the given constant. + **/ + public boolean moreSpecificThan(PropositionalConstant c) { + return c.evaluate(); + } + + + /** + * The hash code of a PropositionalNegation is the hash code of its child + * constraint plus 1. + * + * @return The hash code for this PropositionalNegation. + **/ + public int hashCode() { + return constraint.hashCode() + 1; + } + + + /** + * Two PropositionalNegations are equivalent when their constraints are equivalent. + * + * @return true iff the argument is a PropositionalNegation of the + * same constraint. + **/ + public boolean equals(Object o) { + if (!(o instanceof PropositionalNegation)) + return false; + PropositionalNegation n = (PropositionalNegation) o; + return constraint.equals(n.constraint); + } + + + /** + * Calls the appropriate visit(·) method of the given Inference + * for this Constraint, as per the visitor pattern. + * + * @param infer The inference visiting this constraint. + **/ + public void runVisit(Inference infer) { + infer.visit(this); + } + + + /** + * Creates a string respresentation of this constraint using the string representations of the + * objects involved. + * + * @param buffer The output of this method will be appended to this buffer. + **/ + public void write(StringBuffer buffer) { + buffer.append("!"); + constraint.write(buffer); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalVariable.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalVariable.java index 45b49ecb..8baebd99 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalVariable.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/PropositionalVariable.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -14,298 +11,303 @@ /** - * Every propositional variable is Boolean and represents one possible - * prediction from a classifier application. If the variable is - * true, then the classifier application did result in the - * specified prediction value. - * - * @author Nick Rizzolo + * Every propositional variable is Boolean and represents one possible prediction from a classifier + * application. If the variable is true, then the classifier application did result in + * the specified prediction value. + * + * @author Nick Rizzolo **/ -public class PropositionalVariable extends PropositionalConstraint -{ - /** The classifier being applied. */ - protected Learner classifier; - /** The classifier is applied to this example object. */ - protected Object example; - /** - * The prediction that the classifier must produce for this variable to be - * true. - **/ - protected String prediction; - /** The value imposed on this variable. */ - public boolean value; - - - /** - * Initializing constructor; the value member variable is set - * to false. - * - * @param c The classifier being applied. - * @param e The classifier is applied to this example object. - * @param p The prediction associated with this variable. - **/ - public PropositionalVariable(Learner c, Object e, String p) { - classifier = c; - example = e; - prediction = p; - value = false; - } - - - /** - * Replaces all unquantified variables with the unique copy stored as a - * value of the given map; also instantiates all quantified variables and - * stores them in the given map. - * - * @param m The map in which to find unique copies of the variables. - **/ - public void consolidateVariables(java.util.AbstractMap m) { } - - - /** Retrieves the classifier. */ - public Learner getClassifier() { return classifier; } - - - /** Retrieves the example object. */ - public Object getExample() { return example; } - - - /** Retrieves the prediction. */ - public String getPrediction() { return prediction; } - - - /** Determines whether the constraint is satisfied. */ - public boolean evaluate() { return value; } - - - /** - * Produces a new, logically simplified version of this constraint, - * preserving variable consolidation. - * - * @see Constraint#consolidateVariables(java.util.AbstractMap) - * @return A logically simplified version of this constraint. - **/ - public PropositionalConstraint simplify() { return this; } - - - /** - * Produces a new propositional constraint equivalent to this constraint - * and that contains no negated constraints other than variables. - * - * @return A constraint representing the negation of this constraint. - **/ - public PropositionalConstraint negate() { - return new PropositionalNegation(this); - } - - - /** - * Produces a new, logically simplified version of this constraint in - * conjunctive normal form (CNF). - * - * @return The conjunctive normal form of this constraint. - **/ - public PropositionalConstraint CNF() { return this; } - - - /** - * Produces a new, logically simplified version of this constraint in - * disjunctive normal form (DNF). - * - * @return The disjunctive normal form of this constraint. - **/ - public PropositionalConstraint DNF() { return this; } - - - /** - * Returns the children of this constraint in an array. - * - * @return The children of this constraint in an array. - **/ - public Constraint[] getChildren() { return new PropositionalConstraint[0]; } - - - /** - * Compares topology to determine if this constraint is more general than - * the given constraint; note: this method is not required to be correct - * when it answers false. - * - * @param c The given constraint. - * @return true if a topological analysis determined that this - * constraint is more general than the given constraint. - **/ - public boolean moreGeneralThan(PropositionalConstraint c) { - return c.moreSpecificThan(this); - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given implication; note: this method is not required to be - * correct when it answers false. - * - * @param c The given implication. - * @return true if a topological analysis determined that this - * constraint is more specific than the given implication. - **/ - public boolean moreSpecificThan(PropositionalImplication c) { - PropositionalConstraint[] children = - (PropositionalConstraint[]) c.getChildren(); - return new PropositionalNegation(this).equals(children[0]) - || equals(children[1]); - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given double implication; note: this method is not required to be - * correct when it answers false. - * - * @param c The given double implication. - * @return true if a topological analysis determined that this - * constraint is more specific than the given double implication. - **/ - public boolean moreSpecificThan(PropositionalDoubleImplication c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given conjunction; note: this method is not required to be - * correct when it answers false. - * - * @param c The given conjunction. - * @return true if a topological analysis determined that this - * constraint is more specific than the given conjunction. - **/ - public boolean moreSpecificThan(PropositionalConjunction c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given disjunction; note: this method is not required to be - * correct when it answers false. - * - * @param c The given disjunction. - * @return true if a topological analysis determined that this - * constraint is more specific than the given disjunction. - **/ - public boolean moreSpecificThan(PropositionalDisjunction c) { - return c.size() > 1 && c.contains(this); - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given at-least; note: this method is not required to be correct - * when it answers false. - * - * @param c The given at-least. - * @return true if a topological analysis determined that this - * constraint is more specific than the given disjunction. - **/ - public boolean moreSpecificThan(PropositionalAtLeast c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given negation; note: this method is not required to be correct - * when it answers false. - * - * @param c The given negation. - * @return true if a topological analysis determined that this - * constraint is more specific than the given negation. - **/ - public boolean moreSpecificThan(PropositionalNegation c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given variable; note: this method is not required to be correct - * when it answers false. - * - * @param c The given variable. - * @return true if a topological analysis determined that this - * constraint is more specific than the given variable. - **/ - public boolean moreSpecificThan(PropositionalVariable c) { - return false; - } - - - /** - * Compares topology to determine if this constraint is more specific than - * the given constant; note: this method is not required to be correct - * when it answers false. - * - * @param c The given constant. - * @return true if a topological analysis determined that this - * constraint is more specific than the given constant. - **/ - public boolean moreSpecificThan(PropositionalConstant c) { - return c.evaluate(); - } - - - /** - * The hash code of a PropositionalVariable is the hash code - * of the string representation of the classifier plus the system's hash - * code for the example object plus the hash code of the prediction. - * - * @return The hash code of this PropositionalVariable. - **/ - public int hashCode() { - return classifier.toString().hashCode() + System.identityHashCode(example) - + prediction.hashCode(); - } - - - /** - * Two PropositionalVariables are equivalent when the string - * representations of their classifiers are equivalent, they store the - * same example object, and their values are equivalent. - * - * @param o The object to test equivalence with. - * @return true iff this object is equivalent to the argument - * object. - **/ - public boolean equals(Object o) { - if (!(o instanceof PropositionalVariable)) return false; - PropositionalVariable v = (PropositionalVariable) o; - return classifier.equals(v.classifier) && example == v.example - && prediction.equals(v.prediction); - } - - - /** - * Calls the appropriate visit(·) method of the given - * Inference for this Constraint, as per the - * visitor pattern. - * - * @param infer The inference visiting this constraint. - **/ - public void runVisit(Inference infer) { infer.visit(this); } - - - /** - * Creates a string respresentation of this constraint using the string - * representations of the objects involved. - * - * @param buffer The output of this method will be appended to this buffer. - **/ - public void write(StringBuffer buffer) { - buffer.append(classifier); - buffer.append("("); - buffer.append(example); - buffer.append(") :: "); - buffer.append(prediction); - } +public class PropositionalVariable extends PropositionalConstraint { + /** The classifier being applied. */ + protected Learner classifier; + /** The classifier is applied to this example object. */ + protected Object example; + /** + * The prediction that the classifier must produce for this variable to be true. + **/ + protected String prediction; + /** The value imposed on this variable. */ + public boolean value; + + + /** + * Initializing constructor; the value member variable is set to false + * . + * + * @param c The classifier being applied. + * @param e The classifier is applied to this example object. + * @param p The prediction associated with this variable. + **/ + public PropositionalVariable(Learner c, Object e, String p) { + classifier = c; + example = e; + prediction = p; + value = false; + } + + + /** + * Replaces all unquantified variables with the unique copy stored as a value of the given map; + * also instantiates all quantified variables and stores them in the given map. + * + * @param m The map in which to find unique copies of the variables. + **/ + public void consolidateVariables(java.util.AbstractMap m) {} + + + /** Retrieves the classifier. */ + public Learner getClassifier() { + return classifier; + } + + + /** Retrieves the example object. */ + public Object getExample() { + return example; + } + + + /** Retrieves the prediction. */ + public String getPrediction() { + return prediction; + } + + + /** Determines whether the constraint is satisfied. */ + public boolean evaluate() { + return value; + } + + + /** + * Produces a new, logically simplified version of this constraint, preserving variable + * consolidation. + * + * @see Constraint#consolidateVariables(java.util.AbstractMap) + * @return A logically simplified version of this constraint. + **/ + public PropositionalConstraint simplify() { + return this; + } + + + /** + * Produces a new propositional constraint equivalent to this constraint and that contains no + * negated constraints other than variables. + * + * @return A constraint representing the negation of this constraint. + **/ + public PropositionalConstraint negate() { + return new PropositionalNegation(this); + } + + + /** + * Produces a new, logically simplified version of this constraint in conjunctive normal form + * (CNF). + * + * @return The conjunctive normal form of this constraint. + **/ + public PropositionalConstraint CNF() { + return this; + } + + + /** + * Produces a new, logically simplified version of this constraint in disjunctive normal form + * (DNF). + * + * @return The disjunctive normal form of this constraint. + **/ + public PropositionalConstraint DNF() { + return this; + } + + + /** + * Returns the children of this constraint in an array. + * + * @return The children of this constraint in an array. + **/ + public Constraint[] getChildren() { + return new PropositionalConstraint[0]; + } + + + /** + * Compares topology to determine if this constraint is more general than the given constraint; + * note: this method is not required to be correct when it answers false. + * + * @param c The given constraint. + * @return true if a topological analysis determined that this constraint is more + * general than the given constraint. + **/ + public boolean moreGeneralThan(PropositionalConstraint c) { + return c.moreSpecificThan(this); + } + + + /** + * Compares topology to determine if this constraint is more specific than the given + * implication; note: this method is not required to be correct when it answers + * false. + * + * @param c The given implication. + * @return true if a topological analysis determined that this constraint is more + * specific than the given implication. + **/ + public boolean moreSpecificThan(PropositionalImplication c) { + PropositionalConstraint[] children = (PropositionalConstraint[]) c.getChildren(); + return new PropositionalNegation(this).equals(children[0]) || equals(children[1]); + } + + + /** + * Compares topology to determine if this constraint is more specific than the given double + * implication; note: this method is not required to be correct when it answers + * false. + * + * @param c The given double implication. + * @return true if a topological analysis determined that this constraint is more + * specific than the given double implication. + **/ + public boolean moreSpecificThan(PropositionalDoubleImplication c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given + * conjunction; note: this method is not required to be correct when it answers + * false. + * + * @param c The given conjunction. + * @return true if a topological analysis determined that this constraint is more + * specific than the given conjunction. + **/ + public boolean moreSpecificThan(PropositionalConjunction c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given + * disjunction; note: this method is not required to be correct when it answers + * false. + * + * @param c The given disjunction. + * @return true if a topological analysis determined that this constraint is more + * specific than the given disjunction. + **/ + public boolean moreSpecificThan(PropositionalDisjunction c) { + return c.size() > 1 && c.contains(this); + } + + + /** + * Compares topology to determine if this constraint is more specific than the given at-least; + * note: this method is not required to be correct when it answers false. + * + * @param c The given at-least. + * @return true if a topological analysis determined that this constraint is more + * specific than the given disjunction. + **/ + public boolean moreSpecificThan(PropositionalAtLeast c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given negation; + * note: this method is not required to be correct when it answers false. + * + * @param c The given negation. + * @return true if a topological analysis determined that this constraint is more + * specific than the given negation. + **/ + public boolean moreSpecificThan(PropositionalNegation c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given variable; + * note: this method is not required to be correct when it answers false. + * + * @param c The given variable. + * @return true if a topological analysis determined that this constraint is more + * specific than the given variable. + **/ + public boolean moreSpecificThan(PropositionalVariable c) { + return false; + } + + + /** + * Compares topology to determine if this constraint is more specific than the given constant; + * note: this method is not required to be correct when it answers false. + * + * @param c The given constant. + * @return true if a topological analysis determined that this constraint is more + * specific than the given constant. + **/ + public boolean moreSpecificThan(PropositionalConstant c) { + return c.evaluate(); + } + + + /** + * The hash code of a PropositionalVariable is the hash code of the string + * representation of the classifier plus the system's hash code for the example object plus the + * hash code of the prediction. + * + * @return The hash code of this PropositionalVariable. + **/ + public int hashCode() { + return classifier.toString().hashCode() + System.identityHashCode(example) + + prediction.hashCode(); + } + + + /** + * Two PropositionalVariables are equivalent when the string representations of + * their classifiers are equivalent, they store the same example object, and their values are + * equivalent. + * + * @param o The object to test equivalence with. + * @return true iff this object is equivalent to the argument object. + **/ + public boolean equals(Object o) { + if (!(o instanceof PropositionalVariable)) + return false; + PropositionalVariable v = (PropositionalVariable) o; + return classifier.equals(v.classifier) && example == v.example + && prediction.equals(v.prediction); + } + + + /** + * Calls the appropriate visit(·) method of the given Inference + * for this Constraint, as per the visitor pattern. + * + * @param infer The inference visiting this constraint. + **/ + public void runVisit(Inference infer) { + infer.visit(this); + } + + + /** + * Creates a string respresentation of this constraint using the string representations of the + * objects involved. + * + * @param buffer The output of this method will be appended to this buffer. + **/ + public void write(StringBuffer buffer) { + buffer.append(classifier); + buffer.append("("); + buffer.append(example); + buffer.append(") :: "); + buffer.append(prediction); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/QuantifiedConstraintInvocation.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/QuantifiedConstraintInvocation.java index b123fada..5fe48ec0 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/QuantifiedConstraintInvocation.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/QuantifiedConstraintInvocation.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -15,132 +12,130 @@ /** - * Represents the invocation of a parameterized constraint nested inside at - * least one quantification expression, where the parameter is a function of - * the quantification variables. - * - * @author Nick Rizzolo + * Represents the invocation of a parameterized constraint nested inside at least one quantification + * expression, where the parameter is a function of the quantification variables. + * + * @author Nick Rizzolo **/ -public class QuantifiedConstraintInvocation extends FirstOrderConstraint -{ - /** The parameterized constraint that has been invoked. */ - protected ParameterizedConstraint parameterized; - /** The implementation of the function that computes the parameter. */ - protected InvocationArgumentReplacer replacer; - /** The latest result of invoking parameterized. */ - protected FirstOrderConstraint constraint; - - - /** - * Initializing constructor. - * - * @param p The invoked constraint. - * @param iar The parameter function implementation. - **/ - public QuantifiedConstraintInvocation(ParameterizedConstraint p, - InvocationArgumentReplacer iar) { - parameterized = p; - replacer = iar; - } - - - /** - * If this method is called without first calling - * setQuantificationVariables(Vector), false will - * be returned. - **/ - public boolean evaluate() { - return constraint != null && constraint.evaluate(); - } - - - /** - * Replaces all unquantified variables with the unique copy stored as a - * value of the given map; also instantiates all quantified variables and - * stores them in the given map. - * - * @param m The map in which to find unique copies of the variables. - **/ - public void consolidateVariables(AbstractMap m) { - if (constraint != null) constraint.consolidateVariables(m); - } - - - /** - * This method sets the given quantification variables to the given object - * references and evaluates the expressions involving those variables in - * this constraint's FirstOrderEquality children. - * - * @param o The new object references for the enclosing quantification - * variables, in order of nesting. - **/ - public void setQuantificationVariables(Vector o) { - if (replacer == null) { - System.err.println( - "LBJava ERROR: Attempting to set quantification variable with no " - + "variable setter implementation provided."); - System.exit(1); +public class QuantifiedConstraintInvocation extends FirstOrderConstraint { + /** The parameterized constraint that has been invoked. */ + protected ParameterizedConstraint parameterized; + /** The implementation of the function that computes the parameter. */ + protected InvocationArgumentReplacer replacer; + /** The latest result of invoking parameterized. */ + protected FirstOrderConstraint constraint; + + + /** + * Initializing constructor. + * + * @param p The invoked constraint. + * @param iar The parameter function implementation. + **/ + public QuantifiedConstraintInvocation(ParameterizedConstraint p, InvocationArgumentReplacer iar) { + parameterized = p; + replacer = iar; } - replacer.setQuantificationVariables(o); - constraint = parameterized.makeConstraint(replacer.compute()); - } - - - /** - * Returns the children of this constraint in an array. - * - * @return The children of this constraint in an array. - **/ - public Constraint[] getChildren() { return new FirstOrderConstraint[0]; } - - - /** - * If this method is called without first calling - * setQuantificationVariables(Vector), the constant - * representing false will be returned. - * - * @return The propositionalized constraint. - **/ - public PropositionalConstraint propositionalize() { - if (constraint == null) return new PropositionalConstant(false); - return constraint.propositionalize(); - } - - - /** - * The hash code of a QuantifiedConstraintInvocation is the - * sum of the hash codes of its children. - * - * @return The hash code for this - * QuantifiedConstraintInvocation. - **/ - public int hashCode() { - return parameterized.hashCode() + replacer.hashCode(); - } - - - /** - * Two QuantifiedConstraintInvocations are equivalent when - * their children are equivalent. - * - * @return true iff the argument is an equivalent - * QuantifiedConstraintInvocation. - **/ - public boolean equals(Object o) { - if (!(o instanceof QuantifiedConstraintInvocation)) return false; - QuantifiedConstraintInvocation q = (QuantifiedConstraintInvocation) o; - return parameterized.equals(q.parameterized) && replacer == q.replacer; - } - - - /** - * Calls the appropriate visit(·) method of the given - * Inference for this Constraint, as per the - * visitor pattern. - * - * @param infer The inference visiting this constraint. - **/ - public void runVisit(Inference infer) { infer.visit(this); } -} + /** + * If this method is called without first calling + * setQuantificationVariables(Vector), false will be returned. + **/ + public boolean evaluate() { + return constraint != null && constraint.evaluate(); + } + + + /** + * Replaces all unquantified variables with the unique copy stored as a value of the given map; + * also instantiates all quantified variables and stores them in the given map. + * + * @param m The map in which to find unique copies of the variables. + **/ + public void consolidateVariables(AbstractMap m) { + if (constraint != null) + constraint.consolidateVariables(m); + } + + + /** + * This method sets the given quantification variables to the given object references and + * evaluates the expressions involving those variables in this constraint's + * FirstOrderEquality children. + * + * @param o The new object references for the enclosing quantification variables, in order of + * nesting. + **/ + public void setQuantificationVariables(Vector o) { + if (replacer == null) { + System.err.println("LBJava ERROR: Attempting to set quantification variable with no " + + "variable setter implementation provided."); + System.exit(1); + } + + replacer.setQuantificationVariables(o); + constraint = parameterized.makeConstraint(replacer.compute()); + } + + + /** + * Returns the children of this constraint in an array. + * + * @return The children of this constraint in an array. + **/ + public Constraint[] getChildren() { + return new FirstOrderConstraint[0]; + } + + + /** + * If this method is called without first calling + * setQuantificationVariables(Vector), the constant representing false + * will be returned. + * + * @return The propositionalized constraint. + **/ + public PropositionalConstraint propositionalize() { + if (constraint == null) + return new PropositionalConstant(false); + return constraint.propositionalize(); + } + + + /** + * The hash code of a QuantifiedConstraintInvocation is the sum of the hash codes + * of its children. + * + * @return The hash code for this QuantifiedConstraintInvocation. + **/ + public int hashCode() { + return parameterized.hashCode() + replacer.hashCode(); + } + + + /** + * Two QuantifiedConstraintInvocations are equivalent when their children are + * equivalent. + * + * @return true iff the argument is an equivalent + * QuantifiedConstraintInvocation. + **/ + public boolean equals(Object o) { + if (!(o instanceof QuantifiedConstraintInvocation)) + return false; + QuantifiedConstraintInvocation q = (QuantifiedConstraintInvocation) o; + return parameterized.equals(q.parameterized) && replacer == q.replacer; + } + + + /** + * Calls the appropriate visit(·) method of the given Inference + * for this Constraint, as per the visitor pattern. + * + * @param infer The inference visiting this constraint. + **/ + public void runVisit(Inference infer) { + infer.visit(this); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/Quantifier.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/Quantifier.java index 1e0510b2..d230dd9b 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/Quantifier.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/Quantifier.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -17,133 +14,126 @@ /** - * A quantifier is a first order constraint parameterized by an object taken - * from a Java Collection of objects. - * - * @author Nick Rizzolo + * A quantifier is a first order constraint parameterized by an object taken from a Java + * Collection of objects. + * + * @author Nick Rizzolo **/ -public abstract class Quantifier extends FirstOrderConstraint -{ - /** The name of the quantification variable. */ - protected String quantificationVariable; - /** The collection of objects to iterate over. */ - protected Collection collection; - /** The constraint being quantified. */ - protected FirstOrderConstraint constraint; - /** - * A list of the objects stored in the quantification variables of - * enclosing quantifiers. - **/ - protected Vector enclosingQuantificationSettings; - /** - * The implementation of the functions that compute any parameters this - * quantifier may have. - **/ - protected QuantifierArgumentReplacer replacer; - - - /** - * Initializing constructor. - * - * @param q The name of the quantification variable. - * @param col The collection of objects to iterate over. - * @param con The constraint being quantified. - **/ - public Quantifier(String q, Collection col, FirstOrderConstraint con) { - this(q, col, con, null); - } - - /** - * This constructor specifies a variable setter for when this quantifier is - * itself quantified. - * - * @param q The name of the quantification variable. - * @param col The collection of objects to iterate over. - * @param con The constraint being quantified. - * @param qar The variable setter. - **/ - public Quantifier(String q, Collection col, FirstOrderConstraint con, - QuantifierArgumentReplacer qar) { - quantificationVariable = q; - collection = col; - constraint = con; - replacer = qar; - } - - - /** - * Returns the children of this constraint in an array. - * - * @return The children of this constraint in an array. - **/ - public Constraint[] getChildren() { - return new FirstOrderConstraint[]{ constraint }; - } - - - /** - * Makes sure that the enclosingQuantificationSettings vector - * exists, then adds a place holder for this quantifier's quantification - * variable setting. - * - * @return The index of this quantifier's quantification variable. - **/ - protected int initialize() { - if (enclosingQuantificationSettings == null) - enclosingQuantificationSettings = new Vector(); - enclosingQuantificationSettings.add(null); - return enclosingQuantificationSettings.size() - 1; - } - - - /** - * Sets the variable map object stored in this object to the given - * argument; also instantiates all quantified variables and stores them in - * the map. - * - * @param m The map in which to find unique copies of the variables. - **/ - public void consolidateVariables(AbstractMap m) { - int index = initialize(); - - for (Iterator I = collection.iterator(); I.hasNext(); ) { - enclosingQuantificationSettings.set(index, I.next()); - constraint.setQuantificationVariables(enclosingQuantificationSettings); - constraint.consolidateVariables(m); +public abstract class Quantifier extends FirstOrderConstraint { + /** The name of the quantification variable. */ + protected String quantificationVariable; + /** The collection of objects to iterate over. */ + protected Collection collection; + /** The constraint being quantified. */ + protected FirstOrderConstraint constraint; + /** + * A list of the objects stored in the quantification variables of enclosing quantifiers. + **/ + protected Vector enclosingQuantificationSettings; + /** + * The implementation of the functions that compute any parameters this quantifier may have. + **/ + protected QuantifierArgumentReplacer replacer; + + + /** + * Initializing constructor. + * + * @param q The name of the quantification variable. + * @param col The collection of objects to iterate over. + * @param con The constraint being quantified. + **/ + public Quantifier(String q, Collection col, FirstOrderConstraint con) { + this(q, col, con, null); + } + + /** + * This constructor specifies a variable setter for when this quantifier is itself quantified. + * + * @param q The name of the quantification variable. + * @param col The collection of objects to iterate over. + * @param con The constraint being quantified. + * @param qar The variable setter. + **/ + public Quantifier(String q, Collection col, FirstOrderConstraint con, + QuantifierArgumentReplacer qar) { + quantificationVariable = q; + collection = col; + constraint = con; + replacer = qar; + } + + + /** + * Returns the children of this constraint in an array. + * + * @return The children of this constraint in an array. + **/ + public Constraint[] getChildren() { + return new FirstOrderConstraint[] {constraint}; + } + + + /** + * Makes sure that the enclosingQuantificationSettings vector exists, then adds a + * place holder for this quantifier's quantification variable setting. + * + * @return The index of this quantifier's quantification variable. + **/ + protected int initialize() { + if (enclosingQuantificationSettings == null) + enclosingQuantificationSettings = new Vector(); + enclosingQuantificationSettings.add(null); + return enclosingQuantificationSettings.size() - 1; } - enclosingQuantificationSettings.removeElementAt(index); - } - - - /** - * The hash code of a Quantifier is the sum of the hash codes - * of its children plus three. - * - * @return The hash code for this Quantifier. - **/ - public int hashCode() { - int result = constraint.hashCode(); - if (replacer != null) result += replacer.hashCode(); - else result += collection.hashCode(); - return result; - } - - - /** - * Two Quantifiers are equivalent when their children are - * equivalent. - * - * @return true iff the argument is an equivalent - * Quantifier. - **/ - public boolean equals(Object o) { - if (!(o instanceof Quantifier)) return false; - Quantifier q = (Quantifier) o; - return replacer == q.replacer - && (replacer != null - || replacer == null && collection.equals(q.collection)) - && constraint.equals(q.constraint); - } -} + /** + * Sets the variable map object stored in this object to the given argument; also instantiates + * all quantified variables and stores them in the map. + * + * @param m The map in which to find unique copies of the variables. + **/ + public void consolidateVariables(AbstractMap m) { + int index = initialize(); + + for (Iterator I = collection.iterator(); I.hasNext();) { + enclosingQuantificationSettings.set(index, I.next()); + constraint.setQuantificationVariables(enclosingQuantificationSettings); + constraint.consolidateVariables(m); + } + + enclosingQuantificationSettings.removeElementAt(index); + } + + + /** + * The hash code of a Quantifier is the sum of the hash codes of its children plus + * three. + * + * @return The hash code for this Quantifier. + **/ + public int hashCode() { + int result = constraint.hashCode(); + if (replacer != null) + result += replacer.hashCode(); + else + result += collection.hashCode(); + return result; + } + + + /** + * Two Quantifiers are equivalent when their children are equivalent. + * + * @return true iff the argument is an equivalent Quantifier. + **/ + public boolean equals(Object o) { + if (!(o instanceof Quantifier)) + return false; + Quantifier q = (Quantifier) o; + return replacer == q.replacer + && (replacer != null || replacer == null && collection.equals(q.collection)) + && constraint.equals(q.constraint); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/QuantifierArgumentReplacer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/QuantifierArgumentReplacer.java index 46c1c289..8639d950 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/QuantifierArgumentReplacer.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/QuantifierArgumentReplacer.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -14,80 +11,68 @@ /** - * Anonymous inner classes extending this class are instantiated by the code - * generated by the LBJava compiler when creating - * FirstOrderConstraint representations. The methods of this - * class are used to compute new values for the arguments of a quantified - * Quantifier. Only certain value returning methods are - * overridden. The others will throw - * UnsupportedOperationExceptions. - * - * @see edu.illinois.cs.cogcomp.lbjava.infer.FirstOrderConstraint - * @see edu.illinois.cs.cogcomp.lbjava.infer.Quantifier - * @see java.lang.UnsupportedOperationException - * @author Nick Rizzolo + * Anonymous inner classes extending this class are instantiated by the code generated by the LBJava + * compiler when creating FirstOrderConstraint representations. The methods of this + * class are used to compute new values for the arguments of a quantified Quantifier. + * Only certain value returning methods are overridden. The others will throw + * UnsupportedOperationExceptions. + * + * @see edu.illinois.cs.cogcomp.lbjava.infer.FirstOrderConstraint + * @see edu.illinois.cs.cogcomp.lbjava.infer.Quantifier + * @see java.lang.UnsupportedOperationException + * @author Nick Rizzolo **/ -abstract public class QuantifierArgumentReplacer extends ArgumentReplacer -{ - /** - * This flag is set if the collection of the quantifier is not quantified. - **/ - public boolean collectionConstant; - /** - * This flag is set if the bound parameter of an - * AtLeastQuantifier or an AtMostQuantifier is - * not quantified. - **/ - public boolean boundConstant; +abstract public class QuantifierArgumentReplacer extends ArgumentReplacer { + /** + * This flag is set if the collection of the quantifier is not quantified. + **/ + public boolean collectionConstant; + /** + * This flag is set if the bound parameter of an AtLeastQuantifier or an + * AtMostQuantifier is not quantified. + **/ + public boolean boundConstant; - /** - * Initializing constructor. - * - * @param c The context of the corresponding equality, except for - * quantification variables. - **/ - public QuantifierArgumentReplacer(Object[] c) { - super(c); - collectionConstant = boundConstant = false; - } + /** + * Initializing constructor. + * + * @param c The context of the corresponding equality, except for quantification variables. + **/ + public QuantifierArgumentReplacer(Object[] c) { + super(c); + collectionConstant = boundConstant = false; + } - /** - * Use this constructor to indicate which of the two arguments of the - * equality is in fact not quantified. - * - * @param c The context of the corresponding equality, except for - * quantification variables. - * @param b Set to false if the unquantified argument is the - * collection; set to true if the unquantified - * argument is the bound. - **/ - public QuantifierArgumentReplacer(Object[] c, boolean b) { - super(c); - collectionConstant = !b; - boundConstant = b; - } + /** + * Use this constructor to indicate which of the two arguments of the equality is in fact not + * quantified. + * + * @param c The context of the corresponding equality, except for quantification variables. + * @param b Set to false if the unquantified argument is the collection; set to + * true if the unquantified argument is the bound. + **/ + public QuantifierArgumentReplacer(Object[] c, boolean b) { + super(c); + collectionConstant = !b; + boundConstant = b; + } - /** - * Computes the new collection. This method needs to be overridden if it - * is to be called, since by default it simply throws an - * UnsupportedOperationException. - **/ - public Collection getCollection() { - throw new UnsupportedOperationException( - "LBJ ERROR: getCollection() not supported."); - } + /** + * Computes the new collection. This method needs to be overridden if it is to be called, since + * by default it simply throws an UnsupportedOperationException. + **/ + public Collection getCollection() { + throw new UnsupportedOperationException("LBJ ERROR: getCollection() not supported."); + } - /** - * Computes the new value of the bound. This method needs to be overridden - * if it is to be called, since by default it simply throws an - * UnsupportedOperationException. - **/ - public int getBound() { - throw new UnsupportedOperationException( - "LBJ ERROR: getBound() not supported."); - } + /** + * Computes the new value of the bound. This method needs to be overridden if it is to be + * called, since by default it simply throws an UnsupportedOperationException. + **/ + public int getBound() { + throw new UnsupportedOperationException("LBJ ERROR: getBound() not supported."); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/UniversalQuantifier.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/UniversalQuantifier.java index ff32f9fc..57b43e2c 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/UniversalQuantifier.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/UniversalQuantifier.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -16,133 +13,129 @@ /** - * A universal quantifier states that the constraint must hold for all - * objects from the collection. - * - * @author Nick Rizzolo + * A universal quantifier states that the constraint must hold for all objects from the collection. + * + * @author Nick Rizzolo **/ -public class UniversalQuantifier extends Quantifier -{ - /** - * Initializing constructor. - * - * @param q The name of the quantification variable. - * @param col The collection of objects to iterate over. - * @param con The constraint being quantified. - **/ - public UniversalQuantifier(String q, Collection col, - FirstOrderConstraint con) { - super(q, col, con); - } - - /** - * This constructor specifies a variable setter for when this quantifier is - * itself quantified. - * - * @param q The name of the quantification variable. - * @param col The collection of objects to iterate over. - * @param con The constraint being quantified. - * @param qar The variable setter. - **/ - public UniversalQuantifier(String q, Collection col, - FirstOrderConstraint con, - QuantifierArgumentReplacer qar) { - super(q, col, con, qar); - } - - - /** Determines whether the constraint is satisfied. */ - public boolean evaluate() { - int index = initialize(); - - for (Iterator I = collection.iterator(); I.hasNext(); ) { - enclosingQuantificationSettings.set(index, I.next()); - constraint.setQuantificationVariables(enclosingQuantificationSettings); - - if (!constraint.evaluate()) { +public class UniversalQuantifier extends Quantifier { + /** + * Initializing constructor. + * + * @param q The name of the quantification variable. + * @param col The collection of objects to iterate over. + * @param con The constraint being quantified. + **/ + public UniversalQuantifier(String q, Collection col, FirstOrderConstraint con) { + super(q, col, con); + } + + /** + * This constructor specifies a variable setter for when this quantifier is itself quantified. + * + * @param q The name of the quantification variable. + * @param col The collection of objects to iterate over. + * @param con The constraint being quantified. + * @param qar The variable setter. + **/ + public UniversalQuantifier(String q, Collection col, FirstOrderConstraint con, + QuantifierArgumentReplacer qar) { + super(q, col, con, qar); + } + + + /** Determines whether the constraint is satisfied. */ + public boolean evaluate() { + int index = initialize(); + + for (Iterator I = collection.iterator(); I.hasNext();) { + enclosingQuantificationSettings.set(index, I.next()); + constraint.setQuantificationVariables(enclosingQuantificationSettings); + + if (!constraint.evaluate()) { + enclosingQuantificationSettings.removeElementAt(index); + return false; + } + } + enclosingQuantificationSettings.removeElementAt(index); - return false; - } + return true; } - enclosingQuantificationSettings.removeElementAt(index); - return true; - } - - - /** - * This method sets the given quantification variables to the given object - * references and evaluates the expressions involving those variables in - * this constraint's children. - * - * @param o The new object references for the enclosing quantification - * variables, in order of nesting. - **/ - public void setQuantificationVariables(Vector o) { - enclosingQuantificationSettings = o; - - if (replacer != null) { - replacer.setQuantificationVariables(o); - collection = replacer.getCollection(); + + /** + * This method sets the given quantification variables to the given object references and + * evaluates the expressions involving those variables in this constraint's children. + * + * @param o The new object references for the enclosing quantification variables, in order of + * nesting. + **/ + public void setQuantificationVariables(Vector o) { + enclosingQuantificationSettings = o; + + if (replacer != null) { + replacer.setQuantificationVariables(o); + collection = replacer.getCollection(); + } } - } - - - /** - * Transforms this first order constraint into a propositional constraint. - * - * @return The propositionalized constraint. - **/ - public PropositionalConstraint propositionalize() { - PropositionalConstraint result = null; - - int index = initialize(); - for (Iterator I = collection.iterator(); I.hasNext(); ) { - enclosingQuantificationSettings.set(index, I.next()); - constraint.setQuantificationVariables(enclosingQuantificationSettings); - - if (result == null) result = constraint.propositionalize(); - else - result = - new PropositionalConjunction(result, constraint.propositionalize()); + + + /** + * Transforms this first order constraint into a propositional constraint. + * + * @return The propositionalized constraint. + **/ + public PropositionalConstraint propositionalize() { + PropositionalConstraint result = null; + + int index = initialize(); + for (Iterator I = collection.iterator(); I.hasNext();) { + enclosingQuantificationSettings.set(index, I.next()); + constraint.setQuantificationVariables(enclosingQuantificationSettings); + + if (result == null) + result = constraint.propositionalize(); + else + result = new PropositionalConjunction(result, constraint.propositionalize()); + } + + enclosingQuantificationSettings.removeElementAt(index); + if (result == null) + result = new PropositionalConstant(true); + return result; } - enclosingQuantificationSettings.removeElementAt(index); - if (result == null) result = new PropositionalConstant(true); - return result; - } - - - /** - * The hash code of a UniversalQuantifier is the sum of the - * hash codes of its children. - * - * @return The hash code for this UniversalQuantifier. - **/ - public int hashCode() { return super.hashCode(); } - - - /** - * Two UniversalQuantifiers are equivalent when their children - * are equivalent. - * - * @return true iff the argument is an equivalent - * UniversalQuantifier. - **/ - public boolean equals(Object o) { - if (!(o instanceof UniversalQuantifier)) return false; - UniversalQuantifier q = (UniversalQuantifier) o; - return super.equals(q); - } - - - /** - * Calls the appropriate visit(·) method of the given - * Inference for this Constraint, as per the - * visitor pattern. - * - * @param infer The inference visiting this constraint. - **/ - public void runVisit(Inference infer) { infer.visit(this); } -} + /** + * The hash code of a UniversalQuantifier is the sum of the hash codes of its + * children. + * + * @return The hash code for this UniversalQuantifier. + **/ + public int hashCode() { + return super.hashCode(); + } + + + /** + * Two UniversalQuantifiers are equivalent when their children are equivalent. + * + * @return true iff the argument is an equivalent UniversalQuantifier. + **/ + public boolean equals(Object o) { + if (!(o instanceof UniversalQuantifier)) + return false; + UniversalQuantifier q = (UniversalQuantifier) o; + return super.equals(q); + } + + + /** + * Calls the appropriate visit(·) method of the given Inference + * for this Constraint, as per the visitor pattern. + * + * @param infer The inference visiting this constraint. + **/ + public void runVisit(Inference infer) { + infer.visit(this); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/ZeroOneILPProblem.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/ZeroOneILPProblem.java index 4a7ba1d3..77fdb766 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/ZeroOneILPProblem.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/infer/ZeroOneILPProblem.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.infer; @@ -14,492 +11,495 @@ import edu.illinois.cs.cogcomp.lbjava.classify.Score; import edu.illinois.cs.cogcomp.lbjava.parse.LineByLine; -import edu.illinois.cs.cogcomp.lbjava.util.DVector; -import edu.illinois.cs.cogcomp.lbjava.util.DVector2D; -import edu.illinois.cs.cogcomp.lbjava.util.IVector; -import edu.illinois.cs.cogcomp.lbjava.util.IVector2D; -import edu.illinois.cs.cogcomp.lbjava.util.Sort; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.DVector; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.DVector2D; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.IVector; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.IVector2D; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.Sort; /** - * Can be used to represent an ILP problem, assuming all variables are 0-1. - * - * @author Nick Rizzolo + * Can be used to represent an ILP problem, assuming all variables are 0-1. + * + * @author Nick Rizzolo **/ -public class ZeroOneILPProblem -{ - /** Represents the constraint type "equality". */ - public static final int EQUALITY = 0; - /** Represents the constraint type "less than or equal to". */ - public static final int LESS_THAN = 1; - /** Represents the constraint type "greater than or equal to". */ - public static final int GREATER_THAN = 2; - /** Maps from the three constraint types to their operator symbols. */ - public static final String[] boundTypeSymbols = { "=", "<=", ">=" }; - - /** Used to mitigate floating point error in (in)equality comparisons. */ - public static final double TOLERANCE = 1e-10; - - - /** - * Remembers whether the objective function should be maximized or - * minimzed. - **/ - protected boolean maximize; - /** - * Represents the coefficients of all inference variables in the objective - * function. - **/ - protected DVector objectiveCoefficients; - /** - * Half of a sparse matrix representation of the constraints; this half - * contains the variable indexes corresponding to the coefficients in - * {@link #Ac}. - **/ - protected IVector2D Av; - /** - * Half of a sparse matrix representation of the constraints; this half - * contains the coefficients on the variables whose indexes appear in - * {@link #Av}. - **/ - protected DVector2D Ac; - /** Contains the types of the constraints. */ - protected IVector boundTypes; - /** The vector of constraint bounds. */ - protected DVector bounds; - - - /** Default constructor. */ - public ZeroOneILPProblem() { reset(); } - - - /** - * Reads a textual representation of a 0-1 ILP problem from the specified - * file. - * - * @param name The name of the file from which to read the ILP problem's - * representation. - **/ - public ZeroOneILPProblem(String name) { - reset(); - LineByLine parser = - new LineByLine(name) { - public Object next() { - String line = readLine(); - while (line != null && line.matches("\\s*")) line = readLine(); - return line; +public class ZeroOneILPProblem { + /** Represents the constraint type "equality". */ + public static final int EQUALITY = 0; + /** Represents the constraint type "less than or equal to". */ + public static final int LESS_THAN = 1; + /** Represents the constraint type "greater than or equal to". */ + public static final int GREATER_THAN = 2; + /** Maps from the three constraint types to their operator symbols. */ + public static final String[] boundTypeSymbols = {"=", "<=", ">="}; + + /** Used to mitigate floating point error in (in)equality comparisons. */ + public static final double TOLERANCE = 1e-10; + + + /** + * Remembers whether the objective function should be maximized or minimzed. + **/ + protected boolean maximize; + /** + * Represents the coefficients of all inference variables in the objective function. + **/ + protected DVector objectiveCoefficients; + /** + * Half of a sparse matrix representation of the constraints; this half contains the variable + * indexes corresponding to the coefficients in {@link #Ac}. + **/ + protected IVector2D Av; + /** + * Half of a sparse matrix representation of the constraints; this half contains the + * coefficients on the variables whose indexes appear in {@link #Av}. + **/ + protected DVector2D Ac; + /** Contains the types of the constraints. */ + protected IVector boundTypes; + /** The vector of constraint bounds. */ + protected DVector bounds; + + + /** Default constructor. */ + public ZeroOneILPProblem() { + reset(); + } + + + /** + * Reads a textual representation of a 0-1 ILP problem from the specified file. + * + * @param name The name of the file from which to read the ILP problem's representation. + **/ + public ZeroOneILPProblem(String name) { + reset(); + LineByLine parser = new LineByLine(name) { + public Object next() { + String line = readLine(); + while (line != null && line.matches("\\s*")) + line = readLine(); + return line; + } + }; + + String line = (String) parser.next(); + String[] a = line.split(" "); + maximize = a[0].startsWith("max"); + for (int i = 1; i < a.length; i += 2) { + double c = Double.parseDouble(a[i]); + int v = Integer.parseInt(a[i + 1].substring(2)); + objectiveCoefficients.set(v, c); + } + + line = (String) parser.next(); + if (line.indexOf("subject") != -1) + line = (String) parser.next(); + + for (; line != null; line = (String) parser.next()) { + a = line.substring(2).split(" "); + int[] variables = new int[a.length / 2 - 1]; + double[] coefficients = new double[variables.length]; + + for (int j = 0; j < a.length - 2; j += 2) { + coefficients[j / 2] = Double.parseDouble(a[j]); + variables[j / 2] = Integer.parseInt(a[j + 1].substring(2)); + } + + int type = EQUALITY; + if (a[a.length - 2].charAt(0) == '>') + type = GREATER_THAN; + else if (a[a.length - 2].charAt(0) == '<') + type = LESS_THAN; + double bound = Double.parseDouble(a[a.length - 1]); + + addConstraint(variables, coefficients, type, bound); } - }; - - String line = (String) parser.next(); - String[] a = line.split(" "); - maximize = a[0].startsWith("max"); - for (int i = 1; i < a.length; i += 2) { - double c = Double.parseDouble(a[i]); - int v = Integer.parseInt(a[i+1].substring(2)); - objectiveCoefficients.set(v, c); } - line = (String) parser.next(); - if (line.indexOf("subject") != -1) line = (String) parser.next(); - for (; line != null; line = (String) parser.next()) { - a = line.substring(2).split(" "); - int[] variables = new int[a.length / 2 - 1]; - double[] coefficients = new double[variables.length]; + /** + * This method clears the all constraints and variables out of the problem representation, + * bringing it back to the state it was in when first constructed. + **/ + public void reset() { + maximize = false; + objectiveCoefficients = new DVector(); + Av = new IVector2D(); + Ac = new DVector2D(); + boundTypes = new IVector(); + bounds = new DVector(); + } + + + /** + * Sets the direction of the objective function. + * + * @param d true if the objective function is to be maximized. + **/ + public void setMaximize(boolean d) { + maximize = d; + } + + /** + * Returns true iff the objective function is to be maximized. + **/ + public boolean getMaximize() { + return maximize; + } + + + /** Returns the number of constraints in the ILP problem. */ + public int rows() { + return bounds.size(); + } + + /** Returns the number of variables in the ILP problem. */ + public int columns() { + return objectiveCoefficients.size(); + } + + + /** + * Sets the specified coefficient in the objective function. + * + * @param j The index of the variable whose coefficient will be set. + * @param cj The new value of the coefficient. + **/ + public void setObjectiveCoefficient(int j, double cj) { + objectiveCoefficients.set(j, cj); + } + + /** Returns the specified objective coefficient. */ + public double getObjectiveCoefficient(int j) { + return objectiveCoefficients.get(j); + } + - for (int j = 0; j < a.length - 2; j += 2) { - coefficients[j / 2] = Double.parseDouble(a[j]); - variables[j / 2] = Integer.parseInt(a[j+1].substring(2)); - } + /** + * Sets the specified coefficient in the constraint matrix. + * + * @param i The index of the constraint. + * @param j The index of the variable. + * @param aij The new value of the coefficient. + **/ + public void setConstraintCoefficient(int i, int j, double aij) { + int index = Av.binarySearch(i, j); - int type = EQUALITY; - if (a[a.length - 2].charAt(0) == '>') type = GREATER_THAN; - else if (a[a.length - 2].charAt(0) == '<') type = LESS_THAN; - double bound = Double.parseDouble(a[a.length - 1]); + if (index < 0) { + index = -index - 1; - addConstraint(variables, coefficients, type, bound); + for (int k = Av.size(i) - 1; k >= index; --k) { + Av.set(i, k + 1, Av.get(i, k)); + Ac.set(i, k + 1, Ac.get(i, k)); + } + + Av.set(i, index, j); + } + + Ac.set(i, index, aij); } - } - - - /** - * This method clears the all constraints and variables out of the problem - * representation, bringing it back to the state it was in when first - * constructed. - **/ - public void reset() { - maximize = false; - objectiveCoefficients = new DVector(); - Av = new IVector2D(); - Ac = new DVector2D(); - boundTypes = new IVector(); - bounds = new DVector(); - } - - - /** - * Sets the direction of the objective function. - * - * @param d true if the objective function is to be - * maximized. - **/ - public void setMaximize(boolean d) { maximize = d; } - /** - * Returns true iff the objective function is to be maximized. - **/ - public boolean getMaximize() { return maximize; } - - - /** Returns the number of constraints in the ILP problem. */ - public int rows() { return bounds.size(); } - /** Returns the number of variables in the ILP problem. */ - public int columns() { return objectiveCoefficients.size(); } - - - /** - * Sets the specified coefficient in the objective function. - * - * @param j The index of the variable whose coefficient will be set. - * @param cj The new value of the coefficient. - **/ - public void setObjectiveCoefficient(int j, double cj) { - objectiveCoefficients.set(j, cj); - } - /** Returns the specified objective coefficient. */ - public double getObjectiveCoefficient(int j) { - return objectiveCoefficients.get(j); - } - - - /** - * Sets the specified coefficient in the constraint matrix. - * - * @param i The index of the constraint. - * @param j The index of the variable. - * @param aij The new value of the coefficient. - **/ - public void setConstraintCoefficient(int i, int j, double aij) { - int index = Av.binarySearch(i, j); - - if (index < 0) { - index = -index - 1; - - for (int k = Av.size(i) - 1; k >= index; --k) { - Av.set(i, k + 1, Av.get(i, k)); - Ac.set(i, k + 1, Ac.get(i, k)); - } - - Av.set(i, index, j); + + /** Returns the specified constraint coefficient. */ + public double getConstraintCoefficient(int i, int j) { + int index = Av.binarySearch(i, j); + if (index < 0) + return 0; + return Ac.get(i, index); } - Ac.set(i, index, aij); - } - - /** Returns the specified constraint coefficient. */ - public double getConstraintCoefficient(int i, int j) { - int index = Av.binarySearch(i, j); - if (index < 0) return 0; - return Ac.get(i, index); - } - - - /** - * Sets the bound type for the specified constraint. - * - * @param i The constraint whose bound type will be set. - * @param t The new type for the constraint's bound. - **/ - public void setBoundType(int i, int t) { boundTypes.set(i, t); } - /** Returns the type of the specified constraint's bound. */ - public int getBoundType(int i) { return boundTypes.get(i); } - - - /** - * Sets the bound on the specified constraint. - * - * @param i The constraint whose bound will be set. - * @param bi The new value for the bound. - **/ - public void setConstraintBound(int i, double bi) { bounds.set(i, bi); } - /** Returns the bound of the specified constraint. */ - public double getConstraintBound(int i) { return bounds.get(i); } - - - /** - * Determines whether all constraints are satisfied by the given solution. - * - * @param x The settings of the variables. - * @return true iff all constraints are satisfied. - **/ - public boolean constraintsSatisfied(int[] x) { - int constraints = Av.size(); - - for (int i = 0; i < constraints; ++i) { - double a = 0; - int variables = Av.size(i); - for (int j = 0; j < variables; ++j) - a += Ac.get(i, j) * x[Av.get(i, j)]; - if (boundTypes.get(i) == EQUALITY && a != bounds.get(i) - || boundTypes.get(i) == LESS_THAN && a > bounds.get(i) - || boundTypes.get(i) == GREATER_THAN && a < bounds.get(i)) - return false; + + /** + * Sets the bound type for the specified constraint. + * + * @param i The constraint whose bound type will be set. + * @param t The new type for the constraint's bound. + **/ + public void setBoundType(int i, int t) { + boundTypes.set(i, t); } - return true; - } - - - /** - * Adds a new Boolean variable (an integer variable constrained to take - * either the value 0 or the value 1) with the specified coefficient in the - * objective function to the problem. - * - * @param c The objective function coefficient for the new Boolean - * variable. - * @return The index of the created variable. - **/ - public int addBooleanVariable(double c) { - objectiveCoefficients.add(c); - return objectiveCoefficients.size() - 1; - } - - - /** - * Adds a general, multi-valued discrete variable, which is implemented as - * a set of Boolean variables, one per value of the discrete variable, with - * exactly one of those variables set true at any given time. - * - * @param c The objective function coefficients for the new Boolean - * variables. - * @return The indexes of the newly created variables. - **/ - public int[] addDiscreteVariable(double[] c) { - int s = objectiveCoefficients.size(); - int[] result = new int[c.length]; - - for (int i = 0; i < c.length; ++i) { - objectiveCoefficients.add(c[i]); - result[i] = s + i; + /** Returns the type of the specified constraint's bound. */ + public int getBoundType(int i) { + return boundTypes.get(i); } - double[] a = new double[c.length]; - Arrays.fill(a, 1); - addEqualityConstraint(result, a, 1); - return result; - } - - - /** - * Adds a general, multi-valued discrete variable, which is implemented as - * a set of Boolean variables, one per value of the discrete variable, with - * exactly one of those variables set true at any given time. - * - * @param c An array of {@link Score}s containing the - * objective function coefficients for the new Boolean variables. - * @return The indexes of the newly created variables. - **/ - public int[] addDiscreteVariable(Score[] c) { - double[] d = new double[c.length]; - for (int i = 0; i < c.length; ++i) d[i] = c[i].score; - return addDiscreteVariable(d); - } - - - /** - * Adds a typeless constraint to the problem. No need to waste space - * storing the types of constraints if they are implied or assumed. - * Otherwise, this method does the same thing as - * {@link #addConstraint(int[],double[],int,double)}. - * - * @param I The indexes of the variables with non-zero coefficients. - * @param a The coefficients of the variables with the given indexes. - * @param b The new constraint will enforce equality with this constant. - **/ - protected void addConstraint(final int[] I, double[] a, double b) { - int[] indexes = new int[I.length]; - for (int i = 0; i < I.length; ++i) indexes[i] = i; - Sort.sort(indexes, - new Sort.IntComparator() { - public int compare(int i1, int i2) { return I[i1] - I[i2]; } - }); - - int s = Av.size(); - - for (int i = 0; i < I.length; ++i) { - Av.set(s, i, I[indexes[i]]); - double c = a[indexes[i]]; - double rounded = Math.round(c); - if (Math.abs(c - rounded) < TOLERANCE) c = rounded; - Ac.set(s, i, c); + + /** + * Sets the bound on the specified constraint. + * + * @param i The constraint whose bound will be set. + * @param bi The new value for the bound. + **/ + public void setConstraintBound(int i, double bi) { + bounds.set(i, bi); } - bounds.add(b); - } - - - /** - * Adds a new constraint of the specified type to the problem. The two - * array arguments must be the same length, as their elements correspond to - * each other. Variables whose coefficients are zero need not be - * mentioned. Variables that are mentioned must have previously been added - * via {@link #addBooleanVariable(double)} or - * {@link #addDiscreteVariable(double[])}. The resulting constraint has - * the form: - *
xi * a ?= b
- * where xi represents the inference variables - * whose indexes are contained in the array i, - * * represents dot product, and ?= stands for the type of the - * constraint. - * - *

This method is called by the other constraint adding methods in this - * class. It sorts the variables and their coefficients so that the - * presence of a given variable can be determined with - * {@link edu.illinois.cs.cogcomp.lbjava.util.IVector2D#binarySearch(int,int)}. - * - * @param i The indexes of the variables with non-zero coefficients. - * @param a The coefficients of the variables with the given indexes. - * @param t The type of comparison in this constraint. - * @param b The new constraint will enforce equality with this constant. - **/ - protected void addConstraint(int[] i, double[] a, int t, double b) { - addConstraint(i, a, b); - boundTypes.add(t); - } - - - /** - * Adds a new fixed constraint to the problem. The two array arguments - * must be the same length, as their elements correspond to each other. - * Variables whose coefficients are zero need not be mentioned. Variables - * that are mentioned must have previously been added via - * {@link #addBooleanVariable(double)} or - * {@link #addDiscreteVariable(double[])}. The resulting constraint has - * the form: - *

xi * a = b
- * where xi represents the inference variables - * whose indexes are contained in the array i and - * * represents dot product. - * - * @param i The indexes of the variables with non-zero coefficients. - * @param a The coefficients of the variables with the given indexes. - * @param b The new constraint will enforce equality with this constant. - **/ - public void addEqualityConstraint(int[] i, double[] a, double b) { - addConstraint(i, a, EQUALITY, b); - } - - - /** - * Adds a new lower bounded constraint to the problem. The two array - * arguments must be the same length, as their elements correspond to each - * other. Variables whose coefficients are zero need not be mentioned. - * Variables that are mentioned must have previously been added via - * {@link #addBooleanVariable(double)} or - * {@link #addDiscreteVariable(double[])}. The resulting constraint has - * the form: - *
xi * a >= b
- * where xi represents the inference variables - * whose indexes are contained in the array i and - * * represents dot product. - * - * @param i The indexes of the variables with non-zero coefficients. - * @param a The coefficients of the variables with the given indexes. - * @param b The lower bound for the new constraint. - **/ - public void addGreaterThanConstraint(int[] i, double[] a, double b) { - addConstraint(i, a, GREATER_THAN, b); - } - - - /** - * Adds a new upper bounded constraint to the problem. The two array - * arguments must be the same length, as their elements correspond to each - * other. Variables whose coefficients are zero need not be mentioned. - * Variables that are mentioned must have previously been added via - * {@link #addBooleanVariable(double)} or - * {@link #addDiscreteVariable(double[])}. The resulting constraint has - * the form: - *
xi * a <= b
- * where xi represents the inference variables - * whose indexes are contained in the array i and - * * represents dot product. - * - * @param i The indexes of the variables with non-zero coefficients. - * @param a The coefficients of the variables with the given indexes. - * @param b The upper bound for the new constraint. - **/ - public void addLessThanConstraint(int[] i, double[] a, double b) { - addConstraint(i, a, LESS_THAN, b); - } - - - /** - * This method evaluates the objective function on a potential (not - * necessarily feasible) solution. - * - * @param x The current settings of the inference variables. - * @return The value of the objective function with these variable - * settings. - **/ - public double evaluate(int[] x) { - double result = 0; - for (int i = 0; i < x.length; ++i) - result += x[i] * objectiveCoefficients.get(i); - return result; - } - - - /** - * Creates a textual representation of the ILP problem in an algebraic - * notation. - * - * @param buffer The created textual representation will be appended here. - **/ - public void write(StringBuffer buffer) { - if (maximize) buffer.append("max"); - else buffer.append("min"); - - int variables = objectiveCoefficients.size(); - for (int i = 0; i < variables; ++i) { - double c = objectiveCoefficients.get(i); - buffer.append(" "); - if (c >= 0) buffer.append("+"); - buffer.append(c); - buffer.append(" x_"); - buffer.append(i); + /** Returns the bound of the specified constraint. */ + public double getConstraintBound(int i) { + return bounds.get(i); } - buffer.append("\n"); - - int constraints = Ac.size(); - for (int i = 0; i < constraints; ++i) { - int constraintSize = Ac.size(i); - buffer.append(" "); - - for (int j = 0; j < constraintSize; ++j) { - double c = Ac.get(i, j); - buffer.append(" "); - if (c >= 0) buffer.append("+"); - buffer.append(c); - buffer.append(" x_"); - buffer.append(Av.get(i, j)); - } - - buffer.append(" "); - buffer.append(boundTypeSymbols[boundTypes.get(i)]); - buffer.append(" "); - buffer.append(bounds.get(i)); - buffer.append("\n"); + + /** + * Determines whether all constraints are satisfied by the given solution. + * + * @param x The settings of the variables. + * @return true iff all constraints are satisfied. + **/ + public boolean constraintsSatisfied(int[] x) { + int constraints = Av.size(); + + for (int i = 0; i < constraints; ++i) { + double a = 0; + int variables = Av.size(i); + for (int j = 0; j < variables; ++j) + a += Ac.get(i, j) * x[Av.get(i, j)]; + if (boundTypes.get(i) == EQUALITY && a != bounds.get(i) + || boundTypes.get(i) == LESS_THAN && a > bounds.get(i) + || boundTypes.get(i) == GREATER_THAN && a < bounds.get(i)) + return false; + } + + return true; } - } - /** Returns the representation created by {@link #write(StringBuffer)}. */ - public String toString() { - StringBuffer buffer = new StringBuffer(); - write(buffer); - return buffer.toString(); - } -} + /** + * Adds a new Boolean variable (an integer variable constrained to take either the value 0 or + * the value 1) with the specified coefficient in the objective function to the problem. + * + * @param c The objective function coefficient for the new Boolean variable. + * @return The index of the created variable. + **/ + public int addBooleanVariable(double c) { + objectiveCoefficients.add(c); + return objectiveCoefficients.size() - 1; + } + + /** + * Adds a general, multi-valued discrete variable, which is implemented as a set of Boolean + * variables, one per value of the discrete variable, with exactly one of those variables set + * true at any given time. + * + * @param c The objective function coefficients for the new Boolean variables. + * @return The indexes of the newly created variables. + **/ + public int[] addDiscreteVariable(double[] c) { + int s = objectiveCoefficients.size(); + int[] result = new int[c.length]; + + for (int i = 0; i < c.length; ++i) { + objectiveCoefficients.add(c[i]); + result[i] = s + i; + } + + double[] a = new double[c.length]; + Arrays.fill(a, 1); + addEqualityConstraint(result, a, 1); + return result; + } + + + /** + * Adds a general, multi-valued discrete variable, which is implemented as a set of Boolean + * variables, one per value of the discrete variable, with exactly one of those variables set + * true at any given time. + * + * @param c An array of {@link Score}s containing the objective function coefficients for the + * new Boolean variables. + * @return The indexes of the newly created variables. + **/ + public int[] addDiscreteVariable(Score[] c) { + double[] d = new double[c.length]; + for (int i = 0; i < c.length; ++i) + d[i] = c[i].score; + return addDiscreteVariable(d); + } + + + /** + * Adds a typeless constraint to the problem. No need to waste space storing the types of + * constraints if they are implied or assumed. Otherwise, this method does the same thing as + * {@link #addConstraint(int[],double[],int,double)}. + * + * @param I The indexes of the variables with non-zero coefficients. + * @param a The coefficients of the variables with the given indexes. + * @param b The new constraint will enforce equality with this constant. + **/ + protected void addConstraint(final int[] I, double[] a, double b) { + int[] indexes = new int[I.length]; + for (int i = 0; i < I.length; ++i) + indexes[i] = i; + Sort.sort(indexes, new Sort.IntComparator() { + public int compare(int i1, int i2) { + return I[i1] - I[i2]; + } + }); + + int s = Av.size(); + + for (int i = 0; i < I.length; ++i) { + Av.set(s, i, I[indexes[i]]); + double c = a[indexes[i]]; + double rounded = Math.round(c); + if (Math.abs(c - rounded) < TOLERANCE) + c = rounded; + Ac.set(s, i, c); + } + + bounds.add(b); + } + + + /** + * Adds a new constraint of the specified type to the problem. The two array arguments must be + * the same length, as their elements correspond to each other. Variables whose coefficients are + * zero need not be mentioned. Variables that are mentioned must have previously been added via + * {@link #addBooleanVariable(double)} or {@link #addDiscreteVariable(double[])}. The resulting + * constraint has the form:
xi * a ?= b
where + * xi represents the inference variables whose indexes are contained in + * the array i, * represents dot product, and ?= stands for the type + * of the constraint. + * + *

+ * This method is called by the other constraint adding methods in this class. It sorts the + * variables and their coefficients so that the presence of a given variable can be determined + * with {@link IVector2D#binarySearch(int,int)}. + * + * @param i The indexes of the variables with non-zero coefficients. + * @param a The coefficients of the variables with the given indexes. + * @param t The type of comparison in this constraint. + * @param b The new constraint will enforce equality with this constant. + **/ + protected void addConstraint(int[] i, double[] a, int t, double b) { + addConstraint(i, a, b); + boundTypes.add(t); + } + + + /** + * Adds a new fixed constraint to the problem. The two array arguments must be the same length, + * as their elements correspond to each other. Variables whose coefficients are zero need not be + * mentioned. Variables that are mentioned must have previously been added via + * {@link #addBooleanVariable(double)} or {@link #addDiscreteVariable(double[])}. The resulting + * constraint has the form:

xi * a = b
where + * xi represents the inference variables whose indexes are contained in + * the array i and * represents dot product. + * + * @param i The indexes of the variables with non-zero coefficients. + * @param a The coefficients of the variables with the given indexes. + * @param b The new constraint will enforce equality with this constant. + **/ + public void addEqualityConstraint(int[] i, double[] a, double b) { + addConstraint(i, a, EQUALITY, b); + } + + + /** + * Adds a new lower bounded constraint to the problem. The two array arguments must be the same + * length, as their elements correspond to each other. Variables whose coefficients are zero + * need not be mentioned. Variables that are mentioned must have previously been added via + * {@link #addBooleanVariable(double)} or {@link #addDiscreteVariable(double[])}. The resulting + * constraint has the form:
xi * a >= b
+ * where xi represents the inference variables whose indexes are + * contained in the array i and * represents dot product. + * + * @param i The indexes of the variables with non-zero coefficients. + * @param a The coefficients of the variables with the given indexes. + * @param b The lower bound for the new constraint. + **/ + public void addGreaterThanConstraint(int[] i, double[] a, double b) { + addConstraint(i, a, GREATER_THAN, b); + } + + + /** + * Adds a new upper bounded constraint to the problem. The two array arguments must be the same + * length, as their elements correspond to each other. Variables whose coefficients are zero + * need not be mentioned. Variables that are mentioned must have previously been added via + * {@link #addBooleanVariable(double)} or {@link #addDiscreteVariable(double[])}. The resulting + * constraint has the form:
xi * a <= b
+ * where xi represents the inference variables whose indexes are + * contained in the array i and * represents dot product. + * + * @param i The indexes of the variables with non-zero coefficients. + * @param a The coefficients of the variables with the given indexes. + * @param b The upper bound for the new constraint. + **/ + public void addLessThanConstraint(int[] i, double[] a, double b) { + addConstraint(i, a, LESS_THAN, b); + } + + + /** + * This method evaluates the objective function on a potential (not necessarily feasible) + * solution. + * + * @param x The current settings of the inference variables. + * @return The value of the objective function with these variable settings. + **/ + public double evaluate(int[] x) { + double result = 0; + for (int i = 0; i < x.length; ++i) + result += x[i] * objectiveCoefficients.get(i); + return result; + } + + + /** + * Creates a textual representation of the ILP problem in an algebraic notation. + * + * @param buffer The created textual representation will be appended here. + **/ + public void write(StringBuffer buffer) { + if (maximize) + buffer.append("max"); + else + buffer.append("min"); + + int variables = objectiveCoefficients.size(); + for (int i = 0; i < variables; ++i) { + double c = objectiveCoefficients.get(i); + buffer.append(" "); + if (c >= 0) + buffer.append("+"); + buffer.append(c); + buffer.append(" x_"); + buffer.append(i); + } + + buffer.append("\n"); + + int constraints = Ac.size(); + for (int i = 0; i < constraints; ++i) { + int constraintSize = Ac.size(i); + buffer.append(" "); + + for (int j = 0; j < constraintSize; ++j) { + double c = Ac.get(i, j); + buffer.append(" "); + if (c >= 0) + buffer.append("+"); + buffer.append(c); + buffer.append(" x_"); + buffer.append(Av.get(i, j)); + } + + buffer.append(" "); + buffer.append(boundTypeSymbols[boundTypes.get(i)]); + buffer.append(" "); + buffer.append(bounds.get(i)); + buffer.append("\n"); + } + } + + + /** Returns the representation created by {@link #write(StringBuffer)}. */ + public String toString() { + StringBuffer buffer = new StringBuffer(); + write(buffer); + return buffer.toString(); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/io/ChannelOutputStream.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/io/ChannelOutputStream.java index 07efd2fa..4f1112d5 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/io/ChannelOutputStream.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/io/ChannelOutputStream.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.io; @@ -17,108 +14,107 @@ /** - * This class implements an output stream that buffers output in a directly - * allocated ByteBuffer before writing it to a channel. - * - * @author Nick Rizzolo + * This class implements an output stream that buffers output in a directly allocated + * ByteBuffer before writing it to a channel. + * + * @author Nick Rizzolo **/ -public class ChannelOutputStream extends OutputStream -{ - /** The default capacity of {@link #buffer}. */ - private static int defaultCapacity = 1 << 13; - - /** Holds data until it is written. */ - protected ByteBuffer buffer; - /** The channel where the data will be written. */ - protected WritableByteChannel channel; - - - /** - * Creates the stream from the channel where the data will be written. - * - * @param out The channel where the data will be written. - **/ - public ChannelOutputStream(WritableByteChannel out) { - this(out, defaultCapacity); - } - - /** - * Creates the stream from the channel where the data will be written and a - * buffer size. - * - * @param out The channel where the data will be written. - * @param size The buffer size. - **/ - public ChannelOutputStream(WritableByteChannel out, int size) { - if (size < 0) size = 0; - buffer = ByteBuffer.allocateDirect(size); - channel = out; - } - - - /** - * Writes the specified byte to this channel output stream. - * - * @param b The byte to be written. - * @exception IOException Possible while {@link #flush}ing. - **/ - public synchronized void write(int b) throws IOException { - if (buffer.position() == buffer.capacity()) flush(); - buffer.put((byte) b); - } - - - /** - * Writes len bytes from the specified byte array - * starting at offset off to this channel output stream. - * - * @param b The data. - * @param off The start offset in the data. - * @param len The number of bytes to write. - * @exception IOException Possible while {@link #flush}ing. - **/ - public synchronized void write(byte[] b, int off, int len) - throws IOException { - int r = buffer.capacity() - buffer.position(); - if (len > r) { - buffer.put(b, off, r); - flush(); - off += r; - len -= r; - r = buffer.capacity(); +public class ChannelOutputStream extends OutputStream { + /** The default capacity of {@link #buffer}. */ + private static int defaultCapacity = 1 << 13; + + /** Holds data until it is written. */ + protected ByteBuffer buffer; + /** The channel where the data will be written. */ + protected WritableByteChannel channel; + + + /** + * Creates the stream from the channel where the data will be written. + * + * @param out The channel where the data will be written. + **/ + public ChannelOutputStream(WritableByteChannel out) { + this(out, defaultCapacity); } - while (len > r) { - buffer.put(b, off, r); - flush(); - off += r; - len -= r; + /** + * Creates the stream from the channel where the data will be written and a buffer size. + * + * @param out The channel where the data will be written. + * @param size The buffer size. + **/ + public ChannelOutputStream(WritableByteChannel out, int size) { + if (size < 0) + size = 0; + buffer = ByteBuffer.allocateDirect(size); + channel = out; } - if (len > 0) buffer.put(b, off, len); - } - - - /** - * Forces any buffered output bytes to be written to {@link #channel}. - * - * @exception IOException Possible while writing to {@link #channel}. - **/ - public synchronized void flush() throws IOException { - buffer.flip(); - channel.write(buffer); - buffer.clear(); - } - - - /** - * Flushes the {@link #buffer} and closes the {@link #channel}. - * - * @exception IOException Possible while closing {@link #channel}. - **/ - public void close() throws IOException { - flush(); - channel.close(); - } -} + /** + * Writes the specified byte to this channel output stream. + * + * @param b The byte to be written. + * @exception IOException Possible while {@link #flush}ing. + **/ + public synchronized void write(int b) throws IOException { + if (buffer.position() == buffer.capacity()) + flush(); + buffer.put((byte) b); + } + + + /** + * Writes len bytes from the specified byte array starting at offset + * off to this channel output stream. + * + * @param b The data. + * @param off The start offset in the data. + * @param len The number of bytes to write. + * @exception IOException Possible while {@link #flush}ing. + **/ + public synchronized void write(byte[] b, int off, int len) throws IOException { + int r = buffer.capacity() - buffer.position(); + if (len > r) { + buffer.put(b, off, r); + flush(); + off += r; + len -= r; + r = buffer.capacity(); + } + + while (len > r) { + buffer.put(b, off, r); + flush(); + off += r; + len -= r; + } + + if (len > 0) + buffer.put(b, off, len); + } + + + /** + * Forces any buffered output bytes to be written to {@link #channel}. + * + * @exception IOException Possible while writing to {@link #channel}. + **/ + public synchronized void flush() throws IOException { + buffer.flip(); + channel.write(buffer); + buffer.clear(); + } + + + /** + * Flushes the {@link #buffer} and closes the {@link #channel}. + * + * @exception IOException Possible while closing {@link #channel}. + **/ + public void close() throws IOException { + flush(); + channel.close(); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/io/HexInputStream.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/io/HexInputStream.java index ef08d1dc..cc90c57f 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/io/HexInputStream.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/io/HexInputStream.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.io; @@ -15,229 +12,226 @@ /** - * This class receives input from another InputStream assuming - * that data is little endian, hexidecimal text, converts that text to bytes, - * and makes those bytes available through its interface. The most common - * usage of this class will involve passing it to the constructor of another - * InputStream. For instance:

- * - *
-  *   ObjectInputStream ois =
-  *     new ObjectInputStream(
-  *       new GZIPInputStream(
-  *         new HexInputStream(new FileInputStream(fileName))));
-  * 
- * - * @see HexOutputStream - * @author Nick Rizzolo + * This class receives input from another InputStream assuming that data is little + * endian, hexidecimal text, converts that text to bytes, and makes those bytes available through + * its interface. The most common usage of this class will involve passing it to the constructor of + * another InputStream. For instance:
+ *
+ * + *
+ * ObjectInputStream ois = new ObjectInputStream(new GZIPInputStream(new HexInputStream(
+ *         new FileInputStream(fileName))));
+ * 
+ * + * @see HexOutputStream + * @author Nick Rizzolo **/ -public class HexInputStream extends InputStream -{ - /** Characters representing a hexidecimal digit. */ - private static final String digits = "0123456789ABCDEF"; - - /** - * The InputStream from which yet-to-be-converted input - * should be received. - **/ - private InputStream in; - - - /** - * Initializes this stream with another input stream. - * - * @param i The input stream from which yet-to-be-converted input should - * be received. - **/ - public HexInputStream(InputStream i) { in = i; } - - - /** - * Reads the next byte of data from the input stream. The value is - * returned as an int in the range 0 to 255. If no byte is - * available because the end of the stream has been reached, the value -1 - * is returned. This method blocks until input data is available, the end - * of the stream is detected, or an exception is thrown. - * - * @return The next byte of data, or -1 if the end of the stream is - * reached. - **/ - public int read() throws IOException { - int d1 = in.read(); - if (d1 == -1) return -1; - int d2 = in.read(); - if (d2 == -1) - throw new IOException("HexInputStream: Unexpected end of file"); - - int i1 = digits.indexOf((char) d1); - if (i1 == -1) - throw new IOException("HexInputStream: Invalid input character: '" - + ((char) d1) + "' (" + d1 + ")"); - int i2 = digits.indexOf((char) d2); - if (i2 == -1) - throw new IOException("HexInputStream: Invalid input character: '" - + ((char) d2) + "' (" + d2 + ")"); - - return (i2 << 4) | i1; - } - - - /** - * This method has the same effect as read(b, 0, b.length). - * - * @param b A buffer in which the converted input is stored. - * @return The total number of bytes read into the buffer, or -1 if there - * is no more data because the end of the stream was previously - * reached. - **/ - public int read(byte[] b) throws IOException { - return read(b, 0, b.length); - } - - - /** - * Reads up to len bytes of data from another input stream - * into an array of bytes. An attempt is made to read as many as - * len bytes, but a smaller number may be read, possibly zero. - * The number of bytes actually read is returned as an integer.

- * - * This method blocks until input data is available, end of file is - * detected, or an exception is thrown.

- * - * If b is null, a - * NullPointerException is thrown.

- * - * If off is negative, or len is negative, or - * off+len is greater than the length of the array - * b, then an IndexOutOfBoundsException is - * thrown.

- * - * If len is zero, then no bytes are read and 0 is returned; - * otherwise, there is an attempt to read at least one byte. If no byte is - * available because the stream is at end of file, the value -1 is - * returned; otherwise, at least one byte is read and stored into - * b.

- * - * The first byte read is stored into element b[off], the next - * one into b[off+1], and so on. The number of bytes read is, - * at most, equal to len. Let k be the number of bytes - * actually read; these bytes will be stored in elements - * b[off] through b[off+k-1], leaving elements - * b[off+k] through b[off+len-1] unaffected. - *

- * - * In every case, elements b[0] through b[off-1] - * and elements b[off+len] through b[b.length-1] - * are unaffected.

- * - * If the first byte cannot be read for any reason other than end of file, - * then an IOException is thrown. In particular, an - * IOException is thrown if the input stream has been closed. - * - * @param b A buffer into which the converted input is stored. - * @param off The offset in the buffer at which to begin writing. - * @param len The amount of bytes to be received and written into the - * buffer. - * @return The total number of bytes read into the buffer, or -1 if there - * is no more data because the end of the stream has been reached. - **/ - public int read(byte[] b, int off, int len) throws IOException { - byte[] hex = new byte[2 * len]; - int bytesRead = in.read(hex); - if (bytesRead == -1) return -1; - if (bytesRead % 2 == 1) - throw new IOException("HexInputStream: Unexpected end of file"); - - for (int i = 0; i < bytesRead; i += 2) { - int d1 = digits.indexOf((char) hex[i]); - if (d1 == -1) - throw new IOException("HexInputStream: Invalid input character: '" - + ((char) hex[i]) + "' (" + ((int) hex[i]) - + ")"); - int d2 = digits.indexOf((char) hex[i + 1]); - if (d2 == -1) - throw new IOException("HexInputStream: Invalid input character: '" - + ((char) hex[i + 1]) + "' (" - + ((int) hex[i + 1]) + ")"); - - b[i / 2] = (byte) ((d2 << 4) | d1); +public class HexInputStream extends InputStream { + /** Characters representing a hexidecimal digit. */ + private static final String digits = "0123456789ABCDEF"; + + /** + * The InputStream from which yet-to-be-converted input should be received. + **/ + private InputStream in; + + + /** + * Initializes this stream with another input stream. + * + * @param i The input stream from which yet-to-be-converted input should be received. + **/ + public HexInputStream(InputStream i) { + in = i; + } + + + /** + * Reads the next byte of data from the input stream. The value is returned as an + * int in the range 0 to 255. If no byte is available because the end of the stream + * has been reached, the value -1 is returned. This method blocks until input data is available, + * the end of the stream is detected, or an exception is thrown. + * + * @return The next byte of data, or -1 if the end of the stream is reached. + **/ + public int read() throws IOException { + int d1 = in.read(); + if (d1 == -1) + return -1; + int d2 = in.read(); + if (d2 == -1) + throw new IOException("HexInputStream: Unexpected end of file"); + + int i1 = digits.indexOf((char) d1); + if (i1 == -1) + throw new IOException("HexInputStream: Invalid input character: '" + ((char) d1) + + "' (" + d1 + ")"); + int i2 = digits.indexOf((char) d2); + if (i2 == -1) + throw new IOException("HexInputStream: Invalid input character: '" + ((char) d2) + + "' (" + d2 + ")"); + + return (i2 << 4) | i1; + } + + + /** + * This method has the same effect as read(b, 0, b.length). + * + * @param b A buffer in which the converted input is stored. + * @return The total number of bytes read into the buffer, or -1 if there is no more data + * because the end of the stream was previously reached. + **/ + public int read(byte[] b) throws IOException { + return read(b, 0, b.length); + } + + + /** + * Reads up to len bytes of data from another input stream into an array of bytes. + * An attempt is made to read as many as len bytes, but a smaller number may be + * read, possibly zero. The number of bytes actually read is returned as an integer.
+ *
+ * + * This method blocks until input data is available, end of file is detected, or an exception is + * thrown.
+ *
+ * + * If b is null, a NullPointerException is thrown.
+ *
+ * + * If off is negative, or len is negative, or off+len is + * greater than the length of the array b, then an + * IndexOutOfBoundsException is thrown.
+ *
+ * + * If len is zero, then no bytes are read and 0 is returned; otherwise, there is an + * attempt to read at least one byte. If no byte is available because the stream is at end of + * file, the value -1 is returned; otherwise, at least one byte is read and stored into + * b.
+ *
+ * + * The first byte read is stored into element b[off], the next one into + * b[off+1], and so on. The number of bytes read is, at most, equal to + * len. Let k be the number of bytes actually read; these bytes will be + * stored in elements b[off] through b[off+k-1], leaving elements + * b[off+k] through b[off+len-1] unaffected.
+ *
+ * + * In every case, elements b[0] through b[off-1] and elements + * b[off+len] through b[b.length-1] are unaffected.
+ *
+ * + * If the first byte cannot be read for any reason other than end of file, then an + * IOException is thrown. In particular, an IOException is thrown if + * the input stream has been closed. + * + * @param b A buffer into which the converted input is stored. + * @param off The offset in the buffer at which to begin writing. + * @param len The amount of bytes to be received and written into the buffer. + * @return The total number of bytes read into the buffer, or -1 if there is no more data + * because the end of the stream has been reached. + **/ + public int read(byte[] b, int off, int len) throws IOException { + byte[] hex = new byte[2 * len]; + int bytesRead = in.read(hex); + if (bytesRead == -1) + return -1; + if (bytesRead % 2 == 1) + throw new IOException("HexInputStream: Unexpected end of file"); + + for (int i = 0; i < bytesRead; i += 2) { + int d1 = digits.indexOf((char) hex[i]); + if (d1 == -1) + throw new IOException("HexInputStream: Invalid input character: '" + + ((char) hex[i]) + "' (" + ((int) hex[i]) + ")"); + int d2 = digits.indexOf((char) hex[i + 1]); + if (d2 == -1) + throw new IOException("HexInputStream: Invalid input character: '" + + ((char) hex[i + 1]) + "' (" + ((int) hex[i + 1]) + ")"); + + b[i / 2] = (byte) ((d2 << 4) | d1); + } + + return bytesRead / 2; + } + + + /** + * Skips over and discards n bytes of data from this input stream. The skip method + * may, for a variety of reasons, end up skipping over some smaller number of bytes, possibly 0. + * This may result from any of a number of conditions; reaching end of file before + * n bytes have been skipped is only one possibility. The actual number of bytes + * skipped is returned. If n is negative, no bytes are skipped. + * + * @param n The number of bytes to be skipped. + * @return The actual number of bytes skipped. + **/ + public long skip(long n) throws IOException { + return in.skip(n * 2); } - return bytesRead / 2; - } - - - /** - * Skips over and discards n bytes of data from this input - * stream. The skip method may, for a variety of reasons, end up skipping - * over some smaller number of bytes, possibly 0. This may result from any - * of a number of conditions; reaching end of file before n - * bytes have been skipped is only one possibility. The actual number of - * bytes skipped is returned. If n is negative, no bytes are - * skipped. - * - * @param n The number of bytes to be skipped. - * @return The actual number of bytes skipped. - **/ - public long skip(long n) throws IOException { return in.skip(n * 2); } - - - /** - * Returns the number of bytes that can be read (or skipped over) from this - * input stream without blocking by the next caller of a method for this - * input stream. The next caller might be the same thread or or another - * thread. - * - * @return The number of bytes that can be read from this input stream - * without blocking. - **/ - public int available() throws IOException { return in.available() / 2; } - - - /** - * Closes this input stream and releases any system resources associated - * with the stream. - **/ - public void close() throws IOException { in.close(); } - - - /** - * Marks the current position in this input stream. A subsequent call to - * the reset method repositions this stream at the last marked - * position so that subsequent reads re-read the same bytes. - * - * The readlimit argument tells this input stream to allow - * that many bytes to be read before the mark position gets invalidated. - * - * The general contract of mark is that, if the method - * markSupported returns true, the stream somehow - * remembers all the bytes read after the call to mark and stands ready to - * supply those same bytes again if and whenever the method - * reset is called. However, the stream is not required to - * remember any data at all if more than readlimit bytes are - * read from the stream before reset is called. - * - * @param readlimit The maximum limit of bytes that can be read before the - * mark position becomes invalid. - **/ - public void mark(int readlimit) { in.mark(readlimit * 2); } - - - /** - * Repositions this stream to the position at the time the - * mark method was last called on this input stream. - **/ - public void reset() throws IOException { in.reset(); } - - - /** - * Tests if this input stream supports the mark and reset methods. Whether - * or not mark and reset are supported is an - * invariant property of the provided input stream instance. - * - * @return true iff the provided input stream instance - * supports the mark and reset methods. - **/ - public boolean markSupported() { return in.markSupported(); } -} + /** + * Returns the number of bytes that can be read (or skipped over) from this input stream without + * blocking by the next caller of a method for this input stream. The next caller might be the + * same thread or or another thread. + * + * @return The number of bytes that can be read from this input stream without blocking. + **/ + public int available() throws IOException { + return in.available() / 2; + } + + + /** + * Closes this input stream and releases any system resources associated with the stream. + **/ + public void close() throws IOException { + in.close(); + } + + + /** + * Marks the current position in this input stream. A subsequent call to the reset + * method repositions this stream at the last marked position so that subsequent reads re-read + * the same bytes. + * + * The readlimit argument tells this input stream to allow that many bytes to be + * read before the mark position gets invalidated. + * + * The general contract of mark is that, if the method markSupported returns + * true, the stream somehow remembers all the bytes read after the call to mark and + * stands ready to supply those same bytes again if and whenever the method reset + * is called. However, the stream is not required to remember any data at all if more than + * readlimit bytes are read from the stream before reset is called. + * + * @param readlimit The maximum limit of bytes that can be read before the mark position becomes + * invalid. + **/ + public void mark(int readlimit) { + in.mark(readlimit * 2); + } + + + /** + * Repositions this stream to the position at the time the mark method was last + * called on this input stream. + **/ + public void reset() throws IOException { + in.reset(); + } + + + /** + * Tests if this input stream supports the mark and reset methods. Whether or not + * mark and reset are supported is an invariant property of the + * provided input stream instance. + * + * @return true iff the provided input stream instance supports the + * mark and reset methods. + **/ + public boolean markSupported() { + return in.markSupported(); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/io/HexOutputStream.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/io/HexOutputStream.java index 21ea3f75..6943a26d 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/io/HexOutputStream.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/io/HexOutputStream.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.io; @@ -15,117 +12,114 @@ /** - * This class will convert whatever data is sent to it into little endian, - * hexidecimal text and send that text on to another - * OutputStream. The most common usage of this class will - * involve passing it to the constructor of another - * OutputStream. For instance:

- * - *
-  *   ObjectOutputStream oos =
-  *     new ObjectOutputStream(
-  *       new GZIPOutputStream(
-  *         new HexOutputStream(new FileOutputStream(fileName))));
-  * 
- * - * @see HexInputStream - * @author Nick Rizzolo + * This class will convert whatever data is sent to it into little endian, hexidecimal text and send + * that text on to another OutputStream. The most common usage of this class will + * involve passing it to the constructor of another OutputStream. For instance:
+ *
+ * + *
+ * ObjectOutputStream oos = new ObjectOutputStream(new GZIPOutputStream(new HexOutputStream(
+ *         new FileOutputStream(fileName))));
+ * 
+ * + * @see HexInputStream + * @author Nick Rizzolo **/ -public class HexOutputStream extends OutputStream -{ - /** Characters representing a hexidecimal digit. */ - private static final String digits = "0123456789ABCDEF"; - - /** - * The OutputStream to which converted output should be sent. - **/ - private OutputStream out; - - - /** - * Initializes this stream with another output stream. - * - * @param o The output stream to which converted output should be sent. - **/ - public HexOutputStream(OutputStream o) { out = o; } - - - /** - * Writes the specified byte to this output stream. The general contract - * for write is that one byte is written to the output stream. - * The byte to be written is the eight low-order bits of the argument - * b. The 24 high-order bits of b are ignored. - * - * @param b The byte to be written. - **/ - public void write(int b) throws IOException { - b &= 255; - out.write(digits.charAt(b & 15)); - out.write(digits.charAt((b & 240) >> 4)); - } - - - /** - * Writes b.length bytes from the specified byte array to this - * output stream. The general contract for write(b) is that - * it should have exactly the same effect as the call write(b, 0, - * b.length). - * - * @param b The bytes to be written. - **/ - public void write(byte[] b) throws IOException { - write(b, 0, b.length); - } - - - /** - * Writes len bytes from the specified byte array starting at - * offset off to this output stream. The general contract for - * write(b, off, len) is that some of the bytes in the array - * b are written to the output stream in order; element - * b[off] is the first byte written and - * b[off+len-1] is the last byte written by this operation. - *

- * - * If b is null, a - * NullPointerException is thrown.

- * - * If off is negative, or len is negative, or - * off+len is greater than the length of the array - * b, then an IndexOutOfBoundsException is - * thrown. - * - * @param b A buffer containing the bytes to be written. - * @param off The offset of the first byte to be written. - * @param len The amount of bytes to be written. - **/ - public void write(byte[] b, int off, int len) throws IOException { - byte[] hex = new byte[2 * len]; - for (int i = 0; i < len; ++i) { - hex[2 * i] = (byte) digits.charAt(b[off + i] & 15); - hex[2 * i + 1] = (byte) digits.charAt((b[off + i] & 240) >> 4); +public class HexOutputStream extends OutputStream { + /** Characters representing a hexidecimal digit. */ + private static final String digits = "0123456789ABCDEF"; + + /** + * The OutputStream to which converted output should be sent. + **/ + private OutputStream out; + + + /** + * Initializes this stream with another output stream. + * + * @param o The output stream to which converted output should be sent. + **/ + public HexOutputStream(OutputStream o) { + out = o; } - out.write(hex); - } + /** + * Writes the specified byte to this output stream. The general contract for write + * is that one byte is written to the output stream. The byte to be written is the eight + * low-order bits of the argument b. The 24 high-order bits of b are + * ignored. + * + * @param b The byte to be written. + **/ + public void write(int b) throws IOException { + b &= 255; + out.write(digits.charAt(b & 15)); + out.write(digits.charAt((b & 240) >> 4)); + } - /** - * Flushes this output stream and forces any buffered output bytes to be - * written out. The general contract of flush is that calling - * it is an indication that, if any bytes previously written have been - * buffered by the implementation of the output stream, such bytes should - * immediately be written to their intended destination. - **/ - public void flush() throws IOException { out.flush(); } + /** + * Writes b.length bytes from the specified byte array to this output stream. The + * general contract for write(b) is that it should have exactly the same effect as + * the call write(b, 0, + * b.length). + * + * @param b The bytes to be written. + **/ + public void write(byte[] b) throws IOException { + write(b, 0, b.length); + } - /** - * Closes this output stream and releases any system resources associated - * with this stream. The general contract of close is that it - * closes the output stream. A closed stream cannot perform output - * operations and cannot be reopened. - **/ - public void close() throws IOException { out.close(); } -} + /** + * Writes len bytes from the specified byte array starting at offset + * off to this output stream. The general contract for + * write(b, off, len) is that some of the bytes in the array b are + * written to the output stream in order; element b[off] is the first byte written + * and b[off+len-1] is the last byte written by this operation.
+ *
+ * + * If b is null, a NullPointerException is thrown.
+ *
+ * + * If off is negative, or len is negative, or off+len is + * greater than the length of the array b, then an + * IndexOutOfBoundsException is thrown. + * + * @param b A buffer containing the bytes to be written. + * @param off The offset of the first byte to be written. + * @param len The amount of bytes to be written. + **/ + public void write(byte[] b, int off, int len) throws IOException { + byte[] hex = new byte[2 * len]; + for (int i = 0; i < len; ++i) { + hex[2 * i] = (byte) digits.charAt(b[off + i] & 15); + hex[2 * i + 1] = (byte) digits.charAt((b[off + i] & 240) >> 4); + } + + out.write(hex); + } + + + /** + * Flushes this output stream and forces any buffered output bytes to be written out. The + * general contract of flush is that calling it is an indication that, if any bytes + * previously written have been buffered by the implementation of the output stream, such bytes + * should immediately be written to their intended destination. + **/ + public void flush() throws IOException { + out.flush(); + } + + + /** + * Closes this output stream and releases any system resources associated with this stream. The + * general contract of close is that it closes the output stream. A closed stream + * cannot perform output operations and cannot be reopened. + **/ + public void close() throws IOException { + out.close(); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/io/HexStringInputStream.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/io/HexStringInputStream.java index 6afe6d23..bc91bed9 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/io/HexStringInputStream.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/io/HexStringInputStream.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.io; @@ -16,211 +13,208 @@ /** - * Behaves the same as HexInputStream, except its - * constructor takes a String as input to read. - * - * @see HexInputStream - * @author Nick Rizzolo + * Behaves the same as HexInputStream, except its constructor takes a + * String as input to read. + * + * @see HexInputStream + * @author Nick Rizzolo **/ -public class HexStringInputStream extends InputStream -{ - /** Characters representing a hexidecimal digit. */ - private static final String digits = "0123456789ABCDEF"; - - /** Reads encoded input from a given string. */ - private StringReader in; - - - /** - * Initializes this stream with another input stream. - * - * @param s The string from which yet-to-be-converted input should be - * received. - **/ - public HexStringInputStream(String s) { in = new StringReader(s); } - - - /** - * Reads the next char of data from the input stream. The value is - * returned as an int in the range 0 to 255. If no char is - * available because the end of the stream has been reached, the value -1 - * is returned. This method blocks until input data is available, the end - * of the stream is detected, or an exception is thrown. - * - * @return The next char of data, or -1 if the end of the stream is - * reached. - **/ - public int read() throws IOException { - int d1 = in.read(); - if (d1 == -1) return -1; - int d2 = in.read(); - if (d2 == -1) - throw new IOException("HexStringInputStream: Unexpected end of file"); - - int i1 = digits.indexOf((char) d1); - if (i1 == -1) - throw new IOException("HexStringInputStream: Invalid input character: '" - + ((char) d1) + "' (" + d1 + ")"); - int i2 = digits.indexOf((char) d2); - if (i2 == -1) - throw new IOException("HexStringInputStream: Invalid input character: '" - + ((char) d2) + "' (" + d2 + ")"); - - return (i2 << 4) | i1; - } - - - /** - * This method has the same effect as read(b, 0, b.length). - * - * @param b A buffer in which the converted input is stored. - * @return The total number of chars read into the buffer, or -1 if there - * is no more data because the end of the stream was previously - * reached. - **/ - public int read(char[] b) throws IOException { - return read(b, 0, b.length); - } - - - /** - * Reads up to len chars of data from another String - * into an array of chars. An attempt is made to read as many as - * len chars, but a smaller number may be read, possibly zero. - * The number of chars actually read is returned as an integer.

- * - * This method blocks until input data is available, end of file is - * detected, or an exception is thrown.

- * - * If b is null, a - * NullPointerException is thrown.

- * - * If off is negative, or len is negative, or - * off+len is greater than the length of the array - * b, then an IndexOutOfBoundsException is - * thrown.

- * - * If len is zero, then no chars are read and 0 is returned; - * otherwise, there is an attempt to read at least one char. If no char is - * available because the stream is at end of file, the value -1 is - * returned; otherwise, at least one char is read and stored into - * b.

- * - * The first char read is stored into element b[off], the next - * one into b[off+1], and so on. The number of chars read is, - * at most, equal to len. Let k be the number of chars - * actually read; these chars will be stored in elements - * b[off] through b[off+k-1], leaving elements - * b[off+k] through b[off+len-1] unaffected. - *

- * - * In every case, elements b[0] through b[off-1] - * and elements b[off+len] through b[b.length-1] - * are unaffected.

- * - * If the first char cannot be read for any reason other than end of file, - * then an IOException is thrown. In particular, an - * IOException is thrown if the input stream has been closed. - * - * @param b A buffer into which the converted input is stored. - * @param off The offset in the buffer at which to begin writing. - * @param len The amount of chars to be received and written into the - * buffer. - * @return The total number of chars read into the buffer, or -1 if there - * is no more data because the end of the stream has been reached. - **/ - public int read(char[] b, int off, int len) throws IOException { - char[] hex = new char[2 * len]; - int charsRead = in.read(hex); - if (charsRead == -1) return -1; - if (charsRead % 2 == 1) - throw new IOException("HexStringInputStream: Unexpected end of file"); - - for (int i = 0; i < charsRead; i += 2) { - int d1 = digits.indexOf((char) hex[i]); - if (d1 == -1) - throw new IOException( - "HexStringInputStream: Invalid input character: '" - + ((char) hex[i]) + "' (" + ((int) hex[i]) + ")"); - int d2 = digits.indexOf((char) hex[i + 1]); - if (d2 == -1) - throw new IOException( - "HexStringInputStream: Invalid input character: '" - + ((char) hex[i + 1]) + "' (" + ((int) hex[i + 1]) + ")"); - - b[i / 2] = (char) ((d2 << 4) | d1); +public class HexStringInputStream extends InputStream { + /** Characters representing a hexidecimal digit. */ + private static final String digits = "0123456789ABCDEF"; + + /** Reads encoded input from a given string. */ + private StringReader in; + + + /** + * Initializes this stream with another input stream. + * + * @param s The string from which yet-to-be-converted input should be received. + **/ + public HexStringInputStream(String s) { + in = new StringReader(s); } - return charsRead / 2; - } - - - /** - * Skips over and discards n chars of data from this input - * stream. The skip method may, for a variety of reasons, end up skipping - * over some smaller number of chars, possibly 0. This may result from any - * of a number of conditions; reaching end of file before n - * chars have been skipped is only one possibility. The actual number of - * chars skipped is returned. If n is negative, no chars are - * skipped. - * - * @param n The number of chars to be skipped. - * @return The actual number of chars skipped. - **/ - public long skip(long n) throws IOException { return in.skip(n * 2); } - - - /** - * Closes this input stream and releases any system resources associated - * with the stream. - **/ - public void close() throws IOException { in.close(); } - - - /** - * Marks the current position in this input stream. A subsequent call to - * the reset method repositions this stream at the last marked - * position so that subsequent reads re-read the same chars. - * - * The readlimit argument tells this input stream to allow - * that many chars to be read before the mark position gets invalidated. - * - * The general contract of mark is that, if the method - * markSupported returns true, the stream somehow - * remembers all the chars read after the call to mark and stands ready to - * supply those same chars again if and whenever the method - * reset is called. However, the stream is not required to - * remember any data at all if more than readlimit chars are - * read from the stream before reset is called. - * - * @param readlimit The maximum limit of chars that can be read before the - * mark position becomes invalid. - **/ - public void mark(int readlimit) { - try { - in.mark(readlimit * 2); + + /** + * Reads the next char of data from the input stream. The value is returned as an + * int in the range 0 to 255. If no char is available because the end of the stream + * has been reached, the value -1 is returned. This method blocks until input data is available, + * the end of the stream is detected, or an exception is thrown. + * + * @return The next char of data, or -1 if the end of the stream is reached. + **/ + public int read() throws IOException { + int d1 = in.read(); + if (d1 == -1) + return -1; + int d2 = in.read(); + if (d2 == -1) + throw new IOException("HexStringInputStream: Unexpected end of file"); + + int i1 = digits.indexOf((char) d1); + if (i1 == -1) + throw new IOException("HexStringInputStream: Invalid input character: '" + ((char) d1) + + "' (" + d1 + ")"); + int i2 = digits.indexOf((char) d2); + if (i2 == -1) + throw new IOException("HexStringInputStream: Invalid input character: '" + ((char) d2) + + "' (" + d2 + ")"); + + return (i2 << 4) | i1; } - catch (Exception e) { - System.err.println(e); + + + /** + * This method has the same effect as read(b, 0, b.length). + * + * @param b A buffer in which the converted input is stored. + * @return The total number of chars read into the buffer, or -1 if there is no more data + * because the end of the stream was previously reached. + **/ + public int read(char[] b) throws IOException { + return read(b, 0, b.length); } - } - /** - * Repositions this stream to the position at the time the - * mark method was last called on this input stream. - **/ - public void reset() throws IOException { in.reset(); } + /** + * Reads up to len chars of data from another String into an array of chars. An + * attempt is made to read as many as len chars, but a smaller number may be read, + * possibly zero. The number of chars actually read is returned as an integer.
+ *
+ * + * This method blocks until input data is available, end of file is detected, or an exception is + * thrown.
+ *
+ * + * If b is null, a NullPointerException is thrown.
+ *
+ * + * If off is negative, or len is negative, or off+len is + * greater than the length of the array b, then an + * IndexOutOfBoundsException is thrown.
+ *
+ * + * If len is zero, then no chars are read and 0 is returned; otherwise, there is an + * attempt to read at least one char. If no char is available because the stream is at end of + * file, the value -1 is returned; otherwise, at least one char is read and stored into + * b.
+ *
+ * + * The first char read is stored into element b[off], the next one into + * b[off+1], and so on. The number of chars read is, at most, equal to + * len. Let k be the number of chars actually read; these chars will be + * stored in elements b[off] through b[off+k-1], leaving elements + * b[off+k] through b[off+len-1] unaffected.
+ *
+ * + * In every case, elements b[0] through b[off-1] and elements + * b[off+len] through b[b.length-1] are unaffected.
+ *
+ * + * If the first char cannot be read for any reason other than end of file, then an + * IOException is thrown. In particular, an IOException is thrown if + * the input stream has been closed. + * + * @param b A buffer into which the converted input is stored. + * @param off The offset in the buffer at which to begin writing. + * @param len The amount of chars to be received and written into the buffer. + * @return The total number of chars read into the buffer, or -1 if there is no more data + * because the end of the stream has been reached. + **/ + public int read(char[] b, int off, int len) throws IOException { + char[] hex = new char[2 * len]; + int charsRead = in.read(hex); + if (charsRead == -1) + return -1; + if (charsRead % 2 == 1) + throw new IOException("HexStringInputStream: Unexpected end of file"); + + for (int i = 0; i < charsRead; i += 2) { + int d1 = digits.indexOf((char) hex[i]); + if (d1 == -1) + throw new IOException("HexStringInputStream: Invalid input character: '" + + ((char) hex[i]) + "' (" + ((int) hex[i]) + ")"); + int d2 = digits.indexOf((char) hex[i + 1]); + if (d2 == -1) + throw new IOException("HexStringInputStream: Invalid input character: '" + + ((char) hex[i + 1]) + "' (" + ((int) hex[i + 1]) + ")"); + + b[i / 2] = (char) ((d2 << 4) | d1); + } + + return charsRead / 2; + } - /** - * Tests if this input stream supports the mark and reset methods. Whether - * or not mark and reset are supported is an - * invariant property of the provided input stream instance. - * - * @return true iff the provided input stream instance - * supports the mark and reset methods. - **/ - public boolean markSupported() { return in.markSupported(); } -} + /** + * Skips over and discards n chars of data from this input stream. The skip method + * may, for a variety of reasons, end up skipping over some smaller number of chars, possibly 0. + * This may result from any of a number of conditions; reaching end of file before + * n chars have been skipped is only one possibility. The actual number of chars + * skipped is returned. If n is negative, no chars are skipped. + * + * @param n The number of chars to be skipped. + * @return The actual number of chars skipped. + **/ + public long skip(long n) throws IOException { + return in.skip(n * 2); + } + + /** + * Closes this input stream and releases any system resources associated with the stream. + **/ + public void close() throws IOException { + in.close(); + } + + + /** + * Marks the current position in this input stream. A subsequent call to the reset + * method repositions this stream at the last marked position so that subsequent reads re-read + * the same chars. + * + * The readlimit argument tells this input stream to allow that many chars to be + * read before the mark position gets invalidated. + * + * The general contract of mark is that, if the method markSupported returns + * true, the stream somehow remembers all the chars read after the call to mark and + * stands ready to supply those same chars again if and whenever the method reset + * is called. However, the stream is not required to remember any data at all if more than + * readlimit chars are read from the stream before reset is called. + * + * @param readlimit The maximum limit of chars that can be read before the mark position becomes + * invalid. + **/ + public void mark(int readlimit) { + try { + in.mark(readlimit * 2); + } catch (Exception e) { + System.err.println(e); + } + } + + + /** + * Repositions this stream to the position at the time the mark method was last + * called on this input stream. + **/ + public void reset() throws IOException { + in.reset(); + } + + + /** + * Tests if this input stream supports the mark and reset methods. Whether or not + * mark and reset are supported is an invariant property of the + * provided input stream instance. + * + * @return true iff the provided input stream instance supports the + * mark and reset methods. + **/ + public boolean markSupported() { + return in.markSupported(); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/io/IOUtilities.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/io/IOUtilities.java index db973723..7be02c0d 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/io/IOUtilities.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/io/IOUtilities.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.io; @@ -18,44 +15,44 @@ public class IOUtilities { - @SuppressWarnings("rawtypes") - public static boolean existsInClasspath(Class clazz, String fileName) { - URL dirURL = clazz.getResource("/" + fileName); - return dirURL != null; - } - - @SuppressWarnings("rawtypes") - public static URL loadFromClasspath(Class clazz, String fileName) { - URL dirURL = clazz.getResource("/" + fileName); - if (dirURL == null) return null; - - URL url = null; - try { - String dirPath = dirURL.getPath(); - - if (dirURL.getProtocol().equals("jar")) { - int exclamation = dirPath.indexOf("!"); - String jarPath = dirPath.substring(5, exclamation); - String jarRoot = dirPath.substring(0, exclamation + 1); - - JarFile jar = new JarFile(URLDecoder.decode(jarPath, "UTF-8")); - Enumeration entries = jar.entries(); - - while (entries.hasMoreElements()) { - JarEntry element = entries.nextElement(); - String name = element.getName(); - if (name.equals(fileName)) { - url = new URL("jar:" + jarRoot + "/" + name); - } - } - jar.close(); - } - } - catch (Exception e) { - System.err.println("ERROR: Can't read file : " + fileName + "\n" + e); - e.printStackTrace(); - System.exit(1); - } - return url; - } + @SuppressWarnings("rawtypes") + public static boolean existsInClasspath(Class clazz, String fileName) { + URL dirURL = clazz.getResource("/" + fileName); + return dirURL != null; + } + + @SuppressWarnings("rawtypes") + public static URL loadFromClasspath(Class clazz, String fileName) { + URL dirURL = clazz.getResource("/" + fileName); + if (dirURL == null) + return null; + + URL url = null; + try { + String dirPath = dirURL.getPath(); + + if (dirURL.getProtocol().equals("jar")) { + int exclamation = dirPath.indexOf("!"); + String jarPath = dirPath.substring(5, exclamation); + String jarRoot = dirPath.substring(0, exclamation + 1); + + JarFile jar = new JarFile(URLDecoder.decode(jarPath, "UTF-8")); + Enumeration entries = jar.entries(); + + while (entries.hasMoreElements()) { + JarEntry element = entries.nextElement(); + String name = element.getName(); + if (name.equals(fileName)) { + url = new URL("jar:" + jarRoot + "/" + name); + } + } + jar.close(); + } + } catch (Exception e) { + System.err.println("ERROR: Can't read file : " + fileName + "\n" + e); + e.printStackTrace(); + System.exit(1); + } + return url; + } } diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Accuracy.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Accuracy.java index 89915ecc..9799cba6 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Accuracy.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Accuracy.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -16,56 +13,55 @@ /** - * Returns the accuracy of a discrete classifier with respect to the oracle - * as the fraction of examples for which its prediction was correct. - * - * @author Dan Muriello + * Returns the accuracy of a discrete classifier with respect to the oracle as the fraction of + * examples for which its prediction was correct. + * + * @author Dan Muriello **/ -public class Accuracy implements TestingMetric -{ - /** - * Whether or not to print a table of results to STDOUT when - * {@link #test(Classifier,Classifier,Parser)} is called. - **/ - private boolean print; +public class Accuracy implements TestingMetric { + /** + * Whether or not to print a table of results to STDOUT when + * {@link #test(Classifier,Classifier,Parser)} is called. + **/ + private boolean print; - /** - * Creates an Accuracy testing metric that does not print a - * table of results. - **/ - public Accuracy() { this(false); } + /** + * Creates an Accuracy testing metric that does not print a table of results. + **/ + public Accuracy() { + this(false); + } - /** - * Creates an Accuracy testing metric that prints a table of - * results if requested. - * - * @param p Whether or not to print a table of results when - * {@link #test(Classifier,Classifier,Parser)} is called. - **/ - public Accuracy(boolean p) { print = p; } + /** + * Creates an Accuracy testing metric that prints a table of results if requested. + * + * @param p Whether or not to print a table of results when + * {@link #test(Classifier,Classifier,Parser)} is called. + **/ + public Accuracy(boolean p) { + print = p; + } - /** Returns the name of the testing metric. */ - public String getName() { return "Accuracy"; } + /** Returns the name of the testing metric. */ + public String getName() { + return "Accuracy"; + } - /** - * Evaluates a classifier against an oracle on the data provided by a - * parser. - * - * @param classifier The classifier whose accuracy is being measured. - * @param oracle A classifier that returns the label of each example. - * @param parser A parser to supply the example objects. - * @return The fraction of examples for which the classifier's prediction - * was correct. - **/ - public double test(Classifier classifier, Classifier oracle, Parser parser) - { - TestDiscrete tester = - TestDiscrete.testDiscrete(classifier, oracle, parser); - if (print) tester.printPerformance(System.out); - return tester.getOverallStats()[0]; - } + /** + * Evaluates a classifier against an oracle on the data provided by a parser. + * + * @param classifier The classifier whose accuracy is being measured. + * @param oracle A classifier that returns the label of each example. + * @param parser A parser to supply the example objects. + * @return The fraction of examples for which the classifier's prediction was correct. + **/ + public double test(Classifier classifier, Classifier oracle, Parser parser) { + TestDiscrete tester = TestDiscrete.testDiscrete(classifier, oracle, parser); + if (print) + tester.printPerformance(System.out); + return tester.getOverallStats()[0]; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/AdaBoost.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/AdaBoost.java index 183e8044..e3dafe03 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/AdaBoost.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/AdaBoost.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -13,540 +10,532 @@ import java.io.PrintStream; import java.util.Arrays; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.OVector; import edu.illinois.cs.cogcomp.lbjava.classify.Classifier; import edu.illinois.cs.cogcomp.lbjava.classify.DiscretePrimitiveStringFeature; import edu.illinois.cs.cogcomp.lbjava.classify.Feature; import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector; import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; -import edu.illinois.cs.cogcomp.lbjava.util.OVector; - /** - * Implementation of the AdaBoost binary classification learning algorithm. - * This implementation samples from its internal distribution, giving the - * weak learner a new set of examples that the weak learner assumes are - * weighted equally. - * - *

Assumptions: - *

    - *
  1. The weak learner is cloneable. - *
  2. The weak learner is specified with the same values list as this - * learner. - *
  3. The weak learning algorithm is trained on objects that are each - * given a single label feature. - *
- * - * @author Nick Rizzolo + * Implementation of the AdaBoost binary classification learning algorithm. This implementation + * samples from its internal distribution, giving the weak learner a new set of examples that the + * weak learner assumes are weighted equally. + * + *

+ * Assumptions: + *

    + *
  1. The weak learner is cloneable. + *
  2. The weak learner is specified with the same values list as this learner. + *
  3. The weak learning algorithm is trained on objects that are each given a single label feature. + *
+ * + * @author Nick Rizzolo **/ -public class AdaBoost extends Learner -{ - /** Default for {@link #weakLearner}. */ - public static final Learner defaultWeakLearner = - new SparseAveragedPerceptron(); - /** Default for {@link #rounds}. */ - public static final int defaultRounds = 10; - - - /** The weak learning algorithm to be boosted. */ - protected Learner weakLearner; - /** The number of times the weak learner will be called. */ - protected int rounds; - /** Will be filled with trained copies of the weak learner. */ - protected Learner[] weakLearners; - /** Parameters associated with the trained copies of the weak learner. */ - protected double[] alpha; - /** All the examples observed by this learner during training. */ - protected OVector allExamples; - /** The label producing classifier's allowable values. */ - protected String[] allowableValues; - - - /** Instantiates member variables. */ - public AdaBoost() { this(""); } - - /** - * Instantiates member variables. - * - * @param w The weak learning algorithm. - **/ - public AdaBoost(Learner w) { this("", w); } - - /** - * Instantiates member variables. - * - * @param r The number of rounds of boosting. - **/ - public AdaBoost(int r) { this("", r); } - - /** - * Instantiates member variables. - * - * @param w The weak learning algorithm. - * @param r The number of rounds of boosting. - **/ - public AdaBoost(Learner w, int r) { this("", w, r); } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link AdaBoost.Parameters} object. - * - * @param p The settings of all parameters. - **/ - public AdaBoost(Parameters p) { this("", p); } - - /** - * Instantiates member variables. - * - * @param n The name of the classifier. - **/ - public AdaBoost(String n) { this(n, new Parameters()); } - - /** - * Instantiates member variables. - * - * @param n The name of the classifier. - * @param w The weak learning algorithm. - **/ - public AdaBoost(String n, Learner w) { - this(n, w, defaultRounds); - } - - /** - * Instantiates member variables. - * - * @param n The name of the classifier. - * @param r The number of rounds of boosting. - **/ - public AdaBoost(String n, int r) { - this(n, defaultWeakLearner, r); - } - - /** - * Instantiates member variables. - * - * @param n The name of the classifier. - * @param w The weak learning algorithm. - * @param r The number of rounds of boosting. - **/ - public AdaBoost(String n, Learner w, int r) { - super(n); - weakLearner = w; - rounds = r; - allExamples = new OVector(); - allowableValues = new String[]{ "*", "*" }; - } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link AdaBoost.Parameters} object. - * - * @param n The name of the classifier. - * @param p The settings of all parameters. - **/ - public AdaBoost(String n, Parameters p) { - super(n); - setParameters(p); - allExamples = new OVector(); - allowableValues = new String[]{ "*", "*" }; - } - - - /** - * Sets the values of parameters that control the behavior of this learning - * algorithm. - * - * @param p The parameters. - **/ - public void setParameters(Parameters p) { - weakLearner = p.weakLearner; - rounds = p.rounds; - } - - - /** - * Retrieves the parameters that are set in this learner. - * - * @return An object containing all the values of the parameters that - * control the behavior of this learning algorithm. - **/ - public Learner.Parameters getParameters() { - Parameters p = new Parameters(super.getParameters()); - p.weakLearner = weakLearner; - p.rounds = rounds; - return p; - } - - - /** - * Returns the array of allowable values that a feature returned by this - * classifier may take. - * - * @return The allowable values of this learner's labeler, or an array of - * length zero if the labeler has not yet been established or does - * not specify allowable values. - **/ - public String[] allowableValues() { return allowableValues; } - - - /** - * Sets the labeler. - * - * @param l A labeling classifier. - **/ - public void setLabeler(Classifier l) { - if (l == null || l.allowableValues().length != 2) { - System.err.println( - "Error: " + name - + ": An LTU must be given a single binary label classifier."); - new Exception().printStackTrace(); - System.exit(1); +public class AdaBoost extends Learner { + /** Default for {@link #weakLearner}. */ + public static final Learner defaultWeakLearner = new SparseAveragedPerceptron(); + /** Default for {@link #rounds}. */ + public static final int defaultRounds = 10; + + + /** The weak learning algorithm to be boosted. */ + protected Learner weakLearner; + /** The number of times the weak learner will be called. */ + protected int rounds; + /** Will be filled with trained copies of the weak learner. */ + protected Learner[] weakLearners; + /** Parameters associated with the trained copies of the weak learner. */ + protected double[] alpha; + /** All the examples observed by this learner during training. */ + protected OVector allExamples; + /** The label producing classifier's allowable values. */ + protected String[] allowableValues; + + + /** Instantiates member variables. */ + public AdaBoost() { + this(""); + } + + /** + * Instantiates member variables. + * + * @param w The weak learning algorithm. + **/ + public AdaBoost(Learner w) { + this("", w); + } + + /** + * Instantiates member variables. + * + * @param r The number of rounds of boosting. + **/ + public AdaBoost(int r) { + this("", r); } - super.setLabeler(l); - allowableValues = l.allowableValues(); - labelLexicon.clear(); - labelLexicon.lookup( - new DiscretePrimitiveStringFeature( - l.containingPackage, l.name, "", allowableValues[0], (short) 0, - (short) 2), - true); - labelLexicon.lookup( - new DiscretePrimitiveStringFeature( - l.containingPackage, l.name, "", allowableValues[1], (short) 1, - (short) 2), - true); - createPrediction(0); - createPrediction(1); - } - - - /** - * Initializes the weight vector array to the size of the supplied number - * of features. - **/ - public void initialize(int numExamples, int numFeatures) { - allExamples = new OVector(numExamples); - } - - - /** - * This method adds the example object to the array storing the training - * examples. - * - *

Note that learning does not actually take place until - * {@link #doneLearning()} is called. - * - * @param example The example object. - **/ - public void learn(Object example) { - allExamples.add(getExampleArray(example)); - } - - - /** - * This method adds the example object to the array storing the training - * examples. - * - *

Note that learning does not actually take place until - * {@link #doneLearning()} is called. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param exampleLabels The example's label(s). - * @param labelValues The labels' values. - **/ - public void learn(int[] exampleFeatures, double[] exampleValues, - int[] exampleLabels, double[] labelValues) { - allExamples.add(new Object[]{ exampleFeatures, exampleValues, - exampleLabels, labelValues }); - } - - - /** - * Performs learning on the examples stored in {@link #allExamples}, if - * they exist; otherwise do nothing. - **/ - public void doneLearning() { - int m = allExamples.size(); - if (m == 0) return; - - double[] D = new double[m]; - Arrays.fill(D, 1 / (double) m); - - weakLearners = new Learner[rounds]; - alpha = new double[rounds]; - - for (int i = 0; i < rounds; ++i) { - Object[][] sample = new Object[m][]; - for (int j = 0; j < m; ++j) { - double p = Math.random(); - double sum = 0; - int k = 0; - while (sum <= p) sum += D[k++]; - sample[j] = (Object[]) allExamples.get(k - 1); - } - - weakLearners[i] = (Learner) weakLearner.clone(); - weakLearners[i].setLabelLexicon(labelLexicon); - weakLearners[i].learn((Object[]) sample); - weakLearners[i].doneLearning(); - - int totalCorrect = 0; - boolean[] correct = new boolean[m]; - for (int j = 0; j < m; ++j) { - String label = - labelLexicon.lookupKey(((int[]) sample[j][2])[0]).getStringValue(); - String prediction = - weakLearners[i].featureValue(sample[j]).getStringValue(); - correct[j] = label.equals(prediction); - if (correct[j]) totalCorrect++; - } - - double x = totalCorrect / (double) (m - totalCorrect); - alpha[i] = Math.log(x) / 2.0; - - if (i + 1 < rounds) { - double multiplier = Math.sqrt(x); - double sum = 0; - - for (int j = 0; j < m; ++j) { - if (correct[j]) D[j] /= multiplier; - else D[j] *= multiplier; - sum += D[j]; + /** + * Instantiates member variables. + * + * @param w The weak learning algorithm. + * @param r The number of rounds of boosting. + **/ + public AdaBoost(Learner w, int r) { + this("", w, r); + } + + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link AdaBoost.Parameters} object. + * + * @param p The settings of all parameters. + **/ + public AdaBoost(Parameters p) { + this("", p); + } + + /** + * Instantiates member variables. + * + * @param n The name of the classifier. + **/ + public AdaBoost(String n) { + this(n, new Parameters()); + } + + /** + * Instantiates member variables. + * + * @param n The name of the classifier. + * @param w The weak learning algorithm. + **/ + public AdaBoost(String n, Learner w) { + this(n, w, defaultRounds); + } + + /** + * Instantiates member variables. + * + * @param n The name of the classifier. + * @param r The number of rounds of boosting. + **/ + public AdaBoost(String n, int r) { + this(n, defaultWeakLearner, r); + } + + /** + * Instantiates member variables. + * + * @param n The name of the classifier. + * @param w The weak learning algorithm. + * @param r The number of rounds of boosting. + **/ + public AdaBoost(String n, Learner w, int r) { + super(n); + weakLearner = w; + rounds = r; + allExamples = new OVector(); + allowableValues = new String[] {"*", "*"}; + } + + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link AdaBoost.Parameters} object. + * + * @param n The name of the classifier. + * @param p The settings of all parameters. + **/ + public AdaBoost(String n, Parameters p) { + super(n); + setParameters(p); + allExamples = new OVector(); + allowableValues = new String[] {"*", "*"}; + } + + + /** + * Sets the values of parameters that control the behavior of this learning algorithm. + * + * @param p The parameters. + **/ + public void setParameters(Parameters p) { + weakLearner = p.weakLearner; + rounds = p.rounds; + } + + + /** + * Retrieves the parameters that are set in this learner. + * + * @return An object containing all the values of the parameters that control the behavior of + * this learning algorithm. + **/ + public Learner.Parameters getParameters() { + Parameters p = new Parameters(super.getParameters()); + p.weakLearner = weakLearner; + p.rounds = rounds; + return p; + } + + + /** + * Returns the array of allowable values that a feature returned by this classifier may take. + * + * @return The allowable values of this learner's labeler, or an array of length zero if the + * labeler has not yet been established or does not specify allowable values. + **/ + public String[] allowableValues() { + return allowableValues; + } + + + /** + * Sets the labeler. + * + * @param l A labeling classifier. + **/ + public void setLabeler(Classifier l) { + if (l == null || l.allowableValues().length != 2) { + System.err.println("Error: " + name + + ": An LTU must be given a single binary label classifier."); + new Exception().printStackTrace(); + System.exit(1); } - for (int j = 0; j < m; ++j) D[j] /= sum; - } + super.setLabeler(l); + allowableValues = l.allowableValues(); + labelLexicon.clear(); + labelLexicon.lookup(new DiscretePrimitiveStringFeature(l.containingPackage, l.name, "", + allowableValues[0], (short) 0, (short) 2), true); + labelLexicon.lookup(new DiscretePrimitiveStringFeature(l.containingPackage, l.name, "", + allowableValues[1], (short) 1, (short) 2), true); + createPrediction(0); + createPrediction(1); } - allExamples = null; - } - - - /** - * Clears weakLearners and alpha, although this - * is not necessary since learn(Object[]) will overwrite them - * fresh each time it is called. - **/ - public void forget() { - super.forget(); - weakLearners = null; - alpha = null; - allExamples = new OVector(); - } - - - /** - * Computes the scores corresponding to the two prediction values for the - * given example. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @return The scores corresponding to the values in the - * {@link #labelLexicon} in an array with the same indexes. - **/ - protected double[] sumAlphas(int[] exampleFeatures, double[] exampleValues) - { - double[] sums = new double[2]; - - for (int i = 0; i < rounds; ++i) { - int v = - weakLearners[i].featureValue(exampleFeatures, exampleValues) - .getValueIndex(); - sums[v] += alpha[i]; + + /** + * Initializes the weight vector array to the size of the supplied number of features. + **/ + public void initialize(int numExamples, int numFeatures) { + allExamples = new OVector(numExamples); } - return sums; - } - - - /** - * Produces a set of scores indicating the degree to which each possible - * discrete classification value is associated with the given example - * object. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @return The accumulated alpha values of weak learners that predicted the - * associated classification value. - **/ - public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { - double[] scores = sumAlphas(exampleFeatures, exampleValues); - String[] values = - new String[]{ labelLexicon.lookupKey(0).getStringValue(), - labelLexicon.lookupKey(1).getStringValue() }; - return new ScoreSet(values, scores); - } - - - /** - * Returns the classification of the given example as a single feature - * instead of a {@link FeatureVector}. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @return The classification of the example as a feature. - **/ - public Feature featureValue(int[] exampleFeatures, double[] exampleValues) { - double[] scores = sumAlphas(exampleFeatures, exampleValues); - return predictions.get(scores[0] > scores[1] ? 0 : 1); - } - - - /** - * This method uses the trained parameters to make a binary decision about - * an example object. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @return The decision value. - **/ - public String discreteValue(int[] exampleFeatures, double[] exampleValues) { - double[] scores = sumAlphas(exampleFeatures, exampleValues); - return allowableValues[scores[0] > scores[1] ? 0 : 1]; - } - - - /** - * This method uses the trained parameters to make a binary decision about - * an example object. - * - * @param exampleFeatures The example features. - * @param exampleValues The example values. - * @return A binary DiscreteFeature. - **/ - public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) - { - return new FeatureVector(featureValue(exampleFeatures, exampleValues)); - } - - - /** - * Writes this algorithm's internal representation as text. - * - * @param out The output stream. - **/ - public void write(PrintStream out) { - out.println(name); - - if (rounds > 0) { - out.print(alpha[0]); - for (int i = 1; i < rounds; ++i) out.print(", " + alpha[i]); - out.println(); + + /** + * This method adds the example object to the array storing the training examples. + * + *

+ * Note that learning does not actually take place until {@link #doneLearning()} is called. + * + * @param example The example object. + **/ + public void learn(Object example) { + allExamples.add(getExampleArray(example)); } - else out.println("---"); - - out.println(weakLearner.getClass().getName()); - weakLearner.write(out); - for (int i = 0; i < rounds; ++i) { - weakLearners[i].setLexicon(lexicon); - weakLearners[i].write(out); - weakLearners[i].setLexicon(null); + + + /** + * This method adds the example object to the array storing the training examples. + * + *

+ * Note that learning does not actually take place until {@link #doneLearning()} is called. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param exampleLabels The example's label(s). + * @param labelValues The labels' values. + **/ + public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, + double[] labelValues) { + allExamples.add(new Object[] {exampleFeatures, exampleValues, exampleLabels, labelValues}); } - } - - - /** - * Writes the learned function's internal representation in binary form. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - weakLearner.write(out); - out.writeInt(rounds); - for (int i = 0; i < rounds; ++i) weakLearners[i].write(out); - for (int i = 0; i < rounds; ++i) out.writeDouble(alpha[i]); - out.writeString(allowableValues[0]); - out.writeString(allowableValues[1]); - } - - - /** - * Reads the binary representation of a learner with this object's run-time - * type, overwriting any and all learned or manually specified parameters - * as well as the label lexicon but without modifying the feature lexicon. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - weakLearner = Learner.readLearner(in); - rounds = in.readInt(); - for (int i = 0; i < rounds; ++i) - weakLearners[i] = Learner.readLearner(in); - for (int i = 0; i < rounds; ++i) alpha[i] = in.readDouble(); - allowableValues = new String[2]; - allowableValues[0] = in.readString(); - allowableValues[1] = in.readString(); - } - - - /** - * A container for all of {@link AdaBoost}'s configurable parameters. - * Using instances of this class should make code more readable and - * constructors less complicated. - * - * @author Nick Rizzolo - **/ - public static class Parameters extends Learner.Parameters - { - /** The weak learning algorithm to be boosted. */ - protected Learner weakLearner; - /** The number of times the weak learner will be called. */ - protected int rounds; - /** Sets all the default values. */ - public Parameters() { - weakLearner = (Learner) defaultWeakLearner.clone(); - rounds = defaultRounds; + /** + * Performs learning on the examples stored in {@link #allExamples}, if they exist; otherwise do + * nothing. + **/ + public void doneLearning() { + int m = allExamples.size(); + if (m == 0) + return; + + double[] D = new double[m]; + Arrays.fill(D, 1 / (double) m); + + weakLearners = new Learner[rounds]; + alpha = new double[rounds]; + + for (int i = 0; i < rounds; ++i) { + Object[][] sample = new Object[m][]; + for (int j = 0; j < m; ++j) { + double p = Math.random(); + double sum = 0; + int k = 0; + while (sum <= p) + sum += D[k++]; + sample[j] = (Object[]) allExamples.get(k - 1); + } + + weakLearners[i] = (Learner) weakLearner.clone(); + weakLearners[i].setLabelLexicon(labelLexicon); + weakLearners[i].learn((Object[]) sample); + weakLearners[i].doneLearning(); + + int totalCorrect = 0; + boolean[] correct = new boolean[m]; + for (int j = 0; j < m; ++j) { + String label = labelLexicon.lookupKey(((int[]) sample[j][2])[0]).getStringValue(); + String prediction = weakLearners[i].featureValue(sample[j]).getStringValue(); + correct[j] = label.equals(prediction); + if (correct[j]) + totalCorrect++; + } + + double x = totalCorrect / (double) (m - totalCorrect); + alpha[i] = Math.log(x) / 2.0; + + if (i + 1 < rounds) { + double multiplier = Math.sqrt(x); + double sum = 0; + + for (int j = 0; j < m; ++j) { + if (correct[j]) + D[j] /= multiplier; + else + D[j] *= multiplier; + sum += D[j]; + } + + for (int j = 0; j < m; ++j) + D[j] /= sum; + } + } + + allExamples = null; } /** - * Sets the parameters from the parent's parameters object, giving - * defaults to all parameters declared in this object. + * Clears weakLearners and alpha, although this is not necessary since + * learn(Object[]) will overwrite them fresh each time it is called. **/ - public Parameters(Learner.Parameters p) { - super(p); - weakLearner = (Learner) defaultWeakLearner.clone(); - rounds = defaultRounds; + public void forget() { + super.forget(); + weakLearners = null; + alpha = null; + allExamples = new OVector(); } - /** Copy constructor. */ - public Parameters(Parameters p) { - super(p); - weakLearner = p.weakLearner; - rounds = p.rounds; + /** + * Computes the scores corresponding to the two prediction values for the given example. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return The scores corresponding to the values in the {@link #labelLexicon} in an array with + * the same indexes. + **/ + protected double[] sumAlphas(int[] exampleFeatures, double[] exampleValues) { + double[] sums = new double[2]; + + for (int i = 0; i < rounds; ++i) { + int v = weakLearners[i].featureValue(exampleFeatures, exampleValues).getValueIndex(); + sums[v] += alpha[i]; + } + + return sums; } /** - * Calls the appropriate Learner.setParameters(Parameters) - * method for this Parameters object. - * - * @param l The learner whose parameters will be set. + * Produces a set of scores indicating the degree to which each possible discrete classification + * value is associated with the given example object. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return The accumulated alpha values of weak learners that predicted the associated + * classification value. **/ - public void setParameters(Learner l) { - ((AdaBoost) l).setParameters(this); + public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { + double[] scores = sumAlphas(exampleFeatures, exampleValues); + String[] values = + new String[] {labelLexicon.lookupKey(0).getStringValue(), + labelLexicon.lookupKey(1).getStringValue()}; + return new ScoreSet(values, scores); } /** - * Creates a string representation of these parameters in which only - * those parameters that differ from their default values are mentioned. + * Returns the classification of the given example as a single feature instead of a + * {@link FeatureVector}. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return The classification of the example as a feature. **/ - public String nonDefaultString() { - String result = super.nonDefaultString(); - if (rounds != AdaBoost.defaultRounds) - result += ", rounds = " + rounds; - if (result.startsWith(", ")) result = result.substring(2); - return result; + public Feature featureValue(int[] exampleFeatures, double[] exampleValues) { + double[] scores = sumAlphas(exampleFeatures, exampleValues); + return predictions.get(scores[0] > scores[1] ? 0 : 1); } - } -} + + /** + * This method uses the trained parameters to make a binary decision about an example object. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return The decision value. + **/ + public String discreteValue(int[] exampleFeatures, double[] exampleValues) { + double[] scores = sumAlphas(exampleFeatures, exampleValues); + return allowableValues[scores[0] > scores[1] ? 0 : 1]; + } + + + /** + * This method uses the trained parameters to make a binary decision about an example object. + * + * @param exampleFeatures The example features. + * @param exampleValues The example values. + * @return A binary DiscreteFeature. + **/ + public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) { + return new FeatureVector(featureValue(exampleFeatures, exampleValues)); + } + + + /** + * Writes this algorithm's internal representation as text. + * + * @param out The output stream. + **/ + public void write(PrintStream out) { + out.println(name); + + if (rounds > 0) { + out.print(alpha[0]); + for (int i = 1; i < rounds; ++i) + out.print(", " + alpha[i]); + out.println(); + } else + out.println("---"); + + out.println(weakLearner.getClass().getName()); + weakLearner.write(out); + for (int i = 0; i < rounds; ++i) { + weakLearners[i].setLexicon(lexicon); + weakLearners[i].write(out); + weakLearners[i].setLexicon(null); + } + } + + + /** + * Writes the learned function's internal representation in binary form. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + weakLearner.write(out); + out.writeInt(rounds); + for (int i = 0; i < rounds; ++i) + weakLearners[i].write(out); + for (int i = 0; i < rounds; ++i) + out.writeDouble(alpha[i]); + out.writeString(allowableValues[0]); + out.writeString(allowableValues[1]); + } + + + /** + * Reads the binary representation of a learner with this object's run-time type, overwriting + * any and all learned or manually specified parameters as well as the label lexicon but without + * modifying the feature lexicon. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + weakLearner = Learner.readLearner(in); + rounds = in.readInt(); + for (int i = 0; i < rounds; ++i) + weakLearners[i] = Learner.readLearner(in); + for (int i = 0; i < rounds; ++i) + alpha[i] = in.readDouble(); + allowableValues = new String[2]; + allowableValues[0] = in.readString(); + allowableValues[1] = in.readString(); + } + + + /** + * A container for all of {@link AdaBoost}'s configurable parameters. Using instances of this + * class should make code more readable and constructors less complicated. + * + * @author Nick Rizzolo + **/ + public static class Parameters extends Learner.Parameters { + /** The weak learning algorithm to be boosted. */ + protected Learner weakLearner; + /** The number of times the weak learner will be called. */ + protected int rounds; + + + /** Sets all the default values. */ + public Parameters() { + weakLearner = (Learner) defaultWeakLearner.clone(); + rounds = defaultRounds; + } + + + /** + * Sets the parameters from the parent's parameters object, giving defaults to all + * parameters declared in this object. + **/ + public Parameters(Learner.Parameters p) { + super(p); + weakLearner = (Learner) defaultWeakLearner.clone(); + rounds = defaultRounds; + } + + + /** Copy constructor. */ + public Parameters(Parameters p) { + super(p); + weakLearner = p.weakLearner; + rounds = p.rounds; + } + + + /** + * Calls the appropriate Learner.setParameters(Parameters) method for this + * Parameters object. + * + * @param l The learner whose parameters will be set. + **/ + public void setParameters(Learner l) { + ((AdaBoost) l).setParameters(this); + } + + + /** + * Creates a string representation of these parameters in which only those parameters that + * differ from their default values are mentioned. + **/ + public String nonDefaultString() { + String result = super.nonDefaultString(); + if (rounds != AdaBoost.defaultRounds) + result += ", rounds = " + rounds; + if (result.startsWith(", ")) + result = result.substring(2); + return result; + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/AdaGrad.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/AdaGrad.java index aa4521eb..5de5dd31 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/AdaGrad.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/AdaGrad.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -20,22 +17,19 @@ /** * AdaGrad - Adaptive Stochastic Gradient Method * - * AdaGrad alters the update to adapt based on historical information, - * so that frequent occurring features in the gradients get small learning rates - * and infrequent features get higher ones. The learner learns slowly from frequent - * features but "pays attention" to rate but informative features. In practice, this - * means that infrequently occurring features can be learned effectively along side - * more frequently occurring features. + * AdaGrad alters the update to adapt based on historical information, so that frequent occurring + * features in the gradients get small learning rates and infrequent features get higher ones. The + * learner learns slowly from frequent features but "pays attention" to rate but informative + * features. In practice, this means that infrequently occurring features can be learned effectively + * along side more frequently occurring features. * - * A good reference for literature is: - * Duchi, John, Elad Hazan, and Yoram Singer. - * "Adaptive subgradient methods for online learning and stochastic optimization." - * The Journal of Machine Learning Research 12 (2011): 2121-2159. - * http://www.magicbroom.info/Papers/DuchiHaSi10.pdf + * A good reference for literature is: Duchi, John, Elad Hazan, and Yoram Singer. + * "Adaptive subgradient methods for online learning and stochastic optimization." The Journal of + * Machine Learning Research 12 (2011): 2121-2159. http://www.magicbroom.info/Papers/DuchiHaSi10.pdf * - * @author Yiming Jiang (yjiang16@illinois.edu) + * @author Yiming Jiang (yjiang16@illinois.edu) */ -public class AdaGrad extends Learner{ +public class AdaGrad extends Learner { /* eventual value AdaGrad uses */ protected double learningRateA; @@ -43,9 +37,9 @@ public class AdaGrad extends Learner{ /* eventual loss function AdaGrad uses */ protected String lossFunctionA; - private double[] diagonalVector; // sum of squares of gradients - private double[] weightVector; // hypothesis vector - private double[] gradientVector; // gradient vector + private double[] diagonalVector; // sum of squares of gradients + private double[] weightVector; // hypothesis vector + private double[] gradientVector; // gradient vector /* default constant learning rate is 0.1 */ public static final double defaultLearningRate = 0.1; @@ -59,8 +53,8 @@ public class AdaGrad extends Learner{ /** * Constructor * - * The learning rate takes the default value, while the name of the - * classifier gets the empty string. + * The learning rate takes the default value, while the name of the classifier gets the empty + * string. **/ public AdaGrad() { this(""); @@ -69,10 +63,10 @@ public AdaGrad() { /** * Constructor * - * Sets the learning rate to the specified value, while the name of the - * classifier gets the empty string. + * Sets the learning rate to the specified value, while the name of the classifier gets the + * empty string. * - * @param r The desired learning rate value. + * @param r The desired learning rate value. **/ public AdaGrad(double r) { this("", r); @@ -83,7 +77,7 @@ public AdaGrad(double r) { * * Sets all member variables to their associated settings. * - * @param p The settings of all parameters. + * @param p The settings of all parameters. **/ public AdaGrad(Parameters p) { @@ -95,7 +89,7 @@ public AdaGrad(Parameters p) { * * The learning rate takes the default value. * - * @param n The name of the classifier. + * @param n The name of the classifier. **/ public AdaGrad(String n) { this(n, defaultLearningRate); @@ -106,8 +100,8 @@ public AdaGrad(String n) { * * Set desired learning rate * - * @param n The name of the classifier. - * @param r The desired learning rate value. + * @param n The name of the classifier. + * @param r The desired learning rate value. **/ public AdaGrad(String n, double r) { super(n); @@ -121,8 +115,8 @@ public AdaGrad(String n, double r) { * * Sets all member variables to their associated settings. * - * @param n The name of the classifier. - * @param p The settings of all parameters. + * @param n The name of the classifier. + * @param p The settings of all parameters. **/ public AdaGrad(String n, Parameters p) { super(n); @@ -130,10 +124,9 @@ public AdaGrad(String n, Parameters p) { } /** - * Sets the values of parameters that control the behavior of this learning - * algorithm. + * Sets the values of parameters that control the behavior of this learning algorithm. * - * @param p The parameters. + * @param p The parameters. **/ public void setParameters(Parameters p) { learningRateA = p.learningRateP; @@ -142,6 +135,7 @@ public void setParameters(Parameters p) { /** * Getter - get weight vector + * * @return weight vector */ public double[] getWeightVector() { @@ -150,6 +144,7 @@ public double[] getWeightVector() { /** * Getter - get loss function + * * @return "hinge" or "lms" */ public String getLossFunction() { @@ -158,6 +153,7 @@ public String getLossFunction() { /** * Getter - get the constant learning rate + * * @return learning rate */ public double getConstantLearningRate() { @@ -165,29 +161,20 @@ public double getConstantLearningRate() { } /** - * AdaGrad's Learning Function: - * Each row of feature vector + label feed in as arguments; - * Update internal parameters; + * AdaGrad's Learning Function: Each row of feature vector + label feed in as arguments; Update + * internal parameters; * - * Note: - * 1. No bias; No Regularization; are implemented + * Note: 1. No bias; No Regularization; are implemented * - * 2. Loss Function used: - * - Hinge Loss - * Q((x, y), w) = max(0, 1 - y(w * x)) - * - Least Mean Square - * Q((x, y), w) = 1/2 * (y - w * x)^2 + * 2. Loss Function used: - Hinge Loss Q((x, y), w) = max(0, 1 - y(w * x)) - Least Mean Square + * Q((x, y), w) = 1/2 * (y - w * x)^2 * - * 3. Notations Explanations: - * * Feature Vector (exampleValues): feature vector parsed from data set - * * Label (labelValue): label parsed from data set - * * Weight Vector (weightVector): weight vector, internal parameter - * * Gradient (gradientVector): gradient vector, internal parameter - * for Hinge loss function, g_t = - y_t x_t - * for LMS loss function, g_t = (w_t * x_t - y_t) x_t - * where t stands for the t_th iteration - * * Diagonal Matrix (diagonalVector): diagonal matrix, internal parameter - * sum of squares of gradients at feature j until time t; + * 3. Notations Explanations: * Feature Vector (exampleValues): feature vector parsed from data + * set * Label (labelValue): label parsed from data set * Weight Vector (weightVector): weight + * vector, internal parameter * Gradient (gradientVector): gradient vector, internal parameter + * for Hinge loss function, g_t = - y_t x_t for LMS loss function, g_t = (w_t * x_t - y_t) x_t + * where t stands for the t_th iteration * Diagonal Matrix (diagonalVector): diagonal matrix, + * internal parameter sum of squares of gradients at feature j until time t; * * @param exampleFeatures indices for feature vector x * @param exampleValues values for feature vector x @@ -195,8 +182,8 @@ public double getConstantLearningRate() { * @param labelValues value for label y */ @Override - public void learn(int[] exampleFeatures, double[] exampleValues, - int[] exampleLabels, double[] labelValues) { + public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, + double[] labelValues) { /* add an additional dimension to feature dimension on W to reduce computation complexities */ int featureDimension = exampleFeatures.length + 1; @@ -212,17 +199,16 @@ public void learn(int[] exampleFeatures, double[] exampleValues, /* compute (w * x + theta) */ double wDotProductX = 0.0; - for (int i = 0; i < featureDimension-1; i++) { + for (int i = 0; i < featureDimension - 1; i++) { wDotProductX += weightVector[i] * exampleValues[i]; } - wDotProductX += weightVector[featureDimension-1]; + wDotProductX += weightVector[featureDimension - 1]; /* - check if a mistake is made - - if y(w * x + theta) > 1, no mistake, g = 0 - otherwise, made a mistake, g = -y*x - note: for the first n features, the gradient is -yx, for theta, it is -y + * check if a mistake is made + * + * if y(w * x + theta) > 1, no mistake, g = 0 otherwise, made a mistake, g = -y*x note: for + * the first n features, the gradient is -yx, for theta, it is -y */ boolean didMakeAMistake = true; @@ -231,19 +217,17 @@ public void learn(int[] exampleFeatures, double[] exampleValues, } /* compute gradient vector */ - for (int i = 0; i < featureDimension-1; i++) { + for (int i = 0; i < featureDimension - 1; i++) { if (didMakeAMistake) { gradientVector[i] = (-1) * labelValue * exampleValues[i]; - } - else { + } else { gradientVector[i] = 0; } } if (didMakeAMistake) { - gradientVector[featureDimension-1] = (-1) * labelValue; - } - else { - gradientVector[featureDimension-1] = 0; + gradientVector[featureDimension - 1] = (-1) * labelValue; + } else { + gradientVector[featureDimension - 1] = 0; } /* compute diagonal vector, aka squares of gradient vector */ @@ -254,20 +238,21 @@ public void learn(int[] exampleFeatures, double[] exampleValues, double denominator = Math.sqrt(diagonalVector[i]); if (denominator == 0) { - denominator = Math.pow(10, -100); // avoid denominator being 0 + denominator = Math.pow(10, -100); // avoid denominator being 0 } /* update weight vector */ if (didMakeAMistake) { /* w_(t+1) = w_t - g_t * r/(G_t)^(1/2) */ - weightVector[i] = weightVector[i] - - (gradientVector[i] * learningRateA / denominator); + weightVector[i] = + weightVector[i] - (gradientVector[i] * learningRateA / denominator); } } } /** * Initialize internal parameters vector + * * @param size feature dimension */ private void initializeVectors(int size) { @@ -284,41 +269,39 @@ private void initializeVectors(int size) { /** * Simply computes the dot product of the weight vector and the example * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. * @return The computed real value. **/ @Override public double realValue(int[] exampleFeatures, double[] exampleValues) { double weightDotProductX = 0.0; - for(int i = 0; i < exampleFeatures.length; i++) { + for (int i = 0; i < exampleFeatures.length; i++) { weightDotProductX += weightVector[i] * exampleValues[i]; } - weightDotProductX += weightVector[weightVector.length-1]; + weightDotProductX += weightVector[weightVector.length - 1]; return weightDotProductX; } /** - * Returns the classification of the given example as a single feature - * instead of a {@link FeatureVector}. + * Returns the classification of the given example as a single feature instead of a + * {@link FeatureVector}. * - * @param f The features array. - * @param v The values array. + * @param f The features array. + * @param v The values array. * @return The classification of the example as a feature. **/ @Override public Feature featureValue(int[] f, double[] v) { - return - new RealPrimitiveStringFeature(containingPackage, name, "", - realValue(f, v)); + return new RealPrimitiveStringFeature(containingPackage, name, "", realValue(f, v)); } /** - * Simply computes the dot product of the weight vector and the feature - * vector extracted from the example object. + * Simply computes the dot product of the weight vector and the feature vector extracted from + * the example object. * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. * @return The computed feature (in a vector). **/ @Override @@ -327,17 +310,15 @@ public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) { } /** - * Produces a set of scores indicating the degree to which each possible - * discrete classification value is associated with the given example - * object. Learners that return a real feature or more than - * one feature may implement this method by simply returning + * Produces a set of scores indicating the degree to which each possible discrete classification + * value is associated with the given example object. Learners that return a real + * feature or more than one feature may implement this method by simply returning * null. * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of values - * @return A set of scores indicating the degree to which each possible - * discrete classification value is associated with the given - * example object. + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of values + * @return A set of scores indicating the degree to which each possible discrete classification + * value is associated with the given example object. **/ @Override public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { @@ -347,7 +328,7 @@ public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { /** * Writes the learned function's internal representation as text. * - * @param printStream The output stream. + * @param printStream The output stream. **/ @Override public void write(PrintStream printStream) { @@ -365,9 +346,8 @@ public String getOutputType() { } /** - * A container for all of AdaGrad's configurable - * parameters. Using instances of this class should make code - * more readable and constructors less complicated. + * A container for all of AdaGrad's configurable parameters. Using instances of + * this class should make code more readable and constructors less complicated. * * @author Yiming Jiang */ @@ -386,4 +366,4 @@ public Parameters() { lossFunctionP = defaultLossFunction; } } -} \ No newline at end of file +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/BatchTrainer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/BatchTrainer.java index 79cfe022..fd281fec 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/BatchTrainer.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/BatchTrainer.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -22,1416 +19,1338 @@ import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.Sort; import edu.illinois.cs.cogcomp.lbjava.parse.ArrayFileParser; import edu.illinois.cs.cogcomp.lbjava.parse.FoldParser; import edu.illinois.cs.cogcomp.lbjava.parse.FoldSeparator; import edu.illinois.cs.cogcomp.lbjava.parse.Parser; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; -import edu.illinois.cs.cogcomp.lbjava.util.Sort; import edu.illinois.cs.cogcomp.lbjava.util.StudentT; import edu.illinois.cs.cogcomp.lbjava.util.TableFormat; /** - * Use this class to batch train a {@link Learner}. - * - * @author Nick Rizzolo + * Use this class to batch train a {@link Learner}. + * + * @author Nick Rizzolo **/ -public class BatchTrainer -{ - /** - * Writes an example vector to the specified stream, with all features - * being written in the order they appear in the vector. - * - * @param out The output stream. - * @param featureIndexes The lexicon indexes of the features. - * @param featureValues The values or "strengths" of the features. - * @param labelIndexes The lexicon indexes of the labels. - * @param labelValues The values or "strengths" of the labels. - **/ - public static void writeExample(ExceptionlessOutputStream out, - int[] featureIndexes, - double[] featureValues, int[] labelIndexes, - double[] labelValues) { - writeExample(out, featureIndexes, featureValues, labelIndexes, - labelValues, featureIndexes.length, null); - } - - - /** - * Writes an example vector to the specified stream, with all features - * being written in the order they appear in the vector. - * - * @param out The output stream. - * @param featureIndexes The lexicon indexes of the features. - * @param featureValues The values or "strengths" of the features. - * @param labelIndexes The lexicon indexes of the labels. - * @param labelValues The values or "strengths" of the labels. - * @param unpruned The number of features in the vector that aren't - * pruned. - **/ - public static void writeExample(ExceptionlessOutputStream out, - int[] featureIndexes, - double[] featureValues, int[] labelIndexes, - double[] labelValues, int unpruned) { - writeExample(out, featureIndexes, featureValues, labelIndexes, - labelValues, unpruned, null); - } - - - /** - * Writes an example vector contained in an object array to the underlying - * output stream, with features sorted according to their representations - * in the given lexicon if present, or in the order they appear in the - * vector otherwise. - * - * @param out The output stream. - * @param featureIndexes The lexicon indexes of the features. - * @param featureValues The values or "strengths" of the features. - * @param labelIndexes The lexicon indexes of the labels. - * @param labelValues The values or "strengths" of the labels. - * @param lex A lexicon. - **/ - public static void writeExample(ExceptionlessOutputStream out, - int[] featureIndexes, - double[] featureValues, int[] labelIndexes, - double[] labelValues, Lexicon lex) { - writeExample(out, featureIndexes, featureValues, labelIndexes, - labelValues, featureIndexes.length, lex); - } - - - /** - * Writes an example vector contained in an object array to the underlying - * output stream, with features sorted according to their representations - * in the given lexicon if present, or in the order they appear in the - * vector otherwise. - * - * @param out The output stream. - * @param featureIndexes The lexicon indexes of the features. - * @param featureValues The values or "strengths" of the features. - * @param labelIndexes The lexicon indexes of the labels. - * @param labelValues The values or "strengths" of the labels. - * @param unpruned The number of features in the vector that aren't - * pruned. - * @param lexicon A lexicon. - **/ - public static void writeExample(ExceptionlessOutputStream out, - final int[] featureIndexes, - double[] featureValues, int[] labelIndexes, - double[] labelValues, int unpruned, - final Lexicon lexicon) { - int[] I = null; - if (lexicon != null) { - I = new int[featureIndexes.length]; - for (int i = 0; i < I.length; ++i) I[i] = i; - Sort.sort(I, 0, unpruned, - new Sort.IntComparator() { - public int compare(int i1, int i2) { - return lexicon.lookupKey(featureIndexes[i1]) - .compareTo(lexicon.lookupKey(featureIndexes[i2])); - } - }); +public class BatchTrainer { + /** + * Writes an + * example vector to the specified stream, with all features being written in the order they + * appear in the vector. + * + * @param out The output stream. + * @param featureIndexes The lexicon indexes of the features. + * @param featureValues The values or "strengths" of the features. + * @param labelIndexes The lexicon indexes of the labels. + * @param labelValues The values or "strengths" of the labels. + **/ + public static void writeExample(ExceptionlessOutputStream out, int[] featureIndexes, + double[] featureValues, int[] labelIndexes, double[] labelValues) { + writeExample(out, featureIndexes, featureValues, labelIndexes, labelValues, + featureIndexes.length, null); + } + + + /** + * Writes an + * example vector to the specified stream, with all features being written in the order they + * appear in the vector. + * + * @param out The output stream. + * @param featureIndexes The lexicon indexes of the features. + * @param featureValues The values or "strengths" of the features. + * @param labelIndexes The lexicon indexes of the labels. + * @param labelValues The values or "strengths" of the labels. + * @param unpruned The number of features in the vector that aren't pruned. + **/ + public static void writeExample(ExceptionlessOutputStream out, int[] featureIndexes, + double[] featureValues, int[] labelIndexes, double[] labelValues, int unpruned) { + writeExample(out, featureIndexes, featureValues, labelIndexes, labelValues, unpruned, null); + } + + + /** + * Writes + * an example vector contained in an object array to the underlying output stream, with features + * sorted according to their representations in the given lexicon if present, or in the order + * they appear in the vector otherwise. + * + * @param out The output stream. + * @param featureIndexes The lexicon indexes of the features. + * @param featureValues The values or "strengths" of the features. + * @param labelIndexes The lexicon indexes of the labels. + * @param labelValues The values or "strengths" of the labels. + * @param lex A lexicon. + **/ + public static void writeExample(ExceptionlessOutputStream out, int[] featureIndexes, + double[] featureValues, int[] labelIndexes, double[] labelValues, Lexicon lex) { + writeExample(out, featureIndexes, featureValues, labelIndexes, labelValues, + featureIndexes.length, lex); + } + + + /** + * + * Writes an example vector contained in an object array to the underlying output stream, with + * features sorted according to their representations in the given lexicon if present, or in the + * order they appear in the vector otherwise. + * + * @param out The output stream. + * @param featureIndexes The lexicon indexes of the features. + * @param featureValues The values or "strengths" of the features. + * @param labelIndexes The lexicon indexes of the labels. + * @param labelValues The values or "strengths" of the labels. + * @param unpruned The number of features in the vector that aren't pruned. + * @param lexicon A lexicon. + **/ + public static void writeExample(ExceptionlessOutputStream out, final int[] featureIndexes, + double[] featureValues, int[] labelIndexes, double[] labelValues, int unpruned, + final Lexicon lexicon) { + int[] I = null; + if (lexicon != null) { + I = new int[featureIndexes.length]; + for (int i = 0; i < I.length; ++i) + I[i] = i; + Sort.sort(I, 0, unpruned, new Sort.IntComparator() { + public int compare(int i1, int i2) { + return lexicon.lookupKey(featureIndexes[i1]).compareTo( + lexicon.lookupKey(featureIndexes[i2])); + } + }); + } + + out.writeInt(labelIndexes.length); + for (int i = 0; i < labelIndexes.length; ++i) { + out.writeInt(labelIndexes[i]); + out.writeDouble(labelValues[i]); + } + + out.writeInt(unpruned); + out.writeInt(featureIndexes.length - unpruned); + + if (lexicon == null) { + for (int i = 0; i < featureIndexes.length; ++i) { + out.writeInt(featureIndexes[i]); + out.writeDouble(featureValues[i]); + } + } else { + for (int i = 0; i < featureIndexes.length; ++i) { + out.writeInt(featureIndexes[I[i]]); + out.writeDouble(featureValues[I[i]]); + } + } + } + + + // Instance member variables. + /** The learning classifier being trained. */ + protected Learner learner; + /** The parser from which training data for {@link #learner} is received. */ + protected Parser parser; + /** + * The number of training examples in between status messages printed to STDOUT, or + * 0 to suppress these messages. + **/ + protected int progressOutput; + /** Spacing for making status messages prettier. */ + protected String messageIndent; + /** {@link #learner}'s class. */ + protected Class learnerClass; + /** {@link #learner}'s isTraining field. */ + protected Field fieldIsTraining; + /** The number of examples extracted during pre-extraction. */ + protected int examples; + /** The number of features extracted during pre-extraction. */ + protected int lexiconSize; + + + // Constructors. + /** + * Creates a new trainer that doesn't produce status messages. + * + * @param l The learner to be trained. + * @param p The path to an example file. + **/ + public BatchTrainer(Learner l, String p) { + this(l, p, true); } - out.writeInt(labelIndexes.length); - for (int i = 0; i < labelIndexes.length; ++i) { - out.writeInt(labelIndexes[i]); - out.writeDouble(labelValues[i]); + /** + * Creates a new trainer that produces status messages. + * + * @param l The learner to be trained. + * @param p The path to an example file. + * @param o The number of examples in between status messages on STDOUT. + **/ + public BatchTrainer(Learner l, String p, int o) { + this(l, p, true, o); } - out.writeInt(unpruned); - out.writeInt(featureIndexes.length - unpruned); + /** + * Creates a new trainer that produces status + * messages with the specified indentation spacing for status messages. + * + * @param l The learner to be trained. + * @param p The path to an example file. + * @param o The number of examples in between status messages on STDOUT. + * @param i The indentation spacing for status messages. + **/ + public BatchTrainer(Learner l, String p, int o, String i) { + this(l, p, true, o, i); + } - if (lexicon == null) { - for (int i = 0; i < featureIndexes.length; ++i) { - out.writeInt(featureIndexes[i]); - out.writeDouble(featureValues[i]); - } + /** + * Creates a new trainer that doesn't produce status + * messages. + * + * @param l The learner to be trained. + * @param p The path to an example file. + * @param z Whether or not the example file is compressed. + **/ + public BatchTrainer(Learner l, String p, boolean z) { + this(l, new ArrayFileParser(p, z)); } - else { - for (int i = 0; i < featureIndexes.length; ++i) { - out.writeInt(featureIndexes[I[i]]); - out.writeDouble(featureValues[I[i]]); - } + + /** + * Creates a new trainer that produces status + * messages. + * + * @param l The learner to be trained. + * @param p The path to an example file. + * @param z Whether or not the example file is compressed. + * @param o The number of examples in between status messages on STDOUT. + **/ + public BatchTrainer(Learner l, String p, boolean z, int o) { + this(l, new ArrayFileParser(p, z), o); } - } - - - // Instance member variables. - /** The learning classifier being trained. */ - protected Learner learner; - /** The parser from which training data for {@link #learner} is received. */ - protected Parser parser; - /** - * The number of training examples in between status messages printed to - * STDOUT, or 0 to suppress these messages. - **/ - protected int progressOutput; - /** Spacing for making status messages prettier. */ - protected String messageIndent; - /** {@link #learner}'s class. */ - protected Class learnerClass; - /** {@link #learner}'s isTraining field. */ - protected Field fieldIsTraining; - /** The number of examples extracted during pre-extraction. */ - protected int examples; - /** The number of features extracted during pre-extraction. */ - protected int lexiconSize; - - - // Constructors. - /** - * Creates a new trainer that doesn't produce status messages. - * - * @param l The learner to be trained. - * @param p The path to an example file. - **/ - public BatchTrainer(Learner l, String p) { this(l, p, true); } - - /** - * Creates a new trainer that produces status messages. - * - * @param l The learner to be trained. - * @param p The path to an example file. - * @param o The number of examples in between status messages on STDOUT. - **/ - public BatchTrainer(Learner l, String p, int o) { this(l, p, true, o); } - - /** - * Creates a new trainer that produces status messages with the specified - * indentation spacing for status messages. - * - * @param l The learner to be trained. - * @param p The path to an example file. - * @param o The number of examples in between status messages on STDOUT. - * @param i The indentation spacing for status messages. - **/ - public BatchTrainer(Learner l, String p, int o, String i) { - this(l, p, true, o, i); - } - - /** - * Creates a new trainer that doesn't produce status messages. - * - * @param l The learner to be trained. - * @param p The path to an example file. - * @param z Whether or not the example file is compressed. - **/ - public BatchTrainer(Learner l, String p, boolean z) { - this(l, new ArrayFileParser(p, z)); - } - - /** - * Creates a new trainer that produces status messages. - * - * @param l The learner to be trained. - * @param p The path to an example file. - * @param z Whether or not the example file is compressed. - * @param o The number of examples in between status messages on STDOUT. - **/ - public BatchTrainer(Learner l, String p, boolean z, int o) { - this(l, new ArrayFileParser(p, z), o); - } - - /** - * Creates a new trainer that produces status messages with the specified - * indentation spacing for status messages. - * - * @param l The learner to be trained. - * @param p The path to an example file. - * @param z Whether or not the example file is compressed. - * @param o The number of examples in between status messages on STDOUT. - * @param i The indentation spacing for status messages. - **/ - public BatchTrainer(Learner l, String p, boolean z, int o, String i) { - this(l, new ArrayFileParser(p, z), o, i); - } - - /** - * Creates a new trainer that doesn't produce status messages. - * - * @param l The learner to be trained. - * @param p The parser from which training data is received. - **/ - public BatchTrainer(Learner l, Parser p) { this(l, p, 0); } - - /** - * Creates a new trainer that produces status messages. - * - * @param l The learner to be trained. - * @param p The parser from which training data is received. - * @param o The number of examples in between status messages on STDOUT. - **/ - public BatchTrainer(Learner l, Parser p, int o) { this(l, p, o, ""); } - - /** - * Creates a new trainer that produces status messages with the specified - * indentation spacing for status messages. - * - * @param l The learner to be trained. - * @param p The parser from which training data is received. - * @param o The number of examples in between status messages on STDOUT. - * @param i The indentation spacing for status messages. - **/ - public BatchTrainer(Learner l, Parser p, int o, String i) { - learner = l; - parser = p; - progressOutput = o; - messageIndent = i; - - learnerClass = learner.getClass(); - try { fieldIsTraining = learnerClass.getField("isTraining"); } - catch (Exception e) { - System.err.println("Can't access " + learnerClass - + "'s 'isTraining' field: " + e); - System.exit(1); + + /** + * Creates a new trainer that produces status + * messages with the specified indentation spacing for status messages. + * + * @param l The learner to be trained. + * @param p The path to an example file. + * @param z Whether or not the example file is compressed. + * @param o The number of examples in between status messages on STDOUT. + * @param i The indentation spacing for status messages. + **/ + public BatchTrainer(Learner l, String p, boolean z, int o, String i) { + this(l, new ArrayFileParser(p, z), o, i); } - } - - - /** Returns the value of {@link #progressOutput}. */ - public int getProgressOutput() { return progressOutput; } - /** Returns the value of {@link #parser}. */ - public Parser getParser() { return parser; } - - - /** - * Sets the static isTraining flag inside {@link #learner}'s - * runtime class to the specified value. This probably doesn't need to - * be tinkered with after pre-extraction, since it can only affect the - * code that does the extraction. - * - * @param b The new value for the flag. - **/ - protected void setIsTraining(boolean b) { - try { fieldIsTraining.setBoolean(null, b); } - catch (Exception e) { - System.err.println("Can't set " + learnerClass - + "'s 'isTraining' field: " + e); - System.exit(1); + + /** + * Creates a new trainer that doesn't produce status messages. + * + * @param l The learner to be trained. + * @param p The parser from which training data is received. + **/ + public BatchTrainer(Learner l, Parser p) { + this(l, p, 0); } - } - - - /** - * Returns the value of the static isTraining flag inside - * {@link #learner}'s runtime class. - **/ - protected boolean getIsTraining() { - try { return fieldIsTraining.getBoolean(null); } - catch (Exception e) { - System.err.println("Can't get " + learnerClass - + "'s 'isTraining' field: " + e); - System.exit(1); + + /** + * Creates a new trainer that produces status messages. + * + * @param l The learner to be trained. + * @param p The parser from which training data is received. + * @param o The number of examples in between status messages on STDOUT. + **/ + public BatchTrainer(Learner l, Parser p, int o) { + this(l, p, o, ""); } - return false; - } - - - /** - * Performs labeled feature vector pre-extraction into the specified file - * (or memory), replacing {@link #parser} with one that reads from that - * file (or memory). After pre-extraction, the lexicon is written to disk. - * It is assumed that {@link #learner} already knows where to write the - * lexicon. If it doesn't, call {@link Learner#setLexiconLocation(String)} - * or {@link Learner#setLexiconLocation(java.net.URL)} on that object - * before calling this method. - * - *

Calling this method is equivalent to calling - * {@link #preExtract(String,boolean)} with the second argument - * true. - * - * @param exampleFile The full path to a file into which examples will be - * written, or null to extract into - * memory. - * @return The resulting lexicon. - **/ - public Lexicon preExtract(String exampleFile) { - return preExtract(exampleFile, true); - } - - - /** - * Performs labeled feature vector pre-extraction into the specified file - * (or memory), replacing {@link #parser} with one that reads from that - * file (or memory). After pre-extraction, the lexicon is written to disk. - * It is assumed that {@link #learner} already knows where to write the - * lexicon. If it doesn't, call {@link Learner#setLexiconLocation(String)} - * or {@link Learner#setLexiconLocation(java.net.URL)} on that object - * before calling this method. - * - * @param exampleFile The full path to a file into which examples will be - * written, or null to extract into - * memory. - * @param zip Whether or not to compress the extracted examples. - * @return The resulting lexicon. - **/ - public Lexicon preExtract(String exampleFile, boolean zip) { - Learner preExtractLearner = - preExtract(exampleFile, zip, Lexicon.CountPolicy.none); - preExtractLearner.saveLexicon(); - return preExtractLearner.getLexicon(); - } - - - /** - * Performs labeled feature vector pre-extraction into the specified file - * (or memory), replacing {@link #parser} with one that reads from that - * file (or memory). If exampleFile already exists, this - * method writes the examples to a temporary file, then copies the contents - * to the existing file after pre-extraction completes. This is done in - * case the parser providing the examples to this method is reading the - * existing file. - * - *

Note that this method does not write the feature lexicon it - * produces to disk. Calling this method is equivalent to calling - * {@link #preExtract(String,boolean,Lexicon.CountPolicy)} with the second - * argument true. - * - * @param exampleFile The full path to a file into which examples will be - * written, or null to extract into - * memory. - * @param countPolicy The feature counting policy for the learner's - * feature lexicon. - * @return A new learning classifier containing the lexicon built during - * pre-extraction. - **/ - public Learner preExtract(String exampleFile, - Lexicon.CountPolicy countPolicy) { - return preExtract(exampleFile, true, countPolicy); - } - - - /** - * Performs labeled feature vector pre-extraction into the specified file - * (or memory), replacing {@link #parser} with one that reads from that - * file (or memory). If exampleFile already exists, this - * method writes the examples to a temporary file, then copies the contents - * to the existing file after pre-extraction completes. This is done in - * case the parser providing the examples to this method is reading the - * existing file. - * - *

Note that this method does not write the feature lexicon it - * produces to disk. - * - * @param exampleFile The full path to a file into which examples will be - * written, or null to extract into - * memory. - * @param zip Whether or not to compress the extracted examples. - * @param countPolicy The feature counting policy for the learner's - * feature lexicon. - * @return A new learning classifier containing the lexicon built during - * pre-extraction. - **/ - public Learner preExtract(String exampleFile, boolean zip, - Lexicon.CountPolicy countPolicy) { - Learner preExtractLearner = learner.emptyClone(); - preExtractLearner.setLabelLexicon(learner.getLabelLexicon()); - Lexicon lexicon = learner.getLexicon(); - preExtractLearner.setLexicon(lexicon); - preExtractLearner.countFeatures(countPolicy); - learner.setLexicon(null); - setIsTraining(true); - examples = 0; - - // Establish an output stream for writing examples. - ExceptionlessOutputStream eos = null; - ByteArrayOutputStream baos = null; - File fExampleFile = null; - File fTempFile = null; - boolean copy = false; - - if (exampleFile != null) { - fExampleFile = new File(exampleFile); - if (fExampleFile.exists()) { - int lastSlash = exampleFile.lastIndexOf(File.separatorChar); + /** + * Creates a new trainer that produces status + * messages with the specified indentation spacing for status messages. + * + * @param l The learner to be trained. + * @param p The parser from which training data is received. + * @param o The number of examples in between status messages on STDOUT. + * @param i The indentation spacing for status messages. + **/ + public BatchTrainer(Learner l, Parser p, int o, String i) { + learner = l; + parser = p; + progressOutput = o; + messageIndent = i; + + learnerClass = learner.getClass(); try { - if (lastSlash == -1) fTempFile = File.createTempFile("LBJ", null); - else - fTempFile = - File.createTempFile( - "LBJ", null, new File(exampleFile.substring(0, lastSlash))); - } - catch (Exception e) { - System.err.println( - "LBJava ERROR: BatchTrainer.preExtract: Can't create temporary " - + "file: " + e); - System.exit(1); + fieldIsTraining = learnerClass.getField("isTraining"); + } catch (Exception e) { + System.err.println("Can't access " + learnerClass + "'s 'isTraining' field: " + e); + System.exit(1); } + } - fTempFile.deleteOnExit(); - copy = true; - } - else fTempFile = fExampleFile; - try { - if (zip) - eos = - ExceptionlessOutputStream.openCompressedStream( - fTempFile.toURI().toURL()); - else - eos = - ExceptionlessOutputStream.openBufferedStream( - fTempFile.toURI().toURL()); - } - catch (Exception e) { - System.err.println( - "LBJava ERROR: BatchTrainer.preExtract: Can't convert file name '" - + fTempFile + "' to URL: " + e); - System.exit(1); - } + /** Returns the value of {@link #progressOutput}. */ + public int getProgressOutput() { + return progressOutput; } - else { - baos = new ByteArrayOutputStream(1 << 18); - if (zip) { - ZipOutputStream zos = new ZipOutputStream(baos); + + /** Returns the value of {@link #parser}. */ + public Parser getParser() { + return parser; + } + + + /** + * Sets the static isTraining flag inside + * {@link #learner}'s runtime class to the specified value. This probably doesn't need to be + * tinkered with after pre-extraction, since it can only affect the code that does the + * extraction. + * + * @param b The new value for the flag. + **/ + protected void setIsTraining(boolean b) { try { - zos.putNextEntry( - new ZipEntry(ExceptionlessInputStream.zipEntryName)); + fieldIsTraining.setBoolean(null, b); + } catch (Exception e) { + System.err.println("Can't set " + learnerClass + "'s 'isTraining' field: " + e); + System.exit(1); } - catch (Exception e) { - System.err.println("ERROR: Can't create in-memory zip data:"); - e.printStackTrace(); - System.exit(1); - } - eos = new ExceptionlessOutputStream(new BufferedOutputStream(zos)); - } - else eos = new ExceptionlessOutputStream(baos); } - // Write examples to the output stream. - boolean alreadyExtracted = parser instanceof ArrayFileParser; - if (alreadyExtracted) ((ArrayFileParser) parser).setIncludePruned(true); - - for (Object example = parser.next(); example != null; - example = parser.next()) { - if (progressOutput > 0 && examples % progressOutput == 0) - System.out.println( - " " + learner.name + ", pre-extract: " + messageIndent + examples - + " examples at " + new Date()); - - if (example == FoldSeparator.separator) eos.writeInt(-1); - else { - ++examples; - Object[] exampleArray = - alreadyExtracted ? (Object[]) example - : preExtractLearner.getExampleArray(example); - - int[] featureIndexes = (int[]) exampleArray[0]; - double[] featureValues = (double[]) exampleArray[1]; - int[] labelIndexes = (int[]) exampleArray[2]; - double[] labelValues = (double[]) exampleArray[3]; - - if (alreadyExtracted && countPolicy != Lexicon.CountPolicy.none) { - int labelIndex = - countPolicy == Lexicon.CountPolicy.perClass - ? labelIndexes[0] : -1; - for (int i = 0; i < featureIndexes.length; ++i) { - lexicon.lookup(lexicon.lookupKey(featureIndexes[i]), true, - labelIndex); - } + + /** + * Returns the value of the static isTraining flag inside + * {@link #learner}'s runtime class. + **/ + protected boolean getIsTraining() { + try { + return fieldIsTraining.getBoolean(null); + } catch (Exception e) { + System.err.println("Can't get " + learnerClass + "'s 'isTraining' field: " + e); + System.exit(1); } + return false; + } - writeExample(eos, featureIndexes, featureValues, labelIndexes, - labelValues, lexicon); - } + + /** + * Performs labeled feature vector pre-extraction into the specified + * file (or memory), replacing {@link #parser} with one that reads from that file (or memory). + * After pre-extraction, the lexicon is written to disk. It is assumed that {@link #learner} + * already knows where to write the lexicon. If it doesn't, call + * {@link Learner#setLexiconLocation(String)} or + * {@link Learner#setLexiconLocation(java.net.URL)} on that object before calling this method. + * + *

+ * Calling this method is equivalent to calling {@link #preExtract(String,boolean)} with the + * second argument true. + * + * @param exampleFile The full path to a file into which examples will be written, or + * null to extract into memory. + * @return The resulting lexicon. + **/ + public Lexicon preExtract(String exampleFile) { + return preExtract(exampleFile, true); } - if (progressOutput > 0) - System.out.println( - " " + learner.name + ", pre-extract: " + messageIndent + examples - + " examples at " + new Date()); - - parser.close(); - eos.close(); - - if (copy) { - try { - FileChannel in = (new FileInputStream(fTempFile)).getChannel(); - FileChannel out = (new FileOutputStream(fExampleFile)).getChannel(); - in.transferTo(0, fTempFile.length(), out); - in.close(); - out.close(); - } - catch (Exception e) { - System.err.println("LBJava ERROR: Can't copy example file:"); - e.printStackTrace(); - System.exit(1); - } + + /** + * Performs labeled feature vector pre-extraction into the + * specified file (or memory), replacing {@link #parser} with one that reads from that file (or + * memory). After pre-extraction, the lexicon is written to disk. It is assumed that + * {@link #learner} already knows where to write the lexicon. If it doesn't, call + * {@link Learner#setLexiconLocation(String)} or + * {@link Learner#setLexiconLocation(java.net.URL)} on that object before calling this method. + * + * @param exampleFile The full path to a file into which examples will be written, or + * null to extract into memory. + * @param zip Whether or not to compress the extracted examples. + * @return The resulting lexicon. + **/ + public Lexicon preExtract(String exampleFile, boolean zip) { + Learner preExtractLearner = preExtract(exampleFile, zip, Lexicon.CountPolicy.none); + preExtractLearner.saveLexicon(); + return preExtractLearner.getLexicon(); } - setIsTraining(false); - lexiconSize = preExtractLearner.getLexicon().size(); - - // Set up a new parser to read the pre-extracted examples. - if (fTempFile != null) - parser = new ArrayFileParser(fTempFile.getPath(), zip); - else parser = new ArrayFileParser(baos.toByteArray(), zip); - - learner.setLabelLexicon(preExtractLearner.getLabelLexicon()); - return preExtractLearner; - } - - - /** - * This method sets the {@link #examples} and {@link #lexiconSize} - * variables by querying {@link #parser} and {@link #learner} respectively. - * It sets {@link #examples} to 0 if {@link #parser} is not an - * {@link edu.illinois.cs.cogcomp.lbjava.parse.ArrayFileParser} and {@link #lexiconSize} to 0 if - * {@link #learner} doesn't either have the lexicon loaded or know where to - * find it. - **/ - public void fillInSizes() { - if (parser instanceof ArrayFileParser) { - ArrayFileParser afp = (ArrayFileParser) parser; - examples = afp.getNumExamples(); + + /** + * Performs labeled feature vector + * pre-extraction into the specified file (or memory), replacing {@link #parser} with one that + * reads from that file (or memory). If exampleFile already exists, this method + * writes the examples to a temporary file, then copies the contents to the existing file after + * pre-extraction completes. This is done in case the parser providing the examples to this + * method is reading the existing file. + * + *

+ * Note that this method does not write the feature lexicon it produces to disk. Calling + * this method is equivalent to calling {@link #preExtract(String,boolean,Lexicon.CountPolicy)} + * with the second argument true. + * + * @param exampleFile The full path to a file into which examples will be written, or + * null to extract into memory. + * @param countPolicy The feature counting policy for the learner's feature lexicon. + * @return A new learning classifier containing the lexicon built during pre-extraction. + **/ + public Learner preExtract(String exampleFile, Lexicon.CountPolicy countPolicy) { + return preExtract(exampleFile, true, countPolicy); } - else examples = 0; - lexiconSize = learner.getPrunedLexiconSize(); - } - - - /** - * Prunes the data returned by {@link #parser} according to the given - * policy, under the assumption that feature counts have already been - * compiled in the given learner's lexicon. The pruned data is written to - * the given file (or memory), and at the end of the method, - * {@link #parser} is replaced with a new parser that reads from that file - * (or memory). The pruned lexicon is also written to disk. - * - *

If exampleFile already exists, this method writes the - * examples to a temporary file, then copies the contents to the existing - * file after pruning completes. This is done in case the parser providing - * the examples to this method is reading the existing file. - * - *

When calling this method, it must be the case that {@link #parser} - * is a {@link edu.illinois.cs.cogcomp.lbjava.parse.ArrayFileParser}. This condition is easy to - * satisfy, since the - * {@link #preExtract(String,boolean,Lexicon.CountPolicy)} method will - * usually be called prior to this method to count the features in the - * dataset, and this method also replaces {@link #parser} with a - * {@link edu.illinois.cs.cogcomp.lbjava.parse.ArrayFileParser}. - * - *

It is assumed that preExtractLearner already knows - * where to write the lexicon. If it doesn't, call - * {@link Learner#setLexiconLocation(String)} or - * {@link Learner#setLexiconLocation(java.net.URL)} on that object before - * calling this method. - * - *

Calling this method is equivalent to calling - * {@link #pruneDataset(String,boolean,Lexicon.PruningPolicy,Learner)} with - * the second argument true. - * - * @param exampleFile The full path to a file into which examples - * will be written, or null to - * extract into memory. - * @param policy The type of feature pruning. - * @param preExtractLearner A learner whose lexicon contains all the - * necessary feature count information. - **/ - public void pruneDataset(String exampleFile, Lexicon.PruningPolicy policy, - Learner preExtractLearner) { - pruneDataset(exampleFile, true, policy, preExtractLearner); - } - - - /** - * Prunes the data returned by {@link #parser} according to the given - * policy, under the assumption that feature counts have already been - * compiled in the given learner's lexicon. The pruned data is written to - * the given file (or memory), and at the end of the method, - * {@link #parser} is replaced with a new parser that reads from that file - * (or memory). The pruned lexicon is also written to disk. - * - *

If exampleFile already exists, this method writes the - * examples to a temporary file, then copies the contents to the existing - * file after pruning completes. This is done in case the parser providing - * the examples to this method is reading the existing file. - * - *

When calling this method, it must be the case that {@link #parser} - * is an {@link edu.illinois.cs.cogcomp.lbjava.parse.ArrayFileParser ArrayFileParser}. This - * condition is easy to satisfy, since the - * {@link #preExtract(String,boolean,Lexicon.CountPolicy)} method will - * usually be called prior to this method to count the features in the - * dataset, and this method also replaces {@link #parser} with an - * {@link edu.illinois.cs.cogcomp.lbjava.parse.ArrayFileParser ArrayFileParser}. - * - *

It is assumed that preExtractLearner already knows - * where to write the lexicon. If it doesn't, call - * {@link Learner#setLexiconLocation(String)} or - * {@link Learner#setLexiconLocation(java.net.URL)} on that object before - * calling this method. - * - * @param exampleFile The full path to a file into which examples - * will be written, or null to - * extract into memory. - * @param zip Whether or not to compress the extracted - * examples. - * @param policy The type of feature pruning. - * @param preExtractLearner A learner whose lexicon contains all the - * necessary feature count information. - **/ - public void pruneDataset(String exampleFile, boolean zip, - Lexicon.PruningPolicy policy, - Learner preExtractLearner) { - Lexicon lexicon = preExtractLearner.getLexicon(); - - if (!policy.isNone() - && lexicon.getCountPolicy() == Lexicon.CountPolicy.none) - throw new IllegalArgumentException( - "LBJava ERROR: BatchTrainer.pruneDataset: Can't prune with policy '" - + policy + "' if features haven't been counted."); - if (!(parser instanceof ArrayFileParser)) - throw new IllegalArgumentException( - "LBJava ERROR: BatchTrainer.pruneDataset can't be called unless " - + "feature pre-extraction has already been performed."); - ArrayFileParser afp = (ArrayFileParser) parser; - afp.setIncludePruned(true); - - int[] swapMap = lexicon.prune(policy); - - // Establish an output stream for writing examples. - ExceptionlessOutputStream eos = null; - ByteArrayOutputStream baos = null; - File fExampleFile = null; - File fTempFile = null; - boolean copy = false; - - if (exampleFile != null) { - fExampleFile = new File(exampleFile); - if (fExampleFile.exists()) { - int lastSlash = exampleFile.lastIndexOf(File.separatorChar); - try { - if (lastSlash == -1) fTempFile = File.createTempFile("LBJ", null); - else - fTempFile = - File.createTempFile( - "LBJ", null, new File(exampleFile.substring(0, lastSlash))); + + /** + * Performs labeled feature vector + * pre-extraction into the specified file (or memory), replacing {@link #parser} with one that + * reads from that file (or memory). If exampleFile already exists, this method + * writes the examples to a temporary file, then copies the contents to the existing file after + * pre-extraction completes. This is done in case the parser providing the examples to this + * method is reading the existing file. + * + *

+ * Note that this method does not write the feature lexicon it produces to disk. + * + * @param exampleFile The full path to a file into which examples will be written, or + * null to extract into memory. + * @param zip Whether or not to compress the extracted examples. + * @param countPolicy The feature counting policy for the learner's feature lexicon. + * @return A new learning classifier containing the lexicon built during pre-extraction. + **/ + public Learner preExtract(String exampleFile, boolean zip, Lexicon.CountPolicy countPolicy) { + Learner preExtractLearner = learner.emptyClone(); + preExtractLearner.setLabelLexicon(learner.getLabelLexicon()); + Lexicon lexicon = learner.getLexicon(); + preExtractLearner.setLexicon(lexicon); + preExtractLearner.countFeatures(countPolicy); + learner.setLexicon(null); + setIsTraining(true); + examples = 0; + + // Establish an output stream for writing examples. + ExceptionlessOutputStream eos = null; + ByteArrayOutputStream baos = null; + File fExampleFile = null; + File fTempFile = null; + boolean copy = false; + + if (exampleFile != null) { + fExampleFile = new File(exampleFile); + if (fExampleFile.exists()) { + int lastSlash = exampleFile.lastIndexOf(File.separatorChar); + + try { + if (lastSlash == -1) + fTempFile = File.createTempFile("LBJ", null); + else + fTempFile = + File.createTempFile("LBJ", null, + new File(exampleFile.substring(0, lastSlash))); + } catch (Exception e) { + System.err + .println("LBJava ERROR: BatchTrainer.preExtract: Can't create temporary " + + "file: " + e); + System.exit(1); + } + + fTempFile.deleteOnExit(); + copy = true; + } else + fTempFile = fExampleFile; + + try { + if (zip) + eos = ExceptionlessOutputStream.openCompressedStream(fTempFile.toURI().toURL()); + else + eos = ExceptionlessOutputStream.openBufferedStream(fTempFile.toURI().toURL()); + } catch (Exception e) { + System.err + .println("LBJava ERROR: BatchTrainer.preExtract: Can't convert file name '" + + fTempFile + "' to URL: " + e); + System.exit(1); + } + } else { + baos = new ByteArrayOutputStream(1 << 18); + if (zip) { + ZipOutputStream zos = new ZipOutputStream(baos); + try { + zos.putNextEntry(new ZipEntry(ExceptionlessInputStream.zipEntryName)); + } catch (Exception e) { + System.err.println("ERROR: Can't create in-memory zip data:"); + e.printStackTrace(); + System.exit(1); + } + eos = new ExceptionlessOutputStream(new BufferedOutputStream(zos)); + } else + eos = new ExceptionlessOutputStream(baos); + } + + // Write examples to the output stream. + boolean alreadyExtracted = parser instanceof ArrayFileParser; + if (alreadyExtracted) + ((ArrayFileParser) parser).setIncludePruned(true); + + for (Object example = parser.next(); example != null; example = parser.next()) { + if (progressOutput > 0 && examples % progressOutput == 0) + System.out.println(" " + learner.name + ", pre-extract: " + messageIndent + + examples + " examples at " + new Date()); + + if (example == FoldSeparator.separator) + eos.writeInt(-1); + else { + ++examples; + Object[] exampleArray = + alreadyExtracted ? (Object[]) example : preExtractLearner + .getExampleArray(example); + + int[] featureIndexes = (int[]) exampleArray[0]; + double[] featureValues = (double[]) exampleArray[1]; + int[] labelIndexes = (int[]) exampleArray[2]; + double[] labelValues = (double[]) exampleArray[3]; + + if (alreadyExtracted && countPolicy != Lexicon.CountPolicy.none) { + int labelIndex = + countPolicy == Lexicon.CountPolicy.perClass ? labelIndexes[0] : -1; + for (int i = 0; i < featureIndexes.length; ++i) { + lexicon.lookup(lexicon.lookupKey(featureIndexes[i]), true, labelIndex); + } + } + + writeExample(eos, featureIndexes, featureValues, labelIndexes, labelValues, lexicon); + } } - catch (Exception e) { - System.err.println( - "LBJava ERROR: BatchTrainer.preExtract: Can't create temporary " - + "file: " + e); - System.exit(1); + + if (progressOutput > 0) + System.out.println(" " + learner.name + ", pre-extract: " + messageIndent + examples + + " examples at " + new Date()); + + parser.close(); + eos.close(); + + if (copy) { + try { + FileChannel in = (new FileInputStream(fTempFile)).getChannel(); + FileChannel out = (new FileOutputStream(fExampleFile)).getChannel(); + in.transferTo(0, fTempFile.length(), out); + in.close(); + out.close(); + } catch (Exception e) { + System.err.println("LBJava ERROR: Can't copy example file:"); + e.printStackTrace(); + System.exit(1); + } } - fTempFile.deleteOnExit(); - copy = true; - } - else fTempFile = fExampleFile; + setIsTraining(false); + lexiconSize = preExtractLearner.getLexicon().size(); - try { - if (zip) - eos = - ExceptionlessOutputStream.openCompressedStream( - fTempFile.toURI().toURL()); + // Set up a new parser to read the pre-extracted examples. + if (fTempFile != null) + parser = new ArrayFileParser(fTempFile.getPath(), zip); else - eos = - ExceptionlessOutputStream.openBufferedStream( - fTempFile.toURI().toURL()); - } - catch (Exception e) { - System.err.println( - "LBJava ERROR: BatchTrainer.preExtract: Can't convert file name '" - + fTempFile + "' to URL: " + e); - System.exit(1); - } + parser = new ArrayFileParser(baos.toByteArray(), zip); + + learner.setLabelLexicon(preExtractLearner.getLabelLexicon()); + return preExtractLearner; } - else { - baos = new ByteArrayOutputStream(1 << 18); - if (zip) { - ZipOutputStream zos = new ZipOutputStream(baos); - try { - zos.putNextEntry( - new ZipEntry(ExceptionlessInputStream.zipEntryName)); + + + /** + * This method sets the {@link #examples} and {@link #lexiconSize} + * variables by querying {@link #parser} and {@link #learner} respectively. It sets + * {@link #examples} to 0 if {@link #parser} is not an + * {@link edu.illinois.cs.cogcomp.lbjava.parse.ArrayFileParser} and {@link #lexiconSize} to 0 if + * {@link #learner} doesn't either have the lexicon loaded or know where to find it. + **/ + public void fillInSizes() { + if (parser instanceof ArrayFileParser) { + ArrayFileParser afp = (ArrayFileParser) parser; + examples = afp.getNumExamples(); + } else + examples = 0; + lexiconSize = learner.getPrunedLexiconSize(); + } + + + /** + * Prunes the data returned by + * {@link #parser} according to the given policy, under the assumption that feature counts have + * already been compiled in the given learner's lexicon. The pruned data is written to the given + * file (or memory), and at the end of the method, {@link #parser} is replaced with a new parser + * that reads from that file (or memory). The pruned lexicon is also written to disk. + * + *

+ * If exampleFile already exists, this method writes the examples to a temporary + * file, then copies the contents to the existing file after pruning completes. This is done in + * case the parser providing the examples to this method is reading the existing file. + * + *

+ * When calling this method, it must be the case that {@link #parser} is a + * {@link edu.illinois.cs.cogcomp.lbjava.parse.ArrayFileParser}. This condition is easy to + * satisfy, since the {@link #preExtract(String,boolean,Lexicon.CountPolicy)} method will + * usually be called prior to this method to count the features in the dataset, and this method + * also replaces {@link #parser} with a + * {@link edu.illinois.cs.cogcomp.lbjava.parse.ArrayFileParser}. + * + *

+ * It is assumed that preExtractLearner already knows where to write the lexicon. + * If it doesn't, call {@link Learner#setLexiconLocation(String)} or + * {@link Learner#setLexiconLocation(java.net.URL)} on that object before calling this method. + * + *

+ * Calling this method is equivalent to calling + * {@link #pruneDataset(String,boolean,Lexicon.PruningPolicy,Learner)} with the second argument + * true. + * + * @param exampleFile The full path to a file into which examples will be written, or + * null to extract into memory. + * @param policy The type of feature pruning. + * @param preExtractLearner A learner whose lexicon contains all the necessary feature count + * information. + **/ + public void pruneDataset(String exampleFile, Lexicon.PruningPolicy policy, + Learner preExtractLearner) { + pruneDataset(exampleFile, true, policy, preExtractLearner); + } + + + /** + * Prunes the data returned + * by {@link #parser} according to the given policy, under the assumption that feature counts + * have already been compiled in the given learner's lexicon. The pruned data is written to the + * given file (or memory), and at the end of the method, {@link #parser} is replaced with a new + * parser that reads from that file (or memory). The pruned lexicon is also written to disk. + * + *

+ * If exampleFile already exists, this method writes the examples to a temporary + * file, then copies the contents to the existing file after pruning completes. This is done in + * case the parser providing the examples to this method is reading the existing file. + * + *

+ * When calling this method, it must be the case that {@link #parser} is an + * {@link edu.illinois.cs.cogcomp.lbjava.parse.ArrayFileParser ArrayFileParser}. This condition + * is easy to satisfy, since the {@link #preExtract(String,boolean,Lexicon.CountPolicy)} method + * will usually be called prior to this method to count the features in the dataset, and this + * method also replaces {@link #parser} with an + * {@link edu.illinois.cs.cogcomp.lbjava.parse.ArrayFileParser ArrayFileParser}. + * + *

+ * It is assumed that preExtractLearner already knows where to write the lexicon. + * If it doesn't, call {@link Learner#setLexiconLocation(String)} or + * {@link Learner#setLexiconLocation(java.net.URL)} on that object before calling this method. + * + * @param exampleFile The full path to a file into which examples will be written, or + * null to extract into memory. + * @param zip Whether or not to compress the extracted examples. + * @param policy The type of feature pruning. + * @param preExtractLearner A learner whose lexicon contains all the necessary feature count + * information. + **/ + public void pruneDataset(String exampleFile, boolean zip, Lexicon.PruningPolicy policy, + Learner preExtractLearner) { + Lexicon lexicon = preExtractLearner.getLexicon(); + + if (!policy.isNone() && lexicon.getCountPolicy() == Lexicon.CountPolicy.none) + throw new IllegalArgumentException( + "LBJava ERROR: BatchTrainer.pruneDataset: Can't prune with policy '" + policy + + "' if features haven't been counted."); + if (!(parser instanceof ArrayFileParser)) + throw new IllegalArgumentException( + "LBJava ERROR: BatchTrainer.pruneDataset can't be called unless " + + "feature pre-extraction has already been performed."); + ArrayFileParser afp = (ArrayFileParser) parser; + afp.setIncludePruned(true); + + int[] swapMap = lexicon.prune(policy); + + // Establish an output stream for writing examples. + ExceptionlessOutputStream eos = null; + ByteArrayOutputStream baos = null; + File fExampleFile = null; + File fTempFile = null; + boolean copy = false; + + if (exampleFile != null) { + fExampleFile = new File(exampleFile); + if (fExampleFile.exists()) { + int lastSlash = exampleFile.lastIndexOf(File.separatorChar); + + try { + if (lastSlash == -1) + fTempFile = File.createTempFile("LBJ", null); + else + fTempFile = + File.createTempFile("LBJ", null, + new File(exampleFile.substring(0, lastSlash))); + } catch (Exception e) { + System.err + .println("LBJava ERROR: BatchTrainer.preExtract: Can't create temporary " + + "file: " + e); + System.exit(1); + } + + fTempFile.deleteOnExit(); + copy = true; + } else + fTempFile = fExampleFile; + + try { + if (zip) + eos = ExceptionlessOutputStream.openCompressedStream(fTempFile.toURI().toURL()); + else + eos = ExceptionlessOutputStream.openBufferedStream(fTempFile.toURI().toURL()); + } catch (Exception e) { + System.err + .println("LBJava ERROR: BatchTrainer.preExtract: Can't convert file name '" + + fTempFile + "' to URL: " + e); + System.exit(1); + } + } else { + baos = new ByteArrayOutputStream(1 << 18); + if (zip) { + ZipOutputStream zos = new ZipOutputStream(baos); + try { + zos.putNextEntry(new ZipEntry(ExceptionlessInputStream.zipEntryName)); + } catch (Exception e) { + System.err.println("ERROR: Can't create in-memory zip data:"); + e.printStackTrace(); + System.exit(1); + } + eos = new ExceptionlessOutputStream(new BufferedOutputStream(zos)); + } else + eos = new ExceptionlessOutputStream(baos); } - catch (Exception e) { - System.err.println("ERROR: Can't create in-memory zip data:"); - e.printStackTrace(); - System.exit(1); + + // Write examples to the output stream. + examples = 0; + + for (Object example = afp.next(); example != null; example = afp.next()) { + if (progressOutput > 0 && examples % progressOutput == 0) + System.out.println(" " + learner.name + ", pruning: " + examples + " examples at " + + new Date()); + + if (example == FoldSeparator.separator) + eos.writeInt(-1); + else { + ++examples; + Object[] exampleArray = (Object[]) example; + + int[] featureIndexes = (int[]) exampleArray[0]; + double[] featureValues = (double[]) exampleArray[1]; + int[] labelIndexes = (int[]) exampleArray[2]; + double[] labelValues = (double[]) exampleArray[3]; + + int unpruned = featureIndexes.length; + if (swapMap != null) { + // First, map the old feature indexes to the new ones. + for (int i = 0; i < featureIndexes.length; ++i) + featureIndexes[i] = swapMap[featureIndexes[i]]; + + // Second, put the pruned features at the end of the example array. + while (unpruned > 0 + && lexicon.isPruned(featureIndexes[unpruned - 1], labelIndexes[0], + policy)) + --unpruned; + + for (int i = unpruned - 2; i >= 0; --i) + if (lexicon.isPruned(featureIndexes[i], labelIndexes[0], policy)) { + int t = featureIndexes[i]; + featureIndexes[i] = featureIndexes[--unpruned]; + featureIndexes[unpruned] = t; + + double d = featureValues[i]; + featureValues[i] = featureValues[unpruned]; + featureValues[unpruned] = d; + } + } + + writeExample(eos, featureIndexes, featureValues, labelIndexes, labelValues, + unpruned, lexicon); + } } - eos = new ExceptionlessOutputStream(new BufferedOutputStream(zos)); - } - else eos = new ExceptionlessOutputStream(baos); - } - // Write examples to the output stream. - examples = 0; - - for (Object example = afp.next(); example != null; example = afp.next()) { - if (progressOutput > 0 && examples % progressOutput == 0) - System.out.println(" " + learner.name + ", pruning: " + examples - + " examples at " + new Date()); - - if (example == FoldSeparator.separator) eos.writeInt(-1); - else { - ++examples; - Object[] exampleArray = (Object[]) example; - - int[] featureIndexes = (int[]) exampleArray[0]; - double[] featureValues = (double[]) exampleArray[1]; - int[] labelIndexes = (int[]) exampleArray[2]; - double[] labelValues = (double[]) exampleArray[3]; - - int unpruned = featureIndexes.length; - if (swapMap != null) { - // First, map the old feature indexes to the new ones. - for (int i = 0; i < featureIndexes.length; ++i) - featureIndexes[i] = swapMap[featureIndexes[i]]; - - // Second, put the pruned features at the end of the example array. - while (unpruned > 0 - && lexicon.isPruned(featureIndexes[unpruned - 1], - labelIndexes[0], policy)) - --unpruned; - - for (int i = unpruned - 2; i >= 0; --i) - if (lexicon.isPruned(featureIndexes[i], labelIndexes[0], policy)) - { - int t = featureIndexes[i]; - featureIndexes[i] = featureIndexes[--unpruned]; - featureIndexes[unpruned] = t; - - double d = featureValues[i]; - featureValues[i] = featureValues[unpruned]; - featureValues[unpruned] = d; + if (progressOutput > 0) + System.out.println(" " + learner.name + ", pruning: " + examples + " examples at " + + new Date()); + + parser.close(); + eos.close(); + + if (copy) { + try { + FileChannel in = (new FileInputStream(fTempFile)).getChannel(); + FileChannel out = (new FileOutputStream(fExampleFile)).getChannel(); + in.transferTo(0, fTempFile.length(), out); + in.close(); + out.close(); + } catch (Exception e) { + System.err.println("LBJava ERROR: Can't copy example file:"); + e.printStackTrace(); + System.exit(1); } } - writeExample(eos, featureIndexes, featureValues, labelIndexes, - labelValues, unpruned, lexicon); - } + lexiconSize = lexicon.getCutoff(); + preExtractLearner.saveLexicon(); + + // Set up a new parser to read the pre-extracted and pruned examples. + if (fTempFile != null) + parser = new ArrayFileParser(fTempFile.getPath(), zip); + else + parser = new ArrayFileParser(baos.toByteArray(), zip); } - if (progressOutput > 0) - System.out.println(" " + learner.name + ", pruning: " + examples - + " examples at " + new Date()); - - parser.close(); - eos.close(); - - if (copy) { - try { - FileChannel in = (new FileInputStream(fTempFile)).getChannel(); - FileChannel out = (new FileOutputStream(fExampleFile)).getChannel(); - in.transferTo(0, fTempFile.length(), out); - in.close(); - out.close(); - } - catch (Exception e) { - System.err.println("LBJava ERROR: Can't copy example file:"); - e.printStackTrace(); - System.exit(1); - } + + /** + * Provides access to a hook into {@link #train(int)} so that + * additional processing can be performed at the end of each round. This processing supplements + * the processing in {@link Learner#doneWithRound()} which is already called from withink + * {@link #train(int)}. + **/ + public static interface DoneWithRound { + /** + * The hook into {@link #train(int)} as described above. + * + * @param r The 1-based number of the training round that just completed. + **/ + public void doneWithRound(int r); } - lexiconSize = lexicon.getCutoff(); - preExtractLearner.saveLexicon(); - // Set up a new parser to read the pre-extracted and pruned examples. - if (fTempFile != null) - parser = new ArrayFileParser(fTempFile.getPath(), zip); - else parser = new ArrayFileParser(baos.toByteArray(), zip); - } + /** + * Trains {@link #learner} for the specified number of rounds. This learning + * happens on top of any learning that {@link #learner} may have already done. + * + * @param rounds The number of passes to make over the training data. + **/ + public void train(int rounds) { + train(1, rounds); + } - /** - * Provides access to a hook into {@link #train(int)} so that additional - * processing can be performed at the end of each round. This processing - * supplements the processing in {@link Learner#doneWithRound()} which is - * already called from withink {@link #train(int)}. - **/ - public static interface DoneWithRound - { /** - * The hook into {@link #train(int)} as described above. - * - * @param r The 1-based number of the training round that just - * completed. + * Trains {@link #learner} for the specified number of rounds. This + * learning happens on top of any learning that {@link #learner} may have already done. + * + * @param start The 1-based number of the first training round. + * @param rounds The total number of training rounds including those before start. **/ - public void doneWithRound(int r); - } - - - /** - * Trains {@link #learner} for the specified number of rounds. This - * learning happens on top of any learning that {@link #learner} may have - * already done. - * - * @param rounds The number of passes to make over the training data. - **/ - public void train(int rounds) { train(1, rounds); } - - - /** - * Trains {@link #learner} for the specified number of rounds. This - * learning happens on top of any learning that {@link #learner} may have - * already done. - * - * @param start The 1-based number of the first training round. - * @param rounds The total number of training rounds including those before - * start. - **/ - public void train(int start, int rounds) { - train(start, rounds, - new DoneWithRound() { public void doneWithRound(int r) { } }); - } - - - /** - * Trains {@link #learner} for the specified number of rounds. This - * learning happens on top of any learning that {@link #learner} may have - * already done. - * - * @param rounds The number of passes to make over the training data. - * @param dwr Performs post processing at the end of each round. - **/ - public void train(int rounds, DoneWithRound dwr) { - train(1, rounds, dwr); - } - - - /** - * Trains {@link #learner} for the specified number of rounds. This - * learning happens on top of any learning that {@link #learner} may have - * already done. - * - * @param start The 1-based number of the first training round. - * @param rounds The total number of training rounds including those before - * start. - * @param dwr Performs post processing at the end of each round. - **/ - public void train(int start, int rounds, DoneWithRound dwr) { - if (lexiconSize > 0) { - // If the parser is a FoldParser, it means we're doing cross validation - // in which we train on just part of the data. So the examples variable - // doesn't accurately reflect how many training examples we'll see in - // this episode of training. - learner.initialize(parser instanceof FoldParser ? 0 : examples, - lexiconSize); + public void train(int start, int rounds) { + train(start, rounds, new DoneWithRound() { + public void doneWithRound(int r) {} + }); } - else setIsTraining(true); - for (int i = start; i <= rounds; ++i) { - int examples = 0; - for (Object example = parser.next(); example != null; - example = parser.next()) { - if (example == FoldSeparator.separator) continue; + /** + * Trains {@link #learner} for the specified number of rounds. + * This learning happens on top of any learning that {@link #learner} may have already done. + * + * @param rounds The number of passes to make over the training data. + * @param dwr Performs post processing at the end of each round. + **/ + public void train(int rounds, DoneWithRound dwr) { + train(1, rounds, dwr); + } + - if (progressOutput > 0 && examples % progressOutput == 0) { - System.out.print(" " + learner.name + ": " + messageIndent); - if (rounds != 1) System.out.print("Round " + i + ", "); - System.out.println(examples + " examples processed at " - + new Date()); - } + /** + * Trains {@link #learner} for the specified number of + * rounds. This learning happens on top of any learning that {@link #learner} may have already + * done. + * + * @param start The 1-based number of the first training round. + * @param rounds The total number of training rounds including those before start. + * @param dwr Performs post processing at the end of each round. + **/ + public void train(int start, int rounds, DoneWithRound dwr) { + if (lexiconSize > 0) { + // If the parser is a FoldParser, it means we're doing cross validation + // in which we train on just part of the data. So the examples variable + // doesn't accurately reflect how many training examples we'll see in + // this episode of training. + learner.initialize(parser instanceof FoldParser ? 0 : examples, lexiconSize); + } else + setIsTraining(true); + + for (int i = start; i <= rounds; ++i) { + int examples = 0; + + for (Object example = parser.next(); example != null; example = parser.next()) { + if (example == FoldSeparator.separator) + continue; + + if (progressOutput > 0 && examples % progressOutput == 0) { + System.out.print(" " + learner.name + ": " + messageIndent); + if (rounds != 1) + System.out.print("Round " + i + ", "); + System.out.println(examples + " examples processed at " + new Date()); + } + + learner.learn(example); + ++examples; + } - learner.learn(example); - ++examples; - } + if (progressOutput > 0) { + System.out.print(" " + learner.name + ": " + messageIndent); + if (rounds != 1) + System.out.print("Round " + i + ", "); + System.out.println(examples + " examples processed at " + new Date()); + } - if (progressOutput > 0) { - System.out.print(" " + learner.name + ": " + messageIndent); - if (rounds != 1) System.out.print("Round " + i + ", "); - System.out.println(examples + " examples processed at " + new Date()); - } + parser.reset(); + learner.doneWithRound(); + dwr.doneWithRound(i); + } - parser.reset(); - learner.doneWithRound(); - dwr.doneWithRound(i); + learner.doneLearning(); + if (lexiconSize == 0) + setIsTraining(false); } - learner.doneLearning(); - if (lexiconSize == 0) setIsTraining(false); - } - - - /** - * Performs cross validation, computing a confidence interval on the - * performance of the learner after each of the specified rounds of - * training. This method assumes that {@link #learner} has not yet done - * any learning. The learner will again be empty in this sense when the - * method exits, except that any label lexicon present before the method - * was called will be restored. The label lexicon needs to persist in this - * way so that it can ultimately be written into the model file. - * - * @param rounds An array of training rounds after which - * performance of the learner should be evaluated on - * the testing data. - * @param k The number of folds. - * @param splitPolicy The policy according to which the data is split - * up. - * @param alpha The fraction of the distribution to leave outside - * the confidence interval. For example, alpha - * = .05 gives a 95% confidence interval. - * @param metric A metric with which to evaluate the learner on - * testing data. - * @param statusMessages If set true status messages will be - * produced, even if {@link #progressOutput} is zero. - * @return A 2D array results where results[i][0] - * is the average performance of the learner after - * rounds[i] rounds of training and - * results[i][1] is half the size of the corresponding - * confidence interval. - **/ - public double[][] crossValidation(final int[] rounds, - int k, - FoldParser.SplitPolicy splitPolicy, - double alpha, - final TestingMetric metric, - boolean statusMessages) { - if (!(k > 1 || splitPolicy == FoldParser.SplitPolicy.manual)) - throw new IllegalArgumentException( - "LBJava ERROR: BatchTrainer.crossValidation: if the data splitting " - + "policy is not 'Manual', the number of folds must be greater " - + "than 1."); - if (splitPolicy == FoldParser.SplitPolicy.manual) k = -1; - Arrays.sort(rounds); - final int totalRounds = rounds[rounds.length - 1]; - - // Status messages. - if (statusMessages || progressOutput > 0) { - System.out.print(" " + learner.name + ": " + messageIndent - + "Cross Validation: "); - if (k != -1) System.out.print("k = " + k + ", "); - System.out.print("Split = " + splitPolicy); - if (totalRounds != 1) System.out.print(", Rounds = " + totalRounds); - System.out.println(); - } - // Instantiate a fold parser. - final FoldParser foldParser; - // If we pre-extracted, we know how many examples there are already; - // otherwise FoldParser will have to compute it. - if (examples > 0) - foldParser = new FoldParser(parser, k, splitPolicy, 0, false, examples); - else foldParser = new FoldParser(parser, k, splitPolicy, 0, false); - parser = foldParser; - - if (splitPolicy == FoldParser.SplitPolicy.manual) k = foldParser.getK(); - - final double[][] performances = new double[rounds.length][k]; - Lexicon labelLexicon = learner.getLabelLexicon(); - - // Train and get testing performances for each fold. - for (int i = 0; i < k; foldParser.setPivot(++i)) { - if (statusMessages || progressOutput > 0) - System.out.println( - " " + learner.name + ": " + messageIndent - + "Training against subset " + i + " at " + new Date()); - final int fold = i; - messageIndent += " "; - - train(totalRounds, - new DoneWithRound() { - int r = 0; - public void doneWithRound(int round) { - if (round < totalRounds && rounds[r] == round) - performances[r++][fold] = - crossValidationTesting(foldParser, metric, true, false); - } + /** + * + * Performs cross validation, computing a confidence interval on the performance of the learner + * after each of the specified rounds of training. This method assumes that {@link #learner} has + * not yet done any learning. The learner will again be empty in this sense when the method + * exits, except that any label lexicon present before the method was called will be restored. + * The label lexicon needs to persist in this way so that it can ultimately be written into the + * model file. + * + * @param rounds An array of training rounds after which performance of the learner should be + * evaluated on the testing data. + * @param k The number of folds. + * @param splitPolicy The policy according to which the data is split up. + * @param alpha The fraction of the distribution to leave outside the confidence interval. For + * example, alpha + * = .05 gives a 95% confidence interval. + * @param metric A metric with which to evaluate the learner on testing data. + * @param statusMessages If set true status messages will be produced, even if + * {@link #progressOutput} is zero. + * @return A 2D array results where results[i][0] is the average + * performance of the learner after rounds[i] rounds of training and + * results[i][1] is half the size of the corresponding confidence interval. + **/ + public double[][] crossValidation(final int[] rounds, int k, + FoldParser.SplitPolicy splitPolicy, double alpha, final TestingMetric metric, + boolean statusMessages) { + if (!(k > 1 || splitPolicy == FoldParser.SplitPolicy.manual)) + throw new IllegalArgumentException( + "LBJava ERROR: BatchTrainer.crossValidation: if the data splitting " + + "policy is not 'Manual', the number of folds must be greater " + + "than 1."); + if (splitPolicy == FoldParser.SplitPolicy.manual) + k = -1; + Arrays.sort(rounds); + final int totalRounds = rounds[rounds.length - 1]; + + // Status messages. + if (statusMessages || progressOutput > 0) { + System.out.print(" " + learner.name + ": " + messageIndent + "Cross Validation: "); + if (k != -1) + System.out.print("k = " + k + ", "); + System.out.print("Split = " + splitPolicy); + if (totalRounds != 1) + System.out.print(", Rounds = " + totalRounds); + System.out.println(); + } + + // Instantiate a fold parser. + final FoldParser foldParser; + // If we pre-extracted, we know how many examples there are already; + // otherwise FoldParser will have to compute it. + if (examples > 0) + foldParser = new FoldParser(parser, k, splitPolicy, 0, false, examples); + else + foldParser = new FoldParser(parser, k, splitPolicy, 0, false); + parser = foldParser; + + if (splitPolicy == FoldParser.SplitPolicy.manual) + k = foldParser.getK(); + + final double[][] performances = new double[rounds.length][k]; + Lexicon labelLexicon = learner.getLabelLexicon(); + + // Train and get testing performances for each fold. + for (int i = 0; i < k; foldParser.setPivot(++i)) { + if (statusMessages || progressOutput > 0) + System.out.println(" " + learner.name + ": " + messageIndent + + "Training against subset " + i + " at " + new Date()); + final int fold = i; + messageIndent += " "; + + train(totalRounds, new DoneWithRound() { + int r = 0; + + public void doneWithRound(int round) { + if (round < totalRounds && rounds[r] == round) + performances[r++][fold] = + crossValidationTesting(foldParser, metric, true, false); + } }); - performances[rounds.length - 1][i] = - crossValidationTesting(foldParser, metric, false, statusMessages); - messageIndent = messageIndent.substring(2); + performances[rounds.length - 1][i] = + crossValidationTesting(foldParser, metric, false, statusMessages); + messageIndent = messageIndent.substring(2); - learner.forget(); - if (labelLexicon != null && labelLexicon.size() > 0 - && learner.getLabelLexicon().size() == 0) - learner.setLabelLexicon(labelLexicon); - } + learner.forget(); + if (labelLexicon != null && labelLexicon.size() > 0 + && learner.getLabelLexicon().size() == 0) + learner.setLabelLexicon(labelLexicon); + } - parser = foldParser.getParser(); + parser = foldParser.getParser(); - // Compute the confidence interval. - double[][] results = new double[rounds.length][]; - boolean usingAccuracy = metric instanceof Accuracy; + // Compute the confidence interval. + double[][] results = new double[rounds.length][]; + boolean usingAccuracy = metric instanceof Accuracy; - for (int r = 0; r < rounds.length; ++r) { - results[r] = StudentT.confidenceInterval(performances[r], alpha); + for (int r = 0; r < rounds.length; ++r) { + results[r] = StudentT.confidenceInterval(performances[r], alpha); - if (r == rounds.length - 1 && statusMessages || progressOutput > 0) { - double mean = Math.round(results[r][0] * 100000) / 100000.0; - double half = Math.round(results[r][1] * 100000) / 100000.0; + if (r == rounds.length - 1 && statusMessages || progressOutput > 0) { + double mean = Math.round(results[r][0] * 100000) / 100000.0; + double half = Math.round(results[r][1] * 100000) / 100000.0; - System.out.print( - " " + learner.name + ": " + messageIndent + (100 * (1 - alpha)) - + "% confidence interval after " + rounds[r] + " rounds: " - + mean); - if (usingAccuracy) System.out.print("%"); - System.out.print(" +/- " + half); - if (usingAccuracy) System.out.print("%"); - System.out.println(); - } - } + System.out.print(" " + learner.name + ": " + messageIndent + (100 * (1 - alpha)) + + "% confidence interval after " + rounds[r] + " rounds: " + mean); + if (usingAccuracy) + System.out.print("%"); + System.out.print(" +/- " + half); + if (usingAccuracy) + System.out.print("%"); + System.out.println(); + } + } - return results; - } - - - /** - * Tests the learner as a subroutine inside cross validation. - * - * @param foldParser The cross validation parser that splits up the - * data. - * @param metric The metric used to evaluate the performance of the - * learner. - * @param clone Whether or not the learner should be cloned (and - * it should be cloned if more learning will take - * place after making this call). - * @param statusMessages If set true status messages will be - * produced, even if {@link #progressOutput} is zero. - * @return The result produced by the testing metric on the current cross - * validation fold expressed as a percentage (instead of a - * fraction) if the testing metric is {@link Accuracy}. - **/ - protected double crossValidationTesting(FoldParser foldParser, - TestingMetric metric, - boolean clone, - boolean statusMessages) { - Parser originalParser = foldParser.getParser(); - foldParser.setFromPivot(true); - - Learner testLearner = learner; - if (clone) { - testLearner = (Learner) learner.clone(); - testLearner.doneLearning(); + return results; } - double result = 0; - if (originalParser instanceof ArrayFileParser) { - ArrayFileParser afp = (ArrayFileParser) originalParser; - afp.setIncludePruned(true); - result = metric.test(testLearner, null, foldParser); - afp.setIncludePruned(false); - } - else { - setIsTraining(false); - result = metric.test(testLearner, testLearner.getLabeler(), foldParser); - setIsTraining(true); - } + /** + * Tests the learner + * as a subroutine inside cross validation. + * + * @param foldParser The cross validation parser that splits up the data. + * @param metric The metric used to evaluate the performance of the learner. + * @param clone Whether or not the learner should be cloned (and it should be cloned if more + * learning will take place after making this call). + * @param statusMessages If set true status messages will be produced, even if + * {@link #progressOutput} is zero. + * @return The result produced by the testing metric on the current cross validation fold + * expressed as a percentage (instead of a fraction) if the testing metric is + * {@link Accuracy}. + **/ + protected double crossValidationTesting(FoldParser foldParser, TestingMetric metric, + boolean clone, boolean statusMessages) { + Parser originalParser = foldParser.getParser(); + foldParser.setFromPivot(true); + + Learner testLearner = learner; + if (clone) { + testLearner = (Learner) learner.clone(); + testLearner.doneLearning(); + } - foldParser.reset(); - foldParser.setFromPivot(false); - if (metric instanceof Accuracy) result *= 100; - - if (statusMessages || progressOutput > 0) { - double printResult = Math.round(result * 100000) / 100000.0; - System.out.print( - " " + learner.name + ": " + messageIndent + "Subset " - + foldParser.getPivot() + " " + metric.getName() + ": " - + printResult); - if (metric instanceof Accuracy) System.out.print("%"); - System.out.println(); - } + double result = 0; + + if (originalParser instanceof ArrayFileParser) { + ArrayFileParser afp = (ArrayFileParser) originalParser; + afp.setIncludePruned(true); + result = metric.test(testLearner, null, foldParser); + afp.setIncludePruned(false); + } else { + setIsTraining(false); + result = metric.test(testLearner, testLearner.getLabeler(), foldParser); + setIsTraining(true); + } - return result; - } - - - /** - * Tune learning algorithm parameters using cross validation. Note that - * this interface takes both an array of - * {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner.Parameters} objects and an array of rounds. - * As such, the value in the {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner.Parameters#rounds} - * field is ignored during tuning. It is also overwritten in each of the - * {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner.Parameters} objects when the optimal number of - * rounds is determined in terms of the other parameters in each object. - * Finally, in addition to returning the parameters that got the best - * performance, this method also sets {@link #learner} with those - * parameters at the end of the method. - * - *

This method assumes that {@link #learner} has not yet done any - * learning. The learner will again be empty in this sense when the method - * exits, except that any label lexicon present before the method was - * called will be restored. The label lexicon needs to persist in this way - * so that it can ultimately be written into the model file. - * - * @param parameters An array of parameter settings objects. - * @param rounds An array of training rounds after which performance - * of the learner should be evaluated on the testing - * data. - * @param k The number of folds. - * @param splitPolicy The policy according to which the data is split up. - * @param alpha The fraction of the distribution to leave outside - * the confidence interval. For example, alpha = - * .05 gives a 95% confidence interval. - * @param metric A metric with which to evaluate the learner. - * @return The element of parameters that resulted in the best - * performance according to metric. - **/ - public Learner.Parameters tune(Learner.Parameters[] parameters, - int[] rounds, - int k, - FoldParser.SplitPolicy splitPolicy, - double alpha, - TestingMetric metric) { - int best = -1; - String[] parameterStrings = new String[parameters.length]; - double[][] scores = new double[parameters.length][]; - - for (int i = 0; i < parameters.length; ++i) { - parameterStrings[i] = parameters[i].nonDefaultString(); - - // Status message. - if (progressOutput > 0) - System.out.println( - " " + learner.name + ": " + messageIndent + "Trying parameters (" - + parameterStrings[i] + ")"); - - learner.setParameters(parameters[i]); - messageIndent += " "; - double[][] results = - crossValidation(rounds, k, splitPolicy, alpha, metric, false); - messageIndent = messageIndent.substring(2); - - // Update best scores, rounds, and parameters. - int bestRounds = 0; - if (best == -1 || results[0][0] > scores[best][0]) best = i; - scores[i] = results[0]; - - for (int j = 1; j < results.length; ++j) - if (results[j][0] > scores[i][0]) { - bestRounds = j; - scores[i] = results[j]; - if (results[j][0] > scores[best][0]) best = i; + foldParser.reset(); + foldParser.setFromPivot(false); + if (metric instanceof Accuracy) + result *= 100; + + if (statusMessages || progressOutput > 0) { + double printResult = Math.round(result * 100000) / 100000.0; + System.out.print(" " + learner.name + ": " + messageIndent + "Subset " + + foldParser.getPivot() + " " + metric.getName() + ": " + printResult); + if (metric instanceof Accuracy) + System.out.print("%"); + System.out.println(); } - parameters[i].rounds = rounds[bestRounds]; + return result; } - if (progressOutput > 0) { - // Print a table of results. - double[][] data = new double[parameters.length][4]; - for (int i = 0; i < parameters.length; ++i) { - data[i][0] = i + 1; - data[i][1] = scores[i][0]; - data[i][2] = scores[i][1]; - data[i][3] = parameters[i].rounds; - } - - String[] columnLabels = { "Set", metric.getName(), "+/-", "Rounds" }; - int[] sigDigits = { 0, 3, 3, 0 }; - String[] s = - TableFormat.tableFormat(columnLabels, null, data, sigDigits, - new int[]{ 0 }); - - System.out.println(" " + learner.name + ": " + messageIndent + "----"); - System.out.println( - " " + learner.name + ": " + messageIndent + "Parameter sets:"); - for (int i = 0; i < parameterStrings.length; ++i) - System.out.println( - " " + learner.name + ": " + messageIndent + (i+1) + ": " - + parameterStrings[i]); - for (int i = 0; i < s.length; ++i) - System.out.println(" " + learner.name + ": " + messageIndent + s[i]); - System.out.println(" " + learner.name + ": " + messageIndent + "----"); - - // Status message. - double bestScore = Math.round(scores[best][0] * 100000) / 100000.0; - System.out.println( - " " + learner.name + ": " + messageIndent + "Best " - + metric.getName() + ": " + bestScore); - System.out.print( - " " + learner.name + ": " + messageIndent + "with "); - - if (parameterStrings[best].length() > 0) { - System.out.println(parameterStrings[best]); - System.out.print( - " " + learner.name + ": " + messageIndent + "and "); - } - - System.out.println(parameters[best].rounds + " rounds"); + + /** + * + * Tune learning algorithm parameters using cross validation. Note that this interface takes + * both an array of {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner.Parameters} objects and + * an array of rounds. As such, the value in the + * {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner.Parameters#rounds} field is ignored + * during tuning. It is also overwritten in each of the + * {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner.Parameters} objects when the optimal + * number of rounds is determined in terms of the other parameters in each object. Finally, in + * addition to returning the parameters that got the best performance, this method also sets + * {@link #learner} with those parameters at the end of the method. + * + *

+ * This method assumes that {@link #learner} has not yet done any learning. The learner will + * again be empty in this sense when the method exits, except that any label lexicon present + * before the method was called will be restored. The label lexicon needs to persist in this way + * so that it can ultimately be written into the model file. + * + * @param parameters An array of parameter settings objects. + * @param rounds An array of training rounds after which performance of the learner should be + * evaluated on the testing data. + * @param k The number of folds. + * @param splitPolicy The policy according to which the data is split up. + * @param alpha The fraction of the distribution to leave outside the confidence interval. For + * example, alpha = + * .05 gives a 95% confidence interval. + * @param metric A metric with which to evaluate the learner. + * @return The element of parameters that resulted in the best performance + * according to metric. + **/ + public Learner.Parameters tune(Learner.Parameters[] parameters, int[] rounds, int k, + FoldParser.SplitPolicy splitPolicy, double alpha, TestingMetric metric) { + int best = -1; + String[] parameterStrings = new String[parameters.length]; + double[][] scores = new double[parameters.length][]; + + for (int i = 0; i < parameters.length; ++i) { + parameterStrings[i] = parameters[i].nonDefaultString(); + + // Status message. + if (progressOutput > 0) + System.out.println(" " + learner.name + ": " + messageIndent + + "Trying parameters (" + parameterStrings[i] + ")"); + + learner.setParameters(parameters[i]); + messageIndent += " "; + double[][] results = crossValidation(rounds, k, splitPolicy, alpha, metric, false); + messageIndent = messageIndent.substring(2); + + // Update best scores, rounds, and parameters. + int bestRounds = 0; + if (best == -1 || results[0][0] > scores[best][0]) + best = i; + scores[i] = results[0]; + + for (int j = 1; j < results.length; ++j) + if (results[j][0] > scores[i][0]) { + bestRounds = j; + scores[i] = results[j]; + if (results[j][0] > scores[best][0]) + best = i; + } + + parameters[i].rounds = rounds[bestRounds]; + } + + if (progressOutput > 0) { + // Print a table of results. + double[][] data = new double[parameters.length][4]; + for (int i = 0; i < parameters.length; ++i) { + data[i][0] = i + 1; + data[i][1] = scores[i][0]; + data[i][2] = scores[i][1]; + data[i][3] = parameters[i].rounds; + } + + String[] columnLabels = {"Set", metric.getName(), "+/-", "Rounds"}; + int[] sigDigits = {0, 3, 3, 0}; + String[] s = + TableFormat.tableFormat(columnLabels, null, data, sigDigits, new int[] {0}); + + System.out.println(" " + learner.name + ": " + messageIndent + "----"); + System.out.println(" " + learner.name + ": " + messageIndent + "Parameter sets:"); + for (int i = 0; i < parameterStrings.length; ++i) + System.out.println(" " + learner.name + ": " + messageIndent + (i + 1) + ": " + + parameterStrings[i]); + for (int i = 0; i < s.length; ++i) + System.out.println(" " + learner.name + ": " + messageIndent + s[i]); + System.out.println(" " + learner.name + ": " + messageIndent + "----"); + + // Status message. + double bestScore = Math.round(scores[best][0] * 100000) / 100000.0; + System.out.println(" " + learner.name + ": " + messageIndent + "Best " + + metric.getName() + ": " + bestScore); + System.out.print(" " + learner.name + ": " + messageIndent + "with "); + + if (parameterStrings[best].length() > 0) { + System.out.println(parameterStrings[best]); + System.out.print(" " + learner.name + ": " + messageIndent + "and "); + } + + System.out.println(parameters[best].rounds + " rounds"); + } + + learner.setParameters(parameters[best]); + return parameters[best]; } - learner.setParameters(parameters[best]); - return parameters[best]; - } - - - /** - * Tune learning algorithm parameters against a development set. Note that - * this interface takes both an array of - * {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner.Parameters} objects and an array of rounds. - * As such, the value in the {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner.Parameters#rounds} - * field is ignored during tuning. It is also overwritten in each of the - * {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner.Parameters} objects when the optimal number of - * rounds is determined in terms of the other parameters in each object. - * Finally, in addition to returning the parameters that got the best - * performance, this method also sets {@link #learner} with those - * parameters at the end of the method. - * - *

This method assumes that {@link #learner} has not yet done any - * learning. The learner will again be empty in this sense when the method - * exits, except that any label lexicon present before the method was - * called will be restored. The label lexicon needs to persist in this way - * so that it can ultimately be written into the model file. - * - * @param parameters An array of parameter settings objects. - * @param rounds An array of training rounds after which performance of - * the learner should be evaluated on the testing data. - * @param devParser A parser from which development set examples are - * obtained. - * @param metric A metric with which to evaluate the learner. - * @return The element of parameters that resulted in the best - * performance according to metric. - **/ - public Learner.Parameters tune(Learner.Parameters[] parameters, - final int[] rounds, - final Parser devParser, - final TestingMetric metric) { - int best = -1; - double[] scores = new double[parameters.length]; - String[] parameterStrings = new String[parameters.length]; - Arrays.sort(rounds); - final int totalRounds = rounds[rounds.length - 1]; - Lexicon labelLexicon = learner.getLabelLexicon(); - - for (int i = 0; i < parameters.length; ++i) { - parameterStrings[i] = parameters[i].nonDefaultString(); - // Status message. - if (progressOutput > 0) - System.out.println( - " " + learner.name + ": " + messageIndent + "Trying parameters (" - + parameterStrings[i] + ")"); - - final double[] results = new double[rounds.length]; - learner.setParameters(parameters[i]); - messageIndent += " "; - - train(totalRounds, - new DoneWithRound() { - int r = 0; - public void doneWithRound(int round) { - if (round < totalRounds && rounds[r] == round) - results[r++] = testMidTraining(devParser, metric, true); - } + + /** + * Tune learning algorithm + * parameters against a development set. Note that this interface takes both an array of + * {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner.Parameters} objects and an array of + * rounds. As such, the value in the + * {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner.Parameters#rounds} field is ignored + * during tuning. It is also overwritten in each of the + * {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner.Parameters} objects when the optimal + * number of rounds is determined in terms of the other parameters in each object. Finally, in + * addition to returning the parameters that got the best performance, this method also sets + * {@link #learner} with those parameters at the end of the method. + * + *

+ * This method assumes that {@link #learner} has not yet done any learning. The learner will + * again be empty in this sense when the method exits, except that any label lexicon present + * before the method was called will be restored. The label lexicon needs to persist in this way + * so that it can ultimately be written into the model file. + * + * @param parameters An array of parameter settings objects. + * @param rounds An array of training rounds after which performance of the learner should be + * evaluated on the testing data. + * @param devParser A parser from which development set examples are obtained. + * @param metric A metric with which to evaluate the learner. + * @return The element of parameters that resulted in the best performance + * according to metric. + **/ + public Learner.Parameters tune(Learner.Parameters[] parameters, final int[] rounds, + final Parser devParser, final TestingMetric metric) { + int best = -1; + double[] scores = new double[parameters.length]; + String[] parameterStrings = new String[parameters.length]; + Arrays.sort(rounds); + final int totalRounds = rounds[rounds.length - 1]; + Lexicon labelLexicon = learner.getLabelLexicon(); + + for (int i = 0; i < parameters.length; ++i) { + parameterStrings[i] = parameters[i].nonDefaultString(); + // Status message. + if (progressOutput > 0) + System.out.println(" " + learner.name + ": " + messageIndent + + "Trying parameters (" + parameterStrings[i] + ")"); + + final double[] results = new double[rounds.length]; + learner.setParameters(parameters[i]); + messageIndent += " "; + + train(totalRounds, new DoneWithRound() { + int r = 0; + + public void doneWithRound(int round) { + if (round < totalRounds && rounds[r] == round) + results[r++] = testMidTraining(devParser, metric, true); + } }); - results[rounds.length - 1] = testMidTraining(devParser, metric, false); - messageIndent = messageIndent.substring(2); + results[rounds.length - 1] = testMidTraining(devParser, metric, false); + messageIndent = messageIndent.substring(2); + + // Update best scores, rounds, and parameters. + int bestRounds = 0; + if (best == -1 || results[0] > scores[best]) + best = i; + scores[i] = results[0]; + + for (int j = 1; j < results.length; ++j) + if (results[j] > scores[i]) { + bestRounds = j; + scores[i] = results[j]; + if (results[j] > scores[best]) + best = i; + } + + parameters[i].rounds = rounds[bestRounds]; + + learner.forget(); + if (labelLexicon != null && labelLexicon.size() > 0 + && learner.getLabelLexicon().size() == 0) + learner.setLabelLexicon(labelLexicon); + } - // Update best scores, rounds, and parameters. - int bestRounds = 0; - if (best == -1 || results[0] > scores[best]) best = i; - scores[i] = results[0]; + if (progressOutput > 0) { + // Print a table of results. + double[][] data = new double[parameters.length][3]; + for (int i = 0; i < parameters.length; ++i) { + data[i][0] = i + 1; + data[i][1] = scores[i]; + data[i][2] = parameters[i].rounds; + } - for (int j = 1; j < results.length; ++j) - if (results[j] > scores[i]) { - bestRounds = j; - scores[i] = results[j]; - if (results[j] > scores[best]) best = i; - } + String[] columnLabels = {"Set", metric.getName(), "Rounds"}; + int[] sigDigits = {0, 3, 0}; + String[] s = + TableFormat.tableFormat(columnLabels, null, data, sigDigits, new int[] {0}); + + System.out.println(" " + learner.name + ": " + messageIndent + "----"); + System.out.println(" " + learner.name + ": " + messageIndent + "Parameter sets:"); + for (int i = 0; i < parameterStrings.length; ++i) + System.out.println(" " + learner.name + ": " + messageIndent + (i + 1) + ": " + + parameterStrings[i]); + for (int i = 0; i < s.length; ++i) + System.out.println(" " + learner.name + ": " + messageIndent + s[i]); + System.out.println(" " + learner.name + ": " + messageIndent + "----"); + + // Status message. + double bestScore = Math.round(scores[best] * 100000) / 100000.0; + System.out.println(" " + learner.name + ": " + messageIndent + "Best " + + metric.getName() + ": " + bestScore); + System.out.print(" " + learner.name + ": " + messageIndent + "with "); + + if (parameterStrings[best].length() > 0) { + System.out.println(parameterStrings[best]); + System.out.print(" " + learner.name + ": " + messageIndent + "and "); + } - parameters[i].rounds = rounds[bestRounds]; + System.out.println(parameters[best].rounds + " rounds"); + } - learner.forget(); - if (labelLexicon != null && labelLexicon.size() > 0 - && learner.getLabelLexicon().size() == 0) - learner.setLabelLexicon(labelLexicon); + learner.setParameters(parameters[best]); + return parameters[best]; } - if (progressOutput > 0) { - // Print a table of results. - double[][] data = new double[parameters.length][3]; - for (int i = 0; i < parameters.length; ++i) { - data[i][0] = i + 1; - data[i][1] = scores[i]; - data[i][2] = parameters[i].rounds; - } - - String[] columnLabels = { "Set", metric.getName(), "Rounds" }; - int[] sigDigits = { 0, 3, 0 }; - String[] s = - TableFormat.tableFormat(columnLabels, null, data, sigDigits, - new int[]{ 0 }); - - System.out.println(" " + learner.name + ": " + messageIndent + "----"); - System.out.println( - " " + learner.name + ": " + messageIndent + "Parameter sets:"); - for (int i = 0; i < parameterStrings.length; ++i) - System.out.println( - " " + learner.name + ": " + messageIndent + (i+1) + ": " - + parameterStrings[i]); - for (int i = 0; i < s.length; ++i) - System.out.println(" " + learner.name + ": " + messageIndent + s[i]); - System.out.println(" " + learner.name + ": " + messageIndent + "----"); - - // Status message. - double bestScore = Math.round(scores[best] * 100000) / 100000.0; - System.out.println( - " " + learner.name + ": " + messageIndent + "Best " - + metric.getName() + ": " + bestScore); - System.out.print( - " " + learner.name + ": " + messageIndent + "with "); - - if (parameterStrings[best].length() > 0) { - System.out.println(parameterStrings[best]); - System.out.print( - " " + learner.name + ": " + messageIndent + "and "); - } - - System.out.println(parameters[best].rounds + " rounds"); - } - learner.setParameters(parameters[best]); - return parameters[best]; - } - - - /** - * Tests {@link #learner} on the specified data while making provisions - * under the assumption that this test happens in between rounds of - * training. - * - * @param testParser A parser producing labeled testing examples. - * @param metric The metric used to evaluate the performance of the - * learner. - * @param clone Whether or not the learner should be cloned (and it - * should be cloned if more learning will take place - * after making this call). - * @return The result produced by the testing metric on the testing data - * expressed as a percentage (instead of a fraction) if the testing - * metric is {@link Accuracy}. - **/ - protected double testMidTraining(Parser testParser, - TestingMetric metric, - boolean clone) { - Learner testLearner = clone ? (Learner) learner.clone() : learner; - testLearner.doneLearning(); - double result = 0; - - if (testParser instanceof ArrayFileParser) { - ArrayFileParser afp = (ArrayFileParser) testParser; - afp.setIncludePruned(true); - result = metric.test(testLearner, null, testParser); - afp.setIncludePruned(false); - } - else { - setIsTraining(false); - result = metric.test(testLearner, testLearner.getLabeler(), testParser); - setIsTraining(true); - } + /** + * Tests {@link #learner} on the + * specified data while making provisions under the assumption that this test happens in between + * rounds of training. + * + * @param testParser A parser producing labeled testing examples. + * @param metric The metric used to evaluate the performance of the learner. + * @param clone Whether or not the learner should be cloned (and it should be cloned if more + * learning will take place after making this call). + * @return The result produced by the testing metric on the testing data expressed as a + * percentage (instead of a fraction) if the testing metric is {@link Accuracy}. + **/ + protected double testMidTraining(Parser testParser, TestingMetric metric, boolean clone) { + Learner testLearner = clone ? (Learner) learner.clone() : learner; + testLearner.doneLearning(); + double result = 0; + + if (testParser instanceof ArrayFileParser) { + ArrayFileParser afp = (ArrayFileParser) testParser; + afp.setIncludePruned(true); + result = metric.test(testLearner, null, testParser); + afp.setIncludePruned(false); + } else { + setIsTraining(false); + result = metric.test(testLearner, testLearner.getLabeler(), testParser); + setIsTraining(true); + } - testParser.reset(); - if (metric instanceof Accuracy) result *= 100; + testParser.reset(); + if (metric instanceof Accuracy) + result *= 100; + + if (progressOutput > 0) { + double printResult = Math.round(result * 100000) / 100000.0; + System.out.print(" " + learner.name + ": " + messageIndent + metric.getName() + ": " + + printResult); + if (metric instanceof Accuracy) + System.out.print("%"); + System.out.println(); + } - if (progressOutput > 0) { - double printResult = Math.round(result * 100000) / 100000.0; - System.out.print( - " " + learner.name + ": " + messageIndent + metric.getName() + ": " - + printResult); - if (metric instanceof Accuracy) System.out.print("%"); - System.out.println(); + return result; } - - return result; - } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/BiasedRandomWeightVector.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/BiasedRandomWeightVector.java index c7c3371d..81db3773 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/BiasedRandomWeightVector.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/BiasedRandomWeightVector.java @@ -1,151 +1,137 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; import java.io.PrintStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; /** - * Same as the RandomWeightVector class that it extends, except - * that this vector also contains a bias term (also initialized randomly) - * which is added to every dot product and affected by every vector addition - * operation. - * - * @author Nick Rizzolo + * Same as the RandomWeightVector class that it extends, except that this vector also + * contains a bias term (also initialized randomly) which is added to every dot product and affected + * by every vector addition operation. + * + * @author Nick Rizzolo **/ -public class BiasedRandomWeightVector extends RandomWeightVector -{ - /** The current bias weight. */ - protected double bias; - - - /** Instantiates this biased vector with a random bias. */ - public BiasedRandomWeightVector() { - this(RandomWeightVector.defaultStddev); - } - - /** - * Sets the specified standard deviation and a random bias. - * - * @param s The standard deviation. - **/ - public BiasedRandomWeightVector(double s) { - super(s); - bias = random.nextGaussian() * stddev; - } - - - /** - * Takes the dot product of this BiasedRandomWeightVector with - * the argument vector, using the specified default weight when one is not - * yet present in this vector. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of feature values - * @param defaultW The default weight. - * @return The computed dot product. - **/ - public double dot(int[] exampleFeatures, double[] exampleValues, - double defaultW) { - return super.dot(exampleFeatures, exampleValues, defaultW) + bias; - } - - - /** - * Self-modifying vector addition where the argument vector is first scaled - * by the given factor. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of feature values - * @param factor The scaling factor. - * @param defaultW An initial weight for previously unseen features. - **/ - public void scaledAdd(int[] exampleFeatures, double[] exampleValues, - double factor, double defaultW) { - super.scaledAdd(exampleFeatures, exampleValues, factor, defaultW); - bias += factor; - } - - - /** - * Empties the weight map and resets the random number generator. This - * means that the same "random" values will be filled in for the weights if - * the same calls to {@link #dot(int[],double[],double)} and - * {@link #scaledAdd(int[],double[],double,double)} are made in the same - * order. - **/ - public void clear() { - super.clear(); - bias = random.nextGaussian() * stddev; - } - - - /** - * Outputs a textual representation of this vector to the specified stream. - * The string representation is the same as in - * {@link SparseWeightVector#write(PrintStream)}, with two added lines just - * after the "Begin" annotation that give the values of - * {@link #stddev} and {@link #bias}. - * - * @param out The stream to write to. - **/ - public void write(PrintStream out) { - out.println("Begin BiasedRandomWeightVector"); - out.println("stddev = " + stddev); - out.println("bias = " + bias); - toStringJustWeights(out); - out.println("End BiasedRandomWeightVector"); - } - - - /** - * Outputs a textual representation of this vector to the specified stream. - * The string representation is the same as in - * {@link SparseWeightVector#write(PrintStream)}, with two added lines just - * after the "Begin" annotation that give the values of - * {@link #stddev} and {@link #bias}. - * - * @param out The stream to write to. - * @param lex The feature lexicon. - **/ - public void write(PrintStream out, Lexicon lex) { - out.println("Begin BiasedRandomWeightVector"); - out.println("stddev = " + stddev); - out.println("bias = " + bias); - toStringJustWeights(out, 0, lex); - out.println("End BiasedRandomWeightVector"); - } - - - /** - * Writes the weight vector's internal representation in binary form. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - out.writeDouble(bias); - } - - - /** - * Returns a new, empty weight vector with the same parameter settings as - * this one. - * - * @return An empty weight vector. - **/ - public SparseWeightVector emptyClone() { - return new BiasedRandomWeightVector(stddev); - } +public class BiasedRandomWeightVector extends RandomWeightVector { + /** The current bias weight. */ + protected double bias; + + + /** Instantiates this biased vector with a random bias. */ + public BiasedRandomWeightVector() { + this(RandomWeightVector.defaultStddev); + } + + /** + * Sets the specified standard deviation and a random bias. + * + * @param s The standard deviation. + **/ + public BiasedRandomWeightVector(double s) { + super(s); + bias = random.nextGaussian() * stddev; + } + + + /** + * Takes the dot product of this BiasedRandomWeightVector with the argument vector, + * using the specified default weight when one is not yet present in this vector. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of feature values + * @param defaultW The default weight. + * @return The computed dot product. + **/ + public double dot(int[] exampleFeatures, double[] exampleValues, double defaultW) { + return super.dot(exampleFeatures, exampleValues, defaultW) + bias; + } + + + /** + * Self-modifying vector addition where the argument vector is first scaled by the given factor. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of feature values + * @param factor The scaling factor. + * @param defaultW An initial weight for previously unseen features. + **/ + public void scaledAdd(int[] exampleFeatures, double[] exampleValues, double factor, + double defaultW) { + super.scaledAdd(exampleFeatures, exampleValues, factor, defaultW); + bias += factor; + } + + + /** + * Empties the weight map and resets the random number generator. This means that the same + * "random" values will be filled in for the weights if the same calls to + * {@link #dot(int[],double[],double)} and {@link #scaledAdd(int[],double[],double,double)} are + * made in the same order. + **/ + public void clear() { + super.clear(); + bias = random.nextGaussian() * stddev; + } + + + /** + * Outputs a textual representation of this vector to the specified stream. The string + * representation is the same as in {@link SparseWeightVector#write(PrintStream)}, with two + * added lines just after the "Begin" annotation that give the values of + * {@link #stddev} and {@link #bias}. + * + * @param out The stream to write to. + **/ + public void write(PrintStream out) { + out.println("Begin BiasedRandomWeightVector"); + out.println("stddev = " + stddev); + out.println("bias = " + bias); + toStringJustWeights(out); + out.println("End BiasedRandomWeightVector"); + } + + + /** + * Outputs a textual representation of this vector to the specified stream. The string + * representation is the same as in {@link SparseWeightVector#write(PrintStream)}, with two + * added lines just after the "Begin" annotation that give the values of + * {@link #stddev} and {@link #bias}. + * + * @param out The stream to write to. + * @param lex The feature lexicon. + **/ + public void write(PrintStream out, Lexicon lex) { + out.println("Begin BiasedRandomWeightVector"); + out.println("stddev = " + stddev); + out.println("bias = " + bias); + toStringJustWeights(out, 0, lex); + out.println("End BiasedRandomWeightVector"); + } + + + /** + * Writes the weight vector's internal representation in binary form. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeDouble(bias); + } + + + /** + * Returns a new, empty weight vector with the same parameter settings as this one. + * + * @return An empty weight vector. + **/ + public SparseWeightVector emptyClone() { + return new BiasedRandomWeightVector(stddev); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/BiasedWeightVector.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/BiasedWeightVector.java index 6a905c97..60b04620 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/BiasedWeightVector.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/BiasedWeightVector.java @@ -1,163 +1,156 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; import java.io.PrintStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; /** - * Same as the SparseWeightVector class that it extends, except - * that this vector also contains a bias term which is added to every dot - * product and affected by every vector addition operation. - * - * @author Nick Rizzolo + * Same as the SparseWeightVector class that it extends, except that this vector also + * contains a bias term which is added to every dot product and affected by every vector addition + * operation. + * + * @author Nick Rizzolo **/ -public class BiasedWeightVector extends SparseWeightVector -{ - /** Default value for {@link #initialBias}. */ - protected static final double defaultInitialBias = 0; - - - /** The first value for {@link #bias}. */ - protected double initialBias; - /** The current bias weight. */ - protected double bias; - - - /** Instantiates this biased vector with default parameter values. */ - public BiasedWeightVector() { this(defaultInitialBias); } - - /** - * Instantiates this biased vector with the specified initial bias. - * - * @param b The inital bias. - **/ - public BiasedWeightVector(double b) { initialBias = bias = b; } - - - /** - * Takes the dot product of this BiasedWeightVector with the - * argument vector, using the specified default weight when one is not yet - * present in this vector. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of feature values - * @param defaultW The default weight. - * @return The computed dot product. - **/ - public double dot(int[] exampleFeatures, double[] exampleValues, - double defaultW) { - return super.dot(exampleFeatures, exampleValues, defaultW) + bias; - } - - - /** - * Self-modifying vector addition where the argument vector is first scaled - * by the given factor. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of feature values - * @param factor The scaling factor. - * @param defaultW An initial weight for previously unseen features. - **/ - public void scaledAdd(int[] exampleFeatures, double[] exampleValues, - double factor, double defaultW) { - super.scaledAdd(exampleFeatures, exampleValues, factor, defaultW); - bias += factor; - } - - - /** Empties the weight map. */ - public void clear() { - super.clear(); - bias = initialBias; - } - - - /** - * Outputs the contents of this BiasedWeightVector into the - * specified PrintStream. The string representation is the - * same as in the super class, with two added lines just after the - * "Begin" annotation that give the values of {@link #bias} - * and {@link #initialBias}. - * - * @param out The stream to write to. - **/ - public void write(PrintStream out) { - out.println("Begin BiasedWeightVector"); - out.println("bias = " + bias); - out.println("initialBias = " + initialBias); - toStringJustWeights(out); - out.println("End BiasedWeightVector"); - } - - - /** - * Outputs the contents of this BiasedWeightVector into the - * specified PrintStream. The string representation is the - * same as in the super class, with two added lines just after the - * "Begin" annotation that give the values of {@link #bias} - * and {@link #initialBias}. - * - * @param out The stream to write to. - * @param lex The feature lexicon. - **/ - public void write(PrintStream out, Lexicon lex) { - out.println("Begin BiasedWeightVector"); - out.println("bias = " + bias); - out.println("initialBias = " + initialBias); - toStringJustWeights(out, 0, lex); - out.println("End BiasedWeightVector"); - } - - - /** - * Writes the weight vector's internal representation in binary form. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - out.writeDouble(initialBias); - out.writeDouble(bias); - } - - - /** - * Reads the representation of a weight vector with this object's run-time - * type from the given stream, overwriting the data in this object. - * - *

This method is appropriate for reading weight vectors as written by - * {@link #write(ExceptionlessOutputStream)}. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - initialBias = in.readDouble(); - bias = in.readDouble(); - } - - - /** - * Returns a new, empty weight vector with the same parameter settings as - * this one. - * - * @return An empty weight vector. - **/ - public SparseWeightVector emptyClone() { - return new BiasedWeightVector(initialBias); - } +public class BiasedWeightVector extends SparseWeightVector { + /** Default value for {@link #initialBias}. */ + protected static final double defaultInitialBias = 0; + + + /** The first value for {@link #bias}. */ + protected double initialBias; + /** The current bias weight. */ + protected double bias; + + + /** Instantiates this biased vector with default parameter values. */ + public BiasedWeightVector() { + this(defaultInitialBias); + } + + /** + * Instantiates this biased vector with the specified initial bias. + * + * @param b The inital bias. + **/ + public BiasedWeightVector(double b) { + initialBias = bias = b; + } + + + /** + * Takes the dot product of this BiasedWeightVector with the argument vector, using + * the specified default weight when one is not yet present in this vector. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of feature values + * @param defaultW The default weight. + * @return The computed dot product. + **/ + public double dot(int[] exampleFeatures, double[] exampleValues, double defaultW) { + return super.dot(exampleFeatures, exampleValues, defaultW) + bias; + } + + + /** + * Self-modifying vector addition where the argument vector is first scaled by the given factor. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of feature values + * @param factor The scaling factor. + * @param defaultW An initial weight for previously unseen features. + **/ + public void scaledAdd(int[] exampleFeatures, double[] exampleValues, double factor, + double defaultW) { + super.scaledAdd(exampleFeatures, exampleValues, factor, defaultW); + bias += factor; + } + + + /** Empties the weight map. */ + public void clear() { + super.clear(); + bias = initialBias; + } + + + /** + * Outputs the contents of this BiasedWeightVector into the specified + * PrintStream. The string representation is the same as in the super class, with + * two added lines just after the "Begin" annotation that give the values of + * {@link #bias} and {@link #initialBias}. + * + * @param out The stream to write to. + **/ + public void write(PrintStream out) { + out.println("Begin BiasedWeightVector"); + out.println("bias = " + bias); + out.println("initialBias = " + initialBias); + toStringJustWeights(out); + out.println("End BiasedWeightVector"); + } + + + /** + * Outputs the contents of this BiasedWeightVector into the specified + * PrintStream. The string representation is the same as in the super class, with + * two added lines just after the "Begin" annotation that give the values of + * {@link #bias} and {@link #initialBias}. + * + * @param out The stream to write to. + * @param lex The feature lexicon. + **/ + public void write(PrintStream out, Lexicon lex) { + out.println("Begin BiasedWeightVector"); + out.println("bias = " + bias); + out.println("initialBias = " + initialBias); + toStringJustWeights(out, 0, lex); + out.println("End BiasedWeightVector"); + } + + + /** + * Writes the weight vector's internal representation in binary form. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeDouble(initialBias); + out.writeDouble(bias); + } + + + /** + * Reads the representation of a weight vector with this object's run-time type from the given + * stream, overwriting the data in this object. + * + *

+ * This method is appropriate for reading weight vectors as written by + * {@link #write(ExceptionlessOutputStream)}. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + initialBias = in.readDouble(); + bias = in.readDouble(); + } + + + /** + * Returns a new, empty weight vector with the same parameter settings as this one. + * + * @return An empty weight vector. + **/ + public SparseWeightVector emptyClone() { + return new BiasedWeightVector(initialBias); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/BinaryMIRA.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/BinaryMIRA.java index 5d43cd9f..8691dbe6 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/BinaryMIRA.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/BinaryMIRA.java @@ -1,457 +1,447 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; import java.io.PrintStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.classify.Classifier; import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - /** - * The Binary MIRA learning algorithm implementation. This algorithm - * operates very similarly to {@link SparsePerceptron} with a thick - * separator, except the learning rate is a function of each training - * example's margin. When the weight vector has made a mistake, the full - * {@link LinearThresholdUnit#learningRate} will be used. When the weight - * vector did not make a mistake, {@link LinearThresholdUnit#learningRate} is - * multiplied by the following value before the update takes place. - * - *

- *

- * (beta/2 - y(w*x)) / ||x||2 - *
- * - *

In the expression above, w is the weight vector, y - * represents the label of the example vector x, * stands for - * inner product, and beta is a user supplied parameter. If this - * expression turns out to be non-positive (i.e., if y(w*x) >= - * beta/2), no update is made for that example. - * - *

It is assumed that {@link Learner#labeler} is a single discrete - * classifier that produces the same feature for every example object and - * that the values that feature may take are available through the - * {@link Classifier#allowableValues()} method. The second value returned - * from {@link Classifier#allowableValues()} is treated as "positive", and it - * is assumed there are exactly 2 allowable values. Assertions will produce - * error messages if these assumptions do not hold. - * - *

This algorithm's user-configurable parameters are stored in member - * fields of this class. They may be set via either a constructor that names - * each parameter explicitly or a constructor that takes an instance of - * {@link edu.illinois.cs.cogcomp.lbjava.learn.BinaryMIRA.Parameters Parameters} as input. - * The documentation in each member field in this class indicates the default - * value of the associated parameter when using the former type of - * constructor. The documentation of the associated member field in the - * {@link edu.illinois.cs.cogcomp.lbjava.learn.BinaryMIRA.Parameters Parameters} class - * indicates the default value of the parameter when using the latter type of - * constructor. - * - * @author Arindam Saha + * The Binary MIRA learning algorithm implementation. This algorithm operates very similarly to + * {@link SparsePerceptron} with a thick separator, except the learning rate is a function of each + * training example's margin. When the weight vector has made a mistake, the full + * {@link LinearThresholdUnit#learningRate} will be used. When the weight vector did not make a + * mistake, {@link LinearThresholdUnit#learningRate} is multiplied by the following value before the + * update takes place. + * + *

+ *

(beta/2 - y(w*x)) / ||x||2
+ * + *

+ * In the expression above, w is the weight vector, y represents the label of the + * example vector x, * stands for inner product, and beta is a user supplied + * parameter. If this expression turns out to be non-positive (i.e., if y(w*x) >= beta/2), + * no update is made for that example. + * + *

+ * It is assumed that {@link Learner#labeler} is a single discrete classifier that produces the same + * feature for every example object and that the values that feature may take are available through + * the {@link Classifier#allowableValues()} method. The second value returned from + * {@link Classifier#allowableValues()} is treated as "positive", and it is assumed there are + * exactly 2 allowable values. Assertions will produce error messages if these assumptions do not + * hold. + * + *

+ * This algorithm's user-configurable parameters are stored in member fields of this class. They may + * be set via either a constructor that names each parameter explicitly or a constructor that takes + * an instance of {@link edu.illinois.cs.cogcomp.lbjava.learn.BinaryMIRA.Parameters Parameters} as + * input. The documentation in each member field in this class indicates the default value of the + * associated parameter when using the former type of constructor. The documentation of the + * associated member field in the {@link edu.illinois.cs.cogcomp.lbjava.learn.BinaryMIRA.Parameters + * Parameters} class indicates the default value of the parameter when using the latter type of + * constructor. + * + * @author Arindam Saha **/ public class BinaryMIRA extends SparsePerceptron { - /** - * Used to decide if two values are nearly equal to each other. - * @see #nearlyEqualTo(double,double) - **/ - public static final double TOLERANCE = 0.000000001; - /** Default value for {@link #beta}. */ - public static final double defaultBeta = 2; - /** Default value for {@link #learningRate}. */ - public static final double defaultLearningRate = 1; - - - /** - * The user supplied learning algorithm parameter; default - * {@link #defaultBeta}. The learning rate changes as a function of - * beta. - **/ - protected double beta; - - - /** - * The learning rate and beta take default values while the name of - * the classifier takes the empty string. - **/ - public BinaryMIRA() { this(""); } - - /** - * Sets the learning rate to the specified value, and beta to the - * default, while the name of the classifier takes the empty string. - * - * @param r The desired learning rate value. - **/ - public BinaryMIRA(double r) { this("", r); } - - /** - * Sets the learning rate and beta to the specified values, while - * the name of the classifier takes the empty string. - * - * @param r The desired learning rate value. - * @param B the desired beta value. - **/ - public BinaryMIRA(double r, double B) { this("", r, B); } - - /** - * Sets the learning rate, beta and the weight vector to the specified - * values. - * - * @param r The desired learning rate. - * @param B The desired beta value. - * @param v The desired weight vector. - **/ - public BinaryMIRA(double r, double B, SparseWeightVector v) { - this("", r, B, v); - } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link BinaryMIRA.Parameters} object. - * - * @param p The settings of all parameters. - **/ - public BinaryMIRA(Parameters p) { this("", p); } - - - /** - * Sets the name of the classifier to the specified value, while the - * learning rate and beta take default values. - * - * @param n The name of the classifier. - **/ - public BinaryMIRA(String n) { this(n, defaultLearningRate); } - - /** - * Sets the name of the classifier and learning rate to the specified - * values, while beta takes the default value. - * - * @param n The name of the classifier. - * @param r The desired learning rate. - **/ - public BinaryMIRA(String n, double r) { this(n, r, defaultBeta); } - - /** - * Sets the name of the classifier, the learning rate and beta to - * the specified values. - * - * @param n The name of the classifier. - * @param r The desired learning rate. - * @param B The desired beta value. - */ - public BinaryMIRA(String n, double r, double B) { - this(n, r, B, - (SparseWeightVector) - LinearThresholdUnit.defaultWeightVector.clone()); - } - - /** - * Sets the name of the classifier, the learning rate, beta and the - * weight vector to the specified values. Use this constructor to specify - * an alternative subclass of {@link SparseWeightVector}. - * - * @param n The name of the classifier. - * @param r The desired learning rate. - * @param B The desired beta value. - * @param v The desired weight vector. - */ - public BinaryMIRA(String n, double r, double B, SparseWeightVector v) { - super(n); - Parameters p = new Parameters(); - p.learningRate = r; - p.weightVector = v; - p.beta = B; - setParameters(p); - } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link BinaryMIRA.Parameters} object. - * - * @param n The name of the classifier. - * @param p The settings of all parameters. - **/ - public BinaryMIRA(String n, Parameters p) { - super(n); - setParameters(p); - } - - - /** - * Sets the values of parameters that control the behavior of this learning - * algorithm. - * - * @param p The parameters. - **/ - public void setParameters(Parameters p) { - super.setParameters(p); - beta = p.beta; - } - - - /** - * Retrieves the parameters that are set in this learner. - * - * @return An object containing all the values of the parameters that - * control the behavior of this learning algorithm. - **/ - public Learner.Parameters getParameters() { - Parameters p = - new Parameters((SparsePerceptron.Parameters) super.getParameters()); - p.beta = beta; - return p; - } - - - /** - * Returns the current value of the {@link #beta} member variable. - * - * @return The value of the {@link #beta} variable. - **/ - public double getBeta() { return beta; } - - - /** - * Sets the {@link #beta} member variable to the specified value. - * - * @param B The new value for {@link #beta}. - **/ - public void setBeta(double B) { beta = B; } - - - /** - * Determines if the weights should be promoted. - * - * @param label The label of the example object. - * @param s The score of the example object. - * @param threshold The LTU threshold. - * @param positiveThickness The thickness of the hyperplane on the - * positive side. - * @return true iff the weights should be promoted. - **/ - public boolean shouldPromote(boolean label, double s, double threshold, - double positiveThickness) { - return label; - } - - /** - * Determines if the weights should be promoted. - * - * @param label The label of the example object. - * @param s The score of the example object. - * @param threshold The LTU threshold. - * @param negativeThickness The thickness of the hyperplane on the - * negative side. - * @return true iff the weights should be demoted. - **/ - public boolean shouldDemote(boolean label, double s, double threshold, - double negativeThickness) { - return !label; - } - - - /** - * Scales the feature vector produced by the extractor by the learning rate - * and adds it to the weight vector. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param rate The learning rate at which the weights are - * updated. - **/ - public void promote(int[] exampleFeatures, double[] exampleValues, - double rate) { - if (!nearlyEqualTo(rate, 0.0)) { - super.promote(exampleFeatures, exampleValues, rate); + /** + * Used to decide if two values are nearly equal to each other. + * + * @see #nearlyEqualTo(double,double) + **/ + public static final double TOLERANCE = 0.000000001; + /** Default value for {@link #beta}. */ + public static final double defaultBeta = 2; + /** Default value for {@link #learningRate}. */ + public static final double defaultLearningRate = 1; + + + /** + * The user supplied learning algorithm parameter; default {@link #defaultBeta}. The learning + * rate changes as a function of beta. + **/ + protected double beta; + + + /** + * The learning rate and beta take default values while the name of the classifier takes the + * empty string. + **/ + public BinaryMIRA() { + this(""); + } + + /** + * Sets the learning rate to the specified value, and beta to the default, while the name of the + * classifier takes the empty string. + * + * @param r The desired learning rate value. + **/ + public BinaryMIRA(double r) { + this("", r); + } + + /** + * Sets the learning rate and beta to the specified values, while the name of the classifier + * takes the empty string. + * + * @param r The desired learning rate value. + * @param B the desired beta value. + **/ + public BinaryMIRA(double r, double B) { + this("", r, B); + } + + /** + * Sets the learning rate, beta and the weight vector to the specified values. + * + * @param r The desired learning rate. + * @param B The desired beta value. + * @param v The desired weight vector. + **/ + public BinaryMIRA(double r, double B, SparseWeightVector v) { + this("", r, B, v); + } + + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link BinaryMIRA.Parameters} object. + * + * @param p The settings of all parameters. + **/ + public BinaryMIRA(Parameters p) { + this("", p); + } + + + /** + * Sets the name of the classifier to the specified value, while the learning rate and beta take + * default values. + * + * @param n The name of the classifier. + **/ + public BinaryMIRA(String n) { + this(n, defaultLearningRate); + } + + /** + * Sets the name of the classifier and learning rate to the specified values, while beta takes + * the default value. + * + * @param n The name of the classifier. + * @param r The desired learning rate. + **/ + public BinaryMIRA(String n, double r) { + this(n, r, defaultBeta); } - } - - - /** - * Scales the feature vector produced by the extractor by the learning rate - * and subtracts it from the weight vector. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param rate The learning rate at which the weights are - * updated. - **/ - public void demote(int[] exampleFeatures, double[] exampleValues, - double rate) { - if (!nearlyEqualTo(rate, 0.0)) { - super.demote(exampleFeatures, exampleValues, rate); + + /** + * Sets the name of the classifier, the learning rate and beta to the specified values. + * + * @param n The name of the classifier. + * @param r The desired learning rate. + * @param B The desired beta value. + */ + public BinaryMIRA(String n, double r, double B) { + this(n, r, B, (SparseWeightVector) LinearThresholdUnit.defaultWeightVector.clone()); } - } - - - /** - * Determines if a is nearly equal to b based on - * the value of the TOLERANCE member variable. - * - * @param a The first value. - * @param b The second value. - * @return true iff they are nearly equal. - **/ - private static boolean nearlyEqualTo(double a, double b) { - return -TOLERANCE < a - b && a - b < TOLERANCE; - } - - - /** - * Computes the learning rate for this example. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param s The score for the example object. - * @param label The label. - * @return The new learning rate. - **/ - public double computeLearningRate(int[] exampleFeatures, - double[] exampleValues, double s, - boolean label) { - double labelVal = label? 1: -1; - - double x = (beta / 2 - labelVal * s) - / (FeatureVector.L2NormSquared(exampleValues) + 1); - - double rate = 1; - if (x < 0) rate = 0; - else if (x < 1) rate = x; - - rate *= learningRate; - - return rate; - } - - - /** - * Returns the original value of the {@link #learningRate} variable. - * - * @return The value of the {@link #learningRate} variable. - **/ - public double getLearningRate() { return learningRate; } - - - /** - * Writes the algorithm's internal representation as text. In the first - * line of output, the name of the classifier is printed, followed by - * {@link #learningRate}, {@link #beta}, - * {@link LinearThresholdUnit#initialWeight}, - * {@link LinearThresholdUnit#threshold}, - * {@link LinearThresholdUnit#positiveThickness}, - * {@link LinearThresholdUnit#negativeThickness}, - * and finally {@link LinearThresholdUnit#bias}. - * - * @param out The output stream. - **/ - public void write(PrintStream out) { - out.println(name + ": " + learningRate + ", " + beta + ", " - + initialWeight + ", " + threshold + ", " + positiveThickness - + ", " + negativeThickness + ", " + bias); - if (lexicon.size() == 0) weightVector.write(out); - else weightVector.write(out, lexicon); - } - - - /** - * Writes the learned function's internal representation in binary form. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - out.writeDouble(beta); - } - - - /** - * Reads the binary representation of a learner with this object's run-time - * type, overwriting any and all learned or manually specified parameters - * as well as the label lexicon but without modifying the feature lexicon. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - beta = in.readDouble(); - } - - - /** - * Simply a container for all of {@link BinaryMIRA}'s configurable - * parameters. Using instances of this class should make code more - * readable and constructors less complicated. - * - * @author Nick Rizzolo - **/ - public static class Parameters extends SparsePerceptron.Parameters { + /** - * The user supplied learning algorithm parameter; default - * {@link #defaultBeta}. The learning rate changes as a function of - * beta. + * Sets the name of the classifier, the learning rate, beta and the weight vector to the + * specified values. Use this constructor to specify an alternative subclass of + * {@link SparseWeightVector}. + * + * @param n The name of the classifier. + * @param r The desired learning rate. + * @param B The desired beta value. + * @param v The desired weight vector. */ - public double beta; + public BinaryMIRA(String n, double r, double B, SparseWeightVector v) { + super(n); + Parameters p = new Parameters(); + p.learningRate = r; + p.weightVector = v; + p.beta = B; + setParameters(p); + } + + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link BinaryMIRA.Parameters} object. + * + * @param n The name of the classifier. + * @param p The settings of all parameters. + **/ + public BinaryMIRA(String n, Parameters p) { + super(n); + setParameters(p); + } + + + /** + * Sets the values of parameters that control the behavior of this learning algorithm. + * + * @param p The parameters. + **/ + public void setParameters(Parameters p) { + super.setParameters(p); + beta = p.beta; + } - /** Sets all the default values. */ - public Parameters() { - beta = defaultBeta; - learningRate = defaultLearningRate; + /** + * Retrieves the parameters that are set in this learner. + * + * @return An object containing all the values of the parameters that control the behavior of + * this learning algorithm. + **/ + public Learner.Parameters getParameters() { + Parameters p = new Parameters((SparsePerceptron.Parameters) super.getParameters()); + p.beta = beta; + return p; } /** - * Sets the parameters from the parent's parameters object, giving - * defaults to all parameters declared in this object. + * Returns the current value of the {@link #beta} member variable. + * + * @return The value of the {@link #beta} variable. **/ - public Parameters(SparsePerceptron.Parameters p) { - super(p); - beta = defaultBeta; + public double getBeta() { + return beta; } - /** Copy constructor. */ - public Parameters(Parameters p) { - super(p); - beta = p.beta; + /** + * Sets the {@link #beta} member variable to the specified value. + * + * @param B The new value for {@link #beta}. + **/ + public void setBeta(double B) { + beta = B; + } + + + /** + * Determines if the weights should be promoted. + * + * @param label The label of the example object. + * @param s The score of the example object. + * @param threshold The LTU threshold. + * @param positiveThickness The thickness of the hyperplane on the positive side. + * @return true iff the weights should be promoted. + **/ + public boolean shouldPromote(boolean label, double s, double threshold, double positiveThickness) { + return label; + } + + /** + * Determines if the weights should be promoted. + * + * @param label The label of the example object. + * @param s The score of the example object. + * @param threshold The LTU threshold. + * @param negativeThickness The thickness of the hyperplane on the negative side. + * @return true iff the weights should be demoted. + **/ + public boolean shouldDemote(boolean label, double s, double threshold, double negativeThickness) { + return !label; } /** - * Calls the appropriate Learner.setParameters(Parameters) - * method for this Parameters object. - * - * @param l The learner whose parameters will be set. + * Scales the feature vector produced by the extractor by the learning rate and adds it to the + * weight vector. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param rate The learning rate at which the weights are updated. **/ - public void setParameters(Learner l) { - ((BinaryMIRA) l).setParameters(this); + public void promote(int[] exampleFeatures, double[] exampleValues, double rate) { + if (!nearlyEqualTo(rate, 0.0)) { + super.promote(exampleFeatures, exampleValues, rate); + } } /** - * Creates a string representation of these parameters in which only - * those parameters that differ from their default values are mentioned. + * Scales the feature vector produced by the extractor by the learning rate and subtracts it + * from the weight vector. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param rate The learning rate at which the weights are updated. **/ - public String nonDefaultString() { - String result = super.nonDefaultString(); + public void demote(int[] exampleFeatures, double[] exampleValues, double rate) { + if (!nearlyEqualTo(rate, 0.0)) { + super.demote(exampleFeatures, exampleValues, rate); + } + } - if (beta != BinaryMIRA.defaultBeta) - result += ", beta = " + beta; - if (result.startsWith(", ")) result = result.substring(2); - return result; + /** + * Determines if a is nearly equal to b based on the value of the + * TOLERANCE member variable. + * + * @param a The first value. + * @param b The second value. + * @return true iff they are nearly equal. + **/ + private static boolean nearlyEqualTo(double a, double b) { + return -TOLERANCE < a - b && a - b < TOLERANCE; } - } -} + /** + * Computes the learning rate for this example. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param s The score for the example object. + * @param label The label. + * @return The new learning rate. + **/ + public double computeLearningRate(int[] exampleFeatures, double[] exampleValues, double s, + boolean label) { + double labelVal = label ? 1 : -1; + + double x = (beta / 2 - labelVal * s) / (FeatureVector.L2NormSquared(exampleValues) + 1); + + double rate = 1; + if (x < 0) + rate = 0; + else if (x < 1) + rate = x; + + rate *= learningRate; + + return rate; + } + + + /** + * Returns the original value of the {@link #learningRate} variable. + * + * @return The value of the {@link #learningRate} variable. + **/ + public double getLearningRate() { + return learningRate; + } + + + /** + * Writes the algorithm's internal representation as text. In the first line of output, the name + * of the classifier is printed, followed by {@link #learningRate}, {@link #beta}, + * {@link LinearThresholdUnit#initialWeight}, {@link LinearThresholdUnit#threshold}, + * {@link LinearThresholdUnit#positiveThickness}, {@link LinearThresholdUnit#negativeThickness}, + * and finally {@link LinearThresholdUnit#bias}. + * + * @param out The output stream. + **/ + public void write(PrintStream out) { + out.println(name + ": " + learningRate + ", " + beta + ", " + initialWeight + ", " + + threshold + ", " + positiveThickness + ", " + negativeThickness + ", " + bias); + if (lexicon.size() == 0) + weightVector.write(out); + else + weightVector.write(out, lexicon); + } + + + /** + * Writes the learned function's internal representation in binary form. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeDouble(beta); + } + + /** + * Reads the binary representation of a learner with this object's run-time type, overwriting + * any and all learned or manually specified parameters as well as the label lexicon but without + * modifying the feature lexicon. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + beta = in.readDouble(); + } + + + /** + * Simply a container for all of {@link BinaryMIRA}'s configurable parameters. Using instances + * of this class should make code more readable and constructors less complicated. + * + * @author Nick Rizzolo + **/ + public static class Parameters extends SparsePerceptron.Parameters { + /** + * The user supplied learning algorithm parameter; default {@link #defaultBeta}. The + * learning rate changes as a function of beta. + */ + public double beta; + + + /** Sets all the default values. */ + public Parameters() { + beta = defaultBeta; + learningRate = defaultLearningRate; + } + + + /** + * Sets the parameters from the parent's parameters object, giving defaults to all + * parameters declared in this object. + **/ + public Parameters(SparsePerceptron.Parameters p) { + super(p); + beta = defaultBeta; + } + + + /** Copy constructor. */ + public Parameters(Parameters p) { + super(p); + beta = p.beta; + } + + + /** + * Calls the appropriate Learner.setParameters(Parameters) method for this + * Parameters object. + * + * @param l The learner whose parameters will be set. + **/ + public void setParameters(Learner l) { + ((BinaryMIRA) l).setParameters(this); + } + + + /** + * Creates a string representation of these parameters in which only those parameters that + * differ from their default values are mentioned. + **/ + public String nonDefaultString() { + String result = super.nonDefaultString(); + + if (beta != BinaryMIRA.defaultBeta) + result += ", beta = " + beta; + + if (result.startsWith(", ")) + result = result.substring(2); + return result; + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/ChildLexicon.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/ChildLexicon.java index be0e6f7f..d94ed353 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/ChildLexicon.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/ChildLexicon.java @@ -1,375 +1,362 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.IVector; import edu.illinois.cs.cogcomp.lbjava.classify.DiscreteConjunctiveFeature; import edu.illinois.cs.cogcomp.lbjava.classify.DiscreteReferrer; import edu.illinois.cs.cogcomp.lbjava.classify.Feature; import edu.illinois.cs.cogcomp.lbjava.classify.RealConjunctiveFeature; import edu.illinois.cs.cogcomp.lbjava.classify.RealReferrer; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; -import edu.illinois.cs.cogcomp.lbjava.util.IVector; - /** - * Instances of this class are intended to store features that are children - * of other features and which do not correspond to their own weights in any - * learner's weight vector. While a {@link Lexicon} will store an instance - * of this class in its {@link Lexicon#lexiconChildren} field, an instance of - * this class will never do so. Also, the {@link #lookupChild(Feature)} - * method behaves differently in this class, since it is assumed that - * children are stored here. + * Instances of this class are intended to store features that are children of other features and + * which do not correspond to their own weights in any learner's weight vector. While a + * {@link Lexicon} will store an instance of this class in its {@link Lexicon#lexiconChildren} + * field, an instance of this class will never do so. Also, the {@link #lookupChild(Feature)} method + * behaves differently in this class, since it is assumed that children are stored here. **/ -public class ChildLexicon extends Lexicon -{ - /** - * The elements of this vector (which correspond to the features in - * {@link #lexiconInv}) serve a dual purpose; first, to indicate by - * absolute value the number of other features currently stored in this - * object that have the corresponding feature as a child, and second, to - * indicate by sign if the corresponding feature has been marked for - * removal. - **/ - private IVector parents; // Initialization happens in clear() - /** - * A reference to the lexicon that uses this lexicon as its child lexicon. - **/ - private Lexicon parentLexicon; - - - /** Creates an empty lexicon. */ - public ChildLexicon() { } - - /** - * Creates an empty lexicon. - * - * @param p The lexicon that uses this lexicon as its child lexicon. - **/ - public ChildLexicon(Lexicon p) { parentLexicon = p; } - // Lexicon's constructor will call clear(), so there's no need to initialize - // parents. - - /** - * Creates an empty lexicon with the given encoding. - * - * @param p The lexicon that uses this lexicon as its child lexicon. - * @param e The encoding to use when adding features to this lexicon. - **/ - public ChildLexicon(Lexicon p, String e) { - super(e); - // The super constructor will call clear(), so there's no need to - // initialize parents. - parentLexicon = p; - } - - - /** Clears the data structures associated with this instance. */ - public void clear() { - super.clear(); - parents = new IVector(); - } - - - /** - * Sets the value of {@link #parentLexicon} and makes sure that any - * features marked for removal in this lexicon are the identical objects - * also present in the parent. This is useful in particular just after - * lexicons have been read from disk. - * - * @param p The new parent lexicon. - **/ - public void setParent(Lexicon p) { - parentLexicon = p; - int N = lexiconInv.size(); - - for (int i = 0; i < N; ++i) { - Feature f = lexiconInv.get(i); - - if (f != null && parents.get(i) < 0) { - Feature pf = p.lookupKey(p.lookup(f)); - if (pf == null) { - System.err.println("LBJava ERROR: Can't find feature " + f - + " in parent lexicon."); - new Exception().printStackTrace(); - System.exit(1); +public class ChildLexicon extends Lexicon { + /** + * The elements of this vector (which correspond to the features in {@link #lexiconInv}) serve a + * dual purpose; first, to indicate by absolute value the number of other features currently + * stored in this object that have the corresponding feature as a child, and second, to indicate + * by sign if the corresponding feature has been marked for removal. + **/ + private IVector parents; // Initialization happens in clear() + /** + * A reference to the lexicon that uses this lexicon as its child lexicon. + **/ + private Lexicon parentLexicon; + + + /** Creates an empty lexicon. */ + public ChildLexicon() {} + + /** + * Creates an empty lexicon. + * + * @param p The lexicon that uses this lexicon as its child lexicon. + **/ + public ChildLexicon(Lexicon p) { + parentLexicon = p; + } + + // Lexicon's constructor will call clear(), so there's no need to initialize + // parents. + + /** + * Creates an empty lexicon with the given encoding. + * + * @param p The lexicon that uses this lexicon as its child lexicon. + * @param e The encoding to use when adding features to this lexicon. + **/ + public ChildLexicon(Lexicon p, String e) { + super(e); + // The super constructor will call clear(), so there's no need to + // initialize parents. + parentLexicon = p; + } + + + /** Clears the data structures associated with this instance. */ + public void clear() { + super.clear(); + parents = new IVector(); + } + + + /** + * Sets the value of {@link #parentLexicon} and makes sure that any features marked for removal + * in this lexicon are the identical objects also present in the parent. This is useful in + * particular just after lexicons have been read from disk. + * + * @param p The new parent lexicon. + **/ + public void setParent(Lexicon p) { + parentLexicon = p; + int N = lexiconInv.size(); + + for (int i = 0; i < N; ++i) { + Feature f = lexiconInv.get(i); + + if (f != null && parents.get(i) < 0) { + Feature pf = p.lookupKey(p.lookup(f)); + if (pf == null) { + System.err.println("LBJava ERROR: Can't find feature " + f + + " in parent lexicon."); + new Exception().printStackTrace(); + System.exit(1); + } + + lexiconInv.set(i, pf); + if (lexicon != null) + lexicon.put(pf, lexicon.remove(f)); + } } + } + + + /** + * Removes the mapping for the given feature from this lexicon and returns the feature object + * representing it that was stored here. + * + * @param f The feature to remove. + * @return The representation of f that used to be stored here, or + * null if it wasn't present. + **/ + public Feature remove(Feature f) { + if (contains(f)) { // contains(Feature) calls lazyMapCreation() + int index = lookup(f); + int count = parents.get(index); + if (count == 0) { + f.removeFromChildLexicon(this); // Calls decrementParentCounts + lexicon.remove(f); + return lexiconInv.set(index, null); + } else if (count > 0) { + parents.set(index, -count); + return lexiconInv.get(index); + } else { + System.err + .println("LBJava ERROR: Marking feature as removable for the second time: " + + f); + new Exception().printStackTrace(); + System.exit(1); + } + } + + return null; + } + + + /** + * The parent of feature f is being removed, so we decrement f's + * parent counts and remove it if it's ready. + * + * @param f The child feature whose parent counts need updating and which may be removed as + * well. + **/ + public void decrementParentCounts(Feature f) { + int index = lookup(f); + int count = parents.get(index); + + if (count == 0) { + System.err.println("LBJava ERROR: Parent count incorrect for feature " + f); + new Exception().printStackTrace(); + System.exit(1); + } else if (count < 0) { + parents.increment(index); + if (count == -1) { + f.removeFromChildLexicon(this); + lexicon.remove(f); + lexiconInv.set(index, null); + } + } else + parents.decrement(index); + } + + + /** + * This method adds the given feature to this lexicon and also recursively adds its children, if + * any. + * + * @param f The feature to look up. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return A feature equivalent to f that is stored in this lexicon. + **/ + public Feature getChildFeature(Feature f, int label) { + return lexiconInv.get(f.childLexiconLookup(this, label)); + } + + + /** + * Updates the counts in {@link #parents} for the children of f. + * + * @param f The feature to look up. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return The index of f in this lexicon. + **/ + public int childLexiconLookup(Feature f, int label) { + return lookup(f, true, label); + } + - lexiconInv.set(i, pf); - if (lexicon != null) lexicon.put(pf, lexicon.remove(f)); - } + /** + * Updates the counts in {@link #parents} for the children of f. + * + * @param f The feature to look up. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return The index of f in this lexicon. + **/ + public int childLexiconLookup(DiscreteConjunctiveFeature f, int label) { + int oldSize = lexiconInv.size(); + int result = lookup(f, true, label); + if (oldSize < lexiconInv.size()) { + incrementParentCounts(f.getLeft(), label); + incrementParentCounts(f.getRight(), label); + } + return result; } - } - - - /** - * Removes the mapping for the given feature from this lexicon and returns - * the feature object representing it that was stored here. - * - * @param f The feature to remove. - * @return The representation of f that used to be stored - * here, or null if it wasn't present. - **/ - public Feature remove(Feature f) { - if (contains(f)) { // contains(Feature) calls lazyMapCreation() - int index = lookup(f); - int count = parents.get(index); - if (count == 0) { - f.removeFromChildLexicon(this); // Calls decrementParentCounts - lexicon.remove(f); - return lexiconInv.set(index, null); - } - else if (count > 0) { - parents.set(index, -count); - return lexiconInv.get(index); - } - else { - System.err.println( - "LBJava ERROR: Marking feature as removable for the second time: " - + f); - new Exception().printStackTrace(); - System.exit(1); - } + + + /** + * Updates the counts in {@link #parents} for the children of f. + * + * @param f The feature to look up. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return The index of f in this lexicon. + **/ + public int childLexiconLookup(RealConjunctiveFeature f, int label) { + int oldSize = lexiconInv.size(); + int result = lookup(f, true, label); + if (oldSize < lexiconInv.size()) { + incrementParentCounts(f.getLeft(), label); + incrementParentCounts(f.getRight(), label); + } + return result; } - return null; - } - - - /** - * The parent of feature f is being removed, so we decrement - * f's parent counts and remove it if it's ready. - * - * @param f The child feature whose parent counts need updating and which - * may be removed as well. - **/ - public void decrementParentCounts(Feature f) { - int index = lookup(f); - int count = parents.get(index); - - if (count == 0) { - System.err.println( - "LBJava ERROR: Parent count incorrect for feature " + f); - new Exception().printStackTrace(); - System.exit(1); + + /** + * Updates the counts in {@link #parents} for the children of f. + * + * @param f The feature to look up. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return The index of f in this lexicon. + **/ + public int childLexiconLookup(DiscreteReferrer f, int label) { + int oldSize = lexiconInv.size(); + int result = lookup(f, true, label); + if (oldSize < lexiconInv.size()) + incrementParentCounts(f.getReferent(), label); + return result; } - else if (count < 0) { - parents.increment(index); - if (count == -1) { - f.removeFromChildLexicon(this); - lexicon.remove(f); - lexiconInv.set(index, null); - } + + + /** + * Updates the counts in {@link #parents} for the children of f. + * + * @param f The feature to look up. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return The index of f in this lexicon. + **/ + public int childLexiconLookup(RealReferrer f, int label) { + int oldSize = lexiconInv.size(); + int result = lookup(f, true, label); + if (oldSize < lexiconInv.size()) + incrementParentCounts(f.getReferent(), label); + return result; } - else parents.decrement(index); - } - - - /** - * This method adds the given feature to this lexicon and also recursively - * adds its children, if any. - * - * @param f The feature to look up. - * @param label The label of the example containing this feature, or -1 if - * we aren't doing per class feature counting. - * @return A feature equivalent to f that is stored in this - * lexicon. - **/ - public Feature getChildFeature(Feature f, int label) { - return lexiconInv.get(f.childLexiconLookup(this, label)); - } - - - /** - * Updates the counts in {@link #parents} for the children of - * f. - * - * @param f The feature to look up. - * @param label The label of the example containing this feature, or -1 if - * we aren't doing per class feature counting. - * @return The index of f in this lexicon. - **/ - public int childLexiconLookup(Feature f, int label) { - return lookup(f, true, label); - } - - - /** - * Updates the counts in {@link #parents} for the children of - * f. - * - * @param f The feature to look up. - * @param label The label of the example containing this feature, or -1 if - * we aren't doing per class feature counting. - * @return The index of f in this lexicon. - **/ - public int childLexiconLookup(DiscreteConjunctiveFeature f, int label) { - int oldSize = lexiconInv.size(); - int result = lookup(f, true, label); - if (oldSize < lexiconInv.size()) { - incrementParentCounts(f.getLeft(), label); - incrementParentCounts(f.getRight(), label); + + + /** + * Helper method for methods like {@link #childLexiconLookup(DiscreteConjunctiveFeature,int)} + * that actually does the work of looking up the child feature and updating its parent counts. + * + * @param f The feature to look up. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + **/ + protected void incrementParentCounts(Feature f, int label) { + int index = f.childLexiconLookup(this, label); + // Increment count while preserving sign to indicate mark for removal. + if (parents.get(index) == 0) + parents.set(index, parentLexicon.contains(f) ? -1 : 1); + else if (parents.get(index) > 0) + parents.increment(index); + else + parents.decrement(index); } - return result; - } - - - /** - * Updates the counts in {@link #parents} for the children of - * f. - * - * @param f The feature to look up. - * @param label The label of the example containing this feature, or -1 if - * we aren't doing per class feature counting. - * @return The index of f in this lexicon. - **/ - public int childLexiconLookup(RealConjunctiveFeature f, int label) { - int oldSize = lexiconInv.size(); - int result = lookup(f, true, label); - if (oldSize < lexiconInv.size()) { - incrementParentCounts(f.getLeft(), label); - incrementParentCounts(f.getRight(), label); + + + /** + * Unlike the overridden method in {@link Lexicon}, this method simply checks {@link #lexicon} + * for the feature and will throw an exception if it can't be found. + * + * @param f The feature to look up. + * @return If the feature was found in {@link #lexicon}, its associated integer index is + * returned. + * @throws UnsupportedOperationException If the feature isn't found anywhere in the lexicon. + **/ + public int lookupChild(Feature f) { + lazyMapCreation(); + Integer I = (Integer) lexicon.get(f); + if (I != null) + return I.intValue(); + throw new UnsupportedOperationException( + "When calling ChildLexicon.lookupChild(Feature), the feature must " + + "be present in the lexicon. (" + f + ")"); } - return result; - } - - - /** - * Updates the counts in {@link #parents} for the children of - * f. - * - * @param f The feature to look up. - * @param label The label of the example containing this feature, or -1 if - * we aren't doing per class feature counting. - * @return The index of f in this lexicon. - **/ - public int childLexiconLookup(DiscreteReferrer f, int label) { - int oldSize = lexiconInv.size(); - int result = lookup(f, true, label); - if (oldSize < lexiconInv.size()) - incrementParentCounts(f.getReferent(), label); - return result; - } - - - /** - * Updates the counts in {@link #parents} for the children of - * f. - * - * @param f The feature to look up. - * @param label The label of the example containing this feature, or -1 if - * we aren't doing per class feature counting. - * @return The index of f in this lexicon. - **/ - public int childLexiconLookup(RealReferrer f, int label) { - int oldSize = lexiconInv.size(); - int result = lookup(f, true, label); - if (oldSize < lexiconInv.size()) - incrementParentCounts(f.getReferent(), label); - return result; - } - - - /** - * Helper method for methods like - * {@link #childLexiconLookup(DiscreteConjunctiveFeature,int)} that - * actually does the work of looking up the child feature and updating its - * parent counts. - * - * @param f The feature to look up. - * @param label The label of the example containing this feature, or -1 if - * we aren't doing per class feature counting. - **/ - protected void incrementParentCounts(Feature f, int label) { - int index = f.childLexiconLookup(this, label); - // Increment count while preserving sign to indicate mark for removal. - if (parents.get(index) == 0) - parents.set(index, parentLexicon.contains(f) ? -1 : 1); - else if (parents.get(index) > 0) parents.increment(index); - else parents.decrement(index); - } - - - /** - * Unlike the overridden method in {@link Lexicon}, this method simply - * checks {@link #lexicon} for the feature and will throw an exception if - * it can't be found. - * - * @param f The feature to look up. - * @return If the feature was found in {@link #lexicon}, its associated - * integer index is returned. - * @throws UnsupportedOperationException If the feature isn't found - * anywhere in the lexicon. - **/ - public int lookupChild(Feature f) { - lazyMapCreation(); - Integer I = (Integer) lexicon.get(f); - if (I != null) return I.intValue(); - throw - new UnsupportedOperationException( - "When calling ChildLexicon.lookupChild(Feature), the feature must " - + "be present in the lexicon. (" + f + ")"); - } - - - /** - * Writes a binary representation of the lexicon. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - int size = lexiconInv.size(); - int n = 0; while (n < size && lexiconInv.get(n) != null) ++n; - int i = n; while (i < size && lexiconInv.get(i) == null) ++i; - while (i < size) { - parents.set(n++, parents.get(i++)); - while (i < size && lexiconInv.get(i) == null) ++i; + + + /** + * Writes a binary representation of the lexicon. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + int size = lexiconInv.size(); + int n = 0; + while (n < size && lexiconInv.get(n) != null) + ++n; + int i = n; + while (i < size && lexiconInv.get(i) == null) + ++i; + while (i < size) { + parents.set(n++, parents.get(i++)); + while (i < size && lexiconInv.get(i) == null) + ++i; + } + + size = parents.size(); + if (n < size) { + for (i = size - 1; i >= n; --i) + parents.remove(i); + parents = new IVector(parents); + } + + lexiconInv.consolidate(); + lexicon = null; + super.write(out); + parents.write(out); } - size = parents.size(); - if (n < size) { - for (i = size - 1; i >= n; --i) parents.remove(i); - parents = new IVector(parents); + + /** + * Reads a binary representation of the lexicon. + * + * @param in The input stream. + * @param readCounts Whether or not to read the feature counts. + **/ + public void read(ExceptionlessInputStream in, boolean readCounts) { + super.read(in, readCounts); + parents.read(in); } - lexiconInv.consolidate(); - lexicon = null; - super.write(out); - parents.write(out); - } - - - /** - * Reads a binary representation of the lexicon. - * - * @param in The input stream. - * @param readCounts Whether or not to read the feature counts. - **/ - public void read(ExceptionlessInputStream in, boolean readCounts) { - super.read(in, readCounts); - parents.read(in); - } - - - /** - * Produces on STDOUT a table of feature counts including a - * line indicating the position of {@link #pruneCutoff}. It's probably not - * a good idea to call this method unless you know your lexicon is small. - * - * @param p Whether or not to include package names in the output. - **/ - public void printCountTable(boolean p) { - featureCounts = parents; - super.printCountTable(p); - featureCounts = null; - } -} + /** + * Produces on STDOUT a table of feature counts + * including a line indicating the position of {@link #pruneCutoff}. It's probably not a good + * idea to call this method unless you know your lexicon is small. + * + * @param p Whether or not to include package names in the output. + **/ + public void printCountTable(boolean p) { + featureCounts = parents; + super.printCountTable(p); + featureCounts = null; + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/IdentityNormalizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/IdentityNormalizer.java index 8ffd05e3..3babdd93 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/IdentityNormalizer.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/IdentityNormalizer.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -14,19 +11,19 @@ /** - * This Normalizer simply returns the same ScoreSet - * it was passed as input without modifying anything. - * - * @author Nick Rizzolo + * This Normalizer simply returns the same ScoreSet it was passed as input + * without modifying anything. + * + * @author Nick Rizzolo **/ -public class IdentityNormalizer extends Normalizer -{ - /** - * Simply returns the argument. - * - * @param scores The set of scores to normalize. - * @return The normalized set of scores. - **/ - public ScoreSet normalize(ScoreSet scores) { return scores; } +public class IdentityNormalizer extends Normalizer { + /** + * Simply returns the argument. + * + * @param scores The set of scores to normalize. + * @return The normalized set of scores. + **/ + public ScoreSet normalize(ScoreSet scores) { + return scores; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java index 4381f0c4..1728143b 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Learner.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -19,6 +16,8 @@ import java.io.Serializable; import java.net.URL; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.classify.Classifier; import edu.illinois.cs.cogcomp.lbjava.classify.DiscreteFeature; import edu.illinois.cs.cogcomp.lbjava.classify.DiscretePrimitiveStringFeature; @@ -29,1473 +28,1513 @@ import edu.illinois.cs.cogcomp.lbjava.classify.RealFeature; import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet; import edu.illinois.cs.cogcomp.lbjava.util.ClassUtils; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.util.FVector; - /** - * Extend this class to create a new {@link Classifier} that learns to mimic - * one an oracle classifier given a feature extracting classifier and example - * objects. - * - * @author Nick Rizzolo + * Extend this class to create a new {@link Classifier} that learns to mimic one an oracle + * classifier given a feature extracting classifier and example objects. + * + * @author Nick Rizzolo **/ -public abstract class Learner extends Classifier -{ - /** Stores the classifier used to produce labels. */ - protected Classifier labeler; - /** Stores the classifiers used to produce features. */ - protected Classifier extractor; - /** Stores the feature {@link Lexicon}. */ - protected Lexicon lexicon; - /** Stores the label {@link Lexicon}. */ - protected Lexicon labelLexicon; - /** The encoding used by this learner's feature lexicon. */ - protected String encoding; - /** - * Stores the set of predictions that this learner will choose from when - * classifying a new example. - **/ - protected FVector predictions; - /** Caches the location of this learner's offline binary representation. */ - protected URL lcFilePath; - /** Caches the location of this learner's offline lexicon. */ - protected URL lexFilePath; - /** - * Informs this learner that it can and should read its feature lexicon on - * demand. - **/ - protected boolean readLexiconOnDemand; - - - /** - * This constructor is used by the LBJava compiler; it should never be called - * by a programmer. - **/ - protected Learner() { } - - /** - * Initializes the name. - * - * @param n The name of the classifier. - **/ - protected Learner(String n) { - super(n); - lexicon = new Lexicon(); - labelLexicon = new Lexicon(); - predictions = new FVector(); - } - - /** - * Constructor for unsupervised learning. - * - * @param n The name of the classifier. - * @param e The feature extracting classifier. - **/ - protected Learner(String n, Classifier e) { this(n, null, e); } - - /** - * Constructor for supervised learning. - * - * @param n The name of the classifier. - * @param l The labeling classifier. - * @param e The feature extracting classifier. - **/ - protected Learner(String n, Classifier l, Classifier e) { - super(n); - setLabeler(l); - setExtractor(e); - lexicon = new Lexicon(); - labelLexicon = new Lexicon(); - predictions = new FVector(); - } - - - /** - * Sets the values of parameters that control the behavior of this learning - * algorithm. - * - * @param p The parameters. - **/ - public void setParameters(Parameters p) { p.setParameters(this); } - /** Retrieves the parameters that are set in this learner. */ - public Parameters getParameters() { return new Parameters(); } - - - /** - * Sets the labeler. - * - * @param l A labeling classifier. - **/ - public void setLabeler(Classifier l) { labeler = l; } - /** Returns the labeler. */ - public Classifier getLabeler() { return labeler; } - - - /** - * Sets the extractor. - * - * @param e A feature extracting classifier. - **/ - public void setExtractor(Classifier e) { extractor = e; } - /** Returns the extractor. */ - public Classifier getExtractor() { return extractor; } - - - /** - * Sets the feature lexicon. If set to null, the JVM's - * garbage collector is invoked. - * - * @param l A feature lexicon. - **/ - public void setLexicon(Lexicon l) { - lexicon = l; - if (l == null) System.gc(); - else l.setEncoding(encoding); - } - - - /** Returns the feature lexicon. */ - public Lexicon getLexicon() { - demandLexicon(); - return lexicon; - } - - /** Returns the feature lexicon in memory, rather than reading from disk */ - public Lexicon getCurrentLexicon() { - return lexicon; - } - - /** - * Sets the label lexicon. - * - * @param l A feature lexicon. - **/ - public void setLabelLexicon(Lexicon l) { - labelLexicon = l; - if (labelLexicon == null) { - predictions = null; - return; - } - - int N = labelLexicon.size(); - predictions = new FVector(N); - for (int i = 0; i < N; ++i) createPrediction(i); - } - - - /** Returns the label lexicon. */ - public Lexicon getLabelLexicon() { return labelLexicon; } - - - /** - * Sets the encoding to use in this learner's feature lexicon. - * - * @param e The encoding. - **/ - public void setEncoding(String e) { - encoding = e; - lexicon.setEncoding(e); - } - - - /** - * Sets the location of the model as a regular file on this file system. - * - * @param p The file's path. - **/ - public void setModelLocation(String p) { - try { lcFilePath = new URL("file:" + p); } - catch (Exception e) { - System.err.println("ERROR: Can't create URL for file '" + p + "':"); - e.printStackTrace(); - System.exit(1); - } - } - - /** - * Sets the location of the model as a URL. - * - * @param u The model's location. - **/ - public void setModelLocation(URL u) { lcFilePath = u; } - - /** Returns the model's location. */ - public URL getModelLocation() { return lcFilePath; } - - - /** - * Sets the location of the lexicon as a regular file on this file system. - * - * @param p The file's path. - **/ - public void setLexiconLocation(String p) { - try { lexFilePath = new URL("file:" + p); } - catch (Exception e) { - System.err.println("ERROR: Can't create URL for file '" + p + "':"); - e.printStackTrace(); - System.exit(1); - } - } - - /** - * Sets the location of the model as a URL. - * - * @param u The model's location. - **/ - public void setLexiconLocation(URL u) { lexFilePath = u; } - - /** Returns the lexicon's location. */ - public URL getLexiconLocation() { return lexFilePath; } - - - /** - * Establishes a new feature counting policy for this learner's lexicon. - * - * @param policy The new feature counting policy. - **/ - public void countFeatures(Lexicon.CountPolicy policy) { - if (policy == Lexicon.CountPolicy.perClass - && !getOutputType().equals("discrete")) - throw new IllegalArgumentException( - "LBJava ERROR: Learner.countFeatures: Can't do 'per class' feature " - + "counting unless the learner is discrete."); - demandLexicon(); - lexicon.countFeatures(policy); - } - - - /** - * Returns this learner's feature lexicon after discarding any feature - * counts it may have been storing. This method is likely only useful when - * the lexicon and its counts are currently stored on disk and - * {@link #readLexiconOnDemand(String)} or - * {@link #readLexiconOnDemand(URL)} has already been called, in which case - * the lexicon is read from disk without wasting time loading the counts. - **/ - public Lexicon getLexiconDiscardCounts() { - if (readLexiconOnDemand && (lexicon == null || lexicon.size() == 0)) - lexicon = Lexicon.readLexicon(lexFilePath, false); - else lexicon.countFeatures(Lexicon.CountPolicy.none); - return lexicon; - } - - - /** - * Returns a new, emtpy learner into which all of the parameters that - * control the behavior of the algorithm have been copied. Here, "emtpy" - * means no learning has taken place. - **/ - public Learner emptyClone() { - Learner clone = (Learner) super.clone(); - clone.forget(); - return clone; - } - - - /** - * Trains the learning algorithm given an object as an example. - * By default, this simply converts the example object into arrays - * and passes it to {@link #learn(int[],double[],int[],double[])}. - * - * @param example An example of the desired learned classifier's behavior. - **/ - public void learn(Object example) { - Object[] exampleArray = getExampleArray(example); - learn((int[]) exampleArray[0], (double[]) exampleArray[1], - (int[]) exampleArray[2], (double[]) exampleArray[3]); - } - - - /** - * Trains the learning algorithm given a feature vector as an example. - * This simply converts the example object into arrays and passes it to - * {@link #learn(int[],double[],int[],double[])}. - * - * @param vector An example of the desired learned classifier's behavior. - **/ - public void learn(FeatureVector vector) { - Classifier saveExtractor = getExtractor(); - Classifier saveLabeler = getLabeler(); - setExtractor(new FeatureVectorReturner()); - setLabeler(new LabelVectorReturner()); - - learn((Object) vector); - - setExtractor(saveExtractor); - setLabeler(saveLabeler); - } - - - /** - * Trains the learning algorithm given an example formatted as - * arrays of feature indices, their values, and the example labels. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param exampleLabels The example's label(s). - * @param labelValues The values of the labels. - **/ - abstract public void learn(int[] exampleFeatures, double[] exampleValues, - int[] exampleLabels, double[] labelValues); - - - /** - * Trains the learning algorithm given many objects as examples. This - * implementation simply calls {@link #learn(Object)} on each of the - * objects in the input array and finishes by calling - * {@link #doneLearning()}. It should be overridden if there is a more - * efficient implementation. - * - * @param examples Examples of the desired learned classifier's behavior. - **/ - public void learn(Object[] examples) { - for (int i = 0; i < examples.length; ++i) - learn(examples[i]); - doneLearning(); - } - - - /** - * Trains the learning algorithm given many feature vectors as examples. - * This implementation simply calls {@link #learn(FeatureVector)} on each - * of the vectors in the input array and finishes by calling - * {@link #doneLearning()}. It should be overridden if there is a more - * efficient implementation. - * - * @param examples Examples of the desired learned classifier's behavior. - **/ - public void learn(FeatureVector[] examples) { - for (int i = 0; i < examples.length; ++i) - learn(examples[i]); - doneLearning(); - } - - - /** - * This method makes one or more decisions about a single object, returning - * those decisions as {@link Feature}s in a vector. - * - * @param example The object to make decisions about. - * @return A vector of {@link Feature}s about the input object. - **/ - public FeatureVector classify(Object example) { - Object[] exampleArray = getExampleArray(example, false); - return classify((int[]) exampleArray[0], (double[]) exampleArray[1]); - } - - - /** - * This method makes one or more decisions about a single feature vector, - * returning those decisions as {@link Feature}s in a vector. - * - * @param vector The vector to make decisions about. - * @return A vector of {@link Feature}s about the input vector. - **/ - public FeatureVector classify(FeatureVector vector) { - Classifier saveExtractor = getExtractor(); - Classifier saveLabeler = getLabeler(); - setExtractor(new FeatureVectorReturner()); - setLabeler(new LabelVectorReturner()); - - FeatureVector result = classify((Object) vector); - - setExtractor(saveExtractor); - setLabeler(saveLabeler); - return result; - } - - - /** - * This method makes one or more decisions about a single object, returning - * those decisions as {@link Feature}s in a vector. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @return A vector of {@link Feature}s about the input object. - **/ - abstract public FeatureVector classify(int[] exampleFeatures, - double[] exampleValues); - - - /** - * Use this method to make a batch of classification decisions about - * several objects. This function is implemented in the most naive way - * (simply calling {@link #classify(FeatureVector)} repeatedly) and should - * be overridden if there is a more efficient implementation. - * - * @param vectors The vectors to make decisions about. - * @return An array of feature vectors, one per input vector. - **/ - public FeatureVector[] classify(FeatureVector[] vectors) { - FeatureVector[] result = new FeatureVector[vectors.length]; - for (int i = 0; i < vectors.length; ++i) - result[i] = classify(vectors[i]); - return result; - } - - - /** - * Use this method to make a batch of classification decisions about - * several examples. This function is implemented in the most naive way - * (simply calling {@link #classify(int[],double[])} repeatedly) and should - * be overridden if there is a more efficient implementation. - * - * @param e The examples to make decisions about, represented as arrays of - * indices and strengths. - * @return An array of feature vectors, one per input object. - **/ - public FeatureVector[] classify(Object[][] e) { - FeatureVector[] result = new FeatureVector[e.length]; - for (int i = 0; i < e.length; ++i) - result[i] = classify((int[]) e[i][0], (double[]) e[i][1]); - return result; - } - - - /** - * Returns the classification of the given example object as a single - * feature instead of a {@link FeatureVector}. - * - * @param example The object to classify. - * @return The classification of example as a feature. - **/ - public Feature featureValue(Object example) { - Object[] exampleArray = getExampleArray(example, false); - return featureValue((int[]) exampleArray[0], (double[]) exampleArray[1]); - } - - - /** - * Returns the classification of the given feature vector as a single - * feature instead of a {@link FeatureVector}. - * - * @param vector The vector to classify. - * @return The classification of vector as a feature. - **/ - public Feature featureValue(FeatureVector vector) { - Classifier saveExtractor = getExtractor(); - Classifier saveLabeler = getLabeler(); - setExtractor(new FeatureVectorReturner()); - setLabeler(new LabelVectorReturner()); - - Feature result = featureValue((Object) vector); - - setExtractor(saveExtractor); - setLabeler(saveLabeler); - return result; - } - - - /** - * Returns the classification of the given example as a single feature - * instead of a {@link FeatureVector}. - * - * @param f The features array. - * @param v The values array. - * @return The classification of o as a feature. - **/ - public Feature featureValue(int[] f, double[] v) { - throw - new UnsupportedOperationException( - "The featureValue(int[], double[]) method has not been overridden in " - + "class '" + getClass().getName() + "'."); - } - - - /** - * Returns the value of the discrete prediction that this learner would - * make, given an example. - * - * @param example The example object. - * @return The discrete value. - **/ - public String discreteValue(Object example) { - Object[] exampleArray = getExampleArray(example, false); - return - discreteValue((int[]) exampleArray[0], (double[]) exampleArray[1]); - } - - - /** - * Returns the value of the discrete prediction that this learner would - * make, given a feature vector. - * - * @param vector The example vector. - * @return The discrete value. - **/ - public String discreteValue(FeatureVector vector) { - Classifier saveExtractor = getExtractor(); - Classifier saveLabeler = getLabeler(); - setExtractor(new FeatureVectorReturner()); - setLabeler(new LabelVectorReturner()); - - String result = discreteValue((Object) vector); - - setExtractor(saveExtractor); - setLabeler(saveLabeler); - return result; - } - - - /** - * Returns the value of the discrete feature that would be returned by this - * classifier. This method should only be called when overridden by a - * classifier returning a single discrete feature. - * - * @param f The features array. - * @param v The values array. - * @return The value of the feature produced for the input object. - **/ - public String discreteValue(int[] f, double[] v) { - throw - new UnsupportedOperationException( - "The discreteValue(Object) method has not been overridden in class '" - + getClass().getName() + "'."); - } - - - /** - * Returns the value of the real prediction that this learner would - * make, given an example. - * - * @param example The example object. - * @return The real value. - **/ - public double realValue(Object example) { - Object[] exampleArray = getExampleArray(example, false); - return realValue((int[])exampleArray[0], (double[])exampleArray[1]); - } - - - /** - * Returns the value of the real prediction that this learner would - * make, given a feature vector. - * - * @param vector The example vector. - * @return The real value. - **/ - public double realValue(FeatureVector vector) { - Classifier saveExtractor = getExtractor(); - Classifier saveLabeler = getLabeler(); - setExtractor(new FeatureVectorReturner()); - setLabeler(new LabelVectorReturner()); - - double result = realValue((Object) vector); - - setExtractor(saveExtractor); - setLabeler(saveLabeler); - return result; - } - - - /** - * Returns the value of the real feature that would be returned by this - * classifier. This method should only be called when overridden by a - * classifier returning a single real feature. - * - * @param f The features array. - * @param v The values array. - * @return The value of the feature produced for the input object. - **/ - public double realValue(int[] f, double[] v) { - throw - new UnsupportedOperationException( - "The realValue(Object) method has not been overridden in class '" - + getClass().getName() + "'."); - } - - - /** - * Overridden by subclasses to perform any required post-processing - * computations after all training examples have been observed through - * {@link #learn(Object)} and {@link #learn(Object[])}. By default this - * method does nothing. - **/ - public void doneLearning() { - } - - - /** - * This method is sometimes called before training begins, although it is - * not guaranteed to be called at all. It allows the number of examples - * and number of features to be passed to the Learner, in case this - * information is available, such as after pre-extraction. By default this - * method does nothing. - * - * @param numExamples The number of examples that will be observed during - * training. - * @param numFeatures The number of features that will be observed during - * training. - **/ - public void initialize(int numExamples, int numFeatures) { - } - - - /** Called after each round of training. Does nothing by default. */ - public void doneWithRound() { - } - - - /** - * Converts an example object into an array of arrays representing the - * example including its labels. The first array contains the integer keys - * of the example's features, as indexed in the lexicon. The second array - * gives the double values corresponding to the strengths of the features - * in the first array. The third and fourth arrays play the same roles as - * the first and second arrays respectively, except they describe the - * labels. - * - * @param example The example object. - * @return The converted example array. - **/ - public Object[] getExampleArray(Object example) { - return getExampleArray(example, true); - } - - - /** - * Converts an example object into an array of arrays representing the - * example. The first array contains the integer keys of the example's - * features, as indexed in the lexicon. The second array gives the double - * values corresponding to the strengths of the features in the first - * array. The third and fourth arrays will only be present if - * training is set to true. They play the same - * roles as the first and second arrays respectively, except they describe - * the labels. - * - * @param example The example object. - * @param training Whether or not labels should be extracted. - * @return The converted example array. - **/ - public Object[] getExampleArray(Object example, boolean training) { - if (example instanceof Object[] - && ((Object[]) example)[0] instanceof int[] - && ((Object[]) example)[1] instanceof double[]) - return (Object[]) example; - - if (readLexiconOnDemand && (lexicon == null || lexicon.size() == 0)) { - readLexicon(lexFilePath); - readLexiconOnDemand = false; - } - - Object[] exampleArray = null; - Lexicon.CountPolicy countPolicy = lexicon.getCountPolicy(); - int labelIndex = -1; - - // Get example labels - if (training) { - FeatureVector labelVector = labeler.classify(example); - int F = labelVector.featuresSize(); - int[] labelArray = new int[F]; - double[] labelValues = new double[F]; - - for (int f = 0; f < F; ++f) { - Feature label = labelVector.getFeature(f); - if (label.isDiscrete()) - labelArray[f] = labelLexicon.lookup(label, true); +public abstract class Learner extends Classifier { + /** Stores the classifier used to produce labels. */ + protected Classifier labeler; + /** Stores the classifiers used to produce features. */ + protected Classifier extractor; + /** Stores the feature {@link Lexicon}. */ + protected Lexicon lexicon; + /** Stores the label {@link Lexicon}. */ + protected Lexicon labelLexicon; + /** The encoding used by this learner's feature lexicon. */ + protected String encoding; + /** + * Stores the set of predictions that this learner will choose from when classifying a new + * example. + **/ + protected FVector predictions; + /** Caches the location of this learner's offline binary representation. */ + protected URL lcFilePath; + /** Caches the location of this learner's offline lexicon. */ + protected URL lexFilePath; + /** + * Informs this learner that it can and should read its feature lexicon on demand. + **/ + protected boolean readLexiconOnDemand; + + /** To use a different score function based on the loss, set this flag. */ + protected boolean lossFlag = false; + + /** The number of candidate examples when a global object is passed here. */ + protected int candidates = 1; + + /** this is set while training. */ + protected boolean intraining = false; + + /** + * This constructor is used by the LBJava compiler; it should never be called by a programmer. + **/ + protected Learner() {} + + /** + * Initializes the name. + * + * @param n The name of the classifier. + **/ + protected Learner(String n) { + super(n); + lexicon = new Lexicon(); + labelLexicon = new Lexicon(); + predictions = new FVector(); + } + + /** + * Constructor for unsupervised learning. + * + * @param n The name of the classifier. + * @param e The feature extracting classifier. + **/ + protected Learner(String n, Classifier e) { + this(n, null, e); + } + + /** + * Constructor for supervised learning. + * + * @param n The name of the classifier. + * @param l The labeling classifier. + * @param e The feature extracting classifier. + **/ + protected Learner(String n, Classifier l, Classifier e) { + super(n); + setLabeler(l); + setExtractor(e); + lexicon = new Lexicon(); + labelLexicon = new Lexicon(); + predictions = new FVector(); + } + + + /** + * Sets the values of parameters that control the behavior of this learning algorithm. + * + * @param p The parameters. + **/ + public void setParameters(Parameters p) { + p.setParameters(this); + } + + public void setLossFlag() { + lossFlag = true; + } + + public void unsetLossFlag() { + lossFlag = false; + } + + public void setCandidates(int a) { + candidates = a; + } + + /** Retrieves the parameters that are set in this learner. */ + public Parameters getParameters() { + return new Parameters(); + } + + + /** + * Sets the labeler. + * + * @param l A labeling classifier. + **/ + public void setLabeler(Classifier l) { + labeler = l; + } + + /** Returns the labeler. */ + public Classifier getLabeler() { + return labeler; + } + + + /** + * Sets the extractor. + * + * @param e A feature extracting classifier. + **/ + public void setExtractor(Classifier e) { + extractor = e; + } + + /** Returns the extractor. */ + public Classifier getExtractor() { + return extractor; + } + + + /** + * Sets the feature lexicon. If set to null, the JVM's garbage collector is + * invoked. + * + * @param l A feature lexicon. + **/ + public void setLexicon(Lexicon l) { + lexicon = l; + if (l == null) + System.gc(); else - labelArray[f] = - labelLexicon.lookup(label.getFeatureKey(labelLexicon), true); - labelValues[f] += label.getStrength(); - createPrediction(labelArray[f]); - } - - exampleArray = new Object[]{ null, null, labelArray, labelValues }; - if (countPolicy == Lexicon.CountPolicy.perClass) - //&& labeler.getOutputType().equals("discrete") && F == 1) - // Don't really want to do this comparison for every example; we'll - // trust the user not to do per class feature counting when it isn't - // true. Plus, the countFeatures(CountPolicy) method in this class - // checks for it. - labelIndex = labelArray[0]; - } - else exampleArray = new Object[2]; - - // Get example features. - FeatureVector featureVector = extractor.classify(example); - int F = featureVector.featuresSize(); - int[] exampleArrayFeatures = new int[F]; - double[] exampleArrayValues = new double[F]; - exampleArray[0] = exampleArrayFeatures; - exampleArray[1] = exampleArrayValues; - - for (int f = 0; f < F; ++f) { - Feature feature = featureVector.getFeature(f); - exampleArrayFeatures[f] = - lexicon.lookup(feature.getFeatureKey(lexicon, training, labelIndex), - training, labelIndex); - exampleArrayValues[f] += feature.getStrength(); - } - - return exampleArray; - } - - public void setReadLexiconOnDemand(){ - readLexiconOnDemand = true; - } - - /** - * If it hasn't been created already, this method will create the - * prediction feature in {@link #predictions} associated with the label - * feature at the given index of {@link #labelLexicon}. This method does - * not create {@link RealFeature}s in {@link #predictions} since their - * strengths cannot be modified. In association with - * {@link DiscreteFeature}s it creates a - * {@link DiscretePrimitiveStringFeature} with an empty identifier. Its - * value, valueIndex, and - * totalValues fields are filled by calling the label - * feature's {@link Feature#getStringValue() getStringValue()}, - * {@link Feature#getValueIndex() getValueIndex()}, and - * {@link Feature#totalValues() totalValues()} methods respectively. - * - * @param index The index of a label feature in {@link #labelLexicon}. - **/ - protected void createPrediction(int index) { - createPrediction(labelLexicon, index); - } - - - /** - * If it hasn't been created already, this method will create the - * prediction feature in {@link #predictions} associated with the label - * feature at the given index of lex. This method does - * not create {@link RealFeature}s in {@link #predictions} since their - * strengths cannot be modified. In association with - * {@link DiscreteFeature}s it creates a - * {@link DiscretePrimitiveStringFeature} with an empty identifier. Its - * value, valueIndex, and - * totalValues fields are filled by calling the label - * feature's {@link Feature#getStringValue() getStringValue()}, - * {@link Feature#getValueIndex() getValueIndex()}, and - * {@link Feature#totalValues() totalValues()} methods respectively. - * - * @param lex The label lexicon to associate prediction features with. - * @param index The index of a label feature in lex. - **/ - protected void createPrediction(Lexicon lex, int index) { - if (predictions.get(index) != null - || !getOutputType().equals("discrete")) - return; - Feature label = lex.lookupKey(index); - predictions.set(index, - new DiscretePrimitiveStringFeature( - containingPackage, name, "", label.getStringValue(), - label.getValueIndex(), label.totalValues())); - } - - - /** - * Reinitializes the learner to the state it started at before any learning - * was performed. By default, this sets the lexicons to blank Lexicon - * objects and calls {@link #initialize(int,int)} to reset the number of - * examples and features to 0, for learners that use this. - **/ - public void forget() { - lexicon = new Lexicon(encoding); - labelLexicon = new Lexicon(); - predictions = new FVector(); - initialize(0, 0); - readLexiconOnDemand = false; - } - - - /** - * Produces a set of scores indicating the degree to which each possible - * discrete classification value is associated with the given example - * object. Learners that return a real feature or more than - * one feature may implement this method by simply returning - * null. - * - * @param example The object to make decisions about. - * @return A set of scores indicating the degree to which each possible - * discrete classification value is associated with the given - * example object. - **/ - public ScoreSet scores(Object example) { - Object[] exampleArray = getExampleArray(example, false); - return scores((int[])exampleArray[0], (double[])exampleArray[1]); - } - - - /** - * Produces a set of scores indicating the degree to which each possible - * discrete classification value is associated with the given feature - * vector. Learners that return a real feature or more than - * one feature may implement this method by simply returning - * null. - * - * @param vector The vector to make decisions about. - * @return A set of scores indicating the degree to which each possible - * discrete classification value is associated with the given - * example vector. - **/ - public ScoreSet scores(FeatureVector vector) { - Classifier saveExtractor = getExtractor(); - Classifier saveLabeler = getLabeler(); - setExtractor(new FeatureVectorReturner()); - setLabeler(new LabelVectorReturner()); - - ScoreSet result = scores((Object) vector); - - setExtractor(saveExtractor); - setLabeler(saveLabeler); - return result; - } - - - /** - * Produces a set of scores indicating the degree to which each possible - * discrete classification value is associated with the given example - * object. Learners that return a real feature or more than - * one feature may implement this method by simply returning - * null. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of values - * @return A set of scores indicating the degree to which each possible - * discrete classification value is associated with the given - * example object. - **/ - abstract public ScoreSet scores(int[] exampleFeatures, - double[] exampleValues); - - - /** - * Writes the learned function's internal representation as text. - * - * @param out The output stream. - **/ - abstract public void write(PrintStream out); - - - /** - * Automatically generated code will override this method to set their - * isClone field to false. This then allows a - * pure java program to read a learner's representation into any instance - * of the learner's class. By default, this method does nothing. - **/ - public void unclone() { } - - - /** - * Returns the size of the lexicon after any pruning that may have taken - * place or 0 if the lexicon's location isn't known. - **/ - public int getPrunedLexiconSize() { - if ((lexicon == null || lexicon.size() == 0) && readLexiconOnDemand) { - ExceptionlessInputStream in = - ExceptionlessInputStream.openCompressedStream(lexFilePath); - int result = Lexicon.readPrunedSize(in); - in.close(); - return result; - } - - return lexicon == null ? 0 : lexicon.getCutoff(); - } - - - /** - * Returns a deep (enough) clone of this learner. The following fields are - * cloned themselves: {@link #lexicon}, {@link #labelLexicon}, and - * {@link #predictions}. - * - *

Note that this is an overriding implementation of - * Object's clone() method, and its functionality - * is completely separate from and unrelated to that of this class's - * {@link #unclone()} method. - **/ - public Object clone() { - Learner result = (Learner) super.clone(); - if (lexicon != null) result.lexicon = (Lexicon) lexicon.clone(); - if (labelLexicon != null) - result.labelLexicon = (Lexicon) labelLexicon.clone(); - if (predictions != null) - result.predictions = (FVector) predictions.clone(); - return result; - } - - - /** - * Writes the binary representation of this learned function if there is a - * location cached in {@link #lcFilePath}, and writes the binary - * representation of the feature lexicon if there is a location cached in - * {@link #lexFilePath}. - **/ - public void save() { - if (lcFilePath != null) saveModel(); - if (lexFilePath != null && lexicon != null && lexicon.size() > 0) - saveLexicon(); - } - - - /** - * Writes the binary representation of this learned function to the - * location specified by {@link #lcFilePath}. If {@link #lcFilePath} is - * not set, this method will produce an error message and exit the program. - **/ - public void saveModel() { - if (lcFilePath == null) { - System.err.println( - "LBJava ERROR: saveModel() called without a cached location"); - new Exception().printStackTrace(); - System.exit(1); - } - - ExceptionlessOutputStream out = - ExceptionlessOutputStream.openCompressedStream(lcFilePath); - write(out); - out.close(); - } - - - /** - * Writes the binary representation of the feature lexicon to the location - * specified by {@link #lexFilePath}. If {@link #lexFilePath} is not set, - * this method will produce an error message and exit the program. - **/ - public void saveLexicon() { - if (lexFilePath == null) { - System.err.println( - "LBJava ERROR: saveLexicon() called without a cached location"); - new Exception().printStackTrace(); - System.exit(1); - } - - ExceptionlessOutputStream out = - ExceptionlessOutputStream.openCompressedStream(lexFilePath); - if (lexicon == null) out.writeInt(0); - else lexicon.write(out); - out.close(); - } - - - /** - * Writes the learned function's binary internal represetation including - * both its model and lexicons to the specified files. These files are - * then cached in {@link #lcFilePath} and {@link #lexFilePath}. - * - * @param modelFile The name of the file in which to write the model. - * @param lexFile The name of the file in which to write the feature - * lexicon. - **/ - public void write(String modelFile, String lexFile) { - writeModel(modelFile); - if (lexicon != null && lexicon.size() > 0) writeLexicon(lexFile); - } - - - /** - * Writes only the learned function's model (which includes the label - * lexicon) to the specified file in binary form. This file is then cached - * in {@link #lcFilePath}. - * - * @param filename The name of the file in which to write the model. - **/ - public void writeModel(String filename) { - ExceptionlessOutputStream out = - ExceptionlessOutputStream.openCompressedStream(filename); - write(out); - out.close(); - - try { lcFilePath = new URL("file:" + filename); } - catch (Exception e) { - System.err.println("Error constructing URL:"); - e.printStackTrace(); - System.exit(1); - } - } - - - /** - * Writes the learned function's feature lexicon to the specified file. - * This file is then cached in {@link #lexFilePath}. - * - * @param filename The name of the file in which to write the feature - * lexicon. - **/ - public void writeLexicon(String filename) { - ExceptionlessOutputStream out = - ExceptionlessOutputStream.openCompressedStream(filename); - if (lexicon == null) out.writeInt(0); - else lexicon.write(out); - out.close(); - - try { lexFilePath = new URL("file:" + filename); } - catch (Exception e) { - System.err.println("Error constructing URL:"); - e.printStackTrace(); - System.exit(1); - } - } - - - /** - * Writes the learned function's internal representation in binary form. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - out.writeString(getClass().getName()); - out.writeString(containingPackage); - out.writeString(name); - out.writeString(encoding); - if (labeler == null) out.writeString(null); - else out.writeString(labeler.getClass().getName()); - if (extractor == null) out.writeString(null); - else out.writeString(extractor.getClass().getName()); - if (labelLexicon == null) out.writeInt(0); - else labelLexicon.write(out); - if (predictions == null) out.writeInt(0); - else predictions.write(out); - } - - - /** - * Reads the learned function's binary internal represetation including - * both its model and lexicons from the specified files, overwriting any - * and all data this object may have already contained. These files are - * then cached in {@link #lcFilePath} and {@link #lexFilePath}. - * - * @param modelFile The name of the file from which to read the model. - * @param lexFile The name of the file from which to read the feature - * lexicon. - **/ - public void read(String modelFile, String lexFile) { - readModel(modelFile); - readLexicon(lexFile); - } - - - /** - * Reads only the learned function's model and label lexicon from the - * specified file in binary form, overwriting whatever model data may have - * already existed in this object. This file is then cached in - * {@link #lcFilePath}. - * - * @param filename The name of the file from which to read the model. - **/ - public void readModel(String filename) { - try { readModel(new URL("file:" + filename)); } - catch (Exception e) { - System.err.println("Error constructing URL:"); - e.printStackTrace(); - System.exit(1); - } - } - - - /** - * Reads only the learned function's model and label lexicon from the - * specified location in binary form, overwriting whatever model data may - * have already existed in this object. This location is then cached in - * {@link #lcFilePath}. - * - * @param url The location from which to read the model. - **/ - public void readModel(URL url) { - ExceptionlessInputStream in = - ExceptionlessInputStream.openCompressedStream(url); - String s = in.readString(); - String expected = getClass().getName(); - - if (!s.equals(expected)) { - System.err.println("Error reading model from '" + url + "':"); - System.err.println(" Expected '" + expected + "' but received '" + s - + "'"); - new Exception().printStackTrace(); - in.close(); - System.exit(1); - } - - read(in); - in.close(); - lcFilePath = url; - } - - - /** - * Reads the learned function's feature lexicon from the specified file, - * overwriting the lexicon present in this object, if any. This file is - * then cached in {@link #lexFilePath}. - * - * @param filename The name of the file from which to read the feature - * lexicon. - **/ - public void readLexicon(String filename) { - try { readLexicon(new URL("file:" + filename)); } - catch (Exception e) { - System.err.println("Error constructing URL:"); - e.printStackTrace(); - System.exit(1); - } - } - - - /** - * Reads the learned function's feature lexicon from the specified - * location, overwriting the lexicon present in this object, if any. This - * location is then cached in {@link #lexFilePath}. - * - * @param url The location from which to read the feature lexicon. - **/ - public void readLexicon(URL url) { - lexicon = Lexicon.readLexicon(url); - lexFilePath = url; - } - - - /** - * Reads the binary representation of any type of learner (including the - * label lexicon, but not including the feature lexicon) from the given - * file. In that file, there should first be stored a string containing - * the fully qualified class name of the learner. If the short - * value -1 appears instead, this method returns - * null. - * - *

This method is appropriate for reading learners as written by - * {@link #write(ExceptionlessOutputStream)}. - * - * @param filename The name of the file from which to read the learner. - * @return The learner read from the file. - **/ - public static Learner readLearner(String filename) { - return readLearner(filename, true); - } - - - /** - * Reads the binary representation of any type of learner (including the - * label lexicon, but not including the feature lexicon), with the option - * of cutting off the reading process after the label lexicon and before - * any learned parameters. When whole is false, - * the reading process is cut off in this way. - * - *

This method is appropriate for reading learners as written by - * {@link #write(ExceptionlessOutputStream)}. - * - * @param filename The name of the file from which to read the learner. - * @param whole Whether or not to read the whole model. - * @return The learner read from the file. - **/ - public static Learner readLearner(String filename, boolean whole) { - URL url = null; - - try { url = new URL("file:" + filename); } - catch (Exception e) { - System.err.println("Error constructing URL:"); - e.printStackTrace(); - System.exit(1); - } - - return readLearner(url, whole); - } - - - /** - * Reads the binary representation of any type of learner (including the - * label lexicon, but not including the feature lexicon) from the given - * location. At that location, there should first be stored a string - * containing the fully qualified class name of the learner. If the - * short value -1 appears instead, this method returns - * null. Finally, the location is cached in - * {@link #lcFilePath}. - * - *

This method is appropriate for reading learners as written by - * {@link #write(ExceptionlessOutputStream)}. - * - * @param url The location from which to read the learner. - * @return The learner read from the location. - **/ - public static Learner readLearner(URL url) { - return readLearner(url, true); - } - - - /** - * Reads the binary representation of any type of learner (including the - * label lexicon, but not including the feature lexicon), with the option - * of cutting off the reading process after the label lexicon and before - * any learned parameters. When whole is false, - * the reading process is cut off in this way. Finally, the location is - * cached in {@link #lcFilePath}. - * - *

This method is appropriate for reading learners as written by - * {@link #write(ExceptionlessOutputStream)}. - * - * @param url The location from which to read the learner. - * @param whole Whether or not to read the whole model. - * @return The learner read from the location. - **/ - public static Learner readLearner(URL url, boolean whole) { - ExceptionlessInputStream in = - ExceptionlessInputStream.openCompressedStream(url); - Learner result = readLearner(in, whole); - in.close(); - result.lcFilePath = url; - return result; - } - - - /** - * Reads the binary representation of any type of learner (including the - * label lexicon, but not including the feature lexicon) from the given - * stream. The stream is expected to first return a string containing the - * fully qualified class name of the learner. If the short value - * -1 appears instead, this method returns null. - * - *

This method is appropriate for reading learners as written by - * {@link #write(ExceptionlessOutputStream)}. - * - * @param in The input stream. - * @return The learner read from the stream. - **/ - public static Learner readLearner(ExceptionlessInputStream in) { - return readLearner(in, true); - } - - - /** - * Reads the binary representation of any type of learner (including the - * label lexicon, but not including the feature lexicon), with the option - * of cutting off the reading process after the label lexicon and before - * any learned parameters. When whole is false, - * the reading process is cut off in this way. - * - *

This method is appropriate for reading learners as written by - * {@link #write(ExceptionlessOutputStream)}. - * - * @param in The input stream. - * @param whole Whether or not to read the whole model. - * @return The learner read from the stream. - **/ - public static Learner readLearner(ExceptionlessInputStream in, - boolean whole) { - String name = in.readString(); - if (name == null) return null; - Learner result = ClassUtils.getLearner(name); - result.unclone(); - if (whole) result.read(in); // Overridden by decendents - else { - result.readLabelLexicon(in); // Should not be overridden by decendents - Lexicon labelLexicon = result.getLabelLexicon(); - result.forget(); - result.setLabelLexicon(labelLexicon); - } - return result; - } - - - /** - * Reads the binary representation of a learner with this object's run-time - * type, overwriting any and all learned or manually specified parameters - * as well as the label lexicon but without modifying the feature lexicon. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { readLabelLexicon(in); } - - - /** - * Reads the initial portion of the model file, including the containing - * package and name strings, the names of the labeler and extractor, and - * finally the label lexicon. This method will not read any further model - * parameters, however. - * - * @param in The input stream. - **/ - public void readLabelLexicon(ExceptionlessInputStream in) { - containingPackage = in.readString().intern(); - name = in.readString().intern(); - encoding = in.readString(); - if (encoding != null) encoding = encoding.intern(); - String s = in.readString(); - labeler = s == null ? null : ClassUtils.getClassifier(s); - s = in.readString(); - extractor = s == null ? null : ClassUtils.getClassifier(s); - labelLexicon = Lexicon.readLexicon(in); - if (predictions == null) predictions = new FVector(); - predictions.read(in); - } - - - /** - * Prepares this learner to read in its feature lexicon from the specified - * location on demand; has no effect if this learner already has a - * non-empty lexicon. - * - * @param file The file from which to read the feature lexicon. - **/ - public void readLexiconOnDemand(String file) { - URL url = null; - - try { url = new URL("file:" + file); } - catch (Exception e) { - System.err.println("Error constructing URL:"); - e.printStackTrace(); - System.exit(1); - } - - readLexiconOnDemand(url); - } - - - /** - * Prepares this learner to read in its feature lexicon from the specified - * location on demand; has no effect if this learner already has a - * non-empty lexicon. - * - * @param url The location from which to read the feature lexicon. - **/ - public void readLexiconOnDemand(URL url) { - lexFilePath = url; - readLexiconOnDemand = true; - } - - - /** - * Forces this learner to read in its lexicon representation, but only if - * the lexicon currently available in this object is empty and the learner - * has been scheduled to read its lexicon on demand with - * {@link #readLexiconOnDemand(URL)}. - * - * @see #readLexiconOnDemand - * @return The lexicon just read into {@link #lexicon}. - **/ - public Lexicon demandLexicon() { - if (readLexiconOnDemand && (lexicon == null || lexicon.size() == 0)) { - readLexicon(lexFilePath); - readLexiconOnDemand = false; - } - return lexicon; - } - - - /** - * Serializes a {@link Learner.Parameters} object to the specified file. - * - * @param p The parameters to serialize. - * @param file The file in which to serialize them. - **/ - public static void writeParameters(Parameters p, String file) { - ObjectOutputStream oos = null; - try { - oos = - new ObjectOutputStream( - new BufferedOutputStream( - new FileOutputStream(file))); - } - catch (Exception e) { - System.err.println( - "Can't create object output stream in '" + file + "': " + e); - System.exit(1); - } - - try { oos.writeObject(p); } - catch (Exception e) { - System.err.println( - "Can't write to object output stream in '" + file + "': " + e); - System.exit(1); - } - - try { oos.close(); } - catch (Exception e) { - System.err.println("Can't close object stream in '" + file + "': " + e); - System.exit(1); - } - } - - - /** - * Deserializes a {@link Learner.Parameters} object out of the specified - * locaiton. - * - * @param url The location from which to read the object. - * @return The parameters object. - **/ - public static Parameters readParameters(URL url) { - ObjectInputStream ois = null; - - try { - ois = - new ObjectInputStream( - new BufferedInputStream(url.openStream())); - } - catch (Exception e) { - System.err.println("Can't open '" + url + "' for input: " + e); - System.exit(1); - } + l.setEncoding(encoding); + } + + + /** Returns the feature lexicon. */ + public Lexicon getLexicon() { + demandLexicon(); + return lexicon; + } + + /** Returns the feature lexicon in memory, rather than reading from disk */ + public Lexicon getCurrentLexicon() { + return lexicon; + } + + /** + * Sets the label lexicon. + * + * @param l A feature lexicon. + **/ + public void setLabelLexicon(Lexicon l) { + labelLexicon = l; + if (labelLexicon == null) { + predictions = null; + return; + } + + int N = labelLexicon.size(); + predictions = new FVector(N); + for (int i = 0; i < N; ++i) + createPrediction(i); + } + + + /** Returns the label lexicon. */ + public Lexicon getLabelLexicon() { + return labelLexicon; + } + + + /** + * Sets the encoding to use in this learner's feature lexicon. + * + * @param e The encoding. + **/ + public void setEncoding(String e) { + encoding = e; + lexicon.setEncoding(e); + } + + + /** + * Sets the location of the model as a regular file on this file system. + * + * @param p The file's path. + **/ + public void setModelLocation(String p) { + try { + lcFilePath = new URL("file:" + p); + } catch (Exception e) { + System.err.println("ERROR: Can't create URL for file '" + p + "':"); + e.printStackTrace(); + System.exit(1); + } + } + + /** + * Sets the location of the model as a URL. + * + * @param u The model's location. + **/ + public void setModelLocation(URL u) { + lcFilePath = u; + } + + /** Returns the model's location. */ + public URL getModelLocation() { + return lcFilePath; + } + + /** + * Sets the location of the lexicon as a regular file on this file system. + * + * @param p The file's path. + **/ + public void setLexiconLocation(String p) { + try { + lexFilePath = new URL("file:" + p); + } catch (Exception e) { + System.err.println("ERROR: Can't create URL for file '" + p + "':"); + e.printStackTrace(); + System.exit(1); + } + } + + /** + * Sets the location of the model as a URL. + * + * @param u The model's location. + **/ + public void setLexiconLocation(URL u) { + lexFilePath = u; + } + + /** Returns the lexicon's location. */ + public URL getLexiconLocation() { + return lexFilePath; + } + + /** + * Establishes a new feature counting policy for this learner's lexicon. + * + * @param policy The new feature counting policy. + **/ + public void countFeatures(Lexicon.CountPolicy policy) { + if (policy == Lexicon.CountPolicy.perClass && !getOutputType().equals("discrete")) + throw new IllegalArgumentException( + "LBJava ERROR: Learner.countFeatures: Can't do 'per class' feature " + + "counting unless the learner is discrete."); + demandLexicon(); + lexicon.countFeatures(policy); + } + + /** + * Returns this learner's feature lexicon after discarding any feature counts it may have been + * storing. This method is likely only useful when the lexicon and its counts are currently + * stored on disk and {@link #readLexiconOnDemand(String)} or {@link #readLexiconOnDemand(URL)} + * has already been called, in which case the lexicon is read from disk without wasting time + * loading the counts. + **/ + public Lexicon getLexiconDiscardCounts() { + if (readLexiconOnDemand && (lexicon == null || lexicon.size() == 0)) + lexicon = Lexicon.readLexicon(lexFilePath, false); + else + lexicon.countFeatures(Lexicon.CountPolicy.none); + return lexicon; + } - Parameters result = null; + /** + * Returns a new, emtpy learner into which all of the parameters that control the behavior of + * the algorithm have been copied. Here, "emtpy" means no learning has taken place. + **/ + public Learner emptyClone() { + Learner clone = (Learner) super.clone(); + clone.forget(); + return clone; + } + + /** + * Trains the learning algorithm given an object as an example. By default, this simply converts + * the example object into arrays and passes it to {@link #learn(int[],double[],int[],double[])} + * . + * + * @param example An example of the desired learned classifier's behavior. + **/ + public void learn(Object example) { + Object[] exampleArray = getExampleArray(example); + learn((int[]) exampleArray[0], (double[]) exampleArray[1], (int[]) exampleArray[2], + (double[]) exampleArray[3]); + } + + /** + * Trains the learning algorithm given a feature vector as an example. This simply converts the + * example object into arrays and passes it to {@link #learn(int[],double[],int[],double[])}. + * + * @param vector An example of the desired learned classifier's behavior. + **/ + public void learn(FeatureVector vector) { + Classifier saveExtractor = getExtractor(); + Classifier saveLabeler = getLabeler(); + setExtractor(new FeatureVectorReturner()); + setLabeler(new LabelVectorReturner()); + + learn((Object) vector); + + setExtractor(saveExtractor); + setLabeler(saveLabeler); + } + + + /** + * Trains the learning algorithm given an example formatted as arrays of feature indices, their + * values, and the example labels. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param exampleLabels The example's label(s). + * @param labelValues The values of the labels. + **/ + abstract public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, + double[] labelValues); + + + /** + * Trains the learning algorithm given many objects as examples. This implementation simply + * calls {@link #learn(Object)} on each of the objects in the input array and finishes by + * calling {@link #doneLearning()}. It should be overridden if there is a more efficient + * implementation. + * + * @param examples Examples of the desired learned classifier's behavior. + **/ + public void learn(Object[] examples) { + for (int i = 0; i < examples.length; ++i) + learn(examples[i]); + doneLearning(); + } + + + /** + * Trains the learning algorithm given many feature vectors as examples. This implementation + * simply calls {@link #learn(FeatureVector)} on each of the vectors in the input array and + * finishes by calling {@link #doneLearning()}. It should be overridden if there is a more + * efficient implementation. + * + * @param examples Examples of the desired learned classifier's behavior. + **/ + public void learn(FeatureVector[] examples) { + for (int i = 0; i < examples.length; ++i) + learn(examples[i]); + doneLearning(); + } + + + /** + * This method makes one or more decisions about a single object, returning those decisions as + * {@link Feature}s in a vector. + * + * @param example The object to make decisions about. + * @return A vector of {@link Feature}s about the input object. + **/ + public FeatureVector classify(Object example) { + Object[] exampleArray = getExampleArray(example, false); + return classify((int[]) exampleArray[0], (double[]) exampleArray[1]); + } + + + /** + * This method makes one or more decisions about a single feature vector, returning those + * decisions as {@link Feature}s in a vector. + * + * @param vector The vector to make decisions about. + * @return A vector of {@link Feature}s about the input vector. + **/ + public FeatureVector classify(FeatureVector vector) { + Classifier saveExtractor = getExtractor(); + Classifier saveLabeler = getLabeler(); + setExtractor(new FeatureVectorReturner()); + setLabeler(new LabelVectorReturner()); + + FeatureVector result = classify((Object) vector); + + setExtractor(saveExtractor); + setLabeler(saveLabeler); + return result; + } + + + /** + * This method makes one or more decisions about a single object, returning those decisions as + * {@link Feature}s in a vector. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return A vector of {@link Feature}s about the input object. + **/ + abstract public FeatureVector classify(int[] exampleFeatures, double[] exampleValues); + + + /** + * Use this method to make a batch of classification decisions about several objects. This + * function is implemented in the most naive way (simply calling + * {@link #classify(FeatureVector)} repeatedly) and should be overridden if there is a more + * efficient implementation. + * + * @param vectors The vectors to make decisions about. + * @return An array of feature vectors, one per input vector. + **/ + public FeatureVector[] classify(FeatureVector[] vectors) { + FeatureVector[] result = new FeatureVector[vectors.length]; + for (int i = 0; i < vectors.length; ++i) + result[i] = classify(vectors[i]); + return result; + } + + + /** + * Use this method to make a batch of classification decisions about several examples. This + * function is implemented in the most naive way (simply calling + * {@link #classify(int[],double[])} repeatedly) and should be overridden if there is a more + * efficient implementation. + * + * @param e The examples to make decisions about, represented as arrays of indices and + * strengths. + * @return An array of feature vectors, one per input object. + **/ + public FeatureVector[] classify(Object[][] e) { + FeatureVector[] result = new FeatureVector[e.length]; + for (int i = 0; i < e.length; ++i) + result[i] = classify((int[]) e[i][0], (double[]) e[i][1]); + return result; + } - try { result = (Parameters) ois.readObject(); } - catch (Exception e) { - System.err.println("Can't read from '" + url + "': " + e); - System.exit(1); + + /** + * Returns the classification of the given example object as a single feature instead of a + * {@link FeatureVector}. + * + * @param example The object to classify. + * @return The classification of example as a feature. + **/ + public Feature featureValue(Object example) { + Object[] exampleArray = getExampleArray(example, false); + return featureValue((int[]) exampleArray[0], (double[]) exampleArray[1]); } - try { ois.close(); } - catch (Exception e) { - System.err.println("Can't close '" + url + "': " + e); - System.exit(1); + + /** + * Returns the classification of the given feature vector as a single feature instead of a + * {@link FeatureVector}. + * + * @param vector The vector to classify. + * @return The classification of vector as a feature. + **/ + public Feature featureValue(FeatureVector vector) { + Classifier saveExtractor = getExtractor(); + Classifier saveLabeler = getLabeler(); + setExtractor(new FeatureVectorReturner()); + setLabeler(new LabelVectorReturner()); + + Feature result = featureValue((Object) vector); + + setExtractor(saveExtractor); + setLabeler(saveLabeler); + return result; + } + + + /** + * Returns the classification of the given example as a single feature instead of a + * {@link FeatureVector}. + * + * @param f The features array. + * @param v The values array. + * @return The classification of o as a feature. + **/ + public Feature featureValue(int[] f, double[] v) { + throw new UnsupportedOperationException( + "The featureValue(int[], double[]) method has not been overridden in " + "class '" + + getClass().getName() + "'."); } - - return result; - } - - - /** - * Parameters classes are used to hold values for learning - * algorithm parameters, and all learning algorithm implementations must - * provide a constructor that takes such an object as input. All algorithm - * specific Parameters classes extend this class. - * - * @author Nick Rizzolo - **/ - public static class Parameters implements Serializable - { + + /** - * The number of rounds of training; but wait; this parameter doesn't - * actually affect the behavior of any learners as the number of training - * rounds is specified via other mechanisms. Nonetheless, it comes in - * handy to have it here as a communication vehicle when tuning - * parameters. + * Returns the value of the discrete prediction that this learner would make, given an example. + * + * @param example The example object. + * @return The discrete value. **/ - public int rounds; + public String discreteValue(Object example) { + Object[] exampleArray = getExampleArray(example, false); + return discreteValue((int[]) exampleArray[0], (double[]) exampleArray[1]); + } - /** Sets all the default values. */ - public Parameters() { } + /** + * Returns the value of the discrete prediction that this learner would make, given a feature + * vector. + * + * @param vector The example vector. + * @return The discrete value. + **/ + public String discreteValue(FeatureVector vector) { + Classifier saveExtractor = getExtractor(); + Classifier saveLabeler = getLabeler(); + setExtractor(new FeatureVectorReturner()); + setLabeler(new LabelVectorReturner()); + + String result = discreteValue((Object) vector); - /** Copy constructor. */ - public Parameters(Parameters p) { } + setExtractor(saveExtractor); + setLabeler(saveLabeler); + return result; + } /** - * Calls the appropriate Learner.setParameters(Parameters) - * method for this Parameters object. - * - * @param l The learner whose parameters will be set. + * Returns the value of the discrete feature that would be returned by this classifier. This + * method should only be called when overridden by a classifier returning a single discrete + * feature. + * + * @param f The features array. + * @param v The values array. + * @return The value of the feature produced for the input object. **/ - public void setParameters(Learner l) { - Class c = getClass(); - if (Learner.class.equals(c)) + public String discreteValue(int[] f, double[] v) { throw new UnsupportedOperationException( - "LBJava ERROR: Learner.Parameters.setParameters should never be " - + "called."); - else + "The discreteValue(Object) method has not been overridden in class '" + + getClass().getName() + "'."); + } + + + /** + * Returns the value of the real prediction that this learner would make, given an example. + * + * @param example The example object. + * @return The real value. + **/ + public double realValue(Object example) { + Object[] exampleArray = getExampleArray(example, false); + return realValue((int[]) exampleArray[0], (double[]) exampleArray[1]); + } + + + /** + * Returns the value of the real prediction that this learner would make, given a feature + * vector. + * + * @param vector The example vector. + * @return The real value. + **/ + public double realValue(FeatureVector vector) { + Classifier saveExtractor = getExtractor(); + Classifier saveLabeler = getLabeler(); + setExtractor(new FeatureVectorReturner()); + setLabeler(new LabelVectorReturner()); + + double result = realValue((Object) vector); + + setExtractor(saveExtractor); + setLabeler(saveLabeler); + return result; + } + + + /** + * Returns the value of the real feature that would be returned by this classifier. This method + * should only be called when overridden by a classifier returning a single real feature. + * + * @param f The features array. + * @param v The values array. + * @return The value of the feature produced for the input object. + **/ + public double realValue(int[] f, double[] v) { throw new UnsupportedOperationException( - "LBJava ERROR: " + c.getName() + ".Parameters.setParameters has not " - + "been implemented."); + "The realValue(Object) method has not been overridden in class '" + + getClass().getName() + "'."); + } + + /** + * Start training, this might involve training many models, for cross validation, + * parameter tuning and so on. + **/ + public void beginTraining() { + intraining = true; } + /** - * Creates a string representation of these parameters in which only - * those parameters that differ from their default values are mentioned. + * Overridden by subclasses to perform any required post-processing computations after all + * training examples have been observed through {@link #learn(Object)} and + * {@link #learn(Object[])}. By default this method does nothing. **/ - public String nonDefaultString() { return ""; } - } -} + public void doneLearning() {} + + + /** + * Overridden by subclasses to perform any required post-training computations optimizations, + * in particular, feature subset reduction. This default method does nothing. + */ + public void doneTraining() { + if (intraining) { + intraining = false; + } else { + throw new RuntimeException("calling doneLearning without previously calling beginTraining" + + " violates the lifecycle contract. Or perhaps the subclass does not call the superclass " + + "method. Contact the developer."); + } + } + + /** + * This method is sometimes called before training begins, although it is not guaranteed to be + * called at all. It allows the number of examples and number of features to be passed to the + * Learner, in case this information is available, such as after pre-extraction. By default this + * method does nothing. + * + * @param numExamples The number of examples that will be observed during training. + * @param numFeatures The number of features that will be observed during training. + **/ + public void initialize(int numExamples, int numFeatures) {} + + + /** Called after each round of training. Does nothing by default. */ + public void doneWithRound() {} + + + /** + * Converts an example object into an array of arrays representing the example including its + * labels. The first array contains the integer keys of the example's features, as indexed in + * the lexicon. The second array gives the double values corresponding to the strengths of the + * features in the first array. The third and fourth arrays play the same roles as the first and + * second arrays respectively, except they describe the labels. + * + * @param example The example object. + * @return The converted example array. + **/ + public Object[] getExampleArray(Object example) { + return getExampleArray(example, true); + } + + + /** + * Converts an example object into an array of arrays representing the example. The first array + * contains the integer keys of the example's features, as indexed in the lexicon. The second + * array gives the double values corresponding to the strengths of the features in the first + * array. The third and fourth arrays will only be present if training is set to + * true. They play the same roles as the first and second arrays respectively, + * except they describe the labels. + * + * @param example The example object. + * @param training Whether or not labels should be extracted. + * @return The converted example array. + **/ + public Object[] getExampleArray(Object example, boolean training) { + if (example instanceof Object[] && ((Object[]) example)[0] instanceof int[] + && ((Object[]) example)[1] instanceof double[]) + return (Object[]) example; + + if (readLexiconOnDemand && (lexicon == null || lexicon.size() == 0)) { + readLexicon(lexFilePath); + readLexiconOnDemand = false; + } + + Object[] exampleArray = null; + Lexicon.CountPolicy countPolicy = lexicon.getCountPolicy(); + int labelIndex = -1; + + // Get example labels + if (training) { + FeatureVector labelVector = labeler.classify(example); + int F = labelVector.featuresSize(); + int[] labelArray = new int[F]; + double[] labelValues = new double[F]; + + for (int f = 0; f < F; ++f) { + Feature label = labelVector.getFeature(f); + if (label.isDiscrete()) + labelArray[f] = labelLexicon.lookup(label, true); + else + labelArray[f] = labelLexicon.lookup(label.getFeatureKey(labelLexicon), true); + labelValues[f] += label.getStrength(); + createPrediction(labelArray[f]); + } + + exampleArray = new Object[] {null, null, labelArray, labelValues}; + if (countPolicy == Lexicon.CountPolicy.perClass) + // && labeler.getOutputType().equals("discrete") && F == 1) + // Don't really want to do this comparison for every example; we'll + // trust the user not to do per class feature counting when it isn't + // true. Plus, the countFeatures(CountPolicy) method in this class + // checks for it. + labelIndex = labelArray[0]; + } else + exampleArray = new Object[2]; + + // Get example features. + FeatureVector featureVector = extractor.classify(example); + int F = featureVector.featuresSize(); + int[] exampleArrayFeatures = new int[F]; + double[] exampleArrayValues = new double[F]; + exampleArray[0] = exampleArrayFeatures; + exampleArray[1] = exampleArrayValues; + + for (int f = 0; f < F; ++f) { + Feature feature = featureVector.getFeature(f); + exampleArrayFeatures[f] = + lexicon.lookup(feature.getFeatureKey(lexicon, training, labelIndex), training, + labelIndex); + exampleArrayValues[f] += feature.getStrength(); + } + + return exampleArray; + } + + public void setReadLexiconOnDemand() { + readLexiconOnDemand = true; + } + + /** + * If it hasn't been created already, this method will create the prediction feature in + * {@link #predictions} associated with the label feature at the given index of + * {@link #labelLexicon}. This method does not create {@link RealFeature}s in + * {@link #predictions} since their strengths cannot be modified. In association with + * {@link DiscreteFeature}s it creates a {@link DiscretePrimitiveStringFeature} with an empty + * identifier. Its value, valueIndex, and totalValues + * fields are filled by calling the label feature's {@link Feature#getStringValue() + * getStringValue()}, {@link Feature#getValueIndex() getValueIndex()}, and + * {@link Feature#totalValues() totalValues()} methods respectively. + * + * @param index The index of a label feature in {@link #labelLexicon}. + **/ + protected void createPrediction(int index) { + createPrediction(labelLexicon, index); + } + + + /** + * If it hasn't been created already, this method will create the prediction feature in + * {@link #predictions} associated with the label feature at the given index of lex + * . This method does not create {@link RealFeature}s in {@link #predictions} since their + * strengths cannot be modified. In association with {@link DiscreteFeature}s it creates a + * {@link DiscretePrimitiveStringFeature} with an empty identifier. Its value, + * valueIndex, and totalValues fields are filled by calling the label + * feature's {@link Feature#getStringValue() getStringValue()}, {@link Feature#getValueIndex() + * getValueIndex()}, and {@link Feature#totalValues() totalValues()} methods respectively. + * + * @param lex The label lexicon to associate prediction features with. + * @param index The index of a label feature in lex. + **/ + protected void createPrediction(Lexicon lex, int index) { + if (predictions.get(index) != null || !getOutputType().equals("discrete")) + return; + Feature label = lex.lookupKey(index); + predictions.set(index, new DiscretePrimitiveStringFeature(containingPackage, name, "", + label.getStringValue(), label.getValueIndex(), label.totalValues())); + } + + + /** + * Reinitializes the learner to the state it started at before any learning was performed. By + * default, this sets the lexicons to blank Lexicon objects and calls + * {@link #initialize(int,int)} to reset the number of examples and features to 0, for learners + * that use this. + **/ + public void forget() { + lexicon = new Lexicon(encoding); + labelLexicon = new Lexicon(); + predictions = new FVector(); + initialize(0, 0); + readLexiconOnDemand = false; + } + + + /** + * Produces a set of scores indicating the degree to which each possible discrete classification + * value is associated with the given example object. Learners that return a real + * feature or more than one feature may implement this method by simply returning + * null. + * + * @param example The object to make decisions about. + * @return A set of scores indicating the degree to which each possible discrete classification + * value is associated with the given example object. + **/ + public ScoreSet scores(Object example) { + Object[] exampleArray = getExampleArray(example, false); + ScoreSet resultS = scores((int[]) exampleArray[0], (double[]) exampleArray[1]); + if (!lossFlag) + return resultS; + else + return scoresAugmented(example, resultS); + } + + /** + * Update the score of each binary variable (label) based on the gold value of each example for + * that variable. When using a {@code SparseNetworkLearner} to keep the model there is an LTU + * for each label. If the gold is same as a specific label then its binary value for that label + * is 1 and the score for that label will be {@code oldScore - lossOffset}; otherwise it will be + * 0 and the score will be {@code oldScore + lossOffset}. + * + * @param example The object to make decisions about. + * @param resultS The original scores (see {@link #scores(Object)}). + * @return The augmented set of scores. + */ + public ScoreSet scoresAugmented(Object example, ScoreSet resultS) { + ScoreSet augmentedScores = new ScoreSet(); + Lexicon lLexicon = getLabelLexicon(); + String gold = getLabeler().discreteValue(example); + for (int i = 0; i < lLexicon.size(); i++) { + String candidate = lLexicon.lookupKey(i).getStringValue(); + double originalScore = resultS.getScore(candidate).score; + double lossOffset = 1 / (double) (candidates); + if (candidate.equals(gold)) + augmentedScores.put(candidate, originalScore - lossOffset); + else + augmentedScores.put(candidate, originalScore + lossOffset); + } + + return augmentedScores; + } + + /** + * Produces a set of scores indicating the degree to which each possible discrete classification + * value is associated with the given feature vector. Learners that return a real + * feature or more than one feature may implement this method by simply returning + * null. + * + * @param vector The vector to make decisions about. + * @return A set of scores indicating the degree to which each possible discrete classification + * value is associated with the given example vector. + **/ + public ScoreSet scores(FeatureVector vector) { + Classifier saveExtractor = getExtractor(); + Classifier saveLabeler = getLabeler(); + setExtractor(new FeatureVectorReturner()); + setLabeler(new LabelVectorReturner()); + + ScoreSet result = scores((Object) vector); + + setExtractor(saveExtractor); + setLabeler(saveLabeler); + return result; + } + + + /** + * Produces a set of scores indicating the degree to which each possible discrete classification + * value is associated with the given example object. Learners that return a real + * feature or more than one feature may implement this method by simply returning + * null. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of values + * @return A set of scores indicating the degree to which each possible discrete classification + * value is associated with the given example object. + **/ + abstract public ScoreSet scores(int[] exampleFeatures, double[] exampleValues); + + + /** + * Writes the learned function's internal representation as text. + * + * @param out The output stream. + **/ + abstract public void write(PrintStream out); + + + /** + * Automatically generated code will override this method to set their isClone + * field to false. This then allows a pure java program to read a learner's + * representation into any instance of the learner's class. By default, this method does + * nothing. + **/ + public void unclone() {} + + + /** + * Returns the size of the lexicon after any pruning that may have taken place or 0 if the + * lexicon's location isn't known. + **/ + public int getPrunedLexiconSize() { + if ((lexicon == null || lexicon.size() == 0) && readLexiconOnDemand) { + ExceptionlessInputStream in = + ExceptionlessInputStream.openCompressedStream(lexFilePath); + int result = Lexicon.readPrunedSize(in); + in.close(); + return result; + } + + return lexicon == null ? 0 : lexicon.getCutoff(); + } + + + /** + * Returns a deep (enough) clone of this learner. The following fields are cloned themselves: + * {@link #lexicon}, {@link #labelLexicon}, and {@link #predictions}. + * + *

+ * Note that this is an overriding implementation of Object's clone() + * method, and its functionality is completely separate from and unrelated to that of this + * class's {@link #unclone()} method. + **/ + public Object clone() { + Learner result = (Learner) super.clone(); + if (lexicon != null) + result.lexicon = (Lexicon) lexicon.clone(); + if (labelLexicon != null) + result.labelLexicon = (Lexicon) labelLexicon.clone(); + if (predictions != null) + result.predictions = (FVector) predictions.clone(); + return result; + } + + + /** + * Writes the binary representation of this learned function if there is a location cached in + * {@link #lcFilePath}, and writes the binary representation of the feature lexicon if there is + * a location cached in {@link #lexFilePath}. + **/ + public void save() { + if (lcFilePath != null) + saveModel(); + if (lexFilePath != null && lexicon != null && lexicon.size() > 0) + saveLexicon(); + } + + + /** + * Writes the binary representation of this learned function to the location specified by + * {@link #lcFilePath}. If {@link #lcFilePath} is not set, this method will produce an error + * message and exit the program. + **/ + public void saveModel() { + if (lcFilePath == null) { + System.err.println("LBJava ERROR: saveModel() called without a cached location"); + new Exception().printStackTrace(); + System.exit(1); + } + + ExceptionlessOutputStream out = ExceptionlessOutputStream.openCompressedStream(lcFilePath); + write(out); + out.close(); + } + + + /** + * Writes the binary representation of the feature lexicon to the location specified by + * {@link #lexFilePath}. If {@link #lexFilePath} is not set, this method will produce an error + * message and exit the program. + **/ + public void saveLexicon() { + if (lexFilePath == null) { + System.err.println("LBJava ERROR: saveLexicon() called without a cached location"); + new Exception().printStackTrace(); + System.exit(1); + } + + ExceptionlessOutputStream out = ExceptionlessOutputStream.openCompressedStream(lexFilePath); + if (lexicon == null) + out.writeInt(0); + else + lexicon.write(out); + out.close(); + } + + + /** + * Writes the learned function's binary internal represetation including both its model and + * lexicons to the specified files. These files are then cached in {@link #lcFilePath} and + * {@link #lexFilePath}. + * + * @param modelFile The name of the file in which to write the model. + * @param lexFile The name of the file in which to write the feature lexicon. + **/ + public void write(String modelFile, String lexFile) { + writeModel(modelFile); + if (lexicon != null && lexicon.size() > 0) + writeLexicon(lexFile); + } + + + /** + * Writes only the learned function's model (which includes the label lexicon) to the specified + * file in binary form. This file is then cached in {@link #lcFilePath}. + * + * @param filename The name of the file in which to write the model. + **/ + public void writeModel(String filename) { + ExceptionlessOutputStream out = ExceptionlessOutputStream.openCompressedStream(filename); + write(out); + out.close(); + + try { + lcFilePath = new URL("file:" + filename); + } catch (Exception e) { + System.err.println("Error constructing URL:"); + e.printStackTrace(); + System.exit(1); + } + } + + + /** + * Writes the learned function's feature lexicon to the specified file. This file is then cached + * in {@link #lexFilePath}. + * + * @param filename The name of the file in which to write the feature lexicon. + **/ + public void writeLexicon(String filename) { + ExceptionlessOutputStream out = ExceptionlessOutputStream.openCompressedStream(filename); + if (lexicon == null) + out.writeInt(0); + else + lexicon.write(out); + out.close(); + + try { + lexFilePath = new URL("file:" + filename); + } catch (Exception e) { + System.err.println("Error constructing URL:"); + e.printStackTrace(); + System.exit(1); + } + } + + + /** + * Writes the learned function's internal representation in binary form. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + out.writeString(getClass().getName()); + out.writeString(containingPackage); + out.writeString(name); + out.writeString(encoding); + if (labeler == null) + out.writeString(null); + else + out.writeString(labeler.getClass().getName()); + if (extractor == null) + out.writeString(null); + else + out.writeString(extractor.getClass().getName()); + if (labelLexicon == null) + out.writeInt(0); + else + labelLexicon.write(out); + if (predictions == null) + out.writeInt(0); + else + predictions.write(out); + } + + + /** + * Reads the learned function's binary internal represetation including both its model and + * lexicons from the specified files, overwriting any and all data this object may have already + * contained. These files are then cached in {@link #lcFilePath} and {@link #lexFilePath}. + * + * @param modelFile The name of the file from which to read the model. + * @param lexFile The name of the file from which to read the feature lexicon. + **/ + public void read(String modelFile, String lexFile) { + readModel(modelFile); + readLexicon(lexFile); + } + + + /** + * Reads only the learned function's model and label lexicon from the specified file in binary + * form, overwriting whatever model data may have already existed in this object. This file is + * then cached in {@link #lcFilePath}. + * + * @param filename The name of the file from which to read the model. + **/ + public void readModel(String filename) { + try { + readModel(new URL("file:" + filename)); + } catch (Exception e) { + System.err.println("Error constructing URL:"); + e.printStackTrace(); + System.exit(1); + } + } + + + /** + * Reads only the learned function's model and label lexicon from the specified location in + * binary form, overwriting whatever model data may have already existed in this object. This + * location is then cached in {@link #lcFilePath}. + * + * @param url The location from which to read the model. + **/ + public void readModel(URL url) { + ExceptionlessInputStream in = ExceptionlessInputStream.openCompressedStream(url); + String s = in.readString(); + String expected = getClass().getName(); + + if (!s.equals(expected)) { + System.err.println("Error reading model from '" + url + "':"); + System.err.println(" Expected '" + expected + "' but received '" + s + "'"); + new Exception().printStackTrace(); + in.close(); + System.exit(1); + } + + read(in); + in.close(); + lcFilePath = url; + } + + + /** + * Reads the learned function's feature lexicon from the specified file, overwriting the lexicon + * present in this object, if any. This file is then cached in {@link #lexFilePath}. + * + * @param filename The name of the file from which to read the feature lexicon. + **/ + public void readLexicon(String filename) { + try { + readLexicon(new URL("file:" + filename)); + } catch (Exception e) { + System.err.println("Error constructing URL:"); + e.printStackTrace(); + System.exit(1); + } + } + + + /** + * Reads the learned function's feature lexicon from the specified location, overwriting the + * lexicon present in this object, if any. This location is then cached in {@link #lexFilePath}. + * + * @param url The location from which to read the feature lexicon. + **/ + public void readLexicon(URL url) { + lexicon = Lexicon.readLexicon(url); + lexFilePath = url; + } + + + /** + * Reads the binary representation of any type of learner (including the label lexicon, but not + * including the feature lexicon) from the given file. In that file, there should first be + * stored a string containing the fully qualified class name of the learner. If the short + * value -1 appears instead, this method returns null. + * + *

+ * This method is appropriate for reading learners as written by + * {@link #write(ExceptionlessOutputStream)}. + * + * @param filename The name of the file from which to read the learner. + * @return The learner read from the file. + **/ + public static Learner readLearner(String filename) { + return readLearner(filename, true); + } + + + /** + * Reads the binary representation of any type of learner (including the label lexicon, but not + * including the feature lexicon), with the option of cutting off the reading process after the + * label lexicon and before any learned parameters. When whole is + * false, the reading process is cut off in this way. + * + *

+ * This method is appropriate for reading learners as written by + * {@link #write(ExceptionlessOutputStream)}. + * + * @param filename The name of the file from which to read the learner. + * @param whole Whether or not to read the whole model. + * @return The learner read from the file. + **/ + public static Learner readLearner(String filename, boolean whole) { + URL url = null; + + try { + url = new URL("file:" + filename); + } catch (Exception e) { + System.err.println("Error constructing URL:"); + e.printStackTrace(); + System.exit(1); + } + + return readLearner(url, whole); + } + + + /** + * Reads the binary representation of any type of learner (including the label lexicon, but not + * including the feature lexicon) from the given location. At that location, there should first + * be stored a string containing the fully qualified class name of the learner. If the + * short value -1 appears instead, this method returns null. + * Finally, the location is cached in {@link #lcFilePath}. + * + *

+ * This method is appropriate for reading learners as written by + * {@link #write(ExceptionlessOutputStream)}. + * + * @param url The location from which to read the learner. + * @return The learner read from the location. + **/ + public static Learner readLearner(URL url) { + return readLearner(url, true); + } + + + /** + * Reads the binary representation of any type of learner (including the label lexicon, but not + * including the feature lexicon), with the option of cutting off the reading process after the + * label lexicon and before any learned parameters. When whole is + * false, the reading process is cut off in this way. Finally, the location is + * cached in {@link #lcFilePath}. + * + *

+ * This method is appropriate for reading learners as written by + * {@link #write(ExceptionlessOutputStream)}. + * + * @param url The location from which to read the learner. + * @param whole Whether or not to read the whole model. + * @return The learner read from the location. + **/ + public static Learner readLearner(URL url, boolean whole) { + ExceptionlessInputStream in = ExceptionlessInputStream.openCompressedStream(url); + Learner result = readLearner(in, whole); + in.close(); + result.lcFilePath = url; + return result; + } + + + /** + * Reads the binary representation of any type of learner (including the label lexicon, but not + * including the feature lexicon) from the given stream. The stream is expected to first return + * a string containing the fully qualified class name of the learner. If the short value + * -1 appears instead, this method returns null. + * + *

+ * This method is appropriate for reading learners as written by + * {@link #write(ExceptionlessOutputStream)}. + * + * @param in The input stream. + * @return The learner read from the stream. + **/ + public static Learner readLearner(ExceptionlessInputStream in) { + return readLearner(in, true); + } + + + /** + * Reads the binary representation of any type of learner (including the label lexicon, but not + * including the feature lexicon), with the option of cutting off the reading process after the + * label lexicon and before any learned parameters. When whole is + * false, the reading process is cut off in this way. + * + *

+ * This method is appropriate for reading learners as written by + * {@link #write(ExceptionlessOutputStream)}. + * + * @param in The input stream. + * @param whole Whether or not to read the whole model. + * @return The learner read from the stream. + **/ + public static Learner readLearner(ExceptionlessInputStream in, boolean whole) { + String name = in.readString(); + if (name == null) + return null; + Learner result = ClassUtils.getLearner(name); + result.unclone(); + if (whole) + result.read(in); // Overridden by decendents + else { + result.readLabelLexicon(in); // Should not be overridden by decendents + Lexicon labelLexicon = result.getLabelLexicon(); + result.forget(); + result.setLabelLexicon(labelLexicon); + } + return result; + } + + + /** + * Reads the binary representation of a learner with this object's run-time type, overwriting + * any and all learned or manually specified parameters as well as the label lexicon but without + * modifying the feature lexicon. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + readLabelLexicon(in); + } + + + /** + * Reads the initial portion of the model file, including the containing package and name + * strings, the names of the labeler and extractor, and finally the label lexicon. This method + * will not read any further model parameters, however. + * + * @param in The input stream. + **/ + public void readLabelLexicon(ExceptionlessInputStream in) { + containingPackage = in.readString().intern(); + name = in.readString().intern(); + encoding = in.readString(); + if (encoding != null) + encoding = encoding.intern(); + String s = in.readString(); + labeler = s == null ? null : ClassUtils.getClassifier(s); + s = in.readString(); + extractor = s == null ? null : ClassUtils.getClassifier(s); + labelLexicon = Lexicon.readLexicon(in); + if (predictions == null) + predictions = new FVector(); + predictions.read(in); + } + + + /** + * Prepares this learner to read in its feature lexicon from the specified location on demand; + * has no effect if this learner already has a non-empty lexicon. + * + * @param file The file from which to read the feature lexicon. + **/ + public void readLexiconOnDemand(String file) { + URL url = null; + + try { + url = new URL("file:" + file); + } catch (Exception e) { + System.err.println("Error constructing URL:"); + e.printStackTrace(); + System.exit(1); + } + + readLexiconOnDemand(url); + } + + + /** + * Prepares this learner to read in its feature lexicon from the specified location on demand; + * has no effect if this learner already has a non-empty lexicon. + * + * @param url The location from which to read the feature lexicon. + **/ + public void readLexiconOnDemand(URL url) { + lexFilePath = url; + readLexiconOnDemand = true; + } + + + /** + * Forces this learner to read in its lexicon representation, but only if the lexicon currently + * available in this object is empty and the learner has been scheduled to read its lexicon on + * demand with {@link #readLexiconOnDemand(URL)}. + * + * @see #readLexiconOnDemand + * @return The lexicon just read into {@link #lexicon}. + **/ + public Lexicon demandLexicon() { + if (readLexiconOnDemand && (lexicon == null || lexicon.size() == 0)) { + readLexicon(lexFilePath); + readLexiconOnDemand = false; + } + return lexicon; + } + + + /** + * Serializes a {@link Learner.Parameters} object to the specified file. + * + * @param p The parameters to serialize. + * @param file The file in which to serialize them. + **/ + public static void writeParameters(Parameters p, String file) { + ObjectOutputStream oos = null; + try { + oos = new ObjectOutputStream(new BufferedOutputStream(new FileOutputStream(file))); + } catch (Exception e) { + System.err.println("Can't create object output stream in '" + file + "': " + e); + System.exit(1); + } + + try { + oos.writeObject(p); + } catch (Exception e) { + System.err.println("Can't write to object output stream in '" + file + "': " + e); + System.exit(1); + } + + try { + oos.close(); + } catch (Exception e) { + System.err.println("Can't close object stream in '" + file + "': " + e); + System.exit(1); + } + } + + + /** + * Deserializes a {@link Learner.Parameters} object out of the specified locaiton. + * + * @param url The location from which to read the object. + * @return The parameters object. + **/ + public static Parameters readParameters(URL url) { + ObjectInputStream ois = null; + + try { + ois = new ObjectInputStream(new BufferedInputStream(url.openStream())); + } catch (Exception e) { + System.err.println("Can't open '" + url + "' for input: " + e); + System.exit(1); + } + + Parameters result = null; + + try { + result = (Parameters) ois.readObject(); + } catch (Exception e) { + System.err.println("Can't read from '" + url + "': " + e); + System.exit(1); + } + + try { + ois.close(); + } catch (Exception e) { + System.err.println("Can't close '" + url + "': " + e); + System.exit(1); + } + + return result; + } + + + /** + * Parameters classes are used to hold values for learning algorithm parameters, + * and all learning algorithm implementations must provide a constructor that takes such an + * object as input. All algorithm specific Parameters classes extend this class. + * + * @author Nick Rizzolo + **/ + public static class Parameters implements Serializable { + /** + * The number of rounds of training; but wait; this parameter doesn't actually affect the + * behavior of any learners as the number of training rounds is specified via other + * mechanisms. Nonetheless, it comes in handy to have it here as a communication vehicle + * when tuning parameters. + **/ + public int rounds; + + + /** Sets all the default values. */ + public Parameters() {} + + /** Copy constructor. */ + public Parameters(Parameters p) {} + + + /** + * Calls the appropriate Learner.setParameters(Parameters) method for this + * Parameters object. + * + * @param l The learner whose parameters will be set. + **/ + public void setParameters(Learner l) { + Class c = getClass(); + if (Learner.class.equals(c)) + throw new UnsupportedOperationException( + "LBJava ERROR: Learner.Parameters.setParameters should never be " + + "called."); + else + throw new UnsupportedOperationException("LBJava ERROR: " + c.getName() + + ".Parameters.setParameters has not " + "been implemented."); + } + + + /** + * Creates a string representation of these parameters in which only those parameters that + * differ from their default values are mentioned. + **/ + public String nonDefaultString() { + return ""; + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LearnerToText.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LearnerToText.java index df02bc93..e677797d 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LearnerToText.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LearnerToText.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -17,50 +14,42 @@ /** - * This extremely simple class can be used to print a textual representation - * of a trained learner to STDOUT. This is achieved with the - * following line of code: - * - *

learner.write(System.out);
- * - *

Usage

- *
- * - * java edu.illinois.cs.cogcomp.lbjava.learn.LearnerToText <learner> - * - *
- * - *

Input

- * The <learner> parameter must be a fully qualified class - * name (e.g. myPackage.myName) referring to a class that - * extends {@link Learner}. Every learning classifier specified in an LBJava - * source code satisfies this requirement. - * - *

Output

- * A textual representation of the specified learning classifier is produced - * on STDOUT. - * - * @author Nick Rizzolo + * This extremely simple class can be used to print a textual representation of a trained learner to + * STDOUT. This is achieved with the following line of code: + * + *
learner.write(System.out);
+ * + *

Usage

+ * java edu.illinois.cs.cogcomp.lbjava.learn.LearnerToText <learner> + *
+ * + *

Input

The <learner> parameter must be a fully qualified class name + * (e.g. myPackage.myName) referring to a class that extends {@link Learner}. Every + * learning classifier specified in an LBJava source code satisfies this requirement. + * + *

Output

A textual representation of the specified learning classifier is produced on + * STDOUT. + * + * @author Nick Rizzolo **/ -public class LearnerToText -{ - public static void main(String[] args) { - String learnerName = null; +public class LearnerToText { + public static void main(String[] args) { + String learnerName = null; - try { - learnerName = args[0]; - if (args.length > 1) throw new Exception(); - } - catch (Exception e) { - System.err.println("usage: java edu.illinois.cs.cogcomp.lbjava.learn.LearnerToText "); - System.exit(1); - } + try { + learnerName = args[0]; + if (args.length > 1) + throw new Exception(); + } catch (Exception e) { + System.err + .println("usage: java edu.illinois.cs.cogcomp.lbjava.learn.LearnerToText "); + System.exit(1); + } - Learner learner = ClassUtils.getLearner(learnerName); - learner.demandLexicon(); - PrintStream out = new PrintStream(new BufferedOutputStream(System.out)); - learner.write(out); - out.close(); - } + Learner learner = ClassUtils.getLearner(learnerName); + learner.demandLexicon(); + PrintStream out = new PrintStream(new BufferedOutputStream(System.out)); + learner.write(out); + out.close(); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Lexicon.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Lexicon.java index a037c8bf..bb55b6a7 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Lexicon.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Lexicon.java @@ -1,1189 +1,1215 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; import java.io.Serializable; import java.net.URL; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Map; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.*; +import edu.illinois.cs.cogcomp.lbjava.classify.DiscreteConjunctiveFeature; import edu.illinois.cs.cogcomp.lbjava.classify.Feature; +import edu.illinois.cs.cogcomp.lbjava.classify.RealConjunctiveFeature; import edu.illinois.cs.cogcomp.lbjava.util.ByteString; import edu.illinois.cs.cogcomp.lbjava.util.ClassUtils; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.util.FVector; -import edu.illinois.cs.cogcomp.lbjava.util.IVector; -import edu.illinois.cs.cogcomp.lbjava.util.IVector2D; -import edu.illinois.cs.cogcomp.lbjava.util.Sort; import edu.illinois.cs.cogcomp.lbjava.util.TableFormat; /** - * A Lexicon contains a mapping from {@link Feature}s to - * integers. The integer key of a feature is returned by the - * {@link #lookup(Feature)} method. If the feature is not already in the - * lexicon, then it will be added to the lexicon, and thus lookup calls can - * be made without the need to check if an entry already exists. The integer - * keys are incremented in ascending order starting from 0 as features are - * added to the lexicon. - * - *

The map is implemented as a HashMap by default and the - * Lexicon class has similar functionality. This class also - * maintains a second Vector of integers to their associated - * features for fast reverse lookup using the {@link #lookupKey(int)} method. - * - * @author Michael Paul + * A Lexicon contains a mapping from {@link Feature}s to integers. The integer key of a + * feature is returned by the {@link #lookup(Feature)} method. If the feature is not already in the + * lexicon, then it will be added to the lexicon, and thus lookup calls can be made without the need + * to check if an entry already exists. The integer keys are incremented in ascending order starting + * from 0 as features are added to the lexicon. + * + *

+ * The map is implemented as a HashMap by default and the Lexicon class + * has similar functionality. This class also maintains a second Vector of integers to + * their associated features for fast reverse lookup using the {@link #lookupKey(int)} method. + * + * @author Michael Paul **/ -public class Lexicon implements Cloneable, Serializable -{ - /** - * The default capacity of {@link #lexiconInv} and {@link #featureCounts}. - **/ - private static final int defaultCapacity = 1 << 10; - - - /** - * Reads and returns a feature lexicon from the specified file. - * - * @param filename The name of the file from which to read the feature - * lexicon. - * @return The lexicon. - **/ - public static Lexicon readLexicon(String filename) { - try { return readLexicon(new URL("file:" + filename)); } - catch (Exception e) { - System.err.println("Error constructing URL:"); - e.printStackTrace(); - System.exit(1); +public class Lexicon implements Cloneable, Serializable { + /** + * The default capacity of {@link #lexiconInv} and {@link #featureCounts}. + **/ + private static final int defaultCapacity = 1 << 10; + + + /** + * Reads and returns a feature lexicon from the specified file. + * + * @param filename The name of the file from which to read the feature lexicon. + * @return The lexicon. + **/ + public static Lexicon readLexicon(String filename) { + try { + return readLexicon(new URL("file:" + filename)); + } catch (Exception e) { + System.err.println("Error constructing URL:"); + e.printStackTrace(); + System.exit(1); + } + return null; } - return null; - } - - - /** - * Reads a feature lexicon from the specified location. - * - * @param url The location from which to read the feature lexicon. - * @return The lexicon. - **/ - public static Lexicon readLexicon(URL url) { - return readLexicon(url, true); - } - - - /** - * Reads a feature lexicon from the specified location, with the option to - * ignore the feature counts by setting the second argument to - * false. - * - * @param url The location from which to read the feature lexicon. - * @param readCounts Whether or not to read the feature counts. - * @return The lexicon. - **/ - public static Lexicon readLexicon(URL url, boolean readCounts) { - ExceptionlessInputStream in = - ExceptionlessInputStream.openCompressedStream(url); - Lexicon result = readLexicon(in, readCounts); - in.close(); - return result; - } - - - /** - * Reads a feature lexicon from the specified stream. - * - * @param in The stream from which to read the feature lexicon. - * @return The lexicon. - **/ - public static Lexicon readLexicon(ExceptionlessInputStream in) { - return readLexicon(in, true); - } - - - /** - * Reads a feature lexicon from the specified stream, with the option to - * ignore the feature counts by setting the second argument to - * false. - * - * @param in The stream from which to read the feature lexicon. - * @param readCounts Whether or not to read the feature counts. - * @return The lexicon. - **/ - public static Lexicon readLexicon(ExceptionlessInputStream in, - boolean readCounts) { - String name = in.readString(); - if (name == null) return null; - Class clazz = ClassUtils.getClass(name); - - Lexicon lexicon = null; - try { lexicon = (Lexicon) clazz.newInstance(); } - catch (Exception e) { - System.err.println("Can't instantiate '" + name + "': " + e); - System.exit(1); + + + /** + * Reads a feature lexicon from the specified location. + * + * @param url The location from which to read the feature lexicon. + * @return The lexicon. + **/ + public static Lexicon readLexicon(URL url) { + return readLexicon(url, true); } - lexicon.read(in, readCounts); - return lexicon; - } - - - // Member variables. - /** The map of features to integer keys. */ - protected Map lexicon; - /** The inverted map of integer keys to their features. */ - protected FVector lexiconInv; - /** The encoding to use for new features added to this lexicon. */ - private String encoding; - /** - * This flag remembers whether {@link #encoding} has been assigned a value - * yet or not. Using this flag, we enforce the constraint that once an - * encoding has been set, it can never be changed. This way, a user will - * only be capable of using the same lexicon object in two different - * learners if they have the same encoding. See the implementation of - * {@link Learner#setLexicon(Lexicon)}. - **/ - private boolean encodingSet; - /** Counts the number of occurrences of each feature. */ - protected IVector featureCounts; - /** - * Counts the number of occurrences of each feature on a class-by-class - * basis. - **/ - protected IVector2D perClassFeatureCounts; - /** - * Features at this index in {@link #lexiconInv} or higher have been - * pruned. -1 indicates that no pruning has been done. - **/ - protected int pruneCutoff; - /** - * Stores features that might appear repeatedly as children of other - * features, but which are not themselves given indexes in the lexicon. - **/ - protected ChildLexicon lexiconChildren; - - - /** Creates an empty lexicon. */ - public Lexicon() { clear(); } - - /** - * Creates an empty lexicon with the given encoding. - * - * @param e The encoding to use when adding features to this lexicon. - **/ - public Lexicon(String e) { - encoding = e; - encodingSet = true; - clear(); - } - - - /** Clears the data structures associated with this instance. */ - public void clear() { - lexicon = new HashMap(); - lexiconInv = new FVector(); - lexiconChildren = null; - pruneCutoff = -1; - } - - - /** - * Sets the encoding used when adding features to this lexicon. - * - * @param e The encoding. - **/ - public void setEncoding(String e) { - if (encodingSet && (encoding == null ? e != null : !encoding.equals(e))) { - System.err.println( - "LBJava ERROR: Once established, the encoding of a lexicon cannot be " - + "changed."); - new Exception().printStackTrace(); - System.exit(1); + + /** + * Reads a feature lexicon from the specified location, with + * the option to ignore the feature counts by setting the second argument to false. + * + * @param url The location from which to read the feature lexicon. + * @param readCounts Whether or not to read the feature counts. + * @return The lexicon. + **/ + public static Lexicon readLexicon(URL url, boolean readCounts) { + ExceptionlessInputStream in = ExceptionlessInputStream.openCompressedStream(url); + Lexicon result = readLexicon(in, readCounts); + in.close(); + return result; } - encoding = e; - encodingSet = true; - } - - - /** Simply returns the map stored in {@link #lexicon}. */ - public Map getMap() { - lazyMapCreation(); - return Collections.unmodifiableMap(lexicon); - } - - - /** Returns the number of features currently stored in {@link #lexicon}. */ - public int size() { return lexiconInv.size(); } - /** - * Returns the value of {@link #pruneCutoff}, or {@link #size()} if - * {@link #pruneCutoff} is -1. - **/ - public int getCutoff() { return pruneCutoff == -1 ? size() : pruneCutoff; } - - - /** - * Call this method to initialize the lexicon to count feature occurrences - * on each call to lookup(feature, true) (counting still won't - * happen on a call to lookup(feature, false)). - * Alternatively, this method can also cause the lexicon to discard all its - * feature counts and cease counting features at any time in the future. - * The former happens when policy is something other than - * {@link Lexicon.CountPolicy#none}, and the latter happens when - * policy is {@link Lexicon.CountPolicy#none}. - * - * @see #lookup(Feature,boolean) - * @param policy The new feature counting policy. - **/ - public void countFeatures(CountPolicy policy) { - featureCounts = null; - perClassFeatureCounts = null; - if (policy == CountPolicy.global) - featureCounts = new IVector(defaultCapacity); - else if (policy == CountPolicy.perClass) - perClassFeatureCounts = new IVector2D(8, defaultCapacity); - } - - - /** - * Returns the feature counting policy currently employed by this lexicon. - **/ - public CountPolicy getCountPolicy() { - if (featureCounts != null) return CountPolicy.global; - if (perClassFeatureCounts != null) return CountPolicy.perClass; - return CountPolicy.none; - } - - - /** - * Collapses per-class feature counts into global counts. - **/ - public void perClassToGlobalCounts() { - if (perClassFeatureCounts == null) - throw new IllegalArgumentException( - "LBJava ERROR: Lexicon.perClassToGlobalCounts: Cannot be called if " - + "there are not per-class counts."); - - int rows = perClassFeatureCounts.size(), columns = 0; - for (int i = 0; i < rows; ++i) - columns = Math.max(columns, perClassFeatureCounts.size(i)); - featureCounts = new IVector(defaultCapacity); - - for (int j = 0; j < columns; ++j) { - int count = 0; - for (int i = 0; i < rows; ++i) count += perClassFeatureCounts.get(i, j); - featureCounts.set(j, count); + + /** + * Reads a feature lexicon from the + * specified stream. + * + * @param in The stream from which to read the feature lexicon. + * @return The lexicon. + **/ + public static Lexicon readLexicon(ExceptionlessInputStream in) { + return readLexicon(in, true); } - perClassFeatureCounts = null; - } - - - /** - * Returns true if the given feature is already in the - * lexicon (whether it's past the {@link #pruneCutoff} or not) and - * false otherwise. This does not alter or add anything to - * the lexicon. - * - * @param f The feature to look up. - * @return A boolean indicating if the given feature is currently in the - * lexicon. - **/ - public boolean contains(Feature f) { - lazyMapCreation(); - return lexicon.containsKey(f); - } - - - /** - * Looks up a feature's index by calling lookup(f, false). - * See {@link #lookup(Feature,boolean,int)} for more details. - * - * @param f The feature to look up. - * @return The integer key that the feature maps to. - **/ - public int lookup(Feature f) { return lookup(f, false, -1); } - - - /** - * Looks up a feature's index by calling lookup(f, training, - * -1). See {@link #lookup(Feature,boolean,int)} for more details. - * - * @param f The feature to look up. - * @param training Whether or not the learner is currently training. - * @return The integer key that the feature maps to. - **/ - public int lookup(Feature f, boolean training) { - return lookup(f, training, -1); - } - - - /** - * Looks up the given feature in the lexicon, possibly counting it and/or - * expanding the lexicon to accomodate it. Feature counting and automatic - * lexicon expansion happen when training is - * true. Otherwise, f is not counted even if - * already in the lexicon, and a previously unobserved feature will cause - * this method to return the value of {@link #getCutoff()} without - * expanding the lexicon to accomodate the new feature. - * - * @param f The feature to look up. - * @param training Whether or not the learner is currently training. - * @param label The label of the example containing this feature, or -1 - * if we aren't doing per class feature counting. - * @return The integer key that the feature maps to. - **/ - public int lookup(Feature f, boolean training, int label) { - if (label < 0) { - if (training && perClassFeatureCounts != null) - throw new IllegalArgumentException( - "LBJava ERROR: Lexicon.lookup: Must supply a label when training " - + "with per class feature counts."); + + /** + * Reads a feature lexicon from the + * specified stream, with the option to ignore the feature counts by setting the second argument + * to false. + * + * @param in The stream from which to read the feature lexicon. + * @param readCounts Whether or not to read the feature counts. + * @return The lexicon. + **/ + public static Lexicon readLexicon(ExceptionlessInputStream in, boolean readCounts) { + String name = in.readString(); + if (name == null) + return null; + Class clazz = ClassUtils.getClass(name); + + Lexicon lexicon = null; + try { + lexicon = (Lexicon) clazz.newInstance(); + } catch (Exception e) { + System.err.println("Can't instantiate '" + name + "': " + e); + System.exit(1); + } + + lexicon.read(in, readCounts); + return lexicon; } - else if (!training || perClassFeatureCounts == null) - throw new IllegalArgumentException( - "LBJava ERROR: Lexicon.lookup: A label has been supplied when not " - + "training with per class feature counts."); - lazyMapCreation(); - Integer I = (Integer) lexicon.get(f); - if (I == null) { - if (!training) return getCutoff(); + // Member variables. + /** The map of features to integer keys. */ + protected Map lexicon; + /** The inverted map of integer keys to their features. */ + protected FVector lexiconInv; + /** The encoding to use for new features added to this lexicon. */ + private String encoding; + /** + * This flag remembers whether {@link #encoding} has been assigned a value yet or not. Using + * this flag, we enforce the constraint that once an encoding has been set, it can never be + * changed. This way, a user will only be capable of using the same lexicon object in two + * different learners if they have the same encoding. See the implementation of + * {@link Learner#setLexicon(Lexicon)}. + **/ + private boolean encodingSet; + /** Counts the number of occurrences of each feature. */ + protected IVector featureCounts; + /** + * Counts the number of occurrences of each feature on a class-by-class basis. + **/ + protected IVector2D perClassFeatureCounts; + /** + * Features at this index in {@link #lexiconInv} or higher have been pruned. -1 + * indicates that no pruning has been done. + **/ + protected int pruneCutoff; + /** + * Stores features that might appear repeatedly as children of other features, but which are not + * themselves given indexes in the lexicon. + **/ + protected ChildLexicon lexiconChildren; + - f = f.encode(encoding); + /** Creates an empty lexicon. */ + public Lexicon() { + clear(); + } + + /** + * Creates an empty lexicon with the given encoding. + * + * @param e The encoding to use when adding features to this lexicon. + **/ + public Lexicon(String e) { + encoding = e; + encodingSet = true; + clear(); + } - if (lexiconChildren != null) { - Feature c = lexiconChildren.remove(f); - if (c != null) f = c; - } - int key = lexiconInv.size(); - lexicon.put(f, new Integer(key)); - lexiconInv.add(f); - incrementCount(key, label); - return key; + /** Clears the data structures associated with this instance. */ + public void clear() { + lexicon = new HashMap(); + lexiconInv = new FVector(); + lexiconChildren = null; + pruneCutoff = -1; } - int index = I.intValue(); - if (training) incrementCount(index, label); - return index; - } - - - /** - * Used to lookup the children of conjunctive and referring features during - * training, this method checks {@link #lexiconChildren} if the feature - * isn't present in {@link #lexicon} and {@link #lexiconInv}, and then - * stores the given feature in {@link #lexiconChildren} if it wasn't - * present anywhere. - * - * @param f The feature to look up. - * @param label The label of the example containing this feature, or -1 if - * we aren't doing per class feature counting. - * @return A feature equivalent to f that is stored in this - * lexicon. - **/ - public Feature getChildFeature(Feature f, int label) { - lazyMapCreation(); - Integer I = (Integer) lexicon.get(f); - if (I != null) { - int index = I.intValue(); - incrementCount(index, label); - return lexiconInv.get(index); + + /** + * Sets the encoding used when adding features to this lexicon. + * + * @param e The encoding. + **/ + public void setEncoding(String e) { + if (encodingSet && (encoding == null ? e != null : !encoding.equals(e))) { + System.err + .println("LBJava ERROR: Once established, the encoding of a lexicon cannot be " + + "changed."); + new Exception().printStackTrace(); + System.exit(1); + } + + encoding = e; + encodingSet = true; } - if (lexiconChildren == null) lexiconChildren = new ChildLexicon(this); - return lexiconChildren.getChildFeature(f, -1); - } - - - /** - * Increments the count of the feature with the given index(es). - * - * @param index The index of the feature. - * @param label The label of the example containing this feature, which is - * ignored if we aren't doing per class feature counting. - **/ - protected void incrementCount(int index, int label) { - if (featureCounts != null) featureCounts.increment(index); - else if (perClassFeatureCounts != null) - perClassFeatureCounts.increment(label, index); - } - - - /** - * Used to lookup the children of conjunctive and referring features while - * writing the lexicon, this method checks {@link #lexiconChildren} if the - * feature isn't present in {@link #lexicon} and {@link #lexiconInv}, and - * will throw an exception if it still can't be found. - * - * @param f The feature to look up. - * @return If the feature was found in {@link #lexicon}, its associated - * integer index is returned. Otherwise, -i - 1 is - * returned, where i is the index associated with the - * feature in {@link #lexiconChildren}. - * @throws UnsupportedOperationException If the feature isn't found - * anywhere in the lexicon. - **/ - public int lookupChild(Feature f) { - lazyMapCreation(); - Integer I = (Integer) lexicon.get(f); - if (I != null) return I.intValue(); - - if (lexiconChildren == null) - throw - new UnsupportedOperationException( - "When calling Lexicon.lookupChild(Feature), the feature must be " - + "present in the lexicon."); - - return -lexiconChildren.lookupChild(f) - 1; - } - - - /** - * Does a reverse lexicon lookup and returns the {@link Feature} associated - * with the given integer key, and null if no such feature - * exists. - * - * @param i The integer key to look up. If i is negative, - * {@link #lexiconChildren} is queried instead of - * {@link #lexiconInv}. - * @return The feature that maps to the given integer. - **/ - public Feature lookupKey(int i) { - if (i < 0) return lexiconChildren.lookupKey(-i - 1); - return lexiconInv.get(i); - } - - - /** - * Determines if the given feature index should be pruned according to the - * given pruning policy, which must have its thresholds set already in the - * case that it represents the "Percentage" policy. This method behaves - * equivalently to isPruned(i, -1, p). - * - * @see #isPruned(int,int,Lexicon.PruningPolicy) - * @param i The feature index. - * @param policy The pruning policy. - * @return true iff the feature should be pruned. - **/ - public boolean isPruned(int i, PruningPolicy policy) { - return isPruned(i, -1, policy); - } - - /** - * Determines if the given feature index should be pruned according to the - * given pruning policy, which must have its thresholds set already in the - * case that it represents the "Percentage" policy. The second argument to - * this method represents the label of the example in which the specified - * feature appeared. It is ignored unless per class feature counts are - * present. If they are, then when the specified label is -1, all counts - * for the given feature must be greater than or equal to the corresponding - * threshold for this method to return true. When per class - * feature counts are present and the label is non-negative, only the count - * corresponding to that label must be greater than or equal to its - * corresonding threshold. - * - *

In other words, passing -1 in the second argument gives the behavior - * expected when pruning the lexicon as in - * {@link #prune(Lexicon.PruningPolicy)}. Passing a non-negative label in - * the second argument gives the behavior expected when pruning the actual - * examples. - * - * @param i The feature index. - * @param label The label of the example containing this feature, or -1 if - * we want the lexicon pruning behavior. - * @param policy The pruning policy. - * @return true iff the feature should be pruned. - **/ - public boolean isPruned(int i, int label, PruningPolicy policy) { - if (policy.isNone()) return false; - - if (featureCounts == null && perClassFeatureCounts == null) - throw new IllegalArgumentException( - "LBJava ERROR: Lexicon.isPruned: pruning policy wasn't 'None', but " - + "there are no counts."); - - if (featureCounts != null) // if global counting - return featureCounts.get(i) < policy.getThreshold(0); - // otherwise, per class counting - if (label >= 0) - return - perClassFeatureCounts.get(label, i) < policy.getThreshold(label); - for (int j = 0; j < perClassFeatureCounts.size(); ++j) - if (perClassFeatureCounts.get(j, i) >= policy.getThreshold(j)) - return false; - return true; - } - - - /** - * Rearranges the order in which features appear in the lexicon based on - * the compiled feature counts in {@link #featureCounts} or - * {@link #perClassFeatureCounts} so that pruned features are at the end of - * the feature space. This way, learning algorithms can allocate exactly - * enough space in their weight vectors for the unpruned features. - * - *

This method returns an array of integers which is a permutation of - * the integers from 0 (inclusive) to the number of features in the lexicon - * (exclusive). It represents a map from the features' original indexes to - * their new ones after pruning. The {@link #getCutoff()} method then - * returns the new index of the first pruned feature (or, equivalently, the - * number of unpruned features). All features with a new index greater - * than or equal to this index are considered pruned in the case of global - * pruning. In the case of per-class pruning, the cutoff represents the - * first feature whose count fell below the threshold for every - * class. Thus, in this case, features below the cutoff may still be - * pruned in any given class; just not all of them. - * - * @param policy The type of pruning to perform. - * @return A map from features' original indexes to their new ones, or - * null if policy indicates no pruning. - **/ - public int[] prune(PruningPolicy policy) { - if (policy.isNone()) { - pruneCutoff = -1; - return null; + + /** Simply returns the map stored in {@link #lexicon}. */ + public Map getMap() { + lazyMapCreation(); + return Collections.unmodifiableMap(lexicon); } - if (featureCounts == null && perClassFeatureCounts == null) - throw new UnsupportedOperationException( - "LBJava ERROR: Lexicon.prune: Can't prune if there's no feature " - + "counts."); - - // Set thresholds in the policy. - if (policy.isPercentage()) { - if (featureCounts != null) { // if global counting - long t = - Math.round(Math.ceil(featureCounts.max() * policy.getPercentage())); - policy.setThresholds(new int[]{ (int) t }); - } - else { // if per class counting - int[] thresholds = new int[perClassFeatureCounts.size()]; - int size = perClassFeatureCounts.size(); - double p = policy.getPercentage(); - for (int i = 0; i < size; ++i) - thresholds[i] = - (int) Math.round(Math.ceil(perClassFeatureCounts.max(i) * p)); - policy.setThresholds(thresholds); - } + + /** Returns the number of features currently stored in {@link #lexicon}. */ + public int size() { + return lexiconInv.size(); } - // there's no clause for policy.isAbsolute() here since the appropriate - // threshold must already be established in that case. - else if (!policy.isAbsolute()) - throw new UnsupportedOperationException( - "LBJava ERROR: Lexicon.prune: Pruning policy '" + policy - + "' is not supported."); - - // Swap features around, remembering how it was done in swapMap. - pruneCutoff = size(); - int[] swapMap = new int[pruneCutoff]; - - // If features at the end of the space are pruned, there's no need to swap - // anything; just decrement pruneCutoff. - while (pruneCutoff > 0 && isPruned(pruneCutoff - 1, policy)) { - --pruneCutoff; - swapMap[pruneCutoff] = pruneCutoff; + + /** + * Returns the value of {@link #pruneCutoff}, or {@link #size()} if {@link #pruneCutoff} is -1. + **/ + public int getCutoff() { + return pruneCutoff == -1 ? size() : pruneCutoff; } - // Now we know the feature just below the prune cutoff does not need to be - // pruned (otherwise it would have been handled by the loop above), so we - // start the loop at pruneCutoff - 2 and do swaps for any feature that - // needs to be pruned. - if (pruneCutoff > 0) swapMap[pruneCutoff - 1] = pruneCutoff - 1; - for (int i = pruneCutoff - 2; i >= 0; --i) { - if (isPruned(i, policy)) { - pruneCutoff--; + /** + * Call this method to initialize the lexicon to count + * feature occurrences on each call to lookup(feature, true) (counting still won't + * happen on a call to lookup(feature, false)). Alternatively, this method can also + * cause the lexicon to discard all its feature counts and cease counting features at any time + * in the future. The former happens when policy is something other than + * {@link Lexicon.CountPolicy#none}, and the latter happens when policy is + * {@link Lexicon.CountPolicy#none}. + * + * @see #lookup(Feature,boolean) + * @param policy The new feature counting policy. + **/ + public void countFeatures(CountPolicy policy) { + featureCounts = null; + perClassFeatureCounts = null; + if (policy == CountPolicy.global) + featureCounts = new IVector(defaultCapacity); + else if (policy == CountPolicy.perClass) + perClassFeatureCounts = new IVector2D(8, defaultCapacity); + } - Feature pruned = lexiconInv.get(i); - Feature f = lexiconInv.get(pruneCutoff); - if (lexicon != null) - lexicon.put(pruned, lexicon.put(f, new Integer(i))); - lexiconInv.set(i, f); - lexiconInv.set(pruneCutoff, pruned); + /** + * Returns the feature counting policy currently employed by this + * lexicon. + **/ + public CountPolicy getCountPolicy() { if (featureCounts != null) - featureCounts.set(i, - featureCounts.set(pruneCutoff, - featureCounts.get(i))); - else { - for (int j = 0; j < perClassFeatureCounts.size(); ++j) - perClassFeatureCounts.set( - j, i, - perClassFeatureCounts.set(j, pruneCutoff, - perClassFeatureCounts.get(j, i))); + return CountPolicy.global; + if (perClassFeatureCounts != null) + return CountPolicy.perClass; + return CountPolicy.none; + } + + + /** + * Collapses per-class feature counts into global counts. + **/ + public void perClassToGlobalCounts() { + if (perClassFeatureCounts == null) + throw new IllegalArgumentException( + "LBJava ERROR: Lexicon.perClassToGlobalCounts: Cannot be called if " + + "there are not per-class counts."); + + int rows = perClassFeatureCounts.size(), columns = 0; + for (int i = 0; i < rows; ++i) + columns = Math.max(columns, perClassFeatureCounts.size(i)); + featureCounts = new IVector(defaultCapacity); + + for (int j = 0; j < columns; ++j) { + int count = 0; + for (int i = 0; i < rows; ++i) + count += perClassFeatureCounts.get(i, j); + featureCounts.set(j, count); } - swapMap[i] = swapMap[pruneCutoff]; - swapMap[pruneCutoff] = i; - } - else swapMap[i] = i; + perClassFeatureCounts = null; } - // Invert swapMap. - // swapMap[i] currently stores the original index of the feature whose new - // index is i. but we want the inverse: swapMap[i] should store the new - // index of the feature whose original index was i. we also don't want to - // allocate another array as long as swapMap, even if it's only around - // temporarily. so we do this: - - for (int i = 0; i < swapMap.length; ) { - int newIndex = 0, j = i; - - do { - int original = swapMap[j]; - swapMap[j] = -newIndex; - newIndex = j; - j = original; - } while (j != i); - - swapMap[i] = newIndex; - for (i++; i < swapMap.length && swapMap[i] <= 0; ++i) - swapMap[i] = -swapMap[i]; + + /** + * Returns true if the given feature is already in the + * lexicon (whether it's past the {@link #pruneCutoff} or not) and false otherwise. + * This does not alter or add anything to the lexicon. + * + * @param f The feature to look up. + * @return A boolean indicating if the given feature is currently in the lexicon. + **/ + public boolean contains(Feature f) { + lazyMapCreation(); + return lexicon.containsKey(f); } - return swapMap; - } - - - /** - * Permanently discards any features that have been pruned via - * {@link #prune(Lexicon.PruningPolicy)} as well as all feature counts. - **/ - public void discardPrunedFeatures() { - if (pruneCutoff == -1) return; - featureCounts = null; - perClassFeatureCounts = null; - for (int i = lexiconInv.size() - 1; i >= pruneCutoff; --i) { - Feature f = lexiconInv.remove(i); - if (lexicon != null) lexicon.remove(f); + + /** + * Looks up a feature's index by calling lookup(f, false). + * See {@link #lookup(Feature,boolean,int)} for more details. + * + * @param f The feature to look up. + * @return The integer key that the feature maps to. + **/ + public int lookup(Feature f) { + return lookup(f, false, -1); } - lexiconInv = new FVector(lexiconInv); - pruneCutoff = -1; - } - - - /** - * Returns a deep clone of this lexicon implemented as a - * HashMap. - **/ - public Object clone() { - Lexicon clone = null; - try { clone = (Lexicon) super.clone(); } - catch (Exception e) { - System.err.println("Error cloning Lexicon: " + e); - e.printStackTrace(); - System.exit(1); + + + /** + * Looks up a feature's index by calling + * lookup(f, training, + * -1). See {@link #lookup(Feature,boolean,int)} for more details. + * + * @param f The feature to look up. + * @param training Whether or not the learner is currently training. + * @return The integer key that the feature maps to. + **/ + public int lookup(Feature f, boolean training) { + return lookup(f, training, -1); } - if (lexicon != null) { - clone.lexicon = new HashMap(); - clone.lexicon.putAll(lexicon); + + /** + * Looks up the given feature in the lexicon, possibly + * counting it and/or expanding the lexicon to accomodate it. Feature counting and automatic + * lexicon expansion happen when training is true. Otherwise, + * f is not counted even if already in the lexicon, and a previously unobserved + * feature will cause this method to return the value of {@link #getCutoff()} without expanding + * the lexicon to accomodate the new feature. + * + * @param f The feature to look up. + * @param training Whether or not the learner is currently training. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return The integer key that the feature maps to. + **/ + public int lookup(Feature f, boolean training, int label) { + if (label < 0) { + if (training && perClassFeatureCounts != null) + throw new IllegalArgumentException( + "LBJava ERROR: Lexicon.lookup: Must supply a label when training " + + "with per class feature counts."); + } else if (!training || perClassFeatureCounts == null) + throw new IllegalArgumentException( + "LBJava ERROR: Lexicon.lookup: A label has been supplied when not " + + "training with per class feature counts."); + + lazyMapCreation(); + Integer I = (Integer) lexicon.get(f); + + if (I == null) { + if (!training) + return getCutoff(); + + f = f.encode(encoding); + + if (lexiconChildren != null) { + Feature c = lexiconChildren.remove(f); + if (c != null) + f = c; + } + + int key = lexiconInv.size(); + lexicon.put(f, new Integer(key)); + lexiconInv.add(f); + incrementCount(key, label); + return key; + } + + int index = I.intValue(); + if (training) + incrementCount(index, label); + return index; } - clone.lexiconInv = (FVector) lexiconInv.clone(); - if (featureCounts != null) - clone.featureCounts = (IVector) featureCounts.clone(); - if (perClassFeatureCounts != null) - clone.perClassFeatureCounts = (IVector2D) perClassFeatureCounts.clone(); - if (lexiconChildren != null) - clone.lexiconChildren = (ChildLexicon) lexiconChildren.clone(); - - return clone; - } - - - /** Returns whether the given Lexicon object is equal to this one. */ - public boolean equals(Object o) { - if (!o.getClass().equals(getClass())) return false; - Lexicon l = (Lexicon) o; - return - pruneCutoff == l.pruneCutoff - && (lexicon == null ? l.lexicon == null : lexicon.equals(l.lexicon)) - && (featureCounts == null - ? l.featureCounts == null : featureCounts.equals(l.featureCounts)) - && (perClassFeatureCounts == null - ? l.perClassFeatureCounts == null - : perClassFeatureCounts.equals(l.perClassFeatureCounts)) - && (lexiconChildren == null - ? l.lexiconChildren == null - : lexiconChildren.equals(l.lexiconChildren)); - } - - - /** Returns a hash code for this lexicon. */ - public int hashCode() { return lexiconInv.hashCode(); } - - - /** - * Writes a binary representation of the lexicon. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - out.writeString(getClass().getName()); - if (lexiconChildren == null) out.writeString(null); - else lexiconChildren.write(out); - - final FVector inverse = lexiconInv; - int[] indexes = new int[inverse.size()]; - for (int i = 0; i < indexes.length; ++i) indexes[i] = i; - Sort.sort(indexes, - new Sort.IntComparator() { - public int compare(int i1, int i2) { - return inverse.get(i1).compareTo(inverse.get(i2)); - } - }); - - String previousClassName = null; - String previousPackage = null; - String previousClassifier = null; - String previousSIdentifier = null; - ByteString previousBSIdentifier = null; - out.writeInt(indexes.length); - out.writeInt(pruneCutoff); - - for (int i = 0; i < indexes.length; ++i) { - Feature f = inverse.get(indexes[i]); - previousClassName = - f.lexWrite(out, this, previousClassName, previousPackage, - previousClassifier, previousSIdentifier, - previousBSIdentifier); - previousPackage = f.getPackage(); - previousClassifier = f.getGeneratingClassifier(); - if (f.hasStringIdentifier()) - previousSIdentifier = f.getStringIdentifier(); - else if (f.hasByteStringIdentifier()) - previousBSIdentifier = f.getByteStringIdentifier(); - - out.writeInt(indexes[i]); + + + /** + * Used to lookup the children of conjunctive and referring features during training, this + * method checks {@link #lexiconChildren} if the feature isn't present in {@link #lexicon} and + * {@link #lexiconInv}, and then stores the given feature in {@link #lexiconChildren} if it + * wasn't present anywhere. + * + * @param f The feature to look up. + * @param label The label of the example containing this feature, or -1 if we aren't doing per + * class feature counting. + * @return A feature equivalent to f that is stored in this lexicon. + **/ + public Feature getChildFeature(Feature f, int label) { + lazyMapCreation(); + Integer I = (Integer) lexicon.get(f); + if (I != null) { + int index = I.intValue(); + incrementCount(index, label); + return lexiconInv.get(index); + } + + if (lexiconChildren == null) + lexiconChildren = new ChildLexicon(this); + return lexiconChildren.getChildFeature(f, -1); } - if (featureCounts == null) out.writeInt(0); - else featureCounts.write(out); - if (perClassFeatureCounts == null) out.writeInt(0); - else perClassFeatureCounts.write(out); - } - - - /** - * Reads the binary representation of a lexicon from the specified stream, - * overwriting the data in this object. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { read(in, true); } - - - /** - * Reads the binary representation of a lexicon from the specified stream, - * overwriting the data in this object. This method also gives the option - * to ignore any feature counts stored after the feature mappings by - * setting the second argument to false. - * - * @param in The input stream. - * @param readCounts Whether or not to read the feature counts. - **/ - public void read(ExceptionlessInputStream in, boolean readCounts) { - lexiconChildren = (ChildLexicon) Lexicon.readLexicon(in, readCounts); - - Class previousClass = null; - String previousPackage = null; - String previousClassifier = null; - String previousSIdentifier = null; - ByteString previousBSIdentifier = null; - int N = in.readInt(); - pruneCutoff = in.readInt(); - lexicon = null; - lexiconInv = new FVector(N); - - for (int i = 0; i < N; ++i) { - Feature f = - Feature.lexReadFeature(in, this, previousClass, previousPackage, - previousClassifier, previousSIdentifier, - previousBSIdentifier); - int index = in.readInt(); - lexiconInv.set(index, f); - - previousClass = f.getClass(); - previousPackage = f.getPackage(); - previousClassifier = f.getGeneratingClassifier(); - if (f.hasStringIdentifier()) - previousSIdentifier = f.getStringIdentifier(); - else if (f.hasByteStringIdentifier()) - previousBSIdentifier = f.getByteStringIdentifier(); + + /** + * Increments the count of the feature with the given index(es). + * + * @param index The index of the feature. + * @param label The label of the example containing this feature, which is ignored if we aren't + * doing per class feature counting. + **/ + protected void incrementCount(int index, int label) { + if (featureCounts != null) + featureCounts.increment(index); + else if (perClassFeatureCounts != null) + perClassFeatureCounts.increment(label, index); } - if (readCounts) { - featureCounts = new IVector(); - featureCounts.read(in); - if (featureCounts.size() == 0) featureCounts = null; - perClassFeatureCounts = new IVector2D(); - perClassFeatureCounts.read(in); - if (perClassFeatureCounts.size() == 0) perClassFeatureCounts = null; + + /** + * Used to lookup the children of conjunctive and referring features while writing the lexicon, + * this method checks {@link #lexiconChildren} if the feature isn't present in {@link #lexicon} + * and {@link #lexiconInv}, and will throw an exception if it still can't be found. + * + * @param f The feature to look up. + * @return If the feature was found in {@link #lexicon}, its associated integer index is + * returned. Otherwise, -i - 1 is returned, where i is the + * index associated with the feature in {@link #lexiconChildren}. + * @throws UnsupportedOperationException If the feature isn't found anywhere in the lexicon. + **/ + public int lookupChild(Feature f) { + lazyMapCreation(); + Integer I = (Integer) lexicon.get(f); + if (I != null) + return I.intValue(); + + if (lexiconChildren == null) + throw new UnsupportedOperationException( + "When calling Lexicon.lookupChild(Feature), the feature must be " + + "present in the lexicon."); + + return -lexiconChildren.lookupChild(f) - 1; } - else { - featureCounts = null; - perClassFeatureCounts = null; + + + /** + * Does a reverse lexicon lookup and returns the {@link Feature} + * associated with the given integer key, and null if no such feature exists. + * + * @param i The integer key to look up. If i is negative, {@link #lexiconChildren} + * is queried instead of {@link #lexiconInv}. + * @return The feature that maps to the given integer. + **/ + public Feature lookupKey(int i) { + if (i < 0) + return lexiconChildren.lookupKey(-i - 1); + return lexiconInv.get(i); } - if (lexiconChildren != null) lexiconChildren.setParent(this); - - // Tom Redman - // This class is not thread-safe, and this patch does NOT fix it, it just circumvents - // the issue for the NER package. In effect, the lexicon will be inited as soon as the - // data is loaded, circumventing a race condition as multiple threads potentially attempt - // to initialize the lexicon hash table. Adding features to the lexicon in the lookup method - // is not thread safe, so any usages of this class that attempt that will eventially fail - // in massively parallel implementations. - lazyMapCreation(); // not so lazy at all. - } - - - /** - * Various other methods in this class call this method to ensure that - * {@link #lexicon} is populated before performing operations on it. The - * only reason it wouldn't be is if it had just been read off disk. - **/ - protected void lazyMapCreation() { - if (lexicon == null) { - lexicon = new HashMap(); - int N = lexiconInv.size(); - for (int i = 0; i < N; ++i) - lexicon.put(lexiconInv.get(i), new Integer(i)); + + /** + * Determines if the given feature index should be pruned + * according to the given pruning policy, which must have its thresholds set already in the case + * that it represents the "Percentage" policy. This method behaves equivalently to + * isPruned(i, -1, p). + * + * @see #isPruned(int,int,Lexicon.PruningPolicy) + * @param i The feature index. + * @param policy The pruning policy. + * @return true iff the feature should be pruned. + **/ + public boolean isPruned(int i, PruningPolicy policy) { + return isPruned(i, -1, policy); } - } - - - /** - * Reads the value of {@link #pruneCutoff} from the specified stream, - * discarding everything else. - * - * @param in The input stream. - **/ - public static int readPrunedSize(ExceptionlessInputStream in) { - in.readInt(); - return in.readInt(); - } - - - /** Returns a text representation of this lexicon (for debugging). */ - public String toString() { - StringBuffer result = new StringBuffer(); - - for (int i = 0; i < lexiconInv.size(); ++i) { - result.append(", "); - result.append(i); - result.append(": "); - result.append(lexiconInv.get(i).toString()); + + /** + * Determines if the given feature index should be + * pruned according to the given pruning policy, which must have its thresholds set already in + * the case that it represents the "Percentage" policy. The second argument to this method + * represents the label of the example in which the specified feature appeared. It is ignored + * unless per class feature counts are present. If they are, then when the specified label is + * -1, all counts for the given feature must be greater than or equal to the corresponding + * threshold for this method to return true. When per class feature counts are + * present and the label is non-negative, only the count corresponding to that label must be + * greater than or equal to its corresonding threshold. + * + *

+ * In other words, passing -1 in the second argument gives the behavior expected when pruning + * the lexicon as in {@link #prune(Lexicon.PruningPolicy)}. Passing a non-negative label in the + * second argument gives the behavior expected when pruning the actual examples. + * + * @param i The feature index. + * @param label The label of the example containing this feature, or -1 if we want the lexicon + * pruning behavior. + * @param policy The pruning policy. + * @return true iff the feature should be pruned. + **/ + public boolean isPruned(int i, int label, PruningPolicy policy) { + if (policy.isNone()) + return false; + + if (featureCounts == null && perClassFeatureCounts == null) + throw new IllegalArgumentException( + "LBJava ERROR: Lexicon.isPruned: pruning policy wasn't 'None', but " + + "there are no counts."); + + if (featureCounts != null) // if global counting + return featureCounts.get(i) < policy.getThreshold(0); + // otherwise, per class counting + if (label >= 0) + return perClassFeatureCounts.get(label, i) < policy.getThreshold(label); + for (int j = 0; j < perClassFeatureCounts.size(); ++j) + if (perClassFeatureCounts.get(j, i) >= policy.getThreshold(j)) + return false; + return true; } - if (lexiconInv.size() > 0) return result.substring(2); - return result.toString(); - } - - - /** - * Produces on STDOUT a table of feature counts including a - * line indicating the position of {@link #pruneCutoff}. It's probably not - * a good idea to call this method unless you know your lexicon is small. - * - * @param p Whether or not to include package names in the output. - **/ - public void printCountTable(boolean p) { - int rows = lexiconInv.size(); - String[] rowLabels = new String[rows]; - String[] columnLabels = null; - double[][] data = null; - int[] sigDigits = null; - int[] dashRows = { 0, pruneCutoff }; - - if (featureCounts != null) { - data = new double[rows][2]; - - for (int i = 0; i < rows; ++i) { - data[i][0] = i; - data[i][1] = featureCounts.get(i); - rowLabels[i] = - p ? lexiconInv.get(i).toString() - : lexiconInv.get(i).toStringNoPackage(); - } - - columnLabels = new String[]{ "Index", "Count" }; - sigDigits = new int[2]; + + /** + * Rearranges the order in which features appear in the lexicon + * based on the compiled feature counts in {@link #featureCounts} or + * {@link #perClassFeatureCounts} so that pruned features are at the end of the feature space. + * This way, learning algorithms can allocate exactly enough space in their weight vectors for + * the unpruned features. + * + *

+ * This method returns an array of integers which is a permutation of the integers from 0 + * (inclusive) to the number of features in the lexicon (exclusive). It represents a map from + * the features' original indexes to their new ones after pruning. The {@link #getCutoff()} + * method then returns the new index of the first pruned feature (or, equivalently, the number + * of unpruned features). All features with a new index greater than or equal to this index are + * considered pruned in the case of global pruning. In the case of per-class pruning, the cutoff + * represents the first feature whose count fell below the threshold for every class. + * Thus, in this case, features below the cutoff may still be pruned in any given class; just + * not all of them. + * + * @param policy The type of pruning to perform. + * @return A map from features' original indexes to their new ones, or null if + * policy indicates no pruning. + **/ + public int[] prune(PruningPolicy policy) { + if (policy.isNone()) { + pruneCutoff = -1; + return null; + } + + if (featureCounts == null && perClassFeatureCounts == null) + throw new UnsupportedOperationException( + "LBJava ERROR: Lexicon.prune: Can't prune if there's no feature " + "counts."); + + // Set thresholds in the policy. + if (policy.isPercentage()) { + if (featureCounts != null) { // if global counting + long t = Math.round(Math.ceil(featureCounts.max() * policy.getPercentage())); + policy.setThresholds(new int[] {(int) t}); + } else { // if per class counting + int[] thresholds = new int[perClassFeatureCounts.size()]; + int size = perClassFeatureCounts.size(); + double p = policy.getPercentage(); + for (int i = 0; i < size; ++i) + thresholds[i] = (int) Math.round(Math.ceil(perClassFeatureCounts.max(i) * p)); + policy.setThresholds(thresholds); + } + } + // there's no clause for policy.isAbsolute() here since the appropriate + // threshold must already be established in that case. + else if (!policy.isAbsolute()) + throw new UnsupportedOperationException("LBJava ERROR: Lexicon.prune: Pruning policy '" + + policy + "' is not supported."); + + // Swap features around, remembering how it was done in swapMap. + pruneCutoff = size(); + int[] swapMap = new int[pruneCutoff]; + + // If features at the end of the space are pruned, there's no need to swap + // anything; just decrement pruneCutoff. + while (pruneCutoff > 0 && isPruned(pruneCutoff - 1, policy)) { + --pruneCutoff; + swapMap[pruneCutoff] = pruneCutoff; + } + + // Now we know the feature just below the prune cutoff does not need to be + // pruned (otherwise it would have been handled by the loop above), so we + // start the loop at pruneCutoff - 2 and do swaps for any feature that + // needs to be pruned. + if (pruneCutoff > 0) + swapMap[pruneCutoff - 1] = pruneCutoff - 1; + + for (int i = pruneCutoff - 2; i >= 0; --i) { + if (isPruned(i, policy)) { + pruneCutoff--; + + Feature pruned = lexiconInv.get(i); + Feature f = lexiconInv.get(pruneCutoff); + if (lexicon != null) + lexicon.put(pruned, lexicon.put(f, new Integer(i))); + lexiconInv.set(i, f); + lexiconInv.set(pruneCutoff, pruned); + + if (featureCounts != null) + featureCounts.set(i, featureCounts.set(pruneCutoff, featureCounts.get(i))); + else { + for (int j = 0; j < perClassFeatureCounts.size(); ++j) + perClassFeatureCounts.set( + j, + i, + perClassFeatureCounts.set(j, pruneCutoff, + perClassFeatureCounts.get(j, i))); + } + + swapMap[i] = swapMap[pruneCutoff]; + swapMap[pruneCutoff] = i; + } else + swapMap[i] = i; + } + + // Invert swapMap. + // swapMap[i] currently stores the original index of the feature whose new + // index is i. but we want the inverse: swapMap[i] should store the new + // index of the feature whose original index was i. we also don't want to + // allocate another array as long as swapMap, even if it's only around + // temporarily. so we do this: + + for (int i = 0; i < swapMap.length;) { + int newIndex = 0, j = i; + + do { + int original = swapMap[j]; + swapMap[j] = -newIndex; + newIndex = j; + j = original; + } while (j != i); + + swapMap[i] = newIndex; + for (i++; i < swapMap.length && swapMap[i] <= 0; ++i) + swapMap[i] = -swapMap[i]; + } + + return swapMap; } - else if (perClassFeatureCounts != null) { - int columns = perClassFeatureCounts.size() + 1; - data = new double[rows][columns]; - - for (int i = 0; i < rows; ++i) { - data[i][0] = i; - for (int j = 0; j < columns - 1; ++j) - data[i][j + 1] = perClassFeatureCounts.get(j, i); - rowLabels[i] = - p ? lexiconInv.get(i).toString() - : lexiconInv.get(i).toStringNoPackage(); - } - - columnLabels = new String[columns]; - columnLabels[0] = "Index"; - for (int i = 1; i < columns; ++i) columnLabels[i] = "Label " + (i - 1); - sigDigits = new int[columns]; + + + /** + * Permanently discards any features that have been pruned via + * {@link #prune(Lexicon.PruningPolicy)} as well as all feature counts. + **/ + public void discardPrunedFeatures() { + if (pruneCutoff == -1) + return; + featureCounts = null; + perClassFeatureCounts = null; + for (int i = lexiconInv.size() - 1; i >= pruneCutoff; --i) { + Feature f = lexiconInv.remove(i); + if (lexicon != null) + lexicon.remove(f); + } + lexiconInv = new FVector(lexiconInv); + pruneCutoff = -1; } - else { - data = new double[rows][1]; - for (int i = 0; i < rows; ++i) { - data[i][0] = i; - rowLabels[i] = - p ? lexiconInv.get(i).toString() - : lexiconInv.get(i).toStringNoPackage(); - } - - columnLabels = new String[]{ "Index" }; - sigDigits = new int[1]; + + /** + * Discard features at the provided indices. This operation is performed + * last to first so we can do it in place. This method will sort the input + * array. + * @param dumpthese the indexes of the features to dump. + */ + public void discardPrunedFeatures(int [] dumpthese) { + Arrays.sort(dumpthese); + lexiconInv.remove(dumpthese); + + // this compresses the FVector + lexiconInv = new FVector(lexiconInv); + if (lexicon != null) { + + // reconstitute the lexicon. + lexicon.clear(); + for (int i = 0; i < lexiconInv.size();i++) { + lexicon.put(lexiconInv.get(i), new Integer(i)); + } + + // sanity check, make sure the indices in the lexicon map matches the index in the feature vector + for (int i = 0; i < lexiconInv.size();i++) { + if (i != ((Integer)lexicon.get(lexiconInv.get(i))).intValue()) { + throw new RuntimeException("After optimization pruning, the index in the lexicon did " + + "not match the inverted index."); + } + } + } } - TableFormat.printTableFormat(System.out, columnLabels, rowLabels, data, - sigDigits, dashRows); - } + /** + * Returns a deep clone of this lexicon implemented as a HashMap. + **/ + public Object clone() { + Lexicon clone = null; + try { + clone = (Lexicon) super.clone(); + } catch (Exception e) { + System.err.println("Error cloning Lexicon: " + e); + e.printStackTrace(); + System.exit(1); + } + + if (lexicon != null) { + clone.lexicon = new HashMap(); + clone.lexicon.putAll(lexicon); + } + clone.lexiconInv = (FVector) lexiconInv.clone(); + if (featureCounts != null) + clone.featureCounts = (IVector) featureCounts.clone(); + if (perClassFeatureCounts != null) + clone.perClassFeatureCounts = (IVector2D) perClassFeatureCounts.clone(); + if (lexiconChildren != null) + clone.lexiconChildren = (ChildLexicon) lexiconChildren.clone(); + + return clone; + } - // main(String[]) - public static void main(String[] args) { - String filename = null; - boolean p = true; - try { - filename = args[0]; - if (args.length == 2) p = Boolean.parseBoolean(args[1]); - if (args.length > 2) throw new Exception(); + /** Returns whether the given Lexicon object is equal to this one. */ + public boolean equals(Object o) { + if (!o.getClass().equals(getClass())) + return false; + Lexicon l = (Lexicon) o; + return pruneCutoff == l.pruneCutoff + && (lexicon == null ? l.lexicon == null : lexicon.equals(l.lexicon)) + && (featureCounts == null ? l.featureCounts == null : featureCounts + .equals(l.featureCounts)) + && (perClassFeatureCounts == null ? l.perClassFeatureCounts == null + : perClassFeatureCounts.equals(l.perClassFeatureCounts)) + && (lexiconChildren == null ? l.lexiconChildren == null : lexiconChildren + .equals(l.lexiconChildren)); } - catch (Exception e) { - System.out.println( - "usage: java edu.illinois.cs.cogcomp.lbjava.learn.Lexicon []"); - System.exit(1); + + + /** Returns a hash code for this lexicon. */ + public int hashCode() { + return lexiconInv.hashCode(); } - Lexicon lexicon = readLexicon(filename); - lexicon.printCountTable(p); - if (lexicon.lexiconChildren != null) { - System.out.println("\nChildren:"); - lexicon.lexiconChildren.printCountTable(p); + + /** + * Writes a binary representation of the lexicon. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + out.writeString(getClass().getName()); + if (lexiconChildren == null) + out.writeString(null); + else + lexiconChildren.write(out); + + final FVector inverse = lexiconInv; + int[] indexes = new int[inverse.size()]; + for (int i = 0; i < indexes.length; ++i) + indexes[i] = i; + Sort.sort(indexes, new Sort.IntComparator() { + public int compare(int i1, int i2) { + return inverse.get(i1).compareTo(inverse.get(i2)); + } + }); + + String previousClassName = null; + String previousPackage = null; + String previousClassifier = null; + String previousSIdentifier = null; + ByteString previousBSIdentifier = null; + out.writeInt(indexes.length); + out.writeInt(pruneCutoff); + for (int i = 0; i < indexes.length; ++i) { + Feature f = inverse.get(indexes[i]); + previousClassName = + f.lexWrite(out, this, previousClassName, previousPackage, previousClassifier, + previousSIdentifier, previousBSIdentifier); + previousPackage = f.getPackage(); + previousClassifier = f.getGeneratingClassifier(); + if (f.hasStringIdentifier()) + previousSIdentifier = f.getStringIdentifier(); + else if (f.hasByteStringIdentifier()) + previousBSIdentifier = f.getByteStringIdentifier(); + + out.writeInt(indexes[i]); + } + if (featureCounts == null) + out.writeInt(0); + else + featureCounts.write(out); + if (perClassFeatureCounts == null) + out.writeInt(0); + else + perClassFeatureCounts.write(out); } - } - - - /** - * Immutable type representing the feature counting policy of a lexicon. - * When LBJava's self imposed restriction to use Java 1.4 is lifted, this - * class will be replaced by an enum. - * - *

The three feature counting policies are described below. - * - *

- *
- *
None
- *
Features occurrences are not counted.
- *
Global
- *
- * The lexicon stores one integer count per feature, and every - * occurrence of the feature adds to this count regardless of the - * example it appears in. - *
- *
Per Class
- *
- * The lexicon stores one integer count for each (feature, prediction - * class) pair. When a given feature appears in example, this - * occurrence adds to the count associated with the example's label, - * assuming that examples have a single discrete label. - *
- *
- *
- * - * @author Nick Rizzolo - **/ - public static class CountPolicy - { - /** Represents no counting. */ - public static final CountPolicy none = new CountPolicy(0); - /** Represents global counting. */ - public static final CountPolicy global = new CountPolicy(1); - /** Represents per class counting. */ - public static final CountPolicy perClass = new CountPolicy(2); - - /** The names of the different counting policies as strings. */ - private static final String[] names = { "none", "global", "per class" }; - - - /** Can be used to index the {@link #names} array. */ - private int index; - - - /** Initializes the object with an index. */ - private CountPolicy(int i) { index = i; } - - - /** Retrieves the name of the policy represented by this object. */ - public String toString() { return names[index]; } - } - - - /** - * Represents the feature counting policy of a lexicon. Objects of this - * type are used to identify and describe a desired pruning policy. In - * particular, the description of a pruning policy includes feature count - * thresholds which sometimes need to be computed in terms of data. Space - * is allocated within objects of this type for storing these thresholds - * whenever they are computed. - * - *

The three pruning policies are described below. - * - *

- *
- *
None
- *
No pruning is performed.
- *
Absolute
- *
- * Features whose counts within a given dataset fall below an absolute - * threshold are pruned from that dataset. - *
- *
Percentage
- *
- * Features whose counts within a given dataset are lower than a given - * percentage of the most common feature's count are pruned from that - * dataset. - *
- *
- *
- * - * @author Nick Rizzolo - **/ - public static class PruningPolicy - { - /** Represents no pruning. */ - public static final int NONE = 0; - /** Represents pruning with an absolute threshold. */ - public static final int ABSOLUTE = 1; - /** Represents pruning with a percentage threshold. */ - public static final int PERCENTAGE = 2; - - /** The names of the different counting policies as strings. */ - private static final String[] names = - { "none", "absolute", "percentage" }; - - - /** Can be used to index the {@link #names} array. */ - private int index; + + /** - * The percentage associated with the "Percentage" policy described - * above. + * Reads the binary representation of a lexicon from the + * specified stream, overwriting the data in this object. + * + * @param in The input stream. **/ - private double percentage; + public void read(ExceptionlessInputStream in) { + read(in, true); + } + + /** - * Feature count thresholds which may either be specified by the policy - * explicitly or computed in terms of data. + * Reads the binary representation of a lexicon + * from the specified stream, overwriting the data in this object. This method also gives the + * option to ignore any feature counts stored after the feature mappings by setting the second + * argument to false. + * + * @param in The input stream. + * @param readCounts Whether or not to read the feature counts. **/ - private int[] thresholds; + public void read(ExceptionlessInputStream in, boolean readCounts) { + lexiconChildren = (ChildLexicon) Lexicon.readLexicon(in, readCounts); + + Class previousClass = null; + String previousPackage = null; + String previousClassifier = null; + String previousSIdentifier = null; + ByteString previousBSIdentifier = null; + int N = in.readInt(); + pruneCutoff = in.readInt(); + lexicon = null; + lexiconInv = new FVector(N); + for (int i = 0; i < N; ++i) { + Feature f = + Feature.lexReadFeature(in, this, previousClass, previousPackage, + previousClassifier, previousSIdentifier, previousBSIdentifier); + int index = in.readInt(); + lexiconInv.set(index, f); + previousClass = f.getClass(); + previousPackage = f.getPackage(); + previousClassifier = f.getGeneratingClassifier(); + if (f.hasStringIdentifier()) + previousSIdentifier = f.getStringIdentifier(); + else if (f.hasByteStringIdentifier()) + previousBSIdentifier = f.getByteStringIdentifier(); + } + + if (readCounts) { + featureCounts = new IVector(); + featureCounts.read(in); + if (featureCounts.size() == 0) + featureCounts = null; + perClassFeatureCounts = new IVector2D(); + perClassFeatureCounts.read(in); + if (perClassFeatureCounts.size() == 0) + perClassFeatureCounts = null; + } else { + featureCounts = null; + perClassFeatureCounts = null; + } + if (lexiconChildren != null) + lexiconChildren.setParent(this); + + // Tom Redman + // This class is not thread-safe, and this patch does NOT fix it, it just circumvents + // the issue for the NER package. In effect, the lexicon will be inited as soon as the + // data is loaded, circumventing a race condition as multiple threads potentially attempt + // to initialize the lexicon hash table. Adding features to the lexicon in the lookup method + // is not thread safe, so any usages of this class that attempt that will eventially fail + // in massively parallel implementations. + lazyMapCreation(); // not so lazy at all. + } - /** Creates a new pruning policy in which no features will be pruned. */ - public PruningPolicy() { index = NONE; } /** - * Creates a new "Percentage" policy with the given percentage. - * - * @param p The percentage. + * Various other methods in this class call this method to ensure that {@link #lexicon} is + * populated before performing operations on it. The only reason it wouldn't be is if it had + * just been read off disk. **/ - public PruningPolicy(double p) { - index = PERCENTAGE; - percentage = p; + protected void lazyMapCreation() { + if (lexicon == null) { + lexicon = new HashMap(); + int N = lexiconInv.size(); + for (int i = 0; i < N; ++i) + lexicon.put(lexiconInv.get(i), new Integer(i)); + } } + /** - * Creates a new "Absolute" policy with the given threshold. - * - * @param t The threshold. + * Reads the value of {@link #pruneCutoff} + * from the specified stream, discarding everything else. + * + * @param in The input stream. **/ - public PruningPolicy(int t) { - index = ABSOLUTE; - thresholds = new int[]{ t }; + public static int readPrunedSize(ExceptionlessInputStream in) { + in.readInt(); + return in.readInt(); } - /** true iff the policy is no pruning. */ - public boolean isNone() { return index == NONE; } - /** true iff the policy is absolute thresholding. */ - public boolean isAbsolute() { return index == ABSOLUTE; } - /** true iff the policy is percentage thresholding. */ - public boolean isPercentage() { return index == PERCENTAGE; } + /** Returns a text representation of this lexicon (for debugging). */ + public String toString() { + StringBuffer result = new StringBuffer(); + + for (int i = 0; i < lexiconInv.size(); ++i) { + result.append(", "); + result.append(i); + result.append(": "); + result.append(lexiconInv.get(i).toString()); + } + + if (lexiconInv.size() > 0) + return result.substring(2); + return result.toString(); + } /** - * Use this method to establish feature count thresholds in the - * "Percentage" policy. - * - * @param t The new feature count thresholds. + * Produces on STDOUT a table of feature counts + * including a line indicating the position of {@link #pruneCutoff}. It's probably not a good + * idea to call this method unless you know your lexicon is small. + * + * @param p Whether or not to include package names in the output. **/ - public void setThresholds(int[] t) { - if (index != PERCENTAGE) - throw new UnsupportedOperationException( - "LBJava ERROR: Lexicon.PruningPolicy.setThresholds should not be " - + "called unless the policy is 'Percentage'."); - thresholds = (int[]) t.clone(); + public void printCountTable(boolean p) { + int rows = lexiconInv.size(); + String[] rowLabels = new String[rows]; + String[] columnLabels = null; + double[][] data = null; + int[] sigDigits = null; + int[] dashRows = {0, pruneCutoff}; + + if (featureCounts != null) { + data = new double[rows][2]; + + for (int i = 0; i < rows; ++i) { + data[i][0] = i; + data[i][1] = featureCounts.get(i); + rowLabels[i] = + p ? lexiconInv.get(i).toString() : lexiconInv.get(i).toStringNoPackage(); + } + + columnLabels = new String[] {"Index", "Count"}; + sigDigits = new int[2]; + } else if (perClassFeatureCounts != null) { + int columns = perClassFeatureCounts.size() + 1; + data = new double[rows][columns]; + + for (int i = 0; i < rows; ++i) { + data[i][0] = i; + for (int j = 0; j < columns - 1; ++j) + data[i][j + 1] = perClassFeatureCounts.get(j, i); + rowLabels[i] = + p ? lexiconInv.get(i).toString() : lexiconInv.get(i).toStringNoPackage(); + } + + columnLabels = new String[columns]; + columnLabels[0] = "Index"; + for (int i = 1; i < columns; ++i) + columnLabels[i] = "Label " + (i - 1); + sigDigits = new int[columns]; + } else { + data = new double[rows][1]; + for (int i = 0; i < rows; ++i) { + data[i][0] = i; + rowLabels[i] = + p ? lexiconInv.get(i).toString() : lexiconInv.get(i).toStringNoPackage(); + } + + columnLabels = new String[] {"Index"}; + sigDigits = new int[1]; + } + + TableFormat + .printTableFormat(System.out, columnLabels, rowLabels, data, sigDigits, dashRows); + } + + + // main(String[]) + public static void main(String[] args) { + String filename = null; + boolean p = true; + + try { + filename = args[0]; + if (args.length == 2) + p = Boolean.parseBoolean(args[1]); + if (args.length > 2) + throw new Exception(); + } catch (Exception e) { + System.out + .println("usage: java edu.illinois.cs.cogcomp.lbjava.learn.Lexicon []"); + System.exit(1); + } + + Lexicon lexicon = readLexicon(filename); + lexicon.printCountTable(p); + if (lexicon.lexiconChildren != null) { + System.out.println("\nChildren:"); + lexicon.lexiconChildren.printCountTable(p); + } } /** - * Returns the value of the ith threshold in - * {@link #thresholds} when in "Percentage" mode, but ignores the - * parameter i and returns the first element of - * {@link #thresholds} when in "Absolute" mode. - * - * @param i An index. - * @return A feature count threshold. + * Immutable type representing the feature counting policy of a + * lexicon. When LBJava's self imposed restriction to use Java 1.4 is lifted, this class will be + * replaced by an enum. + * + *

+ * The three feature counting policies are described below. + * + *

+ *
+ *
None
+ *
Features occurrences are not counted.
+ *
Global
+ *
+ * The lexicon stores one integer count per feature, and every occurrence of the feature adds to + * this count regardless of the example it appears in.
+ *
Per Class
+ *
+ * The lexicon stores one integer count for each (feature, prediction class) pair. When a given + * feature appears in example, this occurrence adds to the count associated with the example's + * label, assuming that examples have a single discrete label.
+ *
+ *
+ * + * @author Nick Rizzolo **/ - public int getThreshold(int i) { - if (index == NONE) - throw new UnsupportedOperationException( - "LBJava ERROR: Lexicon.PruningPolicy.getThreshold should never be " - + "called if the pruning policy is 'None'."); - if (index == ABSOLUTE) return thresholds[0]; - return thresholds[i]; - } + public static class CountPolicy { + /** Represents no counting. */ + public static final CountPolicy none = new CountPolicy(0); + /** Represents global counting. */ + public static final CountPolicy global = new CountPolicy(1); + /** Represents per class counting. */ + public static final CountPolicy perClass = new CountPolicy(2); + + /** The names of the different counting policies as strings. */ + private static final String[] names = {"none", "global", "per class"}; + + + /** Can be used to index the {@link #names} array. */ + private int index; + + + /** Initializes the object with an index. */ + private CountPolicy(int i) { + index = i; + } - /** Returns the value of {@link #percentage}. */ - public double getPercentage() { - if (index != PERCENTAGE) - throw new UnsupportedOperationException( - "LBJava ERROR: PruningPolicy: Can't get percentage when pruning " - + "policy isn't 'Percentage'."); - return percentage; + /** Retrieves the name of the policy represented by this object. */ + public String toString() { + return names[index]; + } } - /** Retrieves the name of the policy represented by this object. */ - public String toString() { - String result = names[index]; - if (index == PERCENTAGE) result += "(" + percentage + ")"; - if (index != NONE && thresholds != null) { - result += ": ["; - for (int i = 0; i < thresholds.length; ++i) - result += thresholds[i] + (i + 1 < thresholds.length ? ", " : ""); - result += "]"; - } - return result; + /** + * Represents the feature counting policy of a lexicon. Objects of + * this type are used to identify and describe a desired pruning policy. In particular, the + * description of a pruning policy includes feature count thresholds which sometimes need to be + * computed in terms of data. Space is allocated within objects of this type for storing these + * thresholds whenever they are computed. + * + *

+ * The three pruning policies are described below. + * + *

+ *
+ *
None
+ *
No pruning is performed.
+ *
Absolute
+ *
+ * Features whose counts within a given dataset fall below an absolute threshold are pruned from + * that dataset.
+ *
Percentage
+ *
+ * Features whose counts within a given dataset are lower than a given percentage of the most + * common feature's count are pruned from that dataset.
+ *
+ *
+ * + * @author Nick Rizzolo + **/ + public static class PruningPolicy { + /** Represents no pruning. */ + public static final int NONE = 0; + /** Represents pruning with an absolute threshold. */ + public static final int ABSOLUTE = 1; + /** Represents pruning with a percentage threshold. */ + public static final int PERCENTAGE = 2; + + /** The names of the different counting policies as strings. */ + private static final String[] names = {"none", "absolute", "percentage"}; + + + /** Can be used to index the {@link #names} array. */ + private int index; + /** + * The percentage associated with the "Percentage" policy described above. + **/ + private double percentage; + /** + * Feature count thresholds which may either be specified by the policy explicitly or + * computed in terms of data. + **/ + private int[] thresholds; + + + /** Creates a new pruning policy in which no features will be pruned. */ + public PruningPolicy() { + index = NONE; + } + + /** + * Creates a new "Percentage" policy with the given percentage. + * + * @param p The percentage. + **/ + public PruningPolicy(double p) { + index = PERCENTAGE; + percentage = p; + } + + /** + * Creates a new "Absolute" policy with the given threshold. + * + * @param t The threshold. + **/ + public PruningPolicy(int t) { + index = ABSOLUTE; + thresholds = new int[] {t}; + } + + + /** true iff the policy is no pruning. */ + public boolean isNone() { + return index == NONE; + } + + /** true iff the policy is absolute thresholding. */ + public boolean isAbsolute() { + return index == ABSOLUTE; + } + + /** true iff the policy is percentage thresholding. */ + public boolean isPercentage() { + return index == PERCENTAGE; + } + + + /** + * Use this method to establish feature count thresholds in the "Percentage" policy. + * + * @param t The new feature count thresholds. + **/ + public void setThresholds(int[] t) { + if (index != PERCENTAGE) + throw new UnsupportedOperationException( + "LBJava ERROR: Lexicon.PruningPolicy.setThresholds should not be " + + "called unless the policy is 'Percentage'."); + thresholds = (int[]) t.clone(); + } + + + /** + * Returns the value of the ith threshold in {@link #thresholds} + * when in "Percentage" mode, but ignores the parameter i and returns the first + * element of {@link #thresholds} when in "Absolute" mode. + * + * @param i An index. + * @return A feature count threshold. + **/ + public int getThreshold(int i) { + if (index == NONE) + throw new UnsupportedOperationException( + "LBJava ERROR: Lexicon.PruningPolicy.getThreshold should never be " + + "called if the pruning policy is 'None'."); + if (index == ABSOLUTE) + return thresholds[0]; + return thresholds[i]; + } + + + /** Returns the value of {@link #percentage}. */ + public double getPercentage() { + if (index != PERCENTAGE) + throw new UnsupportedOperationException( + "LBJava ERROR: PruningPolicy: Can't get percentage when pruning " + + "policy isn't 'Percentage'."); + return percentage; + } + + + /** Retrieves the name of the policy represented by this object. */ + public String toString() { + String result = names[index]; + if (index == PERCENTAGE) + result += "(" + percentage + ")"; + if (index != NONE && thresholds != null) { + result += ": ["; + for (int i = 0; i < thresholds.length; ++i) + result += thresholds[i] + (i + 1 < thresholds.length ? ", " : ""); + result += "]"; + } + return result; + } } - } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LinearThresholdUnit.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LinearThresholdUnit.java index 75c8cd39..e0abd3ae 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LinearThresholdUnit.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/LinearThresholdUnit.java @@ -1,835 +1,900 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; import java.util.Arrays; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.classify.Classifier; import edu.illinois.cs.cogcomp.lbjava.classify.DiscretePrimitiveStringFeature; import edu.illinois.cs.cogcomp.lbjava.classify.Feature; import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector; import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; +import edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LinearThresholdUnitOptimizer; import edu.illinois.cs.cogcomp.lbjava.util.FVector; /** - * A LinearThresholdUnit is a {@link Learner} for binary - * classification in which a score is computed as a linear function a - * weight vector and the input example, and the decision is made by - * comparing the score to some threshold quantity. Deriving a linear - * threshold algorithm from this class gives the programmer more flexible - * access to the score it computes as well as its promotion and demotion - * methods (if it's on-line). - * - *

On-line, mistake driven algorithms derived from this class need only - * override the {@link #promote(int[],double[],double)}, and - * {@link #demote(int[],double[],double)} - * methods, assuming the score returned by the {@link #score(Object)} method - * need only be compared with {@link #threshold} to make a prediction. - * Otherwise, the {@link #classify(Object)} method also needs to be - * overridden. If the algorithm is not mistake driven, the - * {@link #learn(Object)} method needs to be overridden as well. - * - *

It is assumed that {@link Learner#labeler} is a single discrete - * classifier that produces the same feature for every example object and - * that the values that feature may take are available through the - * {@link Classifier#allowableValues()} method. The first value returned - * from {@link Classifier#allowableValues()} is treated as "negative", and it - * is assumed there are exactly 2 allowable values. Assertions will produce - * error messages if these assumptions do not hold. - * - *

Fitting a "thick separator" instead of just a hyperplane is also - * supported through this class. - * - *

This algorithm's user-configurable parameters are stored in member - * fields of this class. They may be set via either a constructor that names - * each parameter explicitly or a constructor that takes an instance of - * {@link edu.illinois.cs.cogcomp.lbjava.learn.LinearThresholdUnit.Parameters Parameters} as input. - * The documentation in each member field in this class indicates the default - * value of the associated parameter when using the former type of - * constructor. The documentation of the associated member field in the - * {@link edu.illinois.cs.cogcomp.lbjava.learn.LinearThresholdUnit.Parameters Parameters} class - * indicates the default value of the parameter when using the latter type of - * constructor. - * - * @author Nick Rizzolo + * A LinearThresholdUnit is a {@link Learner} for binary classification in which a + * score is computed as a linear function a weight vector and the input example, and the + * decision is made by comparing the score to some threshold quantity. Deriving a linear threshold + * algorithm from this class gives the programmer more flexible access to the score it computes as + * well as its promotion and demotion methods (if it's on-line). + * + *

+ * On-line, mistake driven algorithms derived from this class need only override the + * {@link #promote(int[],double[],double)}, and {@link #demote(int[],double[],double)} methods, + * assuming the score returned by the {@link #score(Object)} method need only be compared with + * {@link #threshold} to make a prediction. Otherwise, the {@link #classify(Object)} method also + * needs to be overridden. If the algorithm is not mistake driven, the {@link #learn(Object)} method + * needs to be overridden as well. + * + *

+ * It is assumed that {@link Learner#labeler} is a single discrete classifier that produces the same + * feature for every example object and that the values that feature may take are available through + * the {@link Classifier#allowableValues()} method. The first value returned from + * {@link Classifier#allowableValues()} is treated as "negative", and it is assumed there are + * exactly 2 allowable values. Assertions will produce error messages if these assumptions do not + * hold. + * + *

+ * Fitting a "thick separator" instead of just a hyperplane is also supported through this class. + * + *

+ * This algorithm's user-configurable parameters are stored in member fields of this class. They may + * be set via either a constructor that names each parameter explicitly or a constructor that takes + * an instance of {@link edu.illinois.cs.cogcomp.lbjava.learn.LinearThresholdUnit.Parameters + * Parameters} as input. The documentation in each member field in this class indicates the default + * value of the associated parameter when using the former type of constructor. The documentation of + * the associated member field in the + * {@link edu.illinois.cs.cogcomp.lbjava.learn.LinearThresholdUnit.Parameters Parameters} class + * indicates the default value of the parameter when using the latter type of constructor. + * + * @author Nick Rizzolo **/ -public abstract class LinearThresholdUnit extends Learner -{ - /** Default for {@link #initialWeight}. */ - public static final double defaultInitialWeight = 0; - /** Default for {@link #threshold}. */ - public static final double defaultThreshold = 0; - /** Default for {@link #positiveThickness}. */ - public static final double defaultThickness = 0; - /** Default value for {@link #learningRate}. */ - public static final double defaultLearningRate = 0.1; - /** Default for {@link #weightVector}. */ - public static final SparseWeightVector defaultWeightVector = new SparseWeightVector(); - - /** - * The rate at which weights are updated; default - * {@link #defaultLearningRate}. - **/ - protected double learningRate; - /** The LTU's weight vector; default is an empty vector. */ - protected SparseWeightVector weightVector; - /** - * The weight associated with a feature when first added to the vector; - * default {@link #defaultInitialWeight}. - **/ - protected double initialWeight; - /** - * The score is compared against this value to make predictions; default - * {@link LinearThresholdUnit#defaultThreshold}. - **/ - protected double threshold; - /** - * The bias is stored here rather than as an element of the weight vector. - **/ - protected double bias; - /** - * The thickness of the hyperplane on the positive side; default - * {@link #defaultThickness}. - **/ - protected double positiveThickness; - /** - * The thickness of the hyperplane on the negative side; default equal to - * {@link #positiveThickness}. - **/ - protected double negativeThickness; - /** The label producing classifier's allowable values. */ - protected String[] allowableValues; - - /** - * Default constructor. The learning rate and threshold take default - * values, while the name of the classifier gets the empty string. - **/ - public LinearThresholdUnit() { this(""); } - - /** - * Initializing constructor. Sets the learning rate to the specified value, - * and the threshold and thickness take the default, while the name of the - * classifier gets the empty string. - * - * @param r The desired learning rate. - **/ - public LinearThresholdUnit(double r) { this("", r); } - - - /** - * Sets the learning rate and threshold to the specified values, while the - * name of the classifier gets the empty string. - - * - * @param r The desired learning rate value. - * @param t The desired threshold value. - **/ - public LinearThresholdUnit(double r, double t) { this("", r, t); } - - /** - * Use this constructor to fit a thick separator, where both the positive - * and negative sides of the hyperplane will be given the specified - * thickness, while the name of the classifier gets the empty string. - * - * @param r The desired learning rate value. - * @param t The desired threshold value. - * @param pt The desired thickness. - **/ - public LinearThresholdUnit(double r, double t, double pt) { - this("", r, t, pt); - } - - /** - * Use this constructor to fit a thick separator, where the positive and - * negative sides of the hyperplane will be given the specified separate - * thicknesses, while the name of the classifier gets the empty string. - * - * @param r The desired learning rate value. - * @param t The desired threshold value. - * @param pt The desired positive thickness. - * @param nt The desired negative thickness. - **/ - public LinearThresholdUnit(double r, double t, double pt, double nt) { - this("", r, t, pt, nt); - } - - /** - * Initializing constructor. Sets the threshold, positive thickness, and - * negative thickness to their default values. - * - * @param n The name of the classifier. - **/ - protected LinearThresholdUnit(String n) { this(n, defaultLearningRate); } - - - /** - * Default constructor. Sets the threshold, positive thickness, and - * negative thickness to their default values. - * - * @param n The name of the classifier. - * @param r The desired learning rate. - **/ - protected LinearThresholdUnit(String n, double r) { - this(n, r, defaultThreshold); - } - - /** - * Initializing constructor. Sets the threshold to the specified value, - * while the positive and negative thicknesses get their defaults. - * - * @param n The name of the classifier. - * @param r The desired learning rate. - * @param t The desired value for the threshold. - **/ - protected LinearThresholdUnit(String n, double r, double t) { - this(n, r, t, defaultThickness); - } - - /** - * Initializing constructor. Sets the threshold and positive thickness to - * the specified values, and the negative thickness is set to the same - * value as the positive thickness. - * - * @param n The name of the classifier. - * @param r The desired learning rate. - * @param t The desired value for the threshold. - * @param pt The desired thickness. - **/ - protected LinearThresholdUnit(String n, double r, double t, double pt) { - this(n, r, t, pt, pt); - } - - /** - * Initializing constructor. Sets the threshold, positive thickness, and - * negative thickness to the specified values. - * - * @param n The name of the classifier. - * @param r The desired learning rate. - * @param t The desired value for the threshold. - * @param pt The desired positive thickness. - * @param nt The desired negative thickness. - **/ - protected LinearThresholdUnit(String n, double r, double t, double pt, - double nt) { - this(n, r, t, pt, nt, (SparseWeightVector) defaultWeightVector.clone()); - } - - /** - * Initializing constructor. Sets the threshold, positive thickness, and - * negative thickness to the specified values. - * - * @param n The name of the classifier. - * @param r The desired learning rate. - * @param t The desired value for the threshold. - * @param pt The desired positive thickness. - * @param nt The desired negative thickness. - * @param v An initial weight vector. - **/ - protected LinearThresholdUnit(String n, double r, double t, double pt, - double nt, SparseWeightVector v) { - super(n); - Parameters p = new Parameters(); - p.weightVector = v; - p.threshold = t; - p.learningRate = r; - p.positiveThickness = pt; - p.negativeThickness = nt; - setParameters(p); - } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link LinearThresholdUnit.Parameters} object. - * The name of the classifier is the empty string. - * - * @param p The settings of all parameters. - **/ - protected LinearThresholdUnit(Parameters p) { this("", p); } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link LinearThresholdUnit.Parameters} object. - * - * @param n The name of the classifier. - * @param p The settings of all parameters. - **/ - protected LinearThresholdUnit(String n, Parameters p) { - super(n); - setParameters(p); - } - - public SparseWeightVector getWeightVector() { return weightVector; } - - public double getBias() { return bias; } - - public String[] getAllowableValues() { return allowableValues; } - - /** - * Sets the values of parameters that control the behavior of this learning - * algorithm. - * - * @param p The parameters. - **/ - public void setParameters(Parameters p) { - learningRate = p.learningRate; - weightVector = p.weightVector; - initialWeight = p.initialWeight; - threshold = p.threshold; - bias = p.initialWeight; - positiveThickness = p.thickness + p.positiveThickness; - negativeThickness = p.thickness + p.negativeThickness; - } - - /** - * Retrieves the parameters that are set in this learner. - * - * @return An object containing all the values of the parameters that - * control the behavior of this learning algorithm. - **/ - public Learner.Parameters getParameters() { - Parameters p = new Parameters(super.getParameters()); - p.learningRate = learningRate; - p.weightVector = weightVector.emptyClone(); - p.initialWeight = initialWeight; - p.threshold = threshold; - p.positiveThickness = positiveThickness; - p.negativeThickness = negativeThickness; - return p; - } - - - /** - * Sets the labels list. - * - * @param l A new label producing classifier. - **/ - public void setLabeler(Classifier l) { - if (!(l == null || l.allowableValues().length == 2)) { - System.err.println( - "Error: " + name - + ": An LTU must be given a single binary label classifier."); - new Exception().printStackTrace(); - System.exit(1); - } - - super.setLabeler(l); - allowableValues = l == null ? null : l.allowableValues(); - labelLexicon.clear(); - labelLexicon.lookup( - new DiscretePrimitiveStringFeature( - l.containingPackage, l.name, "", allowableValues[0], (short) 0, - (short) 2), - true); - labelLexicon.lookup( - new DiscretePrimitiveStringFeature( - l.containingPackage, l.name, "", allowableValues[1], (short) 1, - (short) 2), - true); - predictions = new FVector(2); - createPrediction(0); - createPrediction(1); - } - - - /** - * Returns the current value of the {@link #initialWeight} variable. - * - * @return The value of the {@link #initialWeight} variable. - **/ - public double getInitialWeight() { return initialWeight; } - - - /** - * Sets the {@link #initialWeight} member variable to the specified value. - * - * @param w The new value for {@link #initialWeight}. - **/ - public void setInitialWeight(double w) { initialWeight = w; } - - - /** - * Returns the current value of the {@link #threshold} variable. - * - * @return The value of the {@link #threshold} variable. - **/ - public double getThreshold() { return threshold; } - - - /** - * Sets the {@link #threshold} member variable to the specified value. - * - * @param t The new value for {@link #threshold}. - **/ - public void setThreshold(double t) { threshold = t; } - - - /** - * Returns the current value of the {@link #positiveThickness} variable. - * - * @return The value of the {@link #positiveThickness} variable. - **/ - public double getPositiveThickness() { return positiveThickness; } - - - /** - * Sets the {@link #positiveThickness} member variable to the specified - * value. - * - * @param t The new value for {@link #positiveThickness}. - **/ - public void setPositiveThickness(double t) { - positiveThickness = t; - } - - - /** - * Returns the current value of the {@link #negativeThickness} variable. - * - * @return The value of the {@link #negativeThickness} variable. - **/ - public double getNegativeThickness() { return negativeThickness; } - - - /** - * Sets the {@link #negativeThickness} member variable to the specified - * value. - * - * @param t The new value for {@link #negativeThickness}. - **/ - public void setNegativeThickness(double t) { negativeThickness = t; } - - - /** - * Sets the {@link #positiveThickness} and {@link #negativeThickness} - * member variables to the specified value. - * - * @param t The new thickness value. - **/ - public void setThickness(double t) { - positiveThickness = negativeThickness = t; - } - - - /** - * Returns the array of allowable values that a feature returned by this - * classifier may take. - * - * @return If a labeler has not yet been established for this LTU, byte - * strings equivalent to { "*", "*" } are returned, - * which indicates to the compiler that classifiers derived from - * this learner will return features that take one of two values - * that are specified in the source code. Otherwise, the allowable - * values of the labeler are returned. - **/ - public String[] allowableValues() { - if (allowableValues == null) allowableValues = new String[]{ "*", "*" }; - return allowableValues; - } - - - /** - * The default training algorithm for a linear threshold unit consists of - * evaluating the example object with the {@link #score(Object)} method and - * {@link #threshold}, checking the result of evaluation against the label, - * and, if they are different, promoting when the label is positive or - * demoting when the label is negative. - * - *

This method does not call {@link #classify(Object)}; it calls - * {@link #score(Object)} directly. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of feature values - * @param exampleLabels The example's label(s) - * @param labelValues The labels' values - **/ - public void learn(int[] exampleFeatures, double[] exampleValues, - int[] exampleLabels, double[] labelValues) { - assert exampleLabels.length == 1 - : "Example must have a single label."; - assert exampleLabels[0] == 0 || exampleLabels[0] == 1 - : "Example has unallowed label value."; - - boolean label = (exampleLabels[0] == 1); - - double s = score(exampleFeatures, exampleValues); - - if (shouldPromote(label, s, threshold, positiveThickness)) - promote(exampleFeatures, exampleValues, - computeLearningRate(exampleFeatures, exampleValues, s, label)); - if (shouldDemote(label, s, threshold, negativeThickness)) - demote(exampleFeatures, exampleValues, - computeLearningRate(exampleFeatures, exampleValues, s, label)); - } - - - /** - * Computes the value of the {@link #learningRate} variable if needed - * and returns the value. By default, the current value of - * {@link #learningRate} - * is returned. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of feature values - * @param s The score of the example object - * @param label The label of the example object - * @return The computed value of the {@link #learningRate} variable - **/ - public double computeLearningRate(int[] exampleFeatures, - double[] exampleValues, double s, - boolean label) { - return learningRate; - } - - - /** - * Determines if the weights should be promoted - * - * @param label The label of the example object - * @param s The score of the example object - * @param threshold The LTU threshold - * @param positiveThickness The thickness of the hyperplane on - the positive side - * @return True if the weights should be promoted, false otherwise. - **/ - public boolean shouldPromote(boolean label, double s, double threshold, - double positiveThickness) { - return (label && s < threshold + positiveThickness); - } - - /** - * Determines if the weights should be demoted - * - * @param label The label of the example object - * @param s The score of the example object - * @param threshold The LTU threshold - * @param negativeThickness The thickness of the hyperplane on - the negative side - * @return True if the weights should be demoted, false otherwise. - **/ - public boolean shouldDemote(boolean label, double s, double threshold, - double negativeThickness) { - return (!label && s >= threshold - negativeThickness); - } - - - /** - * Initializes the weight vector array to the size of the specified number - * of features, setting each weight equal to {@link #initialWeight}. - **/ - public void initialize(int numExamples, int numFeatures) { - double[] weights = new double[numFeatures]; - Arrays.fill(weights, initialWeight); - weightVector = new SparseWeightVector(weights); - } - - - /** - * An LTU returns two scores; one for the negative classification and one - * for the positive classification. By default, the score for the positive - * classification is the result of {@link #score(Object)} minus the - * {@link #threshold}, and the score for the negative classification is the - * opposite of the positive classification's score. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of feature values - * @return Two scores as described above. - **/ - public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { - double s = score(exampleFeatures, exampleValues) - threshold; - ScoreSet result = new ScoreSet(); - result.put(allowableValues[0], -s); - result.put(allowableValues[1], s); - return result; - } - - - /** - * Returns the classification of the given example as a single feature - * instead of a {@link FeatureVector}. - * - * @param f The features array. - * @param v The values array. - * @return The classification of the example as a feature. - **/ - public Feature featureValue(int[] f, double[] v) { - int index = score(f, v) >= threshold ? 1 : 0; - return predictions.get(index); - } - - - /** - * The default evaluation method simply computes the score for the example - * and returns a {@link DiscretePrimitiveStringFeature} set to either the - * second value from the label classifier's array of allowable values if - * the score is greater than or equal to {@link #threshold} or the first - * otherwise. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of feature values - * @return The computed feature (in a vector). - **/ - public String discreteValue(int[] exampleFeatures, double[] exampleValues) { - int index = score(exampleFeatures, exampleValues) >= threshold ? 1 : 0; - return allowableValues[index]; - } - - - /** - * The default evaluation method simply computes the score for the example - * and returns a {@link DiscretePrimitiveStringFeature} set to either the - * second value from the label classifier's array of allowable values if - * the score is greater than or equal to {@link #threshold} or the first - * otherwise. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of feature values - * @return The computed feature (in a vector). - **/ - public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) - { - return new FeatureVector(featureValue(exampleFeatures, exampleValues)); - } - - - /** - * Computes the score for the specified example vector which will be - * thresholded to make the binary classification. - * - * @param example The example object. - * @return The score for the given example vector. - **/ - public double score(Object example) { - Object[] exampleArray = getExampleArray(example, false); - return score((int[]) exampleArray[0], (double[]) exampleArray[1]); - } - - - /** - * Computes the score for the specified example vector which will be - * thresholded to make the binary classification. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of feature values - * @return The score for the given example vector. - **/ - public double score(int[] exampleFeatures, double[] exampleValues) { - return weightVector.dot(exampleFeatures, exampleValues, initialWeight) - + bias; - } - - - /** - * Resets the weight vector to associate the default weight with all - * features. - **/ - public void forget() { - super.forget(); - weightVector = weightVector.emptyClone(); - bias = initialWeight; - setLabeler(labeler); - } - - - /** - * If the LinearThresholdUnit is mistake driven, this method - * should be overridden and used to update the internal representation when - * a mistake is made on a positive example. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of feature values - * @param rate The learning rate at which the weights are updated. - **/ - public abstract void promote(int[] exampleFeatures, double[] exampleValues, - double rate); - - - /** - * If the LinearThresholdUnit is mistake driven, this method - * should be overridden and used to update the internal representation when - * a mistake is made on a negative example. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of feature values - * @param rate The learning rate at which the weights are updated. - **/ - public abstract void demote(int[] exampleFeatures, double[] exampleValues, - double rate); - - - /** - * Writes the learned function's internal representation in binary form. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - - if (allowableValues == null) out.writeInt(0); - else { - out.writeInt(allowableValues.length); - for (int i = 0; i < allowableValues.length; ++i) - out.writeString(allowableValues[i]); - } - - out.writeDouble(initialWeight); - out.writeDouble(threshold); - out.writeDouble(learningRate); - out.writeDouble(positiveThickness); - out.writeDouble(negativeThickness); - out.writeDouble(bias); - weightVector.write(out); - } - - - /** - * Reads the binary representation of a learner with this object's run-time - * type, overwriting any and all learned or manually specified parameters - * as well as the label lexicon but without modifying the feature lexicon. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - - int N = in.readInt(); - if (N == 0) allowableValues = null; - else { - allowableValues = new String[N]; - for (int i = 0; i < N; ++i) - allowableValues[i] = in.readString(); - } - - initialWeight = in.readDouble(); - threshold = in.readDouble(); - learningRate = in.readDouble(); - positiveThickness = in.readDouble(); - negativeThickness = in.readDouble(); - bias = in.readDouble(); - weightVector = SparseWeightVector.readWeightVector(in); - } - - - /** Returns a deep clone of this learning algorithm. */ - public Object clone() { - LinearThresholdUnit clone = (LinearThresholdUnit) super.clone(); - if (weightVector != null) - clone.weightVector = (SparseWeightVector) weightVector.clone(); - return clone; - } - - - /** - * Simply a container for all of {@link LinearThresholdUnit}'s configurable - * parameters. Using instances of this class should make code more - * readable and constructors less complicated. - * - * @author Nick Rizzolo - **/ - public static class Parameters extends Learner.Parameters - { - /** - * The rate at which weights are updated; default - * {@link LinearThresholdUnit#defaultLearningRate}. - **/ - public double learningRate; +@SuppressWarnings("serial") +public abstract class LinearThresholdUnit extends Learner { + /** Default for {@link #initialWeight}. */ + public static final double defaultInitialWeight = 0; + /** Default for {@link #threshold}. */ + public static final double defaultThreshold = 0; + /** Default for {@link #positiveThickness}. */ + public static final double defaultThickness = 0; + /** Default value for {@link #learningRate}. */ + public static final double defaultLearningRate = 0.1; + /** Default for {@link #weightVector}. */ + public static final SparseWeightVector defaultWeightVector = new SparseWeightVector(); + /** any weight less than this is considered irrelevant. This is for prunning. */ + public static final double defaultFeaturePruningThreshold = 0.000001; + + /** + * The rate at which weights are updated; default {@link #defaultLearningRate}. + **/ + protected double learningRate; /** The LTU's weight vector; default is an empty vector. */ - public SparseWeightVector weightVector; + protected SparseWeightVector weightVector; + /** + * The weight associated with a feature when first added to the vector; default + * {@link #defaultInitialWeight}. + **/ + protected double initialWeight; /** - * The weight associated with a feature when first added to the vector; - * default {@link LinearThresholdUnit#defaultInitialWeight}. + * The score is compared against this value to make predictions; default + * {@link LinearThresholdUnit#defaultThreshold}. **/ - public double initialWeight; + protected double threshold; /** - * The score is compared against this value to make predictions; default - * {@link LinearThresholdUnit#defaultThreshold}. + * The bias is stored here rather than as an element of the weight vector. **/ - public double threshold; + protected double bias; /** - * This thickness will be added to both {@link #positiveThickness} and - * {@link #negativeThickness}; default - * {@link LinearThresholdUnit#defaultThickness}. + * The thickness of the hyperplane on the positive side; default {@link #defaultThickness}. **/ - public double thickness; - /** The thickness of the hyperplane on the positive side; default 0. */ - public double positiveThickness; - /** The thickness of the hyperplane on the negative side; default 0. */ - public double negativeThickness; + protected double positiveThickness; + /** + * The thickness of the hyperplane on the negative side; default equal to + * {@link #positiveThickness}. + **/ + protected double negativeThickness; + /** The label producing classifier's allowable values. */ + protected String[] allowableValues; + /** feature pruning threshold caps magnitude of useful features. */ + public double featurePruningThreshold; + /** + * Default constructor. The learning rate and threshold take default values, while the name of + * the classifier gets the empty string. + **/ + public LinearThresholdUnit() { + this(""); + } - /** Sets all the default values. */ - public Parameters() { - learningRate = defaultLearningRate; - weightVector = (SparseWeightVector) defaultWeightVector.clone(); - initialWeight = defaultInitialWeight; - threshold = defaultThreshold; - thickness = defaultThickness; + /** + * Initializing constructor. Sets the learning rate to the specified value, and the threshold + * and thickness take the default, while the name of the classifier gets the empty string. + * + * @param r The desired learning rate. + **/ + public LinearThresholdUnit(double r) { + this("", r); } /** - * Sets the parameters from the parent's parameters object, giving - * defaults to all parameters declared in this object. + * Sets the learning rate and threshold to the specified values, while the name of the + * classifier gets the empty string. + * + * + * @param r The desired learning rate value. + * @param t The desired threshold value. **/ - public Parameters(Learner.Parameters p) { this(); } + public LinearThresholdUnit(double r, double t) { + this("", r, t); + } + /** + * Use this constructor to fit a thick separator, where both the positive and negative sides of + * the hyperplane will be given the specified thickness, while the name of the classifier gets + * the empty string. + * + * @param r The desired learning rate value. + * @param t The desired threshold value. + * @param pt The desired thickness. + **/ + public LinearThresholdUnit(double r, double t, double pt) { + this("", r, t, pt); + } - /** Copy constructor. */ - public Parameters(Parameters p) { - learningRate = p.learningRate; - weightVector = p.weightVector; - initialWeight = p.initialWeight; - threshold = p.threshold; - thickness = p.thickness; - positiveThickness = p.positiveThickness; - negativeThickness = p.negativeThickness; + /** + * Use this constructor to fit a thick separator, where the positive and negative sides of the + * hyperplane will be given the specified separate thicknesses, while the name of the classifier + * gets the empty string. + * + * @param r The desired learning rate value. + * @param t The desired threshold value. + * @param pt The desired positive thickness. + * @param nt The desired negative thickness. + **/ + public LinearThresholdUnit(double r, double t, double pt, double nt) { + this("", r, t, pt, nt); } + /** + * Use this constructor to fit a thick separator, where the positive and negative sides of the + * hyperplane will be given the specified separate thicknesses, while the name of the classifier + * gets the empty string. + * + * @param r The desired learning rate value. + * @param t The desired threshold value. + * @param pt The desired positive thickness. + * @param nt The desired negative thickness. + * @param fpt The feature pruning threshold. + */ + public LinearThresholdUnit(double r, double t, double pt, double nt, double fpt) { + this("", r, t, pt, nt, fpt); + } /** - * Calls the appropriate Learner.setParameters(Parameters) - * method for this Parameters object. - * - * @param l The learner whose parameters will be set. + * Initializing constructor. Sets the threshold, positive thickness, and negative thickness to + * their default values. + * + * @param n The name of the classifier. **/ - public void setParameters(Learner l) { - ((LinearThresholdUnit) l).setParameters(this); + protected LinearThresholdUnit(String n) { + this(n, defaultLearningRate); } /** - * Creates a string representation of these parameters in which only - * those parameters that differ from their default values are mentioned. + * Default constructor. Sets the threshold, positive thickness, and negative thickness to their + * default values. + * + * @param n The name of the classifier. + * @param r The desired learning rate. **/ - public String nonDefaultString() { - String result = super.nonDefaultString(); + protected LinearThresholdUnit(String n, double r) { + this(n, r, defaultThreshold); + } - if (learningRate != LinearThresholdUnit.defaultLearningRate) - result += ", learningRate = " + learningRate; - if (initialWeight != LinearThresholdUnit.defaultInitialWeight) - result += ", initialWeight = " + initialWeight; - if (threshold != LinearThresholdUnit.defaultThreshold) - result += ", threshold = " + threshold; - if (thickness != LinearThresholdUnit.defaultThickness) - result += ", thickness = " + thickness; - if (positiveThickness != 0) - result += ", positiveThickness = " + positiveThickness; - if (negativeThickness != 0) - result += ", negativeThickness = " + negativeThickness; + /** + * Initializing constructor. Sets the threshold to the specified value, while the positive and + * negative thicknesses get their defaults. + * + * @param n The name of the classifier. + * @param r The desired learning rate. + * @param t The desired value for the threshold. + **/ + protected LinearThresholdUnit(String n, double r, double t) { + this(n, r, t, defaultThickness); + } - if (result.startsWith(", ")) result = result.substring(2); - return result; + /** + * Initializing constructor. Sets the threshold and positive thickness to the specified values, + * and the negative thickness is set to the same value as the positive thickness. + * + * @param n The name of the classifier. + * @param r The desired learning rate. + * @param t The desired value for the threshold. + * @param pt The desired thickness. + **/ + protected LinearThresholdUnit(String n, double r, double t, double pt) { + this(n, r, t, pt, pt); + } + + /** + * Initializing constructor. Sets the threshold, positive thickness, and negative thickness to + * the specified values. + * + * @param n The name of the classifier. + * @param r The desired learning rate. + * @param t The desired value for the threshold. + * @param pt The desired positive thickness. + * @param nt The desired negative thickness. + **/ + protected LinearThresholdUnit(String n, double r, double t, double pt, double nt) { + this(n, r, t, pt, nt, defaultFeaturePruningThreshold); + } + + /** + * Takes the rate, threshold, positive thickness, and negative thickness and vector. + * + * @param n The name of the classifier. + * @param r The desired learning rate. + * @param t The desired value for the threshold. + * @param pt The desired positive thickness. + * @param nt The desired negative thickness. + * @param v An initial weight vector. + **/ + protected LinearThresholdUnit(String n, double r, double t, double pt, double nt, SparseWeightVector v) { + this(n, r, t, pt, nt, defaultFeaturePruningThreshold, v); + } + + /** + * Initializing constructor. Sets the threshold, positive thickness, and negative thickness to + * the specified values. + * + * @param n The name of the classifier. + * @param r The desired learning rate. + * @param t The desired value for the threshold. + * @param pt The desired positive thickness. + * @param nt The desired negative thickness. + * @param fpt The feature pruning threshold. + **/ + protected LinearThresholdUnit(String n, double r, double t, double pt, double nt, double fpt) { + this(n, r, t, pt, nt, fpt, (SparseWeightVector) defaultWeightVector.clone()); + } + + /** + * Initializing constructor. Sets the threshold, positive thickness, and negative thickness to + * the specified values. + * + * @param n The name of the classifier. + * @param r The desired learning rate. + * @param t The desired value for the threshold. + * @param pt The desired positive thickness. + * @param nt The desired negative thickness. + * @param fpt The feature pruning threshold. + * @param v An initial weight vector. + **/ + protected LinearThresholdUnit(String n, double r, double t, double pt, double nt, double fpt, + SparseWeightVector v) { + super(n); + Parameters p = new Parameters(); + p.weightVector = v; + p.threshold = t; + p.learningRate = r; + p.positiveThickness = pt; + p.negativeThickness = nt; + p.featurePruningThreshold = fpt; + setParameters(p); + } + + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link LinearThresholdUnit.Parameters} object. The name of the classifier is the empty + * string. + * + * @param p The settings of all parameters. + **/ + protected LinearThresholdUnit(Parameters p) { + this("", p); + } + + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link LinearThresholdUnit.Parameters} object. + * + * @param n The name of the classifier. + * @param p The settings of all parameters. + **/ + protected LinearThresholdUnit(String n, Parameters p) { + super(n); + setParameters(p); + } + + public SparseWeightVector getWeightVector() { + return weightVector; + } + + public double getBias() { + return bias; + } + + public String[] getAllowableValues() { + return allowableValues; + } + + /** + * Sets the values of parameters that control the behavior of this learning algorithm. + * + * @param p The parameters. + **/ + public void setParameters(Parameters p) { + learningRate = p.learningRate; + weightVector = p.weightVector; + initialWeight = p.initialWeight; + threshold = p.threshold; + bias = p.initialWeight; + positiveThickness = p.thickness + p.positiveThickness; + negativeThickness = p.thickness + p.negativeThickness; + featurePruningThreshold = p.featurePruningThreshold; + } + + /** + * Retrieves the parameters that are set in this learner. + * + * @return An object containing all the values of the parameters that control the behavior of + * this learning algorithm. + **/ + public Learner.Parameters getParameters() { + Parameters p = new Parameters(super.getParameters()); + p.learningRate = learningRate; + p.weightVector = weightVector.emptyClone(); + p.initialWeight = initialWeight; + p.threshold = threshold; + p.positiveThickness = positiveThickness; + p.negativeThickness = negativeThickness; + p.featurePruningThreshold = featurePruningThreshold; + return p; + } + + + /** + * Sets the labels list. + * + * @param l A new label producing classifier. + **/ + public void setLabeler(Classifier l) { + if (!(l == null || l.allowableValues().length == 2)) { + System.err.println("Error: " + name + + ": An LTU must be given a single binary label classifier."); + new Exception().printStackTrace(); + System.exit(1); + } + + super.setLabeler(l); + allowableValues = l == null ? null : l.allowableValues(); + labelLexicon.clear(); + labelLexicon.lookup(new DiscretePrimitiveStringFeature(l.containingPackage, l.name, "", + allowableValues[0], (short) 0, (short) 2), true); + labelLexicon.lookup(new DiscretePrimitiveStringFeature(l.containingPackage, l.name, "", + allowableValues[1], (short) 1, (short) 2), true); + predictions = new FVector(2); + createPrediction(0); + createPrediction(1); + } + + + /** + * Returns the current value of the {@link #initialWeight} variable. + * + * @return The value of the {@link #initialWeight} variable. + **/ + public double getInitialWeight() { + return initialWeight; + } + + + /** + * Sets the {@link #initialWeight} member variable to the specified value. + * + * @param w The new value for {@link #initialWeight}. + **/ + public void setInitialWeight(double w) { + initialWeight = w; + } + + + /** + * Returns the current value of the {@link #threshold} variable. + * + * @return The value of the {@link #threshold} variable. + **/ + public double getThreshold() { + return threshold; + } + + + /** + * Sets the {@link #threshold} member variable to the specified value. + * + * @param t The new value for {@link #threshold}. + **/ + public void setThreshold(double t) { + threshold = t; + } + + + /** + * Returns the current value of the {@link #positiveThickness} variable. + * + * @return The value of the {@link #positiveThickness} variable. + **/ + public double getPositiveThickness() { + return positiveThickness; + } + + + /** + * Sets the {@link #positiveThickness} member variable to the specified value. + * + * @param t The new value for {@link #positiveThickness}. + **/ + public void setPositiveThickness(double t) { + positiveThickness = t; + } + + + /** + * Returns the current value of the {@link #negativeThickness} variable. + * + * @return The value of the {@link #negativeThickness} variable. + **/ + public double getNegativeThickness() { + return negativeThickness; } - } -} + + /** + * Sets the {@link #negativeThickness} member variable to the specified value. + * + * @param t The new value for {@link #negativeThickness}. + **/ + public void setNegativeThickness(double t) { + negativeThickness = t; + } + + + /** + * Sets the {@link #positiveThickness} and {@link #negativeThickness} member variables to the + * specified value. + * + * @param t The new thickness value. + **/ + public void setThickness(double t) { + positiveThickness = negativeThickness = t; + } + + + /** + * Returns the array of allowable values that a feature returned by this classifier may take. + * + * @return If a labeler has not yet been established for this LTU, byte strings equivalent to + * { "*", "*" } are returned, which indicates to the compiler that + * classifiers derived from this learner will return features that take one of two + * values that are specified in the source code. Otherwise, the allowable values of the + * labeler are returned. + **/ + public String[] allowableValues() { + if (allowableValues == null) + allowableValues = new String[] {"*", "*"}; + return allowableValues; + } + + + /** + * The default training algorithm for a linear threshold unit consists of evaluating the example + * object with the {@link #score(Object)} method and {@link #threshold}, checking the result of + * evaluation against the label, and, if they are different, promoting when the label is + * positive or demoting when the label is negative. + * + *

+ * This method does not call {@link #classify(Object)}; it calls {@link #score(Object)} + * directly. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of feature values + * @param exampleLabels The example's label(s) + * @param labelValues The labels' values + **/ + public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, + double[] labelValues) { + assert exampleLabels.length == 1 : "Example must have a single label."; + assert exampleLabels[0] == 0 || exampleLabels[0] == 1 : "Example has unallowed label value."; + + boolean label = (exampleLabels[0] == 1); + + double s = score(exampleFeatures, exampleValues); + + if (shouldPromote(label, s, threshold, positiveThickness)) + promote(exampleFeatures, exampleValues, + computeLearningRate(exampleFeatures, exampleValues, s, label)); + if (shouldDemote(label, s, threshold, negativeThickness)) + demote(exampleFeatures, exampleValues, + computeLearningRate(exampleFeatures, exampleValues, s, label)); + } + + + /** + * Computes the value of the {@link #learningRate} variable if needed and returns the value. By + * default, the current value of {@link #learningRate} is returned. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of feature values + * @param s The score of the example object + * @param label The label of the example object + * @return The computed value of the {@link #learningRate} variable + **/ + public double computeLearningRate(int[] exampleFeatures, double[] exampleValues, double s, + boolean label) { + return learningRate; + } + + + /** + * Determines if the weights should be promoted + * + * @param label The label of the example object + * @param s The score of the example object + * @param threshold The LTU threshold + * @param positiveThickness The thickness of the hyperplane on the positive side + * @return True if the weights should be promoted, false otherwise. + **/ + public boolean shouldPromote(boolean label, double s, double threshold, double positiveThickness) { + return (label && s < threshold + positiveThickness); + } + + /** + * Determines if the weights should be demoted + * + * @param label The label of the example object + * @param s The score of the example object + * @param threshold The LTU threshold + * @param negativeThickness The thickness of the hyperplane on the negative side + * @return True if the weights should be demoted, false otherwise. + **/ + public boolean shouldDemote(boolean label, double s, double threshold, double negativeThickness) { + return (!label && s >= threshold - negativeThickness); + } + + + /** + * Initializes the weight vector array to the size of the specified number of features, setting + * each weight equal to {@link #initialWeight}. + **/ + public void initialize(int numExamples, int numFeatures) { + double[] weights = new double[numFeatures]; + Arrays.fill(weights, initialWeight); + weightVector = new SparseWeightVector(weights); + } + + + /** + * When training is complete, optimize the feature set by discarding low value + * weights. + */ + public void doneTraining() { + super.doneTraining(); + LinearThresholdUnitOptimizer ltuo = new LinearThresholdUnitOptimizer(this); + ltuo.optimize(); + } + /** + * An LTU returns two scores; one for the negative classification and one for the positive + * classification. By default, the score for the positive classification is the result of + * {@link #score(Object)} minus the {@link #threshold}, and the score for the negative + * classification is the opposite of the positive classification's score. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of feature values + * @return Two scores as described above. + **/ + public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { + double s = score(exampleFeatures, exampleValues) - threshold; + ScoreSet result = new ScoreSet(); + result.put(allowableValues[0], -s); + result.put(allowableValues[1], s); + return result; + } + + + /** + * Returns the classification of the given example as a single feature instead of a + * {@link FeatureVector}. + * + * @param f The features array. + * @param v The values array. + * @return The classification of the example as a feature. + **/ + public Feature featureValue(int[] f, double[] v) { + int index = score(f, v) >= threshold ? 1 : 0; + return predictions.get(index); + } + + + /** + * The default evaluation method simply computes the score for the example and returns a + * {@link DiscretePrimitiveStringFeature} set to either the second value from the label + * classifier's array of allowable values if the score is greater than or equal to + * {@link #threshold} or the first otherwise. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of feature values + * @return The computed feature (in a vector). + **/ + public String discreteValue(int[] exampleFeatures, double[] exampleValues) { + int index = score(exampleFeatures, exampleValues) >= threshold ? 1 : 0; + return allowableValues[index]; + } + + + /** + * The default evaluation method simply computes the score for the example and returns a + * {@link DiscretePrimitiveStringFeature} set to either the second value from the label + * classifier's array of allowable values if the score is greater than or equal to + * {@link #threshold} or the first otherwise. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of feature values + * @return The computed feature (in a vector). + **/ + public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) { + return new FeatureVector(featureValue(exampleFeatures, exampleValues)); + } + + + /** + * Computes the score for the specified example vector which will be thresholded to make the + * binary classification. + * + * @param example The example object. + * @return The score for the given example vector. + **/ + public double score(Object example) { + Object[] exampleArray = getExampleArray(example, false); + return score((int[]) exampleArray[0], (double[]) exampleArray[1]); + } + + + /** + * Computes the score for the specified example vector which will be thresholded to make the + * binary classification. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of feature values + * @return The score for the given example vector. + **/ + public double score(int[] exampleFeatures, double[] exampleValues) { + return weightVector.dot(exampleFeatures, exampleValues, initialWeight) + bias; + } + + + /** + * Resets the weight vector to associate the default weight with all features. + **/ + public void forget() { + super.forget(); + weightVector = weightVector.emptyClone(); + bias = initialWeight; + setLabeler(labeler); + } + + + /** + * If the LinearThresholdUnit is mistake driven, this method should be overridden + * and used to update the internal representation when a mistake is made on a positive example. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of feature values + * @param rate The learning rate at which the weights are updated. + **/ + public abstract void promote(int[] exampleFeatures, double[] exampleValues, double rate); + + + /** + * If the LinearThresholdUnit is mistake driven, this method should be overridden + * and used to update the internal representation when a mistake is made on a negative example. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of feature values + * @param rate The learning rate at which the weights are updated. + **/ + public abstract void demote(int[] exampleFeatures, double[] exampleValues, double rate); + + + /** + * Writes the learned function's internal representation in binary form. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + + if (allowableValues == null) + out.writeInt(0); + else { + out.writeInt(allowableValues.length); + for (int i = 0; i < allowableValues.length; ++i) + out.writeString(allowableValues[i]); + } + + out.writeDouble(initialWeight); + out.writeDouble(threshold); + out.writeDouble(learningRate); + out.writeDouble(positiveThickness); + out.writeDouble(negativeThickness); + out.writeDouble(bias); + weightVector.write(out); + } + + + /** + * Reads the binary representation of a learner with this object's run-time type, overwriting + * any and all learned or manually specified parameters as well as the label lexicon but without + * modifying the feature lexicon. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + + int N = in.readInt(); + if (N == 0) + allowableValues = null; + else { + allowableValues = new String[N]; + for (int i = 0; i < N; ++i) + allowableValues[i] = in.readString(); + } + + initialWeight = in.readDouble(); + threshold = in.readDouble(); + learningRate = in.readDouble(); + positiveThickness = in.readDouble(); + negativeThickness = in.readDouble(); + bias = in.readDouble(); + weightVector = SparseWeightVector.readWeightVector(in); + } + + + /** Returns a deep clone of this learning algorithm. */ + public Object clone() { + LinearThresholdUnit clone = (LinearThresholdUnit) super.clone(); + if (weightVector != null) + clone.weightVector = (SparseWeightVector) weightVector.clone(); + return clone; + } + + + /** + * Simply a container for all of {@link LinearThresholdUnit}'s configurable parameters. Using + * instances of this class should make code more readable and constructors less complicated. + * + * @author Nick Rizzolo + **/ + public static class Parameters extends Learner.Parameters { + /** + * The rate at which weights are updated; default + * {@link LinearThresholdUnit#defaultLearningRate}. + **/ + public double learningRate; + /** The LTU's weight vector; default is an empty vector. */ + public SparseWeightVector weightVector; + /** + * The weight associated with a feature when first added to the vector; default + * {@link LinearThresholdUnit#defaultInitialWeight}. + **/ + public double initialWeight; + /** + * The score is compared against this value to make predictions; default + * {@link LinearThresholdUnit#defaultThreshold}. + **/ + public double threshold; + /** + * This thickness will be added to both {@link #positiveThickness} and + * {@link #negativeThickness}; default {@link LinearThresholdUnit#defaultThickness}. + **/ + public double thickness; + /** The thickness of the hyperplane on the positive side; default 0. */ + public double positiveThickness; + /** The thickness of the hyperplane on the negative side; default 0. */ + public double negativeThickness; + /** feature pruning threshold caps magnitude of useful features. */ + public double featurePruningThreshold; + + /** Sets all the default values. */ + public Parameters() { + learningRate = defaultLearningRate; + weightVector = (SparseWeightVector) defaultWeightVector.clone(); + initialWeight = defaultInitialWeight; + threshold = defaultThreshold; + thickness = defaultThickness; + featurePruningThreshold = defaultFeaturePruningThreshold; + } + + + /** + * Sets the parameters from the parent's parameters object, giving defaults to all + * parameters declared in this object. + **/ + public Parameters(Learner.Parameters p) { + this(); + } + + + /** Copy constructor. */ + public Parameters(Parameters p) { + learningRate = p.learningRate; + weightVector = p.weightVector; + initialWeight = p.initialWeight; + threshold = p.threshold; + thickness = p.thickness; + positiveThickness = p.positiveThickness; + negativeThickness = p.negativeThickness; + featurePruningThreshold = p.featurePruningThreshold; + } + + + /** + * Calls the appropriate Learner.setParameters(Parameters) method for this + * Parameters object. + * + * @param l The learner whose parameters will be set. + **/ + public void setParameters(Learner l) { + ((LinearThresholdUnit) l).setParameters(this); + } + + + /** + * Creates a string representation of these parameters in which only those parameters that + * differ from their default values are mentioned. + **/ + public String nonDefaultString() { + String result = super.nonDefaultString(); + if (learningRate != LinearThresholdUnit.defaultLearningRate) + result += ", learningRate = " + learningRate; + if (initialWeight != LinearThresholdUnit.defaultInitialWeight) + result += ", initialWeight = " + initialWeight; + if (threshold != LinearThresholdUnit.defaultThreshold) + result += ", threshold = " + threshold; + if (thickness != LinearThresholdUnit.defaultThickness) + result += ", thickness = " + thickness; + if (positiveThickness != 0) + result += ", positiveThickness = " + positiveThickness; + if (negativeThickness != 0) + result += ", negativeThickness = " + negativeThickness; + if (featurePruningThreshold != LinearThresholdUnit.defaultFeaturePruningThreshold) + result += ", featurePruningThreshold = " + featurePruningThreshold; + if (result.startsWith(", ")) + result = result.substring(2); + return result; + } + } + + + /** + * Given the index of the weights to prune, discard them, then shrink the weight vector down + * to save memory. + * @param uselessfeatures the features being pruned. + * @param numberFeatures the total number of features before pruning. + */ + public void pruneWeights(int[] uselessfeatures, int numberFeatures) { + this.getWeightVector().pruneWeights(uselessfeatures, numberFeatures); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Log.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Log.java index 25a0cef3..ac2afe98 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Log.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Log.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -15,40 +12,40 @@ /** - * Simply turns each score s in the {@link ScoreSet} returned by the - * specified {@link Normalizer} into log(s). - * - * @author Nick Rizzolo + * Simply turns each score s in the {@link ScoreSet} returned by the specified + * {@link Normalizer} into log(s). + * + * @author Nick Rizzolo **/ -public class Log extends Normalizer -{ - /** This normalizer runs before applying the log function. */ - protected Normalizer first; - - - /** This constructor provided for use by the LBJava compiler only. */ - public Log() { } - - /** - * Initializing constructor. - * - * @param n This normalizer runs before applying the log function. - **/ - public Log(Normalizer n) { first = n; } - - - /** - * Normalizes the given ScoreSet; its scores are modified in - * place before it is returned. - * - * @param scores The set of scores to normalize. - * @return The normalized set of scores. - **/ - public ScoreSet normalize(ScoreSet scores) { - Score[] array = first.normalize(scores).toArray(); - for (int i = 0; i < array.length; ++i) - array[i].score = Math.log(array[i].score); - return scores; - } +public class Log extends Normalizer { + /** This normalizer runs before applying the log function. */ + protected Normalizer first; + + + /** This constructor provided for use by the LBJava compiler only. */ + public Log() {} + + /** + * Initializing constructor. + * + * @param n This normalizer runs before applying the log function. + **/ + public Log(Normalizer n) { + first = n; + } + + + /** + * Normalizes the given ScoreSet; its scores are modified in place before it is + * returned. + * + * @param scores The set of scores to normalize. + * @return The normalized set of scores. + **/ + public ScoreSet normalize(ScoreSet scores) { + Score[] array = first.normalize(scores).toArray(); + for (int i = 0; i < array.length; ++i) + array[i].score = Math.log(array[i].score); + return scores; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/MultiLabelLearner.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/MultiLabelLearner.java index 5441a7e3..97263d09 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/MultiLabelLearner.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/MultiLabelLearner.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -14,142 +11,152 @@ /** - * A simple implementation of a learner that learns from examples with - * multiple labels and is capable of predicting multiple labels on new - * examples. A separate {@link LinearThresholdUnit} is learned independently - * to predict whether each label is appropriate for a given example. Any - * {@link LinearThresholdUnit} may be used, so long as it implements its - * clone() method and a public constructor that takes no - * arguments. During testing, the {@link #classify(Object)} method returns a - * separate feature for each {@link LinearThresholdUnit} whose score on the - * example object exceeds the threshold. - * - * @author Nick Rizzolo + * A simple implementation of a learner that learns from examples with multiple labels and is + * capable of predicting multiple labels on new examples. A separate {@link LinearThresholdUnit} is + * learned independently to predict whether each label is appropriate for a given example. Any + * {@link LinearThresholdUnit} may be used, so long as it implements its clone() method + * and a public constructor that takes no arguments. During testing, the {@link #classify(Object)} + * method returns a separate feature for each {@link LinearThresholdUnit} whose score on the example + * object exceeds the threshold. + * + * @author Nick Rizzolo **/ -public class MultiLabelLearner extends SparseNetworkLearner -{ - /** - * Instantiates this multi-label learner with the default learning - * algorithm: {@link SparsePerceptron}. - **/ - public MultiLabelLearner() { this(""); } - - /** - * Instantiates this multi-label learner using the specified algorithm to - * learn each class separately as a binary classifier. This constructor - * will normally only be called by the compiler. - * - * @param ltu The linear threshold unit used to learn binary classifiers. - **/ - public MultiLabelLearner(LinearThresholdUnit ltu) { this("", ltu); } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link MultiLabelLearner.Parameters} object. - * - * @param p The settings of all parameters. - **/ - public MultiLabelLearner(Parameters p) { this("", p); } - - /** - * Instantiates this multi-label learner with the default learning - * algorithm: {@link SparsePerceptron}. - * - * @param n The name of the classifier. - **/ - public MultiLabelLearner(String n) { super(n); } - - /** - * Instantiates this multi-label learner using the specified algorithm to - * learn each class separately as a binary classifier. - * - * @param n The name of the classifier. - * @param ltu The linear threshold unit used to learn binary classifiers. - **/ - public MultiLabelLearner(String n, LinearThresholdUnit ltu) { - super(n, ltu); - } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link MultiLabelLearner.Parameters} object. - * - * @param n The name of the classifier. - * @param p The settings of all parameters. - **/ - public MultiLabelLearner(String n, Parameters p) { - super(n, p); - } - - - /** - * Retrieves the parameters that are set in this learner. - * - * @return An object containing all the values of the parameters that - * control the behavior of this learning algorithm. - **/ - public Learner.Parameters getParameters() { return new Parameters(); } - - - /** This learner's output type is "discrete%". */ - public String getOutputType() { return "discrete%"; } - - - /** - * Returns a separate feature for each {@link LinearThresholdUnit} whose - * score on the example object exceeds the threshold. - * - * @param exampleFeatures The example's feature indices. - * @param exampleValues The feature values. - * @return A vector containing the features described above. - **/ - public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) - { - FeatureVector result = new FeatureVector(); - - for (int i = 0; i < network.size(); ++i) { - LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); - double score = ltu.score(exampleFeatures, exampleValues); - if (score >= 0) result.addFeature(predictions.get(i)); +public class MultiLabelLearner extends SparseNetworkLearner { + /** + * Instantiates this multi-label learner with the default learning algorithm: + * {@link SparsePerceptron}. + **/ + public MultiLabelLearner() { + this(""); + } + + /** + * Instantiates this multi-label learner using the specified algorithm to learn each class + * separately as a binary classifier. This constructor will normally only be called by the + * compiler. + * + * @param ltu The linear threshold unit used to learn binary classifiers. + **/ + public MultiLabelLearner(LinearThresholdUnit ltu) { + this("", ltu); + } + + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link MultiLabelLearner.Parameters} object. + * + * @param p The settings of all parameters. + **/ + public MultiLabelLearner(Parameters p) { + this("", p); + } + + /** + * Instantiates this multi-label learner with the default learning algorithm: + * {@link SparsePerceptron}. + * + * @param n The name of the classifier. + **/ + public MultiLabelLearner(String n) { + super(n); } - return result; - } + /** + * Instantiates this multi-label learner using the specified algorithm to learn each class + * separately as a binary classifier. + * + * @param n The name of the classifier. + * @param ltu The linear threshold unit used to learn binary classifiers. + **/ + public MultiLabelLearner(String n, LinearThresholdUnit ltu) { + super(n, ltu); + } + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link MultiLabelLearner.Parameters} object. + * + * @param n The name of the classifier. + * @param p The settings of all parameters. + **/ + public MultiLabelLearner(String n, Parameters p) { + super(n, p); + } - /** - * Simply a container for all of {@link MultiLabelLearner}'s configurable - * parameters. Using instances of this class should make code more - * readable and constructors less complicated. - * - * @author Nick Rizzolo - **/ - public static class Parameters extends SparseNetworkLearner.Parameters - { - /** Sets all the default values. */ - public Parameters() { } + + /** + * Retrieves the parameters that are set in this learner. + * + * @return An object containing all the values of the parameters that control the behavior of + * this learning algorithm. + **/ + public Learner.Parameters getParameters() { + return new Parameters(); + } + + + /** This learner's output type is "discrete%". */ + public String getOutputType() { + return "discrete%"; + } /** - * Sets the parameters from the parent's parameters object, giving - * defaults to all parameters declared in this object. + * Returns a separate feature for each {@link LinearThresholdUnit} whose score on the example + * object exceeds the threshold. + * + * @param exampleFeatures The example's feature indices. + * @param exampleValues The feature values. + * @return A vector containing the features described above. **/ - public Parameters(SparseNetworkLearner.Parameters p) { super(p); } + public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) { + FeatureVector result = new FeatureVector(); + for (int i = 0; i < network.size(); ++i) { + LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); + double score = ltu.score(exampleFeatures, exampleValues); + if (score >= 0) + result.addFeature(predictions.get(i)); + } - /** Copy constructor. */ - public Parameters(Parameters p) { super(p); } + return result; + } /** - * Calls the appropriate Learner.setParameters(Parameters) - * method for this Parameters object. - * - * @param l The learner whose parameters will be set. + * Simply a container for all of {@link MultiLabelLearner}'s configurable parameters. Using + * instances of this class should make code more readable and constructors less complicated. + * + * @author Nick Rizzolo **/ - public void setParameters(Learner l) { - ((SparseNetworkLearner) l).setParameters(this); + public static class Parameters extends SparseNetworkLearner.Parameters { + /** Sets all the default values. */ + public Parameters() {} + + + /** + * Sets the parameters from the parent's parameters object, giving defaults to all + * parameters declared in this object. + **/ + public Parameters(SparseNetworkLearner.Parameters p) { + super(p); + } + + + /** Copy constructor. */ + public Parameters(Parameters p) { + super(p); + } + + + /** + * Calls the appropriate Learner.setParameters(Parameters) method for this + * Parameters object. + * + * @param l The learner whose parameters will be set. + **/ + public void setParameters(Learner l) { + ((SparseNetworkLearner) l).setParameters(this); + } } - } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/MuxLearner.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/MuxLearner.java index 42d5915b..6df0cb3f 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/MuxLearner.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/MuxLearner.java @@ -1,570 +1,554 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; import java.io.PrintStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.OVector; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.Sort; import edu.illinois.cs.cogcomp.lbjava.classify.Classifier; import edu.illinois.cs.cogcomp.lbjava.classify.DiscretePrimitiveStringFeature; import edu.illinois.cs.cogcomp.lbjava.classify.Feature; import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector; import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; +import edu.illinois.cs.cogcomp.lbjava.learn.MuxLearner; import edu.illinois.cs.cogcomp.lbjava.util.FVector; -import edu.illinois.cs.cogcomp.lbjava.util.OVector; -import edu.illinois.cs.cogcomp.lbjava.util.Sort; - /** - * A MuxLearner uses one of many Learners indexed - * by the first feature in an example to produce a classification. During - * training, the features produced by the first child classifier of this - * classifier's composite generator feature extractor are taken to determine - * which Learners will learn from the training object. For any - * given example, there must be one Feature produced by the - * labeler for each Feature produced by the first child - * classifier. If this classifier's feature extractor is not a composite - * generator, the first feature it produces is the only one taken. - * - *

It is assumed that the Learner being multiplexed expects - * a single label feature on each training example, and that the feature(s) - * used to do the multiplexing are DiscreteFeature(s). - * Furthermore, if this classifier's feature extractor is a composite - * generator, it must produce the same number of features as this - * classifier's labeler, and they must correspond to each other in the order - * produced. - * - *

This algorithm's user-configurable parameters are stored in member - * fields of this class. They may be set via either a constructor that names - * each parameter explicitly or a constructor that takes an instance of - * {@link learn.MuxLearner.Parameters Parameters} as input. The - * documentation in each member field in this class indicates the default - * value of the associated parameter when using the former type of - * constructor. The documentation of the associated member field in the - * {@link learn.MuxLearner.Parameters Parameters} class indicates the - * default value of the parameter when using the latter type of constructor. - * - * @author Nick Rizzolo + * A MuxLearner uses one of many Learners indexed by the first feature in + * an example to produce a classification. During training, the features produced by the first child + * classifier of this classifier's composite generator feature extractor are taken to determine + * which Learners will learn from the training object. For any given example, there + * must be one Feature produced by the labeler for each Feature produced + * by the first child classifier. If this classifier's feature extractor is not a composite + * generator, the first feature it produces is the only one taken. + * + *

+ * It is assumed that the Learner being multiplexed expects a single label feature on + * each training example, and that the feature(s) used to do the multiplexing are + * DiscreteFeature(s). Furthermore, if this classifier's feature extractor is a + * composite generator, it must produce the same number of features as this classifier's labeler, + * and they must correspond to each other in the order produced. + * + *

+ * This algorithm's user-configurable parameters are stored in member fields of this class. They may + * be set via either a constructor that names each parameter explicitly or a constructor that takes + * an instance of {@link MuxLearner.Parameters Parameters} as input. The documentation in each + * member field in this class indicates the default value of the associated parameter when using the + * former type of constructor. The documentation of the associated member field in the + * {@link MuxLearner.Parameters Parameters} class indicates the default value of the parameter when + * using the latter type of constructor. + * + * @author Nick Rizzolo **/ -public class MuxLearner extends Learner -{ - /** Default for {@link #baseLearner}. */ - public static final Learner defaultBaseLearner = - new SparseNetworkLearner(new SparseAveragedPerceptron()); - /** Default for {@link #defaultPrediction}. */ - public static final String defaultDefaultPrediction = null; - - - /** - * Instances of this learning algorithm will be multiplexed; default - * null. - **/ - protected Learner baseLearner; - /** A map from feature values to learners. */ - protected OVector network; - /** - * This string is returned during testing when the multiplexed - * Learner doesn't exist; default - * {@link #defaultDefaultPrediction}. - **/ - protected String defaultPrediction; - /** A feature whose value is {@link #defaultPrediction}. */ - protected Feature defaultFeature; - - - /** For the LBJava compiler; not for use by the LBJava user. */ - public MuxLearner() { } - - /** - * Instantiates this multiplexed learner using the specified base learning - * algorithm. This constructor will normally only be called by the - * compiler. - * - * @param base Instances of this learner will be multiplexed. - **/ - public MuxLearner(Learner base) { this("", base); } - - /** - * Instantiates this multiplexed learner using the specified base learning - * algorithm. - * - * @param base Instances of this learner will be multiplexed. - * @param d This prediction will be returned during testing when the - * multiplexed Learner does not exist. - **/ - public MuxLearner(Learner base, String d) { this("", base, d); } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link MuxLearner.Parameters} object. - * - * @param p The settings of all parameters. - **/ - public MuxLearner(Parameters p) { this("", p); } - - /** For the LBJava compiler; not for use by the LBJava user. */ - public MuxLearner(String n) { super(n); } - - /** - * Instantiates this multiplexed learner using the specified base learning - * algorithm. - * - * @param n The name of the classifier. - * @param base Instances of this learner will be multiplexed. - **/ - public MuxLearner(String n, Learner base) { - this(n, base, defaultDefaultPrediction); - } - - /** - * Instantiates this multiplexed learner using the specified base learning - * algorithm. - * - * @param n The name of the classifier. - * @param base Instances of this learner will be multiplexed. - * @param d This prediction will be returned during testing when the - * multiplexed Learner does not exist. - **/ - public MuxLearner(String n, Learner base, String d) { - super(n); - Parameters p = new Parameters(); - p.baseLearner = base; - p.defaultPrediction = d; - setParameters(p); - network = new OVector(); - } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link MuxLearner.Parameters} object. - * - * @param n The name of the classifier. - * @param p The settings of all parameters. - **/ - public MuxLearner(String n, Parameters p) { - super(n); - setParameters(p); - network = new OVector(); - } - - - /** - * Sets the values of parameters that control the behavior of this learning - * algorithm. - * - * @param p The parameters. - **/ - public void setParameters(Parameters p) { - setBase(p.baseLearner); - defaultPrediction = p.defaultPrediction; - setDefaultFeature(); - } - - - /** - * Sets the value of {@link #defaultFeature} according to the current value - * of {@link #defaultPrediction}. - **/ - protected void setDefaultFeature() { - defaultFeature = - new DiscretePrimitiveStringFeature( - containingPackage, name, "default", defaultPrediction, - valueIndexOf(defaultPrediction), - (short) allowableValues().length); - } - - - /** - * Retrieves the parameters that are set in this learner. - * - * @return An object containing all the values of the parameters that - * control the behavior of this learning algorithm. - **/ - public Learner.Parameters getParameters() { - Parameters p = new Parameters(super.getParameters()); - p.baseLearner = baseLearner; - p.defaultPrediction = defaultPrediction; - return p; - } - - - /** - * Sets {@link #baseLearner}. This method will not have any effect - * on the learners that already exist in the network. However, new - * learners created after this method is executed will be of the same type - * as the object specified. - * - * @param base The new base learning algorithm. - **/ - public void setBase(Learner base) { - baseLearner = base; - baseLearner.containingPackage = containingPackage; - baseLearner.name = name + "::base"; - } - - - /** - * Sets the labeler. - * - * @param l A labeling classifier. - **/ - public void setLabeler(Classifier l) { - super.setLabeler(l); - setBase(baseLearner); - } - - - /** - * Sets the label lexicon. - * - * @param l A feature lexicon. - **/ - public void setLabelLexicon(Lexicon l) { - super.setLabelLexicon(l); - if (network != null) { - int N = network.size(); - for (int i = 0; i < N; ++i) { - Learner learner = (Learner) network.get(i); - if (learner != null) learner.setLabelLexicon(l); - } +public class MuxLearner extends Learner { + /** Default for {@link #baseLearner}. */ + public static final Learner defaultBaseLearner = new SparseNetworkLearner( + new SparseAveragedPerceptron()); + /** Default for {@link #defaultPrediction}. */ + public static final String defaultDefaultPrediction = null; + + + /** + * Instances of this learning algorithm will be multiplexed; default null. + **/ + protected Learner baseLearner; + /** A map from feature values to learners. */ + protected OVector network; + /** + * This string is returned during testing when the multiplexed Learner doesn't + * exist; default {@link #defaultDefaultPrediction}. + **/ + protected String defaultPrediction; + /** A feature whose value is {@link #defaultPrediction}. */ + protected Feature defaultFeature; + + + /** For the LBJava compiler; not for use by the LBJava user. */ + public MuxLearner() {} + + /** + * Instantiates this multiplexed learner using the specified base learning algorithm. This + * constructor will normally only be called by the compiler. + * + * @param base Instances of this learner will be multiplexed. + **/ + public MuxLearner(Learner base) { + this("", base); } - } - - - /** - * The training example is multiplexed to the appropriate - * Learner(s). - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param exampleLabels The example's label(s). - * @param labelValues The labels' values. - **/ - public void learn(int[] exampleFeatures, double[] exampleValues, - int[] exampleLabels, double[] labelValues) { - assert exampleFeatures.length > exampleLabels.length - : "MuxLearner ERROR: The example vector must have more features " - + "than labels, since it is assumed that there is a correspondence " - + "between the labels and the first features in the vector."; - - int F = exampleFeatures.length; - int L = exampleLabels.length; - int[] example = new int[F - L]; - double[] values = new double[F - L]; - int[] selections = new int[L]; - - int i = 0, j = 0; - for ( ; i < L; i++) { - selections[i] = exampleFeatures[i]; + + /** + * Instantiates this multiplexed learner using the specified base learning algorithm. + * + * @param base Instances of this learner will be multiplexed. + * @param d This prediction will be returned during testing when the multiplexed + * Learner does not exist. + **/ + public MuxLearner(Learner base, String d) { + this("", base, d); } - for ( ; i < F; i++, j++) { - example[j] = exampleFeatures[i]; - values[j] = exampleValues[i]; + + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link MuxLearner.Parameters} object. + * + * @param p The settings of all parameters. + **/ + public MuxLearner(Parameters p) { + this("", p); } - for (i = 0; i < L; i++) { - Learner l = (Learner) network.get(selections[i]); + /** For the LBJava compiler; not for use by the LBJava user. */ + public MuxLearner(String n) { + super(n); + } - if (l == null) { - l = (Learner) baseLearner.clone(); - l.setLabelLexicon(labelLexicon); - network.set(selections[i], l); - } + /** + * Instantiates this multiplexed learner using the specified base learning algorithm. + * + * @param n The name of the classifier. + * @param base Instances of this learner will be multiplexed. + **/ + public MuxLearner(String n, Learner base) { + this(n, base, defaultDefaultPrediction); + } - int[] labels = new int[1]; - labels[0] = exampleLabels[i]; - double[] labelVal = new double[1]; - labelVal[0] = labelValues[i]; + /** + * Instantiates this multiplexed learner using the specified base learning algorithm. + * + * @param n The name of the classifier. + * @param base Instances of this learner will be multiplexed. + * @param d This prediction will be returned during testing when the multiplexed + * Learner does not exist. + **/ + public MuxLearner(String n, Learner base, String d) { + super(n); + Parameters p = new Parameters(); + p.baseLearner = base; + p.defaultPrediction = d; + setParameters(p); + network = new OVector(); + } - l.learn(example, values, labels, labelVal); + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link MuxLearner.Parameters} object. + * + * @param n The name of the classifier. + * @param p The settings of all parameters. + **/ + public MuxLearner(String n, Parameters p) { + super(n); + setParameters(p); + network = new OVector(); } - } - - - /** Clears the network. */ - public void forget() { - super.forget(); - network = new OVector(); - } - - - /** - * Produces a set of scores indicating the degree to which each possible - * discrete classification value is associated with the given example - * object. These scores are just the scores produced by the multiplexed - * Learner's scores(Object) method. - * - * @see Learner#scores(Object) - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - **/ - public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { - int[] example = new int[exampleFeatures.length - 1]; - double[] values = new double[exampleFeatures.length - 1]; - System.arraycopy(exampleFeatures, 1, example, 0, example.length); - System.arraycopy(exampleValues, 1, values, 0, values.length); - - int selection = exampleFeatures[0]; - Learner l = (Learner) network.get(selection); - if (l == null) - return - new ScoreSet(new String[]{ defaultPrediction }, new double[]{ 1 }); - return l.scores(example, values); - } - - - /** - * Returns the value of the discrete feature that would be returned by this - * classifier. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @return A single value with the winning linear threshold unit's - * associated value. - **/ - public String discreteValue(int[] exampleFeatures, double[] exampleValues) { - return featureValue(exampleFeatures, exampleValues).getStringValue(); - } - - - /** - * Returns the value of the real feature that would be returned by this - * classifier. - * - * @param f The features array. - * @param v The values array. - * @return The value of the feature produced for the input object. - **/ - public double realValue(int[] f, double[] v) { - return featureValue(f, v).getStrength(); - } - - - /** - * Returns the classification of the given example as a single feature - * instead of a {@link FeatureVector}. - * - * @param f The features array. - * @param v The values array. - * @return The classification of the example as a feature. - **/ - public Feature featureValue(int[] f, double[] v) { - int[] example = new int[f.length - 1]; - double[] values = new double[f.length - 1]; - System.arraycopy(f, 1, example, 0, example.length); - System.arraycopy(v, 1, values, 0, values.length); - - int selection = f[0]; - Learner l = (Learner) network.get(selection); - if (l == null) return defaultFeature; - return l.featureValue(example, values); - } - - - /** - * This method performs the multiplexing and returns the output of the - * selected Learner. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @return The output of the selected Learner. - **/ - public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) - { - return new FeatureVector(featureValue(exampleFeatures, exampleValues)); - } - - - /** - * Writes the algorithm's internal representation as text. - * - * @param out The output stream. - **/ - public void write(PrintStream out) { - int N = network.size(); - final FVector entries = new FVector(N); - final OVector learners = new OVector(N); - for (int i = 0; i < N; ++i) { - Learner learner = (Learner) network.get(i); - if (network.get(i) != null) { - entries.add(lexicon.lookupKey(i)); - learners.add(learner); - } + + + /** + * Sets the values of parameters that control the behavior of this learning algorithm. + * + * @param p The parameters. + **/ + public void setParameters(Parameters p) { + setBase(p.baseLearner); + defaultPrediction = p.defaultPrediction; + setDefaultFeature(); } - N = entries.size(); - int[] indexes = new int[N]; - for (int i = 0; i < N; ++i) indexes[i] = i; - Sort.sort(indexes, - new Sort.IntComparator() { - public int compare(int i1, int i2) { - return entries.get(i1).compareTo(entries.get(i2)); - } - }); - - for (int i = 0; i < N; ++i) { - out.println("select: " + entries.get(indexes[i]).getStringValue()); - Learner learner = (Learner) learners.get(indexes[i]); - learner.setLexicon(lexicon); - learner.write(out); - learner.setLexicon(null); + + /** + * Sets the value of {@link #defaultFeature} according to the current value of + * {@link #defaultPrediction}. + **/ + protected void setDefaultFeature() { + defaultFeature = + new DiscretePrimitiveStringFeature(containingPackage, name, "default", + defaultPrediction, valueIndexOf(defaultPrediction), + (short) allowableValues().length); } - out.println("End of MuxLearner"); - } - - - /** - * Writes the learned function's internal representation in binary form. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - out.writeString(defaultPrediction); - baseLearner.write(out); - int N = network.size(); - out.writeInt(N); - - int M = 0; - for (int i = 0; i < N; ++i) if (network.get(i) != null) ++M; - out.writeInt(M); - - for (int i = 0; i < N; ++i) { - Learner learner = (Learner) network.get(i); - if (learner != null) { - out.writeInt(i); - learner.write(out); - } + + /** + * Retrieves the parameters that are set in this learner. + * + * @return An object containing all the values of the parameters that control the behavior of + * this learning algorithm. + **/ + public Learner.Parameters getParameters() { + Parameters p = new Parameters(super.getParameters()); + p.baseLearner = baseLearner; + p.defaultPrediction = defaultPrediction; + return p; } - } - - - /** - * Reads the binary representation of a learner with this object's run-time - * type, overwriting any and all learned or manually specified parameters - * as well as the label lexicon but without modifying the feature lexicon. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - defaultPrediction = in.readString(); - setDefaultFeature(); - baseLearner = Learner.readLearner(in); - int N = in.readInt(); - network = new OVector(N); - int M = in.readInt(); - for (int i = 0; i < M; ++i) - network.set(in.readInt(), Learner.readLearner(in)); - } - - - /** Returns a deep clone of this learning algorithm. */ - public Object clone() { - MuxLearner clone = null; - try { clone = (MuxLearner) super.clone(); } - catch (Exception e) { - System.err.println("Error cloning MuxLearner: " + e); - e.printStackTrace(); - System.exit(1); + + + /** + * Sets {@link #baseLearner}. This method will not have any effect on the learners that + * already exist in the network. However, new learners created after this method is executed + * will be of the same type as the object specified. + * + * @param base The new base learning algorithm. + **/ + public void setBase(Learner base) { + baseLearner = base; + baseLearner.containingPackage = containingPackage; + baseLearner.name = name + "::base"; } - clone.baseLearner = (Learner) baseLearner.clone(); - int N = network.size(); - clone.network = new OVector(N); - for (int i = 0; i < N; ++i) { - Learner learner = (Learner) network.get(i); - if (learner != null) clone.network.set(i, learner.clone()); + + /** + * Sets the labeler. + * + * @param l A labeling classifier. + **/ + public void setLabeler(Classifier l) { + super.setLabeler(l); + setBase(baseLearner); } - return clone; - } + + /** + * Sets the label lexicon. + * + * @param l A feature lexicon. + **/ + public void setLabelLexicon(Lexicon l) { + super.setLabelLexicon(l); + if (network != null) { + int N = network.size(); + for (int i = 0; i < N; ++i) { + Learner learner = (Learner) network.get(i); + if (learner != null) + learner.setLabelLexicon(l); + } + } + } - /** - * Simply a container for all of {@link MuxLearner}'s configurable - * parameters. Using instances of this class should make code more - * readable and constructors less complicated. - * - * @author Nick Rizzolo - **/ - public static class Parameters extends Learner.Parameters - { /** - * Instances of this learning algorithm will be multiplexed; default - * null. + * The training example is multiplexed to the appropriate Learner(s). + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param exampleLabels The example's label(s). + * @param labelValues The labels' values. **/ - public Learner baseLearner; + public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, + double[] labelValues) { + assert exampleFeatures.length > exampleLabels.length : "MuxLearner ERROR: The example vector must have more features " + + "than labels, since it is assumed that there is a correspondence " + + "between the labels and the first features in the vector."; + + int F = exampleFeatures.length; + int L = exampleLabels.length; + int[] example = new int[F - L]; + double[] values = new double[F - L]; + int[] selections = new int[L]; + + int i = 0, j = 0; + for (; i < L; i++) { + selections[i] = exampleFeatures[i]; + } + for (; i < F; i++, j++) { + example[j] = exampleFeatures[i]; + values[j] = exampleValues[i]; + } + + for (i = 0; i < L; i++) { + Learner l = (Learner) network.get(selections[i]); + + if (l == null) { + l = (Learner) baseLearner.clone(); + l.setLabelLexicon(labelLexicon); + network.set(selections[i], l); + } + + int[] labels = new int[1]; + labels[0] = exampleLabels[i]; + double[] labelVal = new double[1]; + labelVal[0] = labelValues[i]; + + l.learn(example, values, labels, labelVal); + } + } + + + /** Clears the network. */ + public void forget() { + super.forget(); + network = new OVector(); + } + + /** - * This string is returned during testing when the multiplexed - * Learner doesn't exist; default - * {@link MuxLearner#defaultDefaultPrediction}. + * Produces a set of scores indicating the degree to which each possible discrete classification + * value is associated with the given example object. These scores are just the scores produced + * by the multiplexed Learner's scores(Object) method. + * + * @see Learner#scores(Object) + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. **/ - public String defaultPrediction; + public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { + int[] example = new int[exampleFeatures.length - 1]; + double[] values = new double[exampleFeatures.length - 1]; + System.arraycopy(exampleFeatures, 1, example, 0, example.length); + System.arraycopy(exampleValues, 1, values, 0, values.length); + + int selection = exampleFeatures[0]; + Learner l = (Learner) network.get(selection); + if (l == null) + return new ScoreSet(new String[] {defaultPrediction}, new double[] {1}); + return l.scores(example, values); + } - /** Sets all the default values. */ - public Parameters() { - baseLearner = (Learner) defaultBaseLearner.clone(); - defaultPrediction = defaultDefaultPrediction; + /** + * Returns the value of the discrete feature that would be returned by this classifier. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return A single value with the winning linear threshold unit's associated value. + **/ + public String discreteValue(int[] exampleFeatures, double[] exampleValues) { + return featureValue(exampleFeatures, exampleValues).getStringValue(); } /** - * Sets the parameters from the parent's parameters object, giving - * defaults to all parameters declared in this object. + * Returns the value of the real feature that would be returned by this classifier. + * + * @param f The features array. + * @param v The values array. + * @return The value of the feature produced for the input object. **/ - public Parameters(Learner.Parameters p) { - super(p); - baseLearner = (Learner) defaultBaseLearner.clone(); - defaultPrediction = defaultDefaultPrediction; + public double realValue(int[] f, double[] v) { + return featureValue(f, v).getStrength(); } - /** Copy constructor. */ - public Parameters(Parameters p) { - super(p); - baseLearner = p.baseLearner; - defaultPrediction = p.defaultPrediction; + /** + * Returns the classification of the given example as a single feature instead of a + * {@link FeatureVector}. + * + * @param f The features array. + * @param v The values array. + * @return The classification of the example as a feature. + **/ + public Feature featureValue(int[] f, double[] v) { + int[] example = new int[f.length - 1]; + double[] values = new double[f.length - 1]; + System.arraycopy(f, 1, example, 0, example.length); + System.arraycopy(v, 1, values, 0, values.length); + + int selection = f[0]; + Learner l = (Learner) network.get(selection); + if (l == null) + return defaultFeature; + return l.featureValue(example, values); } /** - * Calls the appropriate Learner.setParameters(Parameters) - * method for this Parameters object. - * - * @param l The learner whose parameters will be set. + * This method performs the multiplexing and returns the output of the selected + * Learner. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return The output of the selected Learner. **/ - public void setParameters(Learner l) { - ((MuxLearner) l).setParameters(this); + public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) { + return new FeatureVector(featureValue(exampleFeatures, exampleValues)); } /** - * Creates a string representation of these parameters in which only - * those parameters that differ from their default values are mentioned. + * Writes the algorithm's internal representation as text. + * + * @param out The output stream. **/ - public String nonDefaultString() { - String result = super.nonDefaultString(); - String name = baseLearner.getClass().getName(); - name = name.substring(name.lastIndexOf('.') + 1); + public void write(PrintStream out) { + int N = network.size(); + final FVector entries = new FVector(N); + final OVector learners = new OVector(N); + for (int i = 0; i < N; ++i) { + Learner learner = (Learner) network.get(i); + if (network.get(i) != null) { + entries.add(lexicon.lookupKey(i)); + learners.add(learner); + } + } + + N = entries.size(); + int[] indexes = new int[N]; + for (int i = 0; i < N; ++i) + indexes[i] = i; + Sort.sort(indexes, new Sort.IntComparator() { + public int compare(int i1, int i2) { + return entries.get(i1).compareTo(entries.get(i2)); + } + }); + + for (int i = 0; i < N; ++i) { + out.println("select: " + entries.get(indexes[i]).getStringValue()); + Learner learner = (Learner) learners.get(indexes[i]); + learner.setLexicon(lexicon); + learner.write(out); + learner.setLexicon(null); + } + + out.println("End of MuxLearner"); + } - if (!defaultPrediction.equals(MuxLearner.defaultDefaultPrediction)) - result += "defaultPrediction = " + defaultPrediction + ", "; - result += name + ": " + baseLearner.getParameters().nonDefaultString(); - return result; + /** + * Writes the learned function's internal representation in binary form. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeString(defaultPrediction); + baseLearner.write(out); + int N = network.size(); + out.writeInt(N); + + int M = 0; + for (int i = 0; i < N; ++i) + if (network.get(i) != null) + ++M; + out.writeInt(M); + + for (int i = 0; i < N; ++i) { + Learner learner = (Learner) network.get(i); + if (learner != null) { + out.writeInt(i); + learner.write(out); + } + } } - } -} + + /** + * Reads the binary representation of a learner with this object's run-time type, overwriting + * any and all learned or manually specified parameters as well as the label lexicon but without + * modifying the feature lexicon. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + defaultPrediction = in.readString(); + setDefaultFeature(); + baseLearner = Learner.readLearner(in); + int N = in.readInt(); + network = new OVector(N); + int M = in.readInt(); + for (int i = 0; i < M; ++i) + network.set(in.readInt(), Learner.readLearner(in)); + } + + + /** Returns a deep clone of this learning algorithm. */ + public Object clone() { + MuxLearner clone = null; + try { + clone = (MuxLearner) super.clone(); + } catch (Exception e) { + System.err.println("Error cloning MuxLearner: " + e); + e.printStackTrace(); + System.exit(1); + } + + clone.baseLearner = (Learner) baseLearner.clone(); + int N = network.size(); + clone.network = new OVector(N); + for (int i = 0; i < N; ++i) { + Learner learner = (Learner) network.get(i); + if (learner != null) + clone.network.set(i, learner.clone()); + } + + return clone; + } + + + /** + * Simply a container for all of {@link MuxLearner}'s configurable parameters. Using instances + * of this class should make code more readable and constructors less complicated. + * + * @author Nick Rizzolo + **/ + public static class Parameters extends Learner.Parameters { + /** + * Instances of this learning algorithm will be multiplexed; default null. + **/ + public Learner baseLearner; + /** + * This string is returned during testing when the multiplexed Learner doesn't + * exist; default {@link MuxLearner#defaultDefaultPrediction}. + **/ + public String defaultPrediction; + + + /** Sets all the default values. */ + public Parameters() { + baseLearner = (Learner) defaultBaseLearner.clone(); + defaultPrediction = defaultDefaultPrediction; + } + + + /** + * Sets the parameters from the parent's parameters object, giving defaults to all + * parameters declared in this object. + **/ + public Parameters(Learner.Parameters p) { + super(p); + baseLearner = (Learner) defaultBaseLearner.clone(); + defaultPrediction = defaultDefaultPrediction; + } + + + /** Copy constructor. */ + public Parameters(Parameters p) { + super(p); + baseLearner = p.baseLearner; + defaultPrediction = p.defaultPrediction; + } + + + /** + * Calls the appropriate Learner.setParameters(Parameters) method for this + * Parameters object. + * + * @param l The learner whose parameters will be set. + **/ + public void setParameters(Learner l) { + ((MuxLearner) l).setParameters(this); + } + + + /** + * Creates a string representation of these parameters in which only those parameters that + * differ from their default values are mentioned. + **/ + public String nonDefaultString() { + String result = super.nonDefaultString(); + String name = baseLearner.getClass().getName(); + name = name.substring(name.lastIndexOf('.') + 1); + + if (!defaultPrediction.equals(MuxLearner.defaultDefaultPrediction)) + result += "defaultPrediction = " + defaultPrediction + ", "; + result += name + ": " + baseLearner.getParameters().nonDefaultString(); + + return result; + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/NaiveBayes.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/NaiveBayes.java index 2cacbaee..c1245ccc 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/NaiveBayes.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/NaiveBayes.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -17,862 +14,853 @@ import java.util.Comparator; import java.util.Map; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.OVector; import edu.illinois.cs.cogcomp.lbjava.classify.Classifier; import edu.illinois.cs.cogcomp.lbjava.classify.Feature; import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector; import edu.illinois.cs.cogcomp.lbjava.classify.RealFeature; import edu.illinois.cs.cogcomp.lbjava.classify.Score; import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; -import edu.illinois.cs.cogcomp.lbjava.util.OVector; - /** - * Naive Bayes is a multi-class learner that uses prediction value counts and - * feature counts given a particular prediction value to select the most - * likely prediction value. More precisely, a score sv for - * a given prediction value v is computed such that - * esv is proportional to - * - *

- * P(v) Prodf P(f|v) - *
- * - * where Prod is a multiplication quantifier over f, and - * f stands for a feature. The value corresponding to the highest - * score is selected as the prediction. Feature values that were never - * observed given a particular prediction value during training are smoothed - * with a configurable constant that defaults to e-15. - * - *

This {@link Learner} learns a discrete classifier from - * other discrete classifiers. Features coming from - * real classifiers are ignored. It is also assumed that a - * single discrete label feature will be produced in association with each - * example object. A feature taking one of the values observed in that label - * feature will be produced by the learned classifier. - * - *

This algorithm's user-configurable parameters are stored in member - * fields of this class. They may be set via either a constructor that names - * each parameter explicitly or a constructor that takes an instance of - * {@link NaiveBayesTest.NaiveBayes.Parameters Parameters} as input. The - * documentation in each member field in this class indicates the default - * value of the associated parameter when using the former type of - * constructor. The documentation of the associated member field in the - * {@link NaiveBayesTest.NaiveBayes.Parameters Parameters} class indicates the - * default value of the parameter when using the latter type of constructor. - * - * @see NaiveBayesVector - * @author Nick Rizzolo + * Naive Bayes is a multi-class learner that uses prediction value counts and feature counts given a + * particular prediction value to select the most likely prediction value. More precisely, a score + * sv for a given prediction value v is computed such that + * esv is proportional to + * + *

P(v) Prodf P(f|v)
+ * + * where Prod is a multiplication quantifier over f, and f stands for a + * feature. The value corresponding to the highest score is selected as the prediction. Feature + * values that were never observed given a particular prediction value during training are smoothed + * with a configurable constant that defaults to e-15. + * + *

+ * This {@link Learner} learns a discrete classifier from other discrete + * classifiers. Features coming from real classifiers are ignored. It is also + * assumed that a single discrete label feature will be produced in association with each example + * object. A feature taking one of the values observed in that label feature will be produced by the + * learned classifier. + * + *

+ * This algorithm's user-configurable parameters are stored in member fields of this class. They may + * be set via either a constructor that names each parameter explicitly or a constructor that takes + * an instance of {@link NaiveBayes.Parameters Parameters} as input. The documentation in each + * member field in this class indicates the default value of the associated parameter when using the + * former type of constructor. The documentation of the associated member field in the + * {@link NaiveBayes.Parameters Parameters} class indicates the default value of the parameter when + * using the latter type of constructor. + * + * @see NaiveBayesVector + * @author Nick Rizzolo **/ -public class NaiveBayes extends Learner -{ - /** - * The default conditional feature probability is - * edefaultSmoothing. - **/ - public static final int defaultSmoothing = -15; - - - /** - * The exponential of this number is used as the conditional probability of - * a feature that was never observed during training; default - * {@link #defaultSmoothing}. - **/ - protected double smoothing; - /** One {@link NaiveBayesVector} for each observed prediction value. */ - protected OVector network; - - - /** Default constructor. */ - public NaiveBayes() { this(""); } - - /** - * Initializes the smoothing constant. - * - * @param smooth The exponential of this number is used as the conditional - * probability of a feature that was never observed during - * training. - **/ - public NaiveBayes(double smooth) { this("", smooth); } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link NaiveBayes.Parameters} object. - * - * @param p The settings of all parameters. - **/ - public NaiveBayes(Parameters p) { this("", p); } - - /** - * Initializes the name of the classifier. - * - * @param n The classifier's name. - **/ - public NaiveBayes(String n) { this(n, defaultSmoothing); } - - /** - * Initializes the name and smoothing constant. - * - * @param name The classifier's name. - * @param smooth The exponential of this number is used as the conditional - * probability of a feature that was never observed during - * training. - **/ - public NaiveBayes(String name, double smooth) { - super(name); - network = new OVector(); - smoothing = smooth; - } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link NaiveBayes.Parameters} object. - * - * @param n The name of the classifier. - * @param p The settings of all parameters. - **/ - public NaiveBayes(String n, Parameters p) { - super(n); - network = new OVector(); - setParameters(p); - } - - - /** - * Sets the values of parameters that control the behavior of this learning - * algorithm. - * - * @param p The parameters. - **/ - public void setParameters(Parameters p) { - smoothing = p.smoothing; - } - - - /** - * Retrieves the parameters that are set in this learner. - * - * @return An object containing all the values of the parameters that - * control the behavior of this learning algorithm. - **/ - public Learner.Parameters getParameters() { - Parameters p = new Parameters(super.getParameters()); - p.smoothing = smoothing; - return p; - } - - - /** - * Sets the smoothing parameter to the specified value. - * - * @param s The new value for the smoothing parameter. - **/ - public void setSmoothing(double s) { smoothing = s; } - - - /** - * Sets the labeler. - * - * @param l A labeling classifier. - **/ - public void setLabeler(Classifier l) { - if (!l.getOutputType().equals("discrete")) { - System.err.println( - "LBJava WARNING: NaiveBayes will only work with a label classifier " - + "that returns discrete."); - System.err.println( - " The given label classifier, " + l.getClass().getName() - + ", returns " + l.getOutputType() + "."); - } - - super.setLabeler(l); - } - - - /** - * Trains the learning algorithm given an object as an example. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param exampleLabels The example's label(s). - * @param labelValues The labels' values. - **/ - public void learn(int[] exampleFeatures, double[] exampleValues, - int[] exampleLabels, double[] labelValues) { - int label = exampleLabels[0]; - int N = network.size(); - - NaiveBayesVector labelVector = null; - if (label >= N) { - while (N++ < label) - network.add(new NaiveBayesVector()); - labelVector = new NaiveBayesVector(); - network.add(labelVector); - } - else labelVector = (NaiveBayesVector) network.get(label); - - labelVector.scaledAdd(exampleFeatures, exampleValues, 1.0); - } - - - /** Clears the network. */ - public void forget() { - super.forget(); - network = new OVector(); - } - - - /** - * The scores in the returned {@link ScoreSet} are the posterior - * probabilities of each possible label given the example. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @return A set of scores indicating the degree to which each possible - * discrete classification value is associated with the given - * example object. - **/ - public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { - ScoreSet s = new ScoreSet(); - - for (int l = 0; l < network.size(); l++) { - NaiveBayesVector vector = (NaiveBayesVector) network.get(l); - double score = vector.dot(exampleFeatures, exampleValues); - s.put(labelLexicon.lookupKey(l).getStringValue(), score); - } - - Score[] original = s.toArray(); - ScoreSet result = new ScoreSet(); +public class NaiveBayes extends Learner { + /** + * The default conditional feature probability is edefaultSmoothing + * . + **/ + public static final int defaultSmoothing = -15; - // This code would clearly run quicker if you computed each exp(score) - // ahead of time, and divided them each by their sum. However, each score - // is likely to be a very negative number, so exp(score) may not be - // numerically stable. Subtracting two scores, however, hopefully leaves - // you with a "less negative" number, so exp applied to the subtraction - // hopefully behaves better. - for (int i = 0; i < original.length; ++i) { - double score = 1; + /** + * The exponential of this number is used as the conditional probability of a feature that was + * never observed during training; default {@link #defaultSmoothing}. + **/ + protected double smoothing; + /** One {@link NaiveBayesVector} for each observed prediction value. */ + protected OVector network; - for (int j = 0; j < original.length; ++j) { - if (i == j) continue; - score += Math.exp(original[j].score - original[i].score); - } - result.put(original[i].value, 1 / score); + /** Default constructor. */ + public NaiveBayes() { + this(""); } - return result; - } - - - /** - * Returns the classification of the given example as a single feature - * instead of a {@link FeatureVector}. - * - * @param f The features array. - * @param v The values array. - * @return The classification of the example as a feature. - **/ - public Feature featureValue(int[] f, double[] v) { - double bestScore = -Double.MAX_VALUE; - int bestLabel = -1; - - for (int l = 0; l < network.size(); l++) { - NaiveBayesVector vector = (NaiveBayesVector) network.get(l); - double score = vector.dot(f, v); - - if (score > bestScore) { - bestLabel = l; - bestScore = score; - } + /** + * Initializes the smoothing constant. + * + * @param smooth The exponential of this number is used as the conditional probability of a + * feature that was never observed during training. + **/ + public NaiveBayes(double smooth) { + this("", smooth); } - if (bestLabel == -1) return null; - return predictions.get(bestLabel); - } - - - /** - * Prediction value counts and feature counts given a particular prediction - * value are used to select the most likely prediction value. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @return The most likely discrete value. - **/ - public String discreteValue(int[] exampleFeatures, double[] exampleValues) { - return featureValue(exampleFeatures, exampleValues).getStringValue(); - } - - - /** - * Prediction value counts and feature counts given a particular prediction - * value are used to select the most likely prediction value. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @return A single discrete feature, set to the most likely value. - **/ - public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) - { - return new FeatureVector(featureValue(exampleFeatures, exampleValues)); - } - - - /** - * Writes the algorithm's internal representation as text. - * - * @param out The output stream. - **/ - public void write(PrintStream out) { - int N = network.size(); - for (int i = 0; i < N; ++i) { - out.println("label: " + labelLexicon.lookupKey(i).getStringValue()); - ((NaiveBayesVector) network.get(i)).write(out); + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link NaiveBayes.Parameters} object. + * + * @param p The settings of all parameters. + **/ + public NaiveBayes(Parameters p) { + this("", p); } - out.println("End of NaiveBayes"); - } - - - /** - * Writes the learned function's internal representation in binary form. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - out.writeDouble(smoothing); - int N = network.size(); - out.writeInt(N); - for (int i = 0; i < N; ++i) - ((NaiveBayesVector) network.get(i)).write(out); - } - - - /** - * Reads the binary representation of a learner with this object's run-time - * type, overwriting any and all learned or manually specified parameters - * as well as the label lexicon but without modifying the feature lexicon. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - smoothing = in.readDouble(); - int N = in.readInt(); - network = new OVector(N); - - for (int i = 0; i < N; ++i) { - NaiveBayesVector nbv = new NaiveBayesVector(); - nbv.read(in); - network.add(nbv); - } - } - - - /** Returns a deep clone of this learning algorithm. */ - public Object clone() { - NaiveBayes clone = (NaiveBayes) super.clone(); - int N = network.size(); - clone.network = new OVector(N); - for (int i = 0; i < N; ++i) - clone.network.add(((NaiveBayesVector) network.get(i)).clone()); - return clone; - } - - - /** - * Simply a container for all of {@link NaiveBayes}'s configurable - * parameters. Using instances of this class should make code more - * readable and constructors less complicated. - * - * @author Nick Rizzolo - **/ - public static class Parameters extends Learner.Parameters - { /** - * The exponential of this number is used as the conditional probability - * of a feature that was never observed during training; default - * {@link NaiveBayes#defaultSmoothing}. + * Initializes the name of the classifier. + * + * @param n The classifier's name. **/ - public double smoothing; - - - /** Sets all the default values. */ - public Parameters() { - smoothing = defaultSmoothing; + public NaiveBayes(String n) { + this(n, defaultSmoothing); } - /** - * Sets the parameters from the parent's parameters object, giving - * defaults to all parameters declared in this object. + * Initializes the name and smoothing constant. + * + * @param name The classifier's name. + * @param smooth The exponential of this number is used as the conditional probability of a + * feature that was never observed during training. **/ - public Parameters(Learner.Parameters p) { - super(p); - smoothing = defaultSmoothing; + public NaiveBayes(String name, double smooth) { + super(name); + network = new OVector(); + smoothing = smooth; } - - /** Copy constructor. */ - public Parameters(Parameters p) { - super(p); - smoothing = p.smoothing; + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link NaiveBayes.Parameters} object. + * + * @param n The name of the classifier. + * @param p The settings of all parameters. + **/ + public NaiveBayes(String n, Parameters p) { + super(n); + network = new OVector(); + setParameters(p); } /** - * Calls the appropriate Learner.setParameters(Parameters) - * method for this Parameters object. - * - * @param l The learner whose parameters will be set. + * Sets the values of parameters that control the behavior of this learning algorithm. + * + * @param p The parameters. **/ - public void setParameters(Learner l) { - ((NaiveBayes) l).setParameters(this); + public void setParameters(Parameters p) { + smoothing = p.smoothing; } /** - * Creates a string representation of these parameters in which only - * those parameters that differ from their default values are mentioned. + * Retrieves the parameters that are set in this learner. + * + * @return An object containing all the values of the parameters that control the behavior of + * this learning algorithm. **/ - public String nonDefaultString() { - String result = super.nonDefaultString(); - - if (smoothing != NaiveBayes.defaultSmoothing) - result += ", smoothing = " + smoothing; - - if (result.startsWith(", ")) result = result.substring(2); - return result; - } - } - - - /** - * A Count object stores two doubles, one which - * holds a accumulated count value and the other intended to hold the - * natural logarithm of the count. The object also contains a - * boolean flag that is set when the log needs to be updated. - * - * @author Nick Rizzolo - **/ - protected static class Count implements Cloneable, Serializable - { - /** The accumulated value. */ - protected double count; - /** The natural logartihm of {@link #count} is sometimes stored here. */ - protected transient double logCount; - /** A flag that is set iff {@link #logCount} is not up to date. */ - protected transient boolean updateLog; - - - /** Sets the count to 0. */ - public Count() { - count = 0; - logCount = 0; - updateLog = true; + public Learner.Parameters getParameters() { + Parameters p = new Parameters(super.getParameters()); + p.smoothing = smoothing; + return p; } - /** Returns the integer count. */ - public double getCount() { return count; } - - /** - * Increments the count, but does not update the log. - * - * @param inc The amount the count should be incremented by. + * Sets the smoothing parameter to the specified value. + * + * @param s The new value for the smoothing parameter. **/ - public void increment(double inc) { - count += inc; - updateLog = true; + public void setSmoothing(double s) { + smoothing = s; } - /** Returns the log after updating it. */ - public double getLog() { - if (updateLog) { - logCount = Math.log(count); - updateLog = false; - } + /** + * Sets the labeler. + * + * @param l A labeling classifier. + **/ + public void setLabeler(Classifier l) { + if (!l.getOutputType().equals("discrete")) { + System.err.println("LBJava WARNING: NaiveBayes will only work with a label classifier " + + "that returns discrete."); + System.err.println(" The given label classifier, " + l.getClass().getName() + + ", returns " + l.getOutputType() + "."); + } - return logCount; + super.setLabeler(l); } /** - * The string representation of a Count object is simply the - * integer count. + * Trains the learning algorithm given an object as an example. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param exampleLabels The example's label(s). + * @param labelValues The labels' values. **/ - public String toString() { return "" + count; } + public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, + double[] labelValues) { + int label = exampleLabels[0]; + int N = network.size(); + NaiveBayesVector labelVector = null; + if (label >= N) { + while (N++ < label) + network.add(new NaiveBayesVector()); + labelVector = new NaiveBayesVector(); + network.add(labelVector); + } else + labelVector = (NaiveBayesVector) network.get(label); - /** - * Writes the count's internal representation in binary form. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - out.writeDouble(count); + labelVector.scaledAdd(exampleFeatures, exampleValues, 1.0); } - /** - * Reads the binary representation of a count into this object, - * overwriting any data that may already be here. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - count = in.readDouble(); - updateLog = true; + /** Clears the network. */ + public void forget() { + super.forget(); + network = new OVector(); } /** - * This method returns a shallow clone. - * - * @return A shallow clone. + * The scores in the returned {@link ScoreSet} are the posterior probabilities of each possible + * label given the example. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return A set of scores indicating the degree to which each possible discrete classification + * value is associated with the given example object. **/ - public Object clone() { - Object clone = null; + public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { + ScoreSet s = new ScoreSet(); - try { clone = super.clone(); } - catch (Exception e) { - System.err.println("Error cloning " + getClass().getName() + ":"); - e.printStackTrace(); - System.exit(1); - } - - return clone; - } + for (int l = 0; l < network.size(); l++) { + NaiveBayesVector vector = (NaiveBayesVector) network.get(l); + double score = vector.dot(exampleFeatures, exampleValues); + s.put(labelLexicon.lookupKey(l).getStringValue(), score); + } + Score[] original = s.toArray(); + ScoreSet result = new ScoreSet(); - /** - * Special handling during deserialization to ensure that - * {@link #updateLog} is set to true. - * - * @param in The stream to deserialize from. - **/ - private void readObject(java.io.ObjectInputStream in) - throws IOException, ClassNotFoundException { - in.defaultReadObject(); - updateLog = true; - } - } - - - /** - * Keeps track of all the counts associated with a given label. - * Features are associated with {@link NaiveBayes.Count}s. Those not - * appearing in this vector are assumed to have a count of 0. The - * invocation of either of the scaledAdd methods increments - * the prior count for the label. - * - *

{@link RealFeature}s' strengths are ignored by this vector; they are - * assumed to be equal to 1, as if the feature were an active Boolean - * feature. - * - * @author Nick Rizzolo - **/ - protected class NaiveBayesVector extends SparseWeightVector - { - /** The counts in the vector indexed by their {@link Lexicon} key. */ - protected OVector counts; - /** - * The prior count is the number of times either scaledAdd - * method has been called. - **/ - protected Count priorCount; + // This code would clearly run quicker if you computed each exp(score) + // ahead of time, and divided them each by their sum. However, each score + // is likely to be a very negative number, so exp(score) may not be + // numerically stable. Subtracting two scores, however, hopefully leaves + // you with a "less negative" number, so exp applied to the subtraction + // hopefully behaves better. + for (int i = 0; i < original.length; ++i) { + double score = 1; - /** Simply instantiates {@link NaiveBayes.NaiveBayesVector#counts}. */ - public NaiveBayesVector() { this(new OVector(defaultCapacity)); } + for (int j = 0; j < original.length; ++j) { + if (i == j) + continue; + score += Math.exp(original[j].score - original[i].score); + } - /** - * Simply initializes {@link #counts}. - * - * @param w An array of counts. - **/ - public NaiveBayesVector(Count[] w) { this(new OVector(w)); } + result.put(original[i].value, 1 / score); + } - /** - * Simply initializes {@link #counts}. - * - * @param w A vector of counts. - **/ - public NaiveBayesVector(OVector w) { - counts = w; - priorCount = new Count(); + return result; } /** - * Returns the prior count of the prediction value associated with this - * vector. + * Returns the classification of the given example as a single feature instead of a + * {@link FeatureVector}. + * + * @param f The features array. + * @param v The values array. + * @return The classification of the example as a feature. **/ - public Count getPrior() { return priorCount; } + public Feature featureValue(int[] f, double[] v) { + double bestScore = -Double.MAX_VALUE; + int bestLabel = -1; + for (int l = 0; l < network.size(); l++) { + NaiveBayesVector vector = (NaiveBayesVector) network.get(l); + double score = vector.dot(f, v); - /** - * Takes the dot product of this vector with the given vector, using the - * hard coded smoothing weight. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @return The computed dot product. - **/ - public double dot(int[] exampleFeatures, double[] exampleValues) { - return - dot(exampleFeatures, exampleValues, priorCount.getLog() + smoothing); + if (score > bestScore) { + bestLabel = l; + bestScore = score; + } + } + + if (bestLabel == -1) + return null; + return predictions.get(bestLabel); } /** - * Takes the dot product of this vector with the given vector, - * using the specified default weight when encountering a feature that is - * not yet present in this vector. Here, weights are taken as - * log(feature count / prior count). The output of this method is - * related to the empirical probability of the example e as - * follows:

- * - * exp(dot(e)) / (sum of all labels' prior counts)) =
- * P(e's label && e) - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param defaultW The default weight. - * @return The computed dot product. + * Prediction value counts and feature counts given a particular prediction value are used to + * select the most likely prediction value. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return The most likely discrete value. **/ - public double dot(int[] exampleFeatures, double[] exampleValues, - double defaultW) { - double sum = (1 - exampleFeatures.length) * priorCount.getLog(); - for (int i = 0; i < exampleFeatures.length; i++) - sum += getWeight(exampleFeatures[i], defaultW); - return sum; + public String discreteValue(int[] exampleFeatures, double[] exampleValues) { + return featureValue(exampleFeatures, exampleValues).getStringValue(); } /** - * Returns the count of the given feature. - * - * @param featureIndex The feature index. - * @return The count of the feature. + * Prediction value counts and feature counts given a particular prediction value are used to + * select the most likely prediction value. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return A single discrete feature, set to the most likely value. **/ - public double getCount(int featureIndex) { - while (counts.size() <= featureIndex) counts.add(new Count()); - return ((Count) counts.get(featureIndex)).getCount(); + public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) { + return new FeatureVector(featureValue(exampleFeatures, exampleValues)); } /** - * Returns the weight of the given feature - * - * @param featureIndex The feature index. - * @param defaultW The default count. - * @return The weight of the feature. + * Writes the algorithm's internal representation as text. + * + * @param out The output stream. **/ - public double getWeight(int featureIndex, double defaultW) { - while (counts.size() <= featureIndex) counts.add(new Count()); - Count c = (Count) counts.get(featureIndex); - if (c.getCount() == 0) return defaultW; - return c.getLog(); + public void write(PrintStream out) { + int N = network.size(); + for (int i = 0; i < N; ++i) { + out.println("label: " + labelLexicon.lookupKey(i).getStringValue()); + ((NaiveBayesVector) network.get(i)).write(out); + } + + out.println("End of NaiveBayes"); } /** - * This method is overridden to do nothing; use - * {@link #incrementCount(int,double)} instead. - * - * @param f Unused. - * @param w Unused. + * Writes the learned function's internal representation in binary form. + * + * @param out The output stream. **/ - public void setWeight(int f, double w) { } + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeDouble(smoothing); + int N = network.size(); + out.writeInt(N); + for (int i = 0; i < N; ++i) + ((NaiveBayesVector) network.get(i)).write(out); + } /** - * Increments the count of the given feature. - * - * @param featureIndex The index of the feature to update. - * @param factor The factor by which to increment. + * Reads the binary representation of a learner with this object's run-time type, overwriting + * any and all learned or manually specified parameters as well as the label lexicon but without + * modifying the feature lexicon. + * + * @param in The input stream. **/ - public void incrementCount(int featureIndex, double factor) { - if (featureIndex < counts.size()) - ((Count) counts.get(featureIndex)).increment(factor); - else { - while (counts.size() < featureIndex) counts.add(new Count()); - Count c = new Count(); - c.increment(factor); - counts.add(c); - } + public void read(ExceptionlessInputStream in) { + super.read(in); + smoothing = in.readDouble(); + int N = in.readInt(); + network = new OVector(N); + + for (int i = 0; i < N; ++i) { + NaiveBayesVector nbv = new NaiveBayesVector(); + nbv.read(in); + network.add(nbv); + } } - /** - * This method is similar to the implementation in - * {@link SparseWeightVector} except that - * {@link NaiveBayes.NaiveBayesVector#incrementCount(int,double)} - * is called instead of - * {@link SparseWeightVector#setWeight(int,double)}. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param factor The scaling factor. - **/ - public void scaledAdd(int[] exampleFeatures, double[] exampleValues, - double factor) { - priorCount.increment(factor); - for (int i = 0; i < exampleFeatures.length; i++) - incrementCount(exampleFeatures[i], factor); + /** Returns a deep clone of this learning algorithm. */ + public Object clone() { + NaiveBayes clone = (NaiveBayes) super.clone(); + int N = network.size(); + clone.network = new OVector(N); + for (int i = 0; i < N; ++i) + clone.network.add(((NaiveBayesVector) network.get(i)).clone()); + return clone; } /** - * This method is similar to the implementation in - * {@link SparseWeightVector} except that the defaultW - * argument is ignored and - * {@link NaiveBayes.NaiveBayesVector#incrementCount(int,double)} - * is called instead of - * {@link SparseWeightVector#setWeight(int,double)}. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param factor The scaling factor. - * @param defaultW Unused. + * Simply a container for all of {@link NaiveBayes}'s configurable parameters. Using instances + * of this class should make code more readable and constructors less complicated. + * + * @author Nick Rizzolo **/ - public void scaledAdd(int[] exampleFeatures, double[] exampleValues, - double factor, double defaultW) { - scaledAdd(exampleFeatures, exampleValues, factor); - } + public static class Parameters extends Learner.Parameters { + /** + * The exponential of this number is used as the conditional probability of a feature that + * was never observed during training; default {@link NaiveBayes#defaultSmoothing}. + **/ + public double smoothing; - /** - * Outputs the contents of this vector into the specified - * PrintStream. The string representation is the same as in - * the super class, except the "Begin" annotation line also - * contains the value of {@link #priorCount} in parentheses. In - * addition, this method has access to the lexicon, so the output of this - * method is equivalent to that of {@link #write(PrintStream,Lexicon)}. - * - * @param out The stream to write to. - **/ - public void write(PrintStream out) { - write(out, lexicon); - } + /** Sets all the default values. */ + public Parameters() { + smoothing = defaultSmoothing; + } - /** - * Outputs the contents of this vector into the specified - * PrintStream. The string representation is the same as in - * the super class, except the "Begin" annotation line also - * contains the value of {@link #priorCount} in parentheses. - * - * @param out The stream to write to. - * @param lex The feature lexicon. - **/ - public void write(PrintStream out, Lexicon lex) { - out.println("Begin NaiveBayesVector (" + priorCount + ")"); - - Map map = lex.getMap(); - Map.Entry[] entries = - (Map.Entry[]) map.entrySet().toArray(new Map.Entry[map.size()]); - Arrays.sort(entries, - new Comparator() { - public int compare(Object o1, Object o2) { - Map.Entry e1 = (Map.Entry) o1; - Map.Entry e2 = (Map.Entry) o2; - return ((Feature) e1.getKey()).compareTo(e2.getKey()); - } - }); - - int i, biggest = 0; - for (i = 0; i < entries.length; ++i) { - String key = entries[i].getKey().toString(); - biggest = Math.max(biggest, key.length()); - } - - if (biggest % 2 == 0) biggest += 2; - else ++biggest; - - for (i = 0; i < entries.length; ++i) { - String key = entries[i].getKey().toString(); - int index = ((Integer) entries[i].getValue()).intValue(); - out.print(key); - for (int j = 0; key.length() + j < biggest; ++j) out.print(" "); - out.println(getCount(index)); - } - - out.println("End NaiveBayesVector"); - } + /** + * Sets the parameters from the parent's parameters object, giving defaults to all + * parameters declared in this object. + **/ + public Parameters(Learner.Parameters p) { + super(p); + smoothing = defaultSmoothing; + } - /** - * Writes the weight vector's internal representation in binary form. - * Note: this method does not call - * {@link SparseWeightVector#write(ExceptionlessOutputStream)} and does - * not output its class name or the contents of - * {@link SparseWeightVector#weights} since there shouldn't be any. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - priorCount.write(out); - out.writeInt(counts.size()); - for (int i = 0; i < counts.size(); ++i) - ((Count) counts.get(i)).write(out); - } + /** Copy constructor. */ + public Parameters(Parameters p) { + super(p); + smoothing = p.smoothing; + } - /** - * Reads the representation of a weight vector with this object's - * run-time type from the given stream, overwriting the data in this - * object. - * - *

This method is appropriate for reading weight vectors as written - * by {@link #write(ExceptionlessOutputStream)}. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - priorCount = new Count(); - priorCount.read(in); - int N = in.readInt(); - counts = new OVector(N); - for (int i = 0; i < N; ++i) { - Count c = new Count(); - c.read(in); - counts.add(c); - } + /** + * Calls the appropriate Learner.setParameters(Parameters) method for this + * Parameters object. + * + * @param l The learner whose parameters will be set. + **/ + public void setParameters(Learner l) { + ((NaiveBayes) l).setParameters(this); + } + + + /** + * Creates a string representation of these parameters in which only those parameters that + * differ from their default values are mentioned. + **/ + public String nonDefaultString() { + String result = super.nonDefaultString(); + + if (smoothing != NaiveBayes.defaultSmoothing) + result += ", smoothing = " + smoothing; + + if (result.startsWith(", ")) + result = result.substring(2); + return result; + } } /** - * Returns a copy of this NaiveBayesVector. - * - * @return A copy of this NaiveBayesVector. + * A Count object stores two doubles, one which holds a accumulated + * count value and the other intended to hold the natural logarithm of the count. The object + * also contains a boolean flag that is set when the log needs to be updated. + * + * @author Nick Rizzolo **/ - public Object clone() { - NaiveBayesVector clone = (NaiveBayesVector) super.clone(); - Count[] array = new Count[counts.size()]; - for (int i = 0; i < counts.size(); ++i) - array[i] = (Count) ((Count) counts.get(i)).clone(); - clone.counts = new OVector(array); - return clone; + protected static class Count implements Cloneable, Serializable { + /** The accumulated value. */ + protected double count; + /** The natural logartihm of {@link #count} is sometimes stored here. */ + protected transient double logCount; + /** A flag that is set iff {@link #logCount} is not up to date. */ + protected transient boolean updateLog; + + + /** Sets the count to 0. */ + public Count() { + count = 0; + logCount = 0; + updateLog = true; + } + + + /** Returns the integer count. */ + public double getCount() { + return count; + } + + + /** + * Increments the count, but does not update the log. + * + * @param inc The amount the count should be incremented by. + **/ + public void increment(double inc) { + count += inc; + updateLog = true; + } + + + /** Returns the log after updating it. */ + public double getLog() { + if (updateLog) { + logCount = Math.log(count); + updateLog = false; + } + + return logCount; + } + + + /** + * The string representation of a Count object is simply the integer count. + **/ + public String toString() { + return "" + count; + } + + + /** + * Writes the count's internal representation in binary form. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + out.writeDouble(count); + } + + + /** + * Reads the binary representation of a count into this object, overwriting any data that + * may already be here. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + count = in.readDouble(); + updateLog = true; + } + + + /** + * This method returns a shallow clone. + * + * @return A shallow clone. + **/ + public Object clone() { + Object clone = null; + + try { + clone = super.clone(); + } catch (Exception e) { + System.err.println("Error cloning " + getClass().getName() + ":"); + e.printStackTrace(); + System.exit(1); + } + + return clone; + } + + + /** + * Special handling during deserialization to ensure that {@link #updateLog} is set to + * true. + * + * @param in The stream to deserialize from. + **/ + private void readObject(java.io.ObjectInputStream in) throws IOException, + ClassNotFoundException { + in.defaultReadObject(); + updateLog = true; + } } /** - * Returns a new, empty weight vector with the same parameter settings as - * this one. - * - * @return An empty weight vector. + * Keeps track of all the counts associated with a given label. Features are associated with + * {@link NaiveBayes.Count}s. Those not appearing in this vector are assumed to have a count of + * 0. The invocation of either of the scaledAdd methods increments the prior count + * for the label. + * + *

+ * {@link RealFeature}s' strengths are ignored by this vector; they are assumed to be equal to + * 1, as if the feature were an active Boolean feature. + * + * @author Nick Rizzolo **/ - public SparseWeightVector emptyClone() { - return new NaiveBayesVector(); + protected class NaiveBayesVector extends SparseWeightVector { + /** The counts in the vector indexed by their {@link Lexicon} key. */ + protected OVector counts; + /** + * The prior count is the number of times either scaledAdd method has been + * called. + **/ + protected Count priorCount; + + + /** Simply instantiates {@link NaiveBayes.NaiveBayesVector#counts}. */ + public NaiveBayesVector() { + this(new OVector(defaultCapacity)); + } + + /** + * Simply initializes {@link #counts}. + * + * @param w An array of counts. + **/ + public NaiveBayesVector(Count[] w) { + this(new OVector(w)); + } + + /** + * Simply initializes {@link #counts}. + * + * @param w A vector of counts. + **/ + public NaiveBayesVector(OVector w) { + counts = w; + priorCount = new Count(); + } + + + /** + * Returns the prior count of the prediction value associated with this vector. + **/ + public Count getPrior() { + return priorCount; + } + + + /** + * Takes the dot product of this vector with the given vector, using the hard coded + * smoothing weight. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return The computed dot product. + **/ + public double dot(int[] exampleFeatures, double[] exampleValues) { + return dot(exampleFeatures, exampleValues, priorCount.getLog() + smoothing); + } + + + /** + * Takes the dot product of this vector with the given vector, using the specified default + * weight when encountering a feature that is not yet present in this vector. Here, weights + * are taken as log(feature count / prior count). The output of this method is + * related to the empirical probability of the example e as follows:
+ *
+ * + * exp(dot(e)) / (sum of all labels' prior counts)) =
+ * P(e's label && e) + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param defaultW The default weight. + * @return The computed dot product. + **/ + public double dot(int[] exampleFeatures, double[] exampleValues, double defaultW) { + double sum = (1 - exampleFeatures.length) * priorCount.getLog(); + for (int i = 0; i < exampleFeatures.length; i++) + sum += getWeight(exampleFeatures[i], defaultW); + return sum; + } + + + /** + * Returns the count of the given feature. + * + * @param featureIndex The feature index. + * @return The count of the feature. + **/ + public double getCount(int featureIndex) { + while (counts.size() <= featureIndex) + counts.add(new Count()); + return ((Count) counts.get(featureIndex)).getCount(); + } + + + /** + * Returns the weight of the given feature + * + * @param featureIndex The feature index. + * @param defaultW The default count. + * @return The weight of the feature. + **/ + public double getWeight(int featureIndex, double defaultW) { + while (counts.size() <= featureIndex) + counts.add(new Count()); + Count c = (Count) counts.get(featureIndex); + if (c.getCount() == 0) + return defaultW; + return c.getLog(); + } + + + /** + * This method is overridden to do nothing; use {@link #incrementCount(int,double)} instead. + * + * @param f Unused. + * @param w Unused. + **/ + public void setWeight(int f, double w) {} + + + /** + * Increments the count of the given feature. + * + * @param featureIndex The index of the feature to update. + * @param factor The factor by which to increment. + **/ + public void incrementCount(int featureIndex, double factor) { + if (featureIndex < counts.size()) + ((Count) counts.get(featureIndex)).increment(factor); + else { + while (counts.size() < featureIndex) + counts.add(new Count()); + Count c = new Count(); + c.increment(factor); + counts.add(c); + } + } + + + /** + * This method is similar to the implementation in {@link SparseWeightVector} except that + * {@link NaiveBayes.NaiveBayesVector#incrementCount(int,double)} is called instead of + * {@link SparseWeightVector#setWeight(int,double)}. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param factor The scaling factor. + **/ + public void scaledAdd(int[] exampleFeatures, double[] exampleValues, double factor) { + priorCount.increment(factor); + for (int i = 0; i < exampleFeatures.length; i++) + incrementCount(exampleFeatures[i], factor); + } + + + /** + * This method is similar to the implementation in {@link SparseWeightVector} except that + * the defaultW argument is ignored and + * {@link NaiveBayes.NaiveBayesVector#incrementCount(int,double)} is called instead of + * {@link SparseWeightVector#setWeight(int,double)}. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param factor The scaling factor. + * @param defaultW Unused. + **/ + public void scaledAdd(int[] exampleFeatures, double[] exampleValues, double factor, + double defaultW) { + scaledAdd(exampleFeatures, exampleValues, factor); + } + + + /** + * Outputs the contents of this vector into the specified PrintStream. The + * string representation is the same as in the super class, except the "Begin" + * annotation line also contains the value of {@link #priorCount} in parentheses. In + * addition, this method has access to the lexicon, so the output of this method is + * equivalent to that of {@link #write(PrintStream,Lexicon)}. + * + * @param out The stream to write to. + **/ + public void write(PrintStream out) { + write(out, lexicon); + } + + + /** + * Outputs the contents of this vector into the specified PrintStream. The + * string representation is the same as in the super class, except the "Begin" + * annotation line also contains the value of {@link #priorCount} in parentheses. + * + * @param out The stream to write to. + * @param lex The feature lexicon. + **/ + public void write(PrintStream out, Lexicon lex) { + out.println("Begin NaiveBayesVector (" + priorCount + ")"); + + Map map = lex.getMap(); + Map.Entry[] entries = (Map.Entry[]) map.entrySet().toArray(new Map.Entry[map.size()]); + Arrays.sort(entries, new Comparator() { + public int compare(Object o1, Object o2) { + Map.Entry e1 = (Map.Entry) o1; + Map.Entry e2 = (Map.Entry) o2; + return ((Feature) e1.getKey()).compareTo(e2.getKey()); + } + }); + + int i, biggest = 0; + for (i = 0; i < entries.length; ++i) { + String key = entries[i].getKey().toString(); + biggest = Math.max(biggest, key.length()); + } + + if (biggest % 2 == 0) + biggest += 2; + else + ++biggest; + + for (i = 0; i < entries.length; ++i) { + String key = entries[i].getKey().toString(); + int index = ((Integer) entries[i].getValue()).intValue(); + out.print(key); + for (int j = 0; key.length() + j < biggest; ++j) + out.print(" "); + out.println(getCount(index)); + } + + out.println("End NaiveBayesVector"); + } + + + /** + * Writes the weight vector's internal representation in binary form. Note: this + * method does not call {@link SparseWeightVector#write(ExceptionlessOutputStream)} and does + * not output its class name or the contents of {@link SparseWeightVector#weights} since + * there shouldn't be any. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + priorCount.write(out); + out.writeInt(counts.size()); + for (int i = 0; i < counts.size(); ++i) + ((Count) counts.get(i)).write(out); + } + + + /** + * Reads the representation of a weight vector with this object's run-time type from the + * given stream, overwriting the data in this object. + * + *

+ * This method is appropriate for reading weight vectors as written by + * {@link #write(ExceptionlessOutputStream)}. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + priorCount = new Count(); + priorCount.read(in); + int N = in.readInt(); + counts = new OVector(N); + for (int i = 0; i < N; ++i) { + Count c = new Count(); + c.read(in); + counts.add(c); + } + } + + + /** + * Returns a copy of this NaiveBayesVector. + * + * @return A copy of this NaiveBayesVector. + **/ + public Object clone() { + NaiveBayesVector clone = (NaiveBayesVector) super.clone(); + Count[] array = new Count[counts.size()]; + for (int i = 0; i < counts.size(); ++i) + array[i] = (Count) ((Count) counts.get(i)).clone(); + clone.counts = new OVector(array); + return clone; + } + + + /** + * Returns a new, empty weight vector with the same parameter settings as this one. + * + * @return An empty weight vector. + **/ + public SparseWeightVector emptyClone() { + return new NaiveBayesVector(); + } } - } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/NeuralNetLearner.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/NeuralNetLearner.java new file mode 100644 index 00000000..e25c5b5d --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/NeuralNetLearner.java @@ -0,0 +1,447 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.learn; + +import java.io.PrintStream; +import java.util.Arrays; +import java.util.Random; + +import edu.illinois.cs.cogcomp.lbjava.classify.Feature; +import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector; +import edu.illinois.cs.cogcomp.lbjava.classify.RealPrimitiveStringFeature; +import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet; +import edu.illinois.cs.cogcomp.lbjava.neuralnet.Layer; +import edu.illinois.cs.cogcomp.lbjava.neuralnet.SimpleNNTrainer; + +/** + * This class will leverage the Neural Net implementation {@see edu.illinois.cs.cogcomp.lbjava.neuralnet.NeuralNetwork} + * to allow creation and use of a backprop neural net implementation including momentum, bias, and back propogation + * for learning. There is a threaded learner that works quite well ONLY where there are a large number of weights + * between layers. + *

+ * This class is really just a wrapper for a SimpleNNTrainer which does all the work of training. + * @author redman + */ +public class NeuralNetLearner extends Learner { + + /** computed */ + private static final long serialVersionUID = -3369861028861092661L; + + /** the parameters for learning and stuff. */ + private Parameters parameters = new Parameters(); + + /** This is the object that will train the neural net up. It uses it own + * interal mechanism and data representation for efficiency. */ + private SimpleNNTrainer trainer = null; + + /** + * our props include not only number of rounds (epochs), also a learning rate and momentum. + * @author redman + */ + public static class Parameters extends Learner.Parameters { + /** default */ + private static final long serialVersionUID = 1L; + + /** the learning rate. */ + public float learningRate = 0.5f; + + /** the momentum value. */ + public float momentum = 0.5f; + + /** the momentum value. */ + public int seed = -1; + + /** the number of inputs */ + public int inputCount = 0; + + /** the number of outputs */ + public int outputCount = 1; + + /** the number of outputs from the single hidden layer */ + public int hiddenCount = 100; + + /** the layers of the neural network. */ + private Layer[] layers; + + /** + * Copy properties from the provided properties. + * @param p the props to copy. + */ + public Parameters(Parameters p) { + this.learningRate = p.learningRate; + this.momentum = p.momentum; + this.seed = p.seed; + this.inputCount = p.inputCount; + this.outputCount = p.outputCount; + this.hiddenCount = p.hiddenCount; + } + /** + * Copy properties from the provided properties. + * @param p the props to copy. + */ + public Parameters() { + this.learningRate = 0.5f; + this.momentum = 0.5f; + this.seed = -1; + this.inputCount = 0; + this.hiddenCount = 100; + this.outputCount = 1; + } + + } + + /** used to store inputs so we don't realloc these arrays over and over. This is an optimization + * only possible because we know this guys is not multithreaded. */ + private float inputs[] = null; + + /** used to store inputs so we don't realloc these arrays over and over. This is an optimization + * only possible because we know this guys is not multithreaded. */ + private float outputs[] = null; + + /** number of neurons in each layer, including input and output layers.*/ + private int[] layerSizes = null; + + /** + * Init the neural network learner by providing array with number of neurons in each layer, including + * the input layer. The caller will need to determin the number of inputs, the number of outputs and the number + * of hidden layers, and the neurons in that layer. The first index in teh layerSizes indicates the number of inputs, + * the middle layers sizes are determined by the middle integer sizes, and the number of outputs is the last number + * of neurons. + * @param layerSizes the number of neurons in each layer. + */ + public NeuralNetLearner () { + super("Howdy"); + this.layerSizes = new int[3]; + } + + /** + * given arguments for initialization parameters. + * @param p the parameters. + */ + public NeuralNetLearner(Parameters p) { + super("Howdy"); + this.parameters = p; + } + + /** + * The learning rate takes the default value. + * @param n The name of the classifier. + */ + public NeuralNetLearner(String n) { + super(n); + } + + /** + * Init the neural network learner by providing array with number of neurons in each layer, including + * the input layer. The caller will need to determin the number of inputs, the number of outputs and the number + * of hidden layers, and the neurons in that layer. The first index in teh layerSizes indicates the number of inputs, + * the middle layers sizes are determined by the middle integer sizes, and the number of outputs is the last number + * of neurons. + * @param layerSizes the number of neurons in each layer. + */ + public NeuralNetLearner (int[] layerSizes, Parameters p, boolean training) { + super("Howdy"); + parameters = p; + parameters.layers = new Layer[layerSizes.length-1]; + this.layerSizes = layerSizes; + this.forget(); + } + + /** + * Resets the weight vector to all zeros. + */ + public void forget() { + super.forget(); + if (this.getInputCount() != -1) { + this.layerSizes = new int[3]; + this.layerSizes[0] = this.getInputCount(); + this.layerSizes[1] = this.getHiddenCount(); + this.layerSizes[2] = this.getOutputCount(); + parameters.layers = new Layer[layerSizes.length-1]; + Layer[] l = this.parameters.layers; + Random r = new Random (1234); + for (int i = 0; i < layerSizes.length-1; i++) { + l[i] = new Layer(layerSizes[i], layerSizes[i+1], r); + } + inputs = new float[l[0].getNumberInputs()]; + outputs = new float[l[l.length-1].getNumberOutputs()]; + trainer = new SimpleNNTrainer(parameters.layers, parameters.learningRate, parameters.momentum); + } + } + + /** + * Returns a string describing the output feature type of this classifier. + * @return "real" + **/ + public String getOutputType() { + return "real"; + } + /** + * Writes the learned function's internal representation in binary form. + * @param out The output stream. + + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeFloat(this.parameters.learningRate); + out.writeFloat(this.parameters.momentum); + out.writeInt(this.parameters.rounds); + if (this.layerSizes == null) + out.writeInt(0); + else { + out.writeInt(this.layerSizes.length); + for (int neurons : this.layerSizes) + out.writeInt(neurons); + for (Layer l : this.parameters.layers) { + l.write(out); + } + } + } + + /** + * Reads the binary representation of a learner with this object's run-time type, overwriting + * any and all learned or manually specified parameters as well as the label lexicon but without + * modifying the feature lexicon. + * @param in The input stream. + + public void read(ExceptionlessInputStream in) { + super.read(in); + this.parameters.learningRate = in.readFloat(); + this.parameters.momentum = in.readFloat(); + this.parameters.rounds = in.readInt(); + int layers = in.readInt(); + if (layers != 0) { + int[] szs = new int[layers]; + for (int i = 0 ; i < szs.length; i++) + szs[i] = in.readInt(); + this.layerSizes = szs; + Random r = new Random (1234); + for (int i = 0; i < layerSizes.length-1; i++) { + this.parameters.layers[i] = new Layer(layerSizes[i], layerSizes[i+1], r); + } + trainer = new SimpleNNTrainer(parameters.layers, parameters.learningRate, parameters.momentum); + for (Layer l : this.parameters.layers) { + l.read(in); + } + } + } + + + /** + * Populate the input and output vectors with the values for only those + * features that are represented. + */ + final private void populateNNVector(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, + double[] labelValues) { + Arrays.fill(inputs,0.0f); + Arrays.fill(outputs,0.0f); + for (int i = 0; i < exampleFeatures.length; i++) + inputs[exampleFeatures[i]] = (float)exampleValues[i]; + if (exampleLabels != null) + for (int i = 0; i < exampleLabels.length; i++) + outputs[exampleLabels[i]] = (float)labelValues[i]; + + } + + /** + * Trains the learning algorithm given an object as an example. + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param exampleLabels The example's label(s). + * @param labelValues The labels' values. + **/ + public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, + double[] labelValues) { + this.populateNNVector(exampleFeatures, exampleValues, exampleLabels, labelValues); + this.trainer.train(inputs, outputs); + } + + /** + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return null + **/ + public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { + return null; + } + + /** + * Returns the classification of the given example as a single feature instead of a + * {@link FeatureVector}. + * @param f The features array. + * @param v The values array. + * @return The classification of the example as a feature. + **/ + public Feature featureValue(int[] f, double[] v) { + this.populateNNVector(f, v, null, null); + + // this returns the activation energies for ALL layers, we only wan the output layer + float[][] results = this.trainer.activate(inputs); + + // the last vector contains the score, this is the output of the last layer. + return new RealPrimitiveStringFeature(containingPackage, name, "", results [results.length-1][0]); + } + + /** + * Simply computes the dot product of the weight vector and the example + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return The computed real value. + **/ + public double realValue(int[] exampleFeatures, double[] exampleValues) { + this.populateNNVector(exampleFeatures, exampleValues, null, null); + return (double) this.trainer.activate(inputs)[0][0]; + } + + /** + * Simply computes the dot product of the weight vector and the feature vector extracted from + * the example object. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return The computed feature (in a vector). + **/ + public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) { + return new FeatureVector(featureValue(exampleFeatures, exampleValues)); + } + + /** + * Writes the algorithm's internal representation as text. In the first line of output, the name + * of the classifier is printed, followed by {@link #learningRate} and {@link #bias}. + * @param out The output stream. + */ + public void write(PrintStream out) { + out.println(name + ": " + this.parameters.learningRate + ", " + this.parameters.momentum + ", " + this.parameters.rounds); + for (Layer l : this.parameters.layers) { + l.write(out); + } + } + + /** + * Returns a deep clone of this learning algorithm. + * TODO + */ + public Object clone() { + NeuralNetLearner clone = null; + try { + clone = (NeuralNetLearner) super.clone(); + } catch (Exception e) { + System.err.println("Error cloning StochasticGradientDescent: " + e); + System.exit(1); + } + return clone; + } + + /** + * @return the seed to seed all random number gen. + */ + public int getSeed() { + return this.parameters.seed; + } + + /** + * @param seed the seed to set + */ + public void setSeed(int seed) { + this.parameters.seed = seed; + } + + /** + * @return the number of total inputs + */ + public int getInputCount() { + return this.parameters.inputCount; + } + + /** + * @param inputCount the inputCount to set + */ + public void setInputCount(int inputCount) { + this.parameters.inputCount = inputCount; + } + + /** + * @return the outputCount + */ + public int getOutputCount() { + return this.parameters.outputCount; + } + + /** + * @param outputCount the outputCount to set + */ + public void setOutputCount(int outputCount) { + this.parameters.outputCount = outputCount; + } + + /** + * @return the hiddenCount + */ + public int getHiddenCount() { + return this.parameters.hiddenCount; + } + + /** + * @param hiddenCount the hiddenCount to set + */ + public void setHiddenCount(int hiddenCount) { + this.parameters.hiddenCount = hiddenCount; + } + + /** + * @return the learning rate used to throttle the rate at wich the weight parameters change. + */ + public float getLearningRate() { + return parameters.learningRate; + } + + /** + * set the learning rate at which the weight parameters change. + * @param learningRate the learning rate at which the weight parameters change. + */ + public void setLearningRate(float learningRate) { + this.parameters.learningRate = learningRate; + } + + public float getMomentum() { + return parameters.momentum; + } + + /** + * set the value used to prevent convergence against local minimum. + * @param momentum used to prevent convergence against local minimum. + */ + public void setMomentum(float momentum) { + this.parameters.momentum = momentum; + } + + /** + * Get the number of epochs. + * @return number of epochs to train. + */ + public int getEpochs() { + return parameters.rounds; + } + + /** + * set the number of training iterations. More should yield better results, until overfit. + * @param learningRate set the number of training iterations. + */ + public void setEpochs(int epochs) { + this.parameters.rounds = epochs; + } + + /** + * Retrieves the parameters that are set in this learner. + * @return An object containing all the values of the parameters that control the behavior of + * this learning algorithm. + **/ + public Learner.Parameters getParameters() { + return parameters; + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Normalizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Normalizer.java index 554f654a..756f3116 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Normalizer.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Normalizer.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -14,21 +11,18 @@ /** - * A normalizer is a function of a ScoreSet producing normalized - * scores. It is left to the implementing subclass to define the term - * "normalized". - * - * @author Nick Rizzolo + * A normalizer is a function of a ScoreSet producing normalized scores. It is left to + * the implementing subclass to define the term "normalized". + * + * @author Nick Rizzolo **/ -public abstract class Normalizer -{ - /** - * Normalizes the given ScoreSet; its scores are modified in - * place before it is returned. - * - * @param scores The set of scores to normalize. - * @return The normalized set of scores. - **/ - abstract public ScoreSet normalize(ScoreSet scores); +public abstract class Normalizer { + /** + * Normalizes the given ScoreSet; its scores are modified in place before it is + * returned. + * + * @param scores The set of scores to normalize. + * @return The normalized set of scores. + **/ + abstract public ScoreSet normalize(ScoreSet scores); } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/PassiveAggressive.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/PassiveAggressive.java index a89f9fda..11e35927 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/PassiveAggressive.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/PassiveAggressive.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -17,309 +14,290 @@ /** - * The Passive Aggressive learning algorithm implementation. This algorithm - * operates very similarly to {@link SparsePerceptron} with a thick - * separator, except the learning rate is a function of each training - * example's margin. {@link LinearThresholdUnit#learningRate} is - * defined for each example as the following value. - * - *

- *

- * (1 - y(w*x)) / ||x||2 - *
- * - *

In the expression above, w is the weight vector, y - * represents the label of the example vector x, * stands for - * inner product. If this expression turns out to be non-positive - * (i.e., if y(w*x) >= 1), no update is made for that example. - * - *

It is assumed that {@link Learner#labeler} is a single discrete - * classifier that produces the same feature for every example object and - * that the values that feature may take are available through the - * {@link Classifier#allowableValues()} method. The second value returned - * from {@link Classifier#allowableValues()} is treated as "positive", and it - * is assumed there are exactly 2 allowable values. Assertions will produce - * error messages if these assumptions do not hold. - * - *

This algorithm's user-configurable parameters are stored in member - * fields of this class. They may be set via either a constructor that names - * each parameter explicitly or a constructor that takes an instance of - * {@link learn.PassiveAggressive.Parameters Parameters} as input. - * The documentation in each member field in this class indicates the default - * value of the associated parameter when using the former type of - * constructor. The documentation of the associated member field in the - * {@link learn.PassiveAggressive.Parameters Parameters} class - * indicates the default value of the parameter when using the latter type of - * constructor. Note that this learner will not actually use any - * user-supplied value for - * {@link learn.LinearThresholdUnit.Parameters#learningRate} as this is - * computed automatically. - * - * @author Michael Paul + * The Passive Aggressive learning algorithm implementation. This algorithm operates very similarly + * to {@link SparsePerceptron} with a thick separator, except the learning rate is a function of + * each training example's margin. {@link LinearThresholdUnit#learningRate} is defined for each + * example as the following value. + * + *

+ *

(1 - y(w*x)) / ||x||2
+ * + *

+ * In the expression above, w is the weight vector, y represents the label of the + * example vector x, * stands for inner product. If this expression turns out to be + * non-positive (i.e., if y(w*x) >= 1), no update is made for that example. + * + *

+ * It is assumed that {@link Learner#labeler} is a single discrete classifier that produces the same + * feature for every example object and that the values that feature may take are available through + * the {@link Classifier#allowableValues()} method. The second value returned from + * {@link Classifier#allowableValues()} is treated as "positive", and it is assumed there are + * exactly 2 allowable values. Assertions will produce error messages if these assumptions do not + * hold. + * + *

+ * This algorithm's user-configurable parameters are stored in member fields of this class. They may + * be set via either a constructor that names each parameter explicitly or a constructor that takes + * an instance of {@link PassiveAggressive.Parameters Parameters} as input. The documentation in + * each member field in this class indicates the default value of the associated parameter when + * using the former type of constructor. The documentation of the associated member field in the + * {@link PassiveAggressive.Parameters Parameters} class indicates the default value of the + * parameter when using the latter type of constructor. Note that this learner will not actually use + * any user-supplied value for {@link LinearThresholdUnit.Parameters#learningRate} as this is + * computed automatically. + * + * @author Michael Paul **/ -public class PassiveAggressive extends LinearThresholdUnit -{ - /** - * The learning rate and threshold take default values, while the name of - * the classifier gets the empty string. - **/ - public PassiveAggressive() { this(""); } - - - /** - * Sets the learning rate and threshold to the specified values, while the - * name of the classifier gets the empty string. - * - * @param t The desired threshold value. - **/ - public PassiveAggressive(double t) { - this("", t); - } - - /** - * Use this constructor to fit a thick separator, where both the positive - * and negative sides of the hyperplane will be given the specified - * thickness, while the name of the classifier gets the empty string. - * - * @param t The desired threshold value. - * @param pt The desired thickness. - **/ - public PassiveAggressive(double t, double pt) { - this("", t, pt); - } - - /** - * Use this constructor to fit a thick separator, where the positive and - * negative sides of the hyperplane will be given the specified separate - * thicknesses, while the name of the classifier gets the empty string. - * - * @param t The desired threshold value. - * @param pt The desired positive thickness. - * @param nt The desired negative thickness. - **/ - public PassiveAggressive(double t, double pt, double nt) { - this("", t, pt, nt); - } - - /** - * Use this constructor to specify an alternative subclass of - * {@link SparseWeightVector}, while the name of the classifier gets the - * empty string. - * - * @param t The desired threshold value. - * @param pt The desired positive thickness. - * @param nt The desired negative thickness. - * @param v An empty sparse weight vector. - **/ - public PassiveAggressive(double t, double pt, double nt, - SparseWeightVector v) { - this("", t, pt, nt, v); - } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link SparsePerceptron.Parameters} object. - * - * @param p The settings of all parameters. - **/ - public PassiveAggressive(Parameters p) { this("", p); } - - - /** - * Sets the learning rate to the specified value, and the threshold takes - * the default. - * - * @param n The name of the classifier. - **/ - public PassiveAggressive(String n) { - this(n, LinearThresholdUnit.defaultThreshold); - } - - /** - * Sets the learning rate and threshold to the specified values. - * - * @param n The name of the classifier. - * @param t The desired threshold value. - **/ - public PassiveAggressive(String n, double t) { - this(n, t, LinearThresholdUnit.defaultThickness); - } - - /** - * Use this constructor to fit a thick separator, where both the positive - * and negative sides of the hyperplane will be given the specified - * thickness. - * - * @param n The name of the classifier. - * @param t The desired threshold value. - * @param pt The desired thickness. - **/ - public PassiveAggressive(String n, double t, double pt) { - this(n, t, pt, pt); - } - - /** - * Use this constructor to fit a thick separator, where the positive and - * negative sides of the hyperplane will be given the specified separate - * thicknesses. - * - * @param n The name of the classifier. - * @param t The desired threshold value. - * @param pt The desired positive thickness. - * @param nt The desired negative thickness. - **/ - public PassiveAggressive(String n, double t, double pt, double nt) { - this(n, t, pt, nt, - (SparseWeightVector) - LinearThresholdUnit.defaultWeightVector.clone()); - } - - /** - * Use this constructor to specify an alternative subclass of - * {@link SparseWeightVector}. - * - * @param n The name of the classifier. - * @param t The desired threshold value. - * @param pt The desired positive thickness. - * @param nt The desired negative thickness. - * @param v An empty sparse weight vector. - **/ - public PassiveAggressive(String n, double t, double pt, double nt, - SparseWeightVector v) { - super(n, LinearThresholdUnit.defaultLearningRate, t, pt, nt, v); - } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link SparsePerceptron.Parameters} object. - * - * @param n The name of the classifier. - * @param p The settings of all parameters. - **/ - public PassiveAggressive(String n, Parameters p) { - super(n, p); - } - - - /** - * Retrieves the parameters that are set in this learner. - * - * @return An object containing all the values of the parameters that - * control the behavior of this learning algorithm. - **/ - public Learner.Parameters getParameters() { - return - new Parameters((LinearThresholdUnit.Parameters) super.getParameters()); - } - - - /** - * Scales the feature vector produced by the extractor by the learning rate - * and adds it to the weight vector. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of feature values - * @param rate The learning rate at which the weights are updated. - **/ - public void promote(int[] exampleFeatures, double[] exampleValues, - double rate) { - weightVector.scaledAdd(exampleFeatures, exampleValues, rate, - initialWeight); - bias += rate; - } - - - /** - * Scales the feature vector produced by the extractor by the learning rate - * and subtracts it from the weight vector. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of feature values - * @param rate The learning rate at which the weights are updated. - **/ - public void demote(int[] exampleFeatures, double[] exampleValues, - double rate) { - weightVector.scaledAdd(exampleFeatures, exampleValues, -rate, - initialWeight); - bias -= rate; - } - - - /** - * Computes the value of the learning rate for this example. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of feature values - * @param s The score for the example object - * @param label The label - **/ - public double computeLearningRate(int[] exampleFeatures, - double[] exampleValues, double s, - boolean label) { - double labelVal = label ? 1: -1; - - double rate = (1 - labelVal * s) - / (FeatureVector.L2NormSquared(exampleValues) + 1); - - if (rate < 0) rate = 0; - return rate; - } - - - /** - * Writes the algorithm's internal representation as text. In the first - * line of output, the name of the classifier is printed, followed by - * {@link #learningRate}, - * {@link LinearThresholdUnit#initialWeight}, - * {@link LinearThresholdUnit#threshold}, - * {@link LinearThresholdUnit#positiveThickness}, - * {@link LinearThresholdUnit#negativeThickness}, - * and finally {@link LinearThresholdUnit#bias}. - * - * @param out The output stream. - **/ - public void write(PrintStream out) { - out.println(name + ": " + learningRate + ", " - + initialWeight + ", " + threshold + ", " + positiveThickness - + ", " + negativeThickness + ", " + bias); - if (lexicon.size() == 0) weightVector.write(out); - else weightVector.write(out, lexicon); - } - - - /** - * Simply a container for all of {@link PassiveAggressive}'s configurable - * parameters. Using instances of this class should make code more - * readable and constructors less complicated. - * - * @author Nick Rizzolo - **/ - public static class Parameters extends LinearThresholdUnit.Parameters - { - /** Sets all the default values. */ - public Parameters() { } +public class PassiveAggressive extends LinearThresholdUnit { + /** + * The learning rate and threshold take default values, while the name of the classifier gets + * the empty string. + **/ + public PassiveAggressive() { + this(""); + } /** - * Sets the parameters from the parent's parameters object, giving - * defaults to all parameters declared in this object. + * Sets the learning rate and threshold to the specified values, while the name of the + * classifier gets the empty string. + * + * @param t The desired threshold value. **/ - public Parameters(LinearThresholdUnit.Parameters p) { super(p); } + public PassiveAggressive(double t) { + this("", t); + } + /** + * Use this constructor to fit a thick separator, where both the positive and negative sides of + * the hyperplane will be given the specified thickness, while the name of the classifier gets + * the empty string. + * + * @param t The desired threshold value. + * @param pt The desired thickness. + **/ + public PassiveAggressive(double t, double pt) { + this("", t, pt); + } - /** Copy constructor. */ - public Parameters(Parameters p) { super(p); } + /** + * Use this constructor to fit a thick separator, where the positive and negative sides of the + * hyperplane will be given the specified separate thicknesses, while the name of the classifier + * gets the empty string. + * + * @param t The desired threshold value. + * @param pt The desired positive thickness. + * @param nt The desired negative thickness. + **/ + public PassiveAggressive(double t, double pt, double nt) { + this("", t, pt, nt); + } + /** + * Use this constructor to specify an alternative subclass of {@link SparseWeightVector}, while + * the name of the classifier gets the empty string. + * + * @param t The desired threshold value. + * @param pt The desired positive thickness. + * @param nt The desired negative thickness. + * @param v An empty sparse weight vector. + **/ + public PassiveAggressive(double t, double pt, double nt, SparseWeightVector v) { + this("", t, pt, nt, v); + } /** - * Calls the appropriate Learner.setParameters(Parameters) - * method for this Parameters object. - * - * @param l The learner whose parameters will be set. + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link SparsePerceptron.Parameters} object. + * + * @param p The settings of all parameters. **/ - public void setParameters(Learner l) { - ((LinearThresholdUnit) l).setParameters(this); + public PassiveAggressive(Parameters p) { + this("", p); } - } -} + + /** + * Sets the learning rate to the specified value, and the threshold takes the default. + * + * @param n The name of the classifier. + **/ + public PassiveAggressive(String n) { + this(n, LinearThresholdUnit.defaultThreshold); + } + + /** + * Sets the learning rate and threshold to the specified values. + * + * @param n The name of the classifier. + * @param t The desired threshold value. + **/ + public PassiveAggressive(String n, double t) { + this(n, t, LinearThresholdUnit.defaultThickness); + } + + /** + * Use this constructor to fit a thick separator, where both the positive and negative sides of + * the hyperplane will be given the specified thickness. + * + * @param n The name of the classifier. + * @param t The desired threshold value. + * @param pt The desired thickness. + **/ + public PassiveAggressive(String n, double t, double pt) { + this(n, t, pt, pt); + } + + /** + * Use this constructor to fit a thick separator, where the positive and negative sides of the + * hyperplane will be given the specified separate thicknesses. + * + * @param n The name of the classifier. + * @param t The desired threshold value. + * @param pt The desired positive thickness. + * @param nt The desired negative thickness. + **/ + public PassiveAggressive(String n, double t, double pt, double nt) { + this(n, t, pt, nt, (SparseWeightVector) LinearThresholdUnit.defaultWeightVector.clone()); + } + + /** + * Use this constructor to specify an alternative subclass of {@link SparseWeightVector}. + * + * @param n The name of the classifier. + * @param t The desired threshold value. + * @param pt The desired positive thickness. + * @param nt The desired negative thickness. + * @param v An empty sparse weight vector. + **/ + public PassiveAggressive(String n, double t, double pt, double nt, SparseWeightVector v) { + super(n, LinearThresholdUnit.defaultLearningRate, t, pt, nt, v); + } + + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link SparsePerceptron.Parameters} object. + * + * @param n The name of the classifier. + * @param p The settings of all parameters. + **/ + public PassiveAggressive(String n, Parameters p) { + super(n, p); + } + + + /** + * Retrieves the parameters that are set in this learner. + * + * @return An object containing all the values of the parameters that control the behavior of + * this learning algorithm. + **/ + public Learner.Parameters getParameters() { + return new Parameters((LinearThresholdUnit.Parameters) super.getParameters()); + } + + + /** + * Scales the feature vector produced by the extractor by the learning rate and adds it to the + * weight vector. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of feature values + * @param rate The learning rate at which the weights are updated. + **/ + public void promote(int[] exampleFeatures, double[] exampleValues, double rate) { + weightVector.scaledAdd(exampleFeatures, exampleValues, rate, initialWeight); + bias += rate; + } + + + /** + * Scales the feature vector produced by the extractor by the learning rate and subtracts it + * from the weight vector. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of feature values + * @param rate The learning rate at which the weights are updated. + **/ + public void demote(int[] exampleFeatures, double[] exampleValues, double rate) { + weightVector.scaledAdd(exampleFeatures, exampleValues, -rate, initialWeight); + bias -= rate; + } + + + /** + * Computes the value of the learning rate for this example. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of feature values + * @param s The score for the example object + * @param label The label + **/ + public double computeLearningRate(int[] exampleFeatures, double[] exampleValues, double s, + boolean label) { + double labelVal = label ? 1 : -1; + + double rate = (1 - labelVal * s) / (FeatureVector.L2NormSquared(exampleValues) + 1); + + if (rate < 0) + rate = 0; + return rate; + } + + + /** + * Writes the algorithm's internal representation as text. In the first line of output, the name + * of the classifier is printed, followed by {@link #learningRate}, + * {@link LinearThresholdUnit#initialWeight}, {@link LinearThresholdUnit#threshold}, + * {@link LinearThresholdUnit#positiveThickness}, {@link LinearThresholdUnit#negativeThickness}, + * and finally {@link LinearThresholdUnit#bias}. + * + * @param out The output stream. + **/ + public void write(PrintStream out) { + out.println(name + ": " + learningRate + ", " + initialWeight + ", " + threshold + ", " + + positiveThickness + ", " + negativeThickness + ", " + bias); + if (lexicon.size() == 0) + weightVector.write(out); + else + weightVector.write(out, lexicon); + } + + + /** + * Simply a container for all of {@link PassiveAggressive}'s configurable parameters. Using + * instances of this class should make code more readable and constructors less complicated. + * + * @author Nick Rizzolo + **/ + public static class Parameters extends LinearThresholdUnit.Parameters { + /** Sets all the default values. */ + public Parameters() {} + + + /** + * Sets the parameters from the parent's parameters object, giving defaults to all + * parameters declared in this object. + **/ + public Parameters(LinearThresholdUnit.Parameters p) { + super(p); + } + + + /** Copy constructor. */ + public Parameters(Parameters p) { + super(p); + } + + + /** + * Calls the appropriate Learner.setParameters(Parameters) method for this + * Parameters object. + * + * @param l The learner whose parameters will be set. + **/ + public void setParameters(Learner l) { + ((LinearThresholdUnit) l).setParameters(this); + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/RandomWeightVector.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/RandomWeightVector.java index 0f586fee..42c94152 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/RandomWeightVector.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/RandomWeightVector.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -13,166 +10,159 @@ import java.io.PrintStream; import java.util.Random; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; /** - * This weight vector operates similarly to its parent in the class - * hierarchy, but it halucinates (and sets) random values for weights - * corresponding to features it has never been asked about before. Thus, if - * a dot product is calculated on an empty RandomWeightVector, - * all the features in the feature vector will now have random weights - * associated with them in weight vector. If a - * {@link #scaledAdd(int[],double[])} is performed, any features in the - * feature vector lacking a corresponding weight in the weight vector will - * have a random one assigned before the addition is performed. This is - * usually not an issue for most algorithms, since dot products are usually - * performed before deciding how to add, which means all the weights for that - * feature vector will already be set when the addition is performed. Thus, - * it will simply appear to the algorithm that this vector had independent, - * identically distributed random values for all its dimensions when first - * created. - * - *

The random numbers generated by this class are Gaussian with mean 0 - * and with a user-configurable standard deviation. - * - * @author Nick Rizzolo + * This weight vector operates similarly to its parent in the class hierarchy, but it halucinates + * (and sets) random values for weights corresponding to features it has never been asked about + * before. Thus, if a dot product is calculated on an empty RandomWeightVector, all the + * features in the feature vector will now have random weights associated with them in weight + * vector. If a {@link #scaledAdd(int[],double[])} is performed, any features in the feature vector + * lacking a corresponding weight in the weight vector will have a random one assigned before the + * addition is performed. This is usually not an issue for most algorithms, since dot products are + * usually performed before deciding how to add, which means all the weights for that feature vector + * will already be set when the addition is performed. Thus, it will simply appear to the algorithm + * that this vector had independent, identically distributed random values for all its dimensions + * when first created. + * + *

+ * The random numbers generated by this class are Gaussian with mean 0 and with a user-configurable + * standard deviation. + * + * @author Nick Rizzolo **/ -public class RandomWeightVector extends SparseWeightVector -{ - /** Keeps track of how many objects of this class have been constructed. */ - private static int instanceCount = 0; - /** Default value for {@link #stddev}. */ - protected static final double defaultStddev = 100; - - - /** - * The random numbers that are generated by this class are Gaussian with - * mean 0 and standard deviation defined by this variable. - **/ - protected double stddev; - /** Remembers the instance number of this instance. */ - protected int instanceNumber; - /** The random number generator for this instance. */ - protected Random random; - - - /** Sets a default standard deviation. */ - public RandomWeightVector() { this(defaultStddev); } - - /** - * Sets the specified standard deviation. - * - * @param s The standard deviation. - **/ - public RandomWeightVector(double s) { - stddev = s; - instanceNumber = instanceCount++; - random = new Random(instanceNumber); - } - - - /** - * Returns the double precision value for the given feature, or - * sets a random one and returns it if one did not already exist. - * - * @param featureIndex The feature index - * @param defaultW Unused. - * @return The double precision value for the given feature. - **/ - public double getWeight(int featureIndex, double defaultW) { - while (weights.size() <= featureIndex) - weights.add(random.nextGaussian() * stddev); - return weights.get(featureIndex); - } - - - /** - * Empties the weight map and resets the random number generator. This - * means that the same "random" values will be filled in for the weights if - * the same calls to {@link #dot(int[],double[],double)} and - * {@link #scaledAdd(int[],double[],double,double)} are made in the same - * order. - **/ - public void clear() { - super.clear(); - random = new Random(instanceNumber); - } - - - /** - * Outputs the contents of this vector into the specified - * PrintStream. The string representation is the same as in - * the super class, except the "Begin" annotation line also - * contains the value of {@link #stddev} in parentheses. - * - * @param out The stream to write to. - **/ - public void write(PrintStream out) { - out.println("Begin RandomWeightVector (" + stddev + ")"); - toStringJustWeights(out); - out.println("End RandomWeightVector"); - } - - - /** - * Outputs the contents of this vector into the specified - * PrintStream. The string representation is the same as in - * the super class, except the "Begin" annotation line also - * contains the value of {@link #stddev} in parentheses. - * - * @param out The stream to write to. - * @param lex The feature lexicon. - **/ - public void write(PrintStream out, Lexicon lex) { - out.println("Begin RandomWeightVector (" + stddev + ")"); - toStringJustWeights(out, 0, lex); - out.println("End RandomWeightVector"); - } - - - /** - * Writes the weight vector's internal representation in binary form. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - out.writeDouble(stddev); - out.writeInt(instanceNumber); - // Not perfect; to preserve the current semantics of this class (which are - // also less than ideal), we should serialze the random object into the - // stream so it can continue where it left off when read back in. - } - - - /** - * Reads the representation of a weight vector with this object's run-time - * type from the given stream, overwriting the data in this object. - * - *

This method is appropriate for reading weight vectors as written by - * {@link #write(ExceptionlessOutputStream)}. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - stddev = in.readDouble(); - instanceNumber = in.readInt(); - random = new Random(instanceNumber); - // Not perfect; see the comment in #write(ExceptionlessOutputStream) - } - - - /** - * Returns a new, empty weight vector with the same parameter settings as - * this one. - * - * @return An empty weight vector. - **/ - public SparseWeightVector emptyClone() { - return new RandomWeightVector(stddev); - } +public class RandomWeightVector extends SparseWeightVector { + /** Keeps track of how many objects of this class have been constructed. */ + private static int instanceCount = 0; + /** Default value for {@link #stddev}. */ + protected static final double defaultStddev = 100; + + + /** + * The random numbers that are generated by this class are Gaussian with mean 0 and standard + * deviation defined by this variable. + **/ + protected double stddev; + /** Remembers the instance number of this instance. */ + protected int instanceNumber; + /** The random number generator for this instance. */ + protected Random random; + + + /** Sets a default standard deviation. */ + public RandomWeightVector() { + this(defaultStddev); + } + + /** + * Sets the specified standard deviation. + * + * @param s The standard deviation. + **/ + public RandomWeightVector(double s) { + stddev = s; + instanceNumber = instanceCount++; + random = new Random(instanceNumber); + } + + + /** + * Returns the double precision value for the given feature, or sets a random one and returns it + * if one did not already exist. + * + * @param featureIndex The feature index + * @param defaultW Unused. + * @return The double precision value for the given feature. + **/ + public double getWeight(int featureIndex, double defaultW) { + while (weights.size() <= featureIndex) + weights.add(random.nextGaussian() * stddev); + return weights.get(featureIndex); + } + + + /** + * Empties the weight map and resets the random number generator. This means that the same + * "random" values will be filled in for the weights if the same calls to + * {@link #dot(int[],double[],double)} and {@link #scaledAdd(int[],double[],double,double)} are + * made in the same order. + **/ + public void clear() { + super.clear(); + random = new Random(instanceNumber); + } + + + /** + * Outputs the contents of this vector into the specified PrintStream. The string + * representation is the same as in the super class, except the "Begin" annotation + * line also contains the value of {@link #stddev} in parentheses. + * + * @param out The stream to write to. + **/ + public void write(PrintStream out) { + out.println("Begin RandomWeightVector (" + stddev + ")"); + toStringJustWeights(out); + out.println("End RandomWeightVector"); + } + + + /** + * Outputs the contents of this vector into the specified PrintStream. The string + * representation is the same as in the super class, except the "Begin" annotation + * line also contains the value of {@link #stddev} in parentheses. + * + * @param out The stream to write to. + * @param lex The feature lexicon. + **/ + public void write(PrintStream out, Lexicon lex) { + out.println("Begin RandomWeightVector (" + stddev + ")"); + toStringJustWeights(out, 0, lex); + out.println("End RandomWeightVector"); + } + + + /** + * Writes the weight vector's internal representation in binary form. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeDouble(stddev); + out.writeInt(instanceNumber); + // Not perfect; to preserve the current semantics of this class (which are + // also less than ideal), we should serialze the random object into the + // stream so it can continue where it left off when read back in. + } + + + /** + * Reads the representation of a weight vector with this object's run-time type from the given + * stream, overwriting the data in this object. + * + *

+ * This method is appropriate for reading weight vectors as written by + * {@link #write(ExceptionlessOutputStream)}. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + stddev = in.readDouble(); + instanceNumber = in.readInt(); + random = new Random(instanceNumber); + // Not perfect; see the comment in #write(ExceptionlessOutputStream) + } + + + /** + * Returns a new, empty weight vector with the same parameter settings as this one. + * + * @return An empty weight vector. + **/ + public SparseWeightVector emptyClone() { + return new RandomWeightVector(stddev); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Sigmoid.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Sigmoid.java index 1610087f..aa1318b3 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Sigmoid.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Sigmoid.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -15,46 +12,49 @@ /** - * The sigmoid normalization function replaces each score - * xi with - * 1 / (1 + exp(-alpha xi)), where alpha - * is a user-specified constant. - * - * @author Nick Rizzolo + * The sigmoid normalization function replaces each score xi with + * 1 / (1 + exp(-alpha xi)), where alpha is a user-specified + * constant. + * + * @author Nick Rizzolo **/ -public class Sigmoid extends Normalizer -{ - /** The user-specified constant described above. */ - protected double alpha; - - - /** Default constructor; sets {@link #alpha} to 1. */ - public Sigmoid() { this(1); } - - /** - * Initializing constructor. - * - * @param a The setting for {@link #alpha}. - **/ - public Sigmoid(double a) { alpha = a; } - - - /** Retrieves the value of {@link #alpha}. */ - public double getAlpha() { return alpha; } - - - /** - * Normalizes the given ScoreSet; its scores are modified in - * place before it is returned. - * - * @param scores The set of scores to normalize. - * @return The normalized set of scores. - **/ - public ScoreSet normalize(ScoreSet scores) { - Score[] array = scores.toArray(); - for (int i = 0; i < array.length; ++i) - array[i].score = 1 / (1 + Math.exp(-alpha * array[i].score)); - return scores; - } +public class Sigmoid extends Normalizer { + /** The user-specified constant described above. */ + protected double alpha; + + + /** Default constructor; sets {@link #alpha} to 1. */ + public Sigmoid() { + this(1); + } + + /** + * Initializing constructor. + * + * @param a The setting for {@link #alpha}. + **/ + public Sigmoid(double a) { + alpha = a; + } + + + /** Retrieves the value of {@link #alpha}. */ + public double getAlpha() { + return alpha; + } + + + /** + * Normalizes the given ScoreSet; its scores are modified in place before it is + * returned. + * + * @param scores The set of scores to normalize. + * @return The normalized set of scores. + **/ + public ScoreSet normalize(ScoreSet scores) { + Score[] array = scores.toArray(); + for (int i = 0; i < array.length; ++i) + array[i].score = 1 / (1 + Math.exp(-alpha * array[i].score)); + return scores; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Softmax.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Softmax.java index 817e6331..4277e93b 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Softmax.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/Softmax.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -15,54 +12,57 @@ /** - * The softmax normalization function replaces each score with the fraction - * of its exponential out of the sum of all scores' exponentials. In other - * words, each score si is replaced by - * exp(alpha si) / sumj exp(alpha - * sj), where alpha is a user-specified - * constant. - * - * @author Nick Rizzolo + * The softmax normalization function replaces each score with the fraction of its exponential out + * of the sum of all scores' exponentials. In other words, each score si is + * replaced by exp(alpha si) / sumj exp(alpha + * sj), where alpha is a user-specified constant. + * + * @author Nick Rizzolo **/ -public class Softmax extends Normalizer -{ - /** The user-specified constant described above. */ - protected double alpha; +public class Softmax extends Normalizer { + /** The user-specified constant described above. */ + protected double alpha; - /** Default constructor; sets {@link #alpha} to 1. */ - public Softmax() { this(1); } + /** Default constructor; sets {@link #alpha} to 1. */ + public Softmax() { + this(1); + } - /** - * Initializing constructor. - * - * @param a The setting for {@link #alpha}. - **/ - public Softmax(double a) { alpha = a; } + /** + * Initializing constructor. + * + * @param a The setting for {@link #alpha}. + **/ + public Softmax(double a) { + alpha = a; + } - /** Retrieves the value of {@link #alpha}. */ - public double getAlpha() { return alpha; } + /** Retrieves the value of {@link #alpha}. */ + public double getAlpha() { + return alpha; + } - /** - * Normalizes the given ScoreSet; its scores are modified in - * place before it is returned. - * - * @param scores The set of scores to normalize. - * @return The normalized set of scores. - **/ - public ScoreSet normalize(ScoreSet scores) { - Score[] array = scores.toArray(); - double sum = 0; + /** + * Normalizes the given ScoreSet; its scores are modified in place before it is + * returned. + * + * @param scores The set of scores to normalize. + * @return The normalized set of scores. + **/ + public ScoreSet normalize(ScoreSet scores) { + Score[] array = scores.toArray(); + double sum = 0; - for (int i = 0; i < array.length; ++i) { - array[i].score = Math.exp(alpha * array[i].score); - sum += array[i].score; - } + for (int i = 0; i < array.length; ++i) { + array[i].score = Math.exp(alpha * array[i].score); + sum += array[i].score; + } - for (int i = 0; i < array.length; ++i) array[i].score /= sum; - return scores; - } + for (int i = 0; i < array.length; ++i) + array[i].score /= sum; + return scores; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseAveragedPerceptron.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseAveragedPerceptron.java index 29598c31..4e76223a 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseAveragedPerceptron.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseAveragedPerceptron.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -15,735 +12,772 @@ import java.util.Comparator; import java.util.Map; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.DVector; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.classify.Classifier; import edu.illinois.cs.cogcomp.lbjava.classify.Feature; -import edu.illinois.cs.cogcomp.lbjava.util.DVector; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - /** - * An approximation to voted Perceptron, in which a weighted average of the - * weight vectors arrived at during training becomes the weight vector used - * to make predictions after training. - * - *

During training, after each example ei is processed, - * the weight vector wi becomes the active weight vector - * used to make predictions on future training examples. If a mistake was - * made on ei, wi will be different than - * wi - 1. Otherwise, it will remain unchanged. - * - *

After training, each distinct weight vector arrived at during training - * is associated with an integer weight equal to the number of examples whose - * training made that weight vector active. A new weight vector - * w* is computed by taking the average of all these weight - * vectors weighted as described. w* is used to make all - * predictions returned to the user through methods such as - * {@link Classifier#classify(Object)} or - * {@link Classifier#discreteValue(Object)}. - * - *

The above description is a useful way to think about the operation of - * this {@link Learner}. However, the user should note that this - * implementation never explicitly stores w*. Instead, it - * is computed efficiently on demand. Thus, interspersed online training and - * evaluation is efficient and operates as expected. - * - *

It is assumed that {@link Learner#labeler} is a single discrete - * classifier that produces the same feature for every example object and - * that the values that feature may take are available through the - * {@link Classifier#allowableValues()} method. The second value returned - * from {@link Classifier#allowableValues()} is treated as "positive", and it - * is assumed there are exactly 2 allowable values. Assertions will produce - * error messages if these assumptions do not hold. - * - * @author Nick Rizzolo + * An approximation to voted Perceptron, in which a weighted average of the weight vectors arrived + * at during training becomes the weight vector used to make predictions after training. + * + *

+ * During training, after each example ei is processed, the weight vector + * wi becomes the active weight vector used to make predictions on future training + * examples. If a mistake was made on ei, wi will be different + * than wi - 1. Otherwise, it will remain unchanged. + * + *

+ * After training, each distinct weight vector arrived at during training is associated with an + * integer weight equal to the number of examples whose training made that weight vector active. A + * new weight vector w* is computed by taking the average of all these weight + * vectors weighted as described. w* is used to make all predictions returned to + * the user through methods such as {@link Classifier#classify(Object)} or + * {@link Classifier#discreteValue(Object)}. + * + *

+ * The above description is a useful way to think about the operation of this {@link Learner}. + * However, the user should note that this implementation never explicitly stores + * w*. Instead, it is computed efficiently on demand. Thus, interspersed online + * training and evaluation is efficient and operates as expected. + * + *

+ * It is assumed that {@link Learner#labeler} is a single discrete classifier that produces the same + * feature for every example object and that the values that feature may take are available through + * the {@link Classifier#allowableValues()} method. The second value returned from + * {@link Classifier#allowableValues()} is treated as "positive", and it is assumed there are + * exactly 2 allowable values. Assertions will produce error messages if these assumptions do not + * hold. + * + * @author Nick Rizzolo **/ -public class SparseAveragedPerceptron extends SparsePerceptron -{ - /** Default for {@link LinearThresholdUnit#weightVector}. */ - public static final AveragedWeightVector defaultWeightVector = - new AveragedWeightVector(); - - /** - * Holds the same reference as {@link LinearThresholdUnit#weightVector} - * casted to {@link SparseAveragedPerceptron.AveragedWeightVector}. - **/ - protected AveragedWeightVector awv; - /** Keeps the extra information necessary to compute the averaged bias. */ - protected double averagedBias; - - - /** - * The learning rate and threshold take default values, while the name of - * the classifier gets the empty string. - **/ - public SparseAveragedPerceptron() { this(""); } - - /** - * Sets the learning rate to the specified value, and the threshold takes - * the default, while the name of the classifier gets the empty string. - * - * @param r The desired learning rate value. - **/ - public SparseAveragedPerceptron(double r) { this("", r); } - - /** - * Sets the learning rate and threshold to the specified values, while the - * name of the classifier gets the empty string. - * - * @param r The desired learning rate value. - * @param t The desired threshold value. - **/ - public SparseAveragedPerceptron(double r, double t) { this("", r, t); } - - /** - * Use this constructor to fit a thick separator, where both the positive - * and negative sides of the hyperplane will be given the specified - * thickness, while the name of the classifier gets the empty string. - * - * @param r The desired learning rate value. - * @param t The desired threshold value. - * @param pt The desired thickness. - **/ - public SparseAveragedPerceptron(double r, double t, double pt) { - this("", r, t, pt); - } - - /** - * Use this constructor to fit a thick separator, where the positive and - * negative sides of the hyperplane will be given the specified separate - * thicknesses, while the name of the classifier gets the empty string. - * - * @param r The desired learning rate value. - * @param t The desired threshold value. - * @param pt The desired positive thickness. - * @param nt The desired negative thickness. - **/ - public SparseAveragedPerceptron(double r, double t, double pt, double nt) { - this("", r, t, pt, nt); - } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link SparseAveragedPerceptron.Parameters} object. - * - * @param p The settings of all parameters. - **/ - public SparseAveragedPerceptron(SparseAveragedPerceptron.Parameters p) { - this("", p); - } - - - /** - * The learning rate and threshold take default values. - * - * @param n The name of the classifier. - **/ - public SparseAveragedPerceptron(String n) { this(n, defaultLearningRate); } - - /** - * Sets the learning rate to the specified value, and the threshold takes - * the default. - * - * @param n The name of the classifier. - * @param r The desired learning rate value. - **/ - public SparseAveragedPerceptron(String n, double r) { - this(n, r, defaultThreshold); - } - - /** - * Sets the learning rate and threshold to the specified values. - * - * @param n The name of the classifier. - * @param r The desired learning rate value. - * @param t The desired threshold value. - **/ - public SparseAveragedPerceptron(String n, double r, double t) { - this(n, r, t, defaultThickness); - } - - /** - * Use this constructor to fit a thick separator, where both the positive - * and negative sides of the hyperplane will be given the specified - * thickness. - * - * @param n The name of the classifier. - * @param r The desired learning rate value. - * @param t The desired threshold value. - * @param pt The desired thickness. - **/ - public SparseAveragedPerceptron(String n, double r, double t, double pt) { - this(n, r, t, pt, pt); - } - - /** - * Use this constructor to fit a thick separator, where the positive and - * negative sides of the hyperplane will be given the specified separate - * thicknesses. - * - * @param n The name of the classifier. - * @param r The desired learning rate value. - * @param t The desired threshold value. - * @param pt The desired positive thickness. - * @param nt The desired negative thickness. - **/ - public SparseAveragedPerceptron(String n, double r, double t, double pt, - double nt) { - super(n); - Parameters p = new Parameters(); - p.learningRate = r; - p.threshold = t; - p.positiveThickness = pt; - p.negativeThickness = nt; - setParameters(p); - } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link SparseAveragedPerceptron.Parameters} object. - * - * @param n The name of the classifier. - * @param p The settings of all parameters. - **/ - public SparseAveragedPerceptron(String n, - SparseAveragedPerceptron.Parameters p) { - super(n); - setParameters(p); - } - - - /** - * Retrieves the parameters that are set in this learner. - * - * @return An object containing all the values of the parameters that - * control the behavior of this learning algorithm. - **/ - public Learner.Parameters getParameters() { - Parameters p = - new Parameters((SparsePerceptron.Parameters) super.getParameters()); - return p; - } - - - /** - * Sets the values of parameters that control the behavior of this learning - * algorithm. - * - * @param p The parameters. - **/ - public void setParameters(Parameters p) { - super.setParameters(p); - awv = (AveragedWeightVector) weightVector; - } - - - /** - * The score of the specified object is equal to w * x + bias - * where * is dot product, w is the weight - * vector, and x is the feature vector produced by the - * extractor. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @return The result of the dot product plus the bias. - **/ - public double score(int[] exampleFeatures, double[] exampleValues) { - double result = awv.dot(exampleFeatures, exampleValues, initialWeight); - int examples = awv.getExamples(); - - if (examples > 0) - result += (examples * bias - averagedBias) / (double) examples; - return result; - } - - - /** - * Scales the feature vector produced by the extractor by the learning rate - * and adds it to the weight vector. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - **/ - public void promote(int[] exampleFeatures, double[] exampleValues, - double rate) { - bias += rate; - - int examples = awv.getExamples(); - averagedBias += examples * rate; - awv.scaledAdd(exampleFeatures, exampleValues, rate, initialWeight); - } - - - /** - * Scales the feature vector produced by the extractor by the learning rate - * and subtracts it from the weight vector. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - **/ - public void demote(int[] exampleFeatures, double[] exampleValues, - double rate) { - bias -= rate; - - int examples = awv.getExamples(); - averagedBias -= examples * rate; - awv.scaledAdd(exampleFeatures, exampleValues, -rate, initialWeight); - } - - - /** - * This method works just like - * {@link LinearThresholdUnit#learn(int[],double[],int[],double[])}, except - * it notifies its weight vector when it got an example correct in addition - * to updating it when it makes a mistake. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of feature values - * @param exampleLabels The example's label(s) - * @param labelValues The labels' values - **/ - public void learn(int[] exampleFeatures, double[] exampleValues, - int[] exampleLabels, double[] labelValues) { - assert exampleLabels.length == 1 - : "Example must have a single label."; - assert exampleLabels[0] == 0 || exampleLabels[0] == 1 - : "Example has unallowed label value."; - - boolean label = (exampleLabels[0] == 1); - - double s = - awv.simpleDot(exampleFeatures, exampleValues, initialWeight) + bias; - if (label && s < threshold + positiveThickness) - promote(exampleFeatures, exampleValues, getLearningRate()); - else if (!label && s >= threshold - negativeThickness) - demote(exampleFeatures, exampleValues, getLearningRate()); - else awv.correctExample(); - } - - - /** - * Initializes the weight vector array to the size of - * the supplied number of features, with each cell taking - * the default value of {@link #initialWeight}. - * - * @param numExamples The number of examples - * @param numFeatures The number of features - **/ - public void initialize(int numExamples, int numFeatures) { - double[] weights = new double[numFeatures]; - Arrays.fill(weights, initialWeight); - weightVector = awv = new AveragedWeightVector(weights); - } - - - /** Resets the weight vector to all zeros. */ - public void forget() { - super.forget(); - awv = (AveragedWeightVector) weightVector; - averagedBias = 0; - } - - - /** - * Writes the algorithm's internal representation as text. In the first - * line of output, the name of the classifier is printed, followed by - * {@link SparsePerceptron#learningRate}, - * {@link LinearThresholdUnit#initialWeight}, - * {@link LinearThresholdUnit#threshold}, - * {@link LinearThresholdUnit#positiveThickness}, - * {@link LinearThresholdUnit#negativeThickness}, - * {@link LinearThresholdUnit#bias}, and finally {@link #averagedBias}. - * - * @param out The output stream. - **/ - public void write(PrintStream out) { - out.println(name + ": " + learningRate + ", " + initialWeight + ", " - + threshold + ", " + positiveThickness + ", " - + negativeThickness + ", " + bias + ", " + averagedBias); - if (lexicon == null || lexicon.size() == 0) awv.write(out); - else awv.write(out, lexicon); - } - - - /** - * Writes the learned function's internal representation in binary form. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - out.writeDouble(averagedBias); - } - - - /** - * Reads the binary representation of a learner with this object's run-time - * type, overwriting any and all learned or manually specified parameters - * as well as the label lexicon but without modifying the feature lexicon. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - awv = (AveragedWeightVector) weightVector; - averagedBias = in.readDouble(); - } - - - /** - * Simply a container for all of {@link SparseAveragedPerceptron}'s - * configurable parameters. Using instances of this class should make code - * more readable and constructors less complicated. Note that if the - * object referenced by {@link LinearThresholdUnit.Parameters#weightVector} - * is replaced via an instance of this class, it must be replaced with an - * {@link SparseAveragedPerceptron.AveragedWeightVector}. - * - * @author Nick Rizzolo - **/ - public static class Parameters extends SparsePerceptron.Parameters - { - /** Sets all the default values. */ - public Parameters() { - weightVector = (AveragedWeightVector) defaultWeightVector.clone(); - } - +public class SparseAveragedPerceptron extends SparsePerceptron { + /** Default for {@link LinearThresholdUnit#weightVector}. */ + public static final AveragedWeightVector defaultWeightVector = new AveragedWeightVector(); /** - * Sets the parameters from the parent's parameters object, giving - * defaults to all parameters declared in this object. + * Holds the same reference as {@link LinearThresholdUnit#weightVector} casted to + * {@link SparseAveragedPerceptron.AveragedWeightVector}. **/ - public Parameters(SparsePerceptron.Parameters p) { super(p); } + protected AveragedWeightVector awv; + + /** + * @return the awv the averaged weight vector + */ + public AveragedWeightVector getAveragedWeightVector() { + return awv; + } - /** Copy constructor. */ - public Parameters(Parameters p) { super(p); } + /** Keeps the extra information necessary to compute the averaged bias. */ + protected double averagedBias; /** - * Calls the appropriate Learner.setParameters(Parameters) - * method for this Parameters object. - * - * @param l The learner whose parameters will be set. + * The learning rate and threshold take default values, while the name of the classifier gets + * the empty string. **/ - public void setParameters(Learner l) { - ((SparseAveragedPerceptron) l).setParameters(this); + public SparseAveragedPerceptron() { + this(""); } - } - - - /** - * This implementation of a sparse weight vector associates two - * doubles with each {@link Feature}. The first plays the - * role of the usual weight vector, and the second accumulates multiples of - * examples on which mistakes were made to help implement the weighted - * average. - * - * @author Nick Rizzolo - **/ - public static class AveragedWeightVector extends SparseWeightVector - { + /** - * Together with {@link SparseWeightVector#weights}, this vector provides - * enough information to reconstruct the average of all weight vectors - * arrived at during the course of learning. + * Sets the learning rate to the specified value, and the threshold takes the default, while the + * name of the classifier gets the empty string. + * + * @param r The desired learning rate value. **/ - public DVector averagedWeights; - /** Counts the total number of training examples this vector has seen. */ - protected int examples; - - - /** Simply instantiates the weight vectors. */ - public AveragedWeightVector() { this(new DVector(defaultCapacity)); } + public SparseAveragedPerceptron(double r) { + this("", r); + } /** - * Simply initializes the weight vectors. - * - * @param w An array of weights. + * Sets the learning rate and threshold to the specified values, while the name of the + * classifier gets the empty string. + * + * @param r The desired learning rate value. + * @param t The desired threshold value. **/ - public AveragedWeightVector(double[] w) { this(new DVector(w)); } + public SparseAveragedPerceptron(double r, double t) { + this("", r, t); + } /** - * Simply initializes the weight vectors. - * - * @param w A vector of weights. + * Use this constructor to fit a thick separator, where both the positive and negative sides of + * the hyperplane will be given the specified thickness, while the name of the classifier gets + * the empty string. + * + * @param r The desired learning rate value. + * @param t The desired threshold value. + * @param pt The desired thickness. **/ - public AveragedWeightVector(DVector w) { - super((DVector) w.clone()); - averagedWeights = w; + public SparseAveragedPerceptron(double r, double t, double pt) { + this("", r, t, pt); } + /** + * Use this constructor to fit a thick separator, where the positive and negative sides of the + * hyperplane will be given the specified separate thicknesses, while the name of the classifier + * gets the empty string. + * + * @param r The desired learning rate value. + * @param t The desired threshold value. + * @param pt The desired positive thickness. + * @param nt The desired negative thickness. + **/ + public SparseAveragedPerceptron(double r, double t, double pt, double nt) { + this("", r, t, pt, nt); + } - /** Increments the {@link #examples} variable. */ - public void correctExample() { ++examples; } - /** Returns the {@link #examples} variable. */ - public int getExamples() { return examples; } + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link SparseAveragedPerceptron.Parameters} object. + * + * @param p The settings of all parameters. + **/ + public SparseAveragedPerceptron(SparseAveragedPerceptron.Parameters p) { + this("", p); + } /** - * Returns the averaged weight of the given feature. - * - * @param featureIndex The feature index. - * @param defaultW The default weight. - * @return The weight of the feature. + * The learning rate and threshold take default values. + * + * @param n The name of the classifier. **/ - public double getAveragedWeight(int featureIndex, double defaultW) { - if (examples == 0) return 0; - double aw = averagedWeights.get(featureIndex, defaultW); - double w = getWeight(featureIndex, defaultW); - return (examples*w - aw) / (double) examples; + public SparseAveragedPerceptron(String n) { + this(n, defaultLearningRate); } - /** - * Takes the dot product of this AveragedWeightVector with - * the argument vector, using the hard coded default weight. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @return The computed dot product. + * Sets the learning rate to the specified value, and the threshold takes the default. + * + * @param n The name of the classifier. + * @param r The desired learning rate value. **/ - public double dot(int[] exampleFeatures, double[] exampleValues) { - return dot(exampleFeatures, exampleValues, defaultWeight); + public SparseAveragedPerceptron(String n, double r) { + this(n, r, defaultThreshold); } + /** + * Sets the learning rate and threshold to the specified values. + * + * @param n The name of the classifier. + * @param r The desired learning rate value. + * @param t The desired threshold value. + **/ + public SparseAveragedPerceptron(String n, double r, double t) { + this(n, r, t, defaultThickness); + } /** - * Takes the dot product of this AveragedWeightVector with - * the argument vector, using the specified default weight when one is - * not yet present in this vector. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param defaultW The default weight. - * @return The computed dot product. + * Use this constructor to fit a thick separator, where both the positive and negative sides of + * the hyperplane will be given the specified thickness. + * + * @param n The name of the classifier. + * @param r The desired learning rate value. + * @param t The desired threshold value. + * @param pt The desired thickness. **/ - public double dot(int[] exampleFeatures, double[] exampleValues, - double defaultW) { - double sum = 0; + public SparseAveragedPerceptron(String n, double r, double t, double pt) { + this(n, r, t, pt, pt); + } - for (int i = 0; i < exampleFeatures.length; i++) { - double w = getAveragedWeight(exampleFeatures[i], defaultW); - sum += w * exampleValues[i]; - } + /** + * Use this constructor to fit a thick separator, where the positive and negative sides of the + * hyperplane will be given the specified separate thicknesses. + * + * @param n The name of the classifier. + * @param r The desired learning rate value. + * @param t The desired threshold value. + * @param pt The desired positive thickness. + * @param nt The desired negative thickness. + **/ + public SparseAveragedPerceptron(String n, double r, double t, double pt, double nt) { + super(n); + Parameters p = new Parameters(); + p.learningRate = r; + p.threshold = t; + p.positiveThickness = pt; + p.negativeThickness = nt; + setParameters(p); + } - return sum; + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link SparseAveragedPerceptron.Parameters} object. + * + * @param n The name of the classifier. + * @param p The settings of all parameters. + **/ + public SparseAveragedPerceptron(String n, SparseAveragedPerceptron.Parameters p) { + super(n); + setParameters(p); } /** - * Takes the dot product of the regular, non-averaged, Perceptron weight - * vector with the given vector, using the hard coded default weight. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @return The computed dot product. + * Retrieves the parameters that are set in this learner. + * + * @return An object containing all the values of the parameters that control the behavior of + * this learning algorithm. **/ - public double simpleDot(int[] exampleFeatures, double[] exampleValues) { - return super.dot(exampleFeatures, exampleValues, defaultWeight); + public Learner.Parameters getParameters() { + Parameters p = new Parameters((SparsePerceptron.Parameters) super.getParameters()); + return p; } /** - * Takes the dot product of the regular, non-averaged, Perceptron weight - * vector with the given vector, using the specified default weight when - * a feature is not yet present in this vector. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param defaultW An initial weight for new features. - * @return The computed dot product. + * Sets the values of parameters that control the behavior of this learning algorithm. + * + * @param p The parameters. **/ - public double simpleDot(int[] exampleFeatures, double[] exampleValues, - double defaultW) { - return super.dot(exampleFeatures, exampleValues, defaultW); + public void setParameters(Parameters p) { + super.setParameters(p); + awv = (AveragedWeightVector) weightVector; } /** - * Performs pairwise addition of the feature values in the given vector - * scaled by the given factor, modifying this weight vector, using the - * specified default weight when a feature from the given vector is not - * yet present in this vector. The default weight is used to initialize - * new feature weights. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param factor The scaling factor. + * The score of the specified object is equal to w * x + bias where * + * is dot product, w is the weight vector, and x is the feature vector + * produced by the extractor. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return The result of the dot product plus the bias. **/ - public void scaledAdd(int[] exampleFeatures, double[] exampleValues, - double factor) { - scaledAdd(exampleFeatures, exampleValues, factor, defaultWeight); + public double score(int[] exampleFeatures, double[] exampleValues) { + double result = awv.dot(exampleFeatures, exampleValues, initialWeight); + int examples = awv.getExamples(); + + if (examples > 0) + result += (examples * bias - averagedBias) / (double) examples; + return result; } /** - * Performs pairwise addition of the feature values in the given vector - * scaled by the given factor, modifying this weight vector, using the - * specified default weight when a feature from the given vector is not - * yet present in this vector. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param factor The scaling factor. - * @param defaultW An initial weight for new features. + * Scales the feature vector produced by the extractor by the learning rate and adds it to the + * weight vector. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. **/ - public void scaledAdd(int[] exampleFeatures, double[] exampleValues, - double factor, double defaultW) { - for (int i = 0; i < exampleFeatures.length; i++) { - int featureIndex = exampleFeatures[i]; - double currentWeight = getWeight(featureIndex, defaultW); - double w = currentWeight + factor*exampleValues[i]; + public void promote(int[] exampleFeatures, double[] exampleValues, double rate) { + bias += rate; + + int examples = awv.getExamples(); + averagedBias += examples * rate; + awv.scaledAdd(exampleFeatures, exampleValues, rate, initialWeight); + } - double difference = w - currentWeight; - updateAveragedWeight(featureIndex, examples*difference); - setWeight(featureIndex, w); - } + /** + * Scales the feature vector produced by the extractor by the learning rate and subtracts it + * from the weight vector. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + **/ + public void demote(int[] exampleFeatures, double[] exampleValues, double rate) { + bias -= rate; - ++examples; + int examples = awv.getExamples(); + averagedBias -= examples * rate; + awv.scaledAdd(exampleFeatures, exampleValues, -rate, initialWeight); } /** - * Adds a new value to the current averaged weight indexed - * by the supplied feature index. - * - * @param featureIndex The feature index. - * @param w The value to add to the current weight. + * This method works just like {@link LinearThresholdUnit#learn(int[],double[],int[],double[])}, + * except it notifies its weight vector when it got an example correct in addition to updating + * it when it makes a mistake. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of feature values + * @param exampleLabels The example's label(s) + * @param labelValues The labels' values **/ - protected void updateAveragedWeight(int featureIndex, double w) { - double newWeight = averagedWeights.get(featureIndex, defaultWeight) + w; - averagedWeights.set(featureIndex, newWeight, defaultWeight); + public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, + double[] labelValues) { + assert exampleLabels.length == 1 : "Example must have a single label."; + assert exampleLabels[0] == 0 || exampleLabels[0] == 1 : "Example has unallowed label value."; + + boolean label = (exampleLabels[0] == 1); + + double s = awv.simpleDot(exampleFeatures, exampleValues, initialWeight) + bias; + if (label && s < threshold + positiveThickness) + promote(exampleFeatures, exampleValues, getLearningRate()); + else if (!label && s >= threshold - negativeThickness) + demote(exampleFeatures, exampleValues, getLearningRate()); + else + awv.correctExample(); } /** - * Outputs the contents of this SparseWeightVector into the - * specified PrintStream. The string representation starts - * with a "Begin" annotation, ends with an - * "End" annotation, and without a Lexicon - * passed as a parameter, the weights are simply printed in the order of - * their integer indices. - * - * @param out The stream to write to. + * Initializes the weight vector array to the size of the supplied number of features, with each + * cell taking the default value of {@link #initialWeight}. + * + * @param numExamples The number of examples + * @param numFeatures The number of features **/ - public void write(PrintStream out) { - out.println("Begin AveragedWeightVector"); - for (int i = 0; i < averagedWeights.size(); ++i) - out.println(getAveragedWeight(i, 0)); - out.println("End AveragedWeightVector"); + public void initialize(int numExamples, int numFeatures) { + double[] weights = new double[numFeatures]; + Arrays.fill(weights, initialWeight); + weightVector = awv = new AveragedWeightVector(weights); + } + + + /** Resets the weight vector to all zeros. */ + public void forget() { + super.forget(); + awv = (AveragedWeightVector) weightVector; + averagedBias = 0; } /** - * Outputs the contents of this SparseWeightVector into the - * specified PrintStream. The string representation starts - * with a "Begin" annotation, ends with an - * "End" annotation, and lists each feature with its - * corresponding weight on the same, separate line in between. - * - * @param out The stream to write to. - * @param lex The feature lexicon. + * Writes the algorithm's internal representation as text. In the first line of output, the name + * of the classifier is printed, followed by {@link SparsePerceptron#learningRate}, + * {@link LinearThresholdUnit#initialWeight}, {@link LinearThresholdUnit#threshold}, + * {@link LinearThresholdUnit#positiveThickness}, {@link LinearThresholdUnit#negativeThickness}, + * {@link LinearThresholdUnit#bias}, and finally {@link #averagedBias}. + * + * @param out The output stream. **/ - public void write(PrintStream out, Lexicon lex) { - out.println("Begin AveragedWeightVector"); - - Map map = lex.getMap(); - Map.Entry[] entries = - (Map.Entry[]) map.entrySet().toArray(new Map.Entry[map.size()]); - Arrays.sort(entries, - new Comparator() { - public int compare(Object o1, Object o2) { - Map.Entry e1 = (Map.Entry) o1; - Map.Entry e2 = (Map.Entry) o2; - int i1 = ((Integer) e1.getValue()).intValue(); - int i2 = ((Integer) e2.getValue()).intValue(); - if ((i1 < weights.size()) != (i2 < weights.size())) - return i1 - i2; - return ((Feature) e1.getKey()).compareTo(e2.getKey()); - } - }); - - int i, biggest = 0; - for (i = 0; i < entries.length; ++i) { - String key = - entries[i].getKey().toString() - + (((Integer) entries[i].getValue()).intValue() < weights.size() - ? "" : " (pruned)"); - biggest = Math.max(biggest, key.length()); - } - - if (biggest % 2 == 0) biggest += 2; - else ++biggest; - - for (i = 0; i < entries.length; ++i) { - String key = - entries[i].getKey().toString() - + (((Integer) entries[i].getValue()).intValue() < weights.size() - ? "" : " (pruned)"); - out.print(key); - for (int j = 0; key.length() + j < biggest; ++j) out.print(" "); - - int index = ((Integer) entries[i].getValue()).intValue(); - double weight = getAveragedWeight(index, 0); - out.println(weight); - } - - out.println("End AveragedWeightVector"); + public void write(PrintStream out) { + out.println(name + ": " + learningRate + ", " + initialWeight + ", " + threshold + ", " + + positiveThickness + ", " + negativeThickness + ", " + bias + ", " + averagedBias); + if (lexicon == null || lexicon.size() == 0) + awv.write(out); + else + awv.write(out, lexicon); } /** - * Writes the weight vector's internal representation in binary form. - * - * @param out The output stream. + * Writes the learned function's internal representation in binary form. + * + * @param out The output stream. **/ public void write(ExceptionlessOutputStream out) { - super.write(out); - out.writeInt(examples); - averagedWeights.write(out); + super.write(out); + out.writeDouble(averagedBias); } /** - * Reads the representation of a weight vector with this object's - * run-time type from the given stream, overwriting the data in this - * object. - * - *

This method is appropriate for reading weight vectors as written - * by {@link #write(ExceptionlessOutputStream)}. - * - * @param in The input stream. + * Reads the binary representation of a learner with this object's run-time type, overwriting + * any and all learned or manually specified parameters as well as the label lexicon but without + * modifying the feature lexicon. + * + * @param in The input stream. **/ public void read(ExceptionlessInputStream in) { - super.read(in); - examples = in.readInt(); - averagedWeights.read(in); + super.read(in); + awv = (AveragedWeightVector) weightVector; + averagedBias = in.readDouble(); } /** - * Returns a copy of this AveragedWeightVector. - * - * @return A copy of this AveragedWeightVector. + * Simply a container for all of {@link SparseAveragedPerceptron}'s configurable parameters. + * Using instances of this class should make code more readable and constructors less + * complicated. Note that if the object referenced by + * {@link LinearThresholdUnit.Parameters#weightVector} is replaced via an instance of this + * class, it must be replaced with an {@link SparseAveragedPerceptron.AveragedWeightVector}. + * + * @author Nick Rizzolo **/ - public Object clone() { - AveragedWeightVector clone = (AveragedWeightVector) super.clone(); - clone.averagedWeights = (DVector) averagedWeights.clone(); - return clone; + public static class Parameters extends SparsePerceptron.Parameters { + /** Sets all the default values. */ + public Parameters() { + weightVector = (AveragedWeightVector) defaultWeightVector.clone(); + } + + + /** + * Sets the parameters from the parent's parameters object, giving defaults to all + * parameters declared in this object. + **/ + public Parameters(SparsePerceptron.Parameters p) { + super(p); + } + + + /** Copy constructor. */ + public Parameters(Parameters p) { + super(p); + } + + + /** + * Calls the appropriate Learner.setParameters(Parameters) method for this + * Parameters object. + * + * @param l The learner whose parameters will be set. + **/ + public void setParameters(Learner l) { + ((SparseAveragedPerceptron) l).setParameters(this); + } } /** - * Returns a new, empty weight vector with the same parameter settings as - * this one. - * - * @return An empty weight vector. + * This implementation of a sparse weight vector associates two doubles with each + * {@link Feature}. The first plays the role of the usual weight vector, and the second + * accumulates multiples of examples on which mistakes were made to help implement the weighted + * average. + * + * @author Nick Rizzolo **/ - public SparseWeightVector emptyClone() { - return new AveragedWeightVector(); + public static class AveragedWeightVector extends SparseWeightVector { + /** + * Together with {@link SparseWeightVector#weights}, this vector provides enough information + * to reconstruct the average of all weight vectors arrived at during the course of + * learning. + **/ + public DVector averagedWeights; + /** Counts the total number of training examples this vector has seen. */ + protected int examples; + + + /** Simply instantiates the weight vectors. */ + public AveragedWeightVector() { + this(new DVector(defaultCapacity)); + } + + /** + * Simply initializes the weight vectors. + * + * @param w An array of weights. + **/ + public AveragedWeightVector(double[] w) { + this(new DVector(w)); + } + + /** + * Simply initializes the weight vectors. + * + * @param w A vector of weights. + **/ + public AveragedWeightVector(DVector w) { + super((DVector) w.clone()); + averagedWeights = w; + } + + + /** Increments the {@link #examples} variable. */ + public void correctExample() { + ++examples; + } + + /** Returns the {@link #examples} variable. */ + public int getExamples() { + return examples; + } + + + /** + * Returns the averaged weight of the given feature. + * + * @param featureIndex The feature index. + * @param defaultW The default weight. + * @return The weight of the feature. + **/ + public double getAveragedWeight(int featureIndex, double defaultW) { + if (examples == 0) + return 0; + double aw = averagedWeights.get(featureIndex, defaultW); + double w = getWeight(featureIndex, defaultW); + return (examples * w - aw) / (double) examples; + } + + + /** + * Takes the dot product of this AveragedWeightVector with the argument vector, + * using the hard coded default weight. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return The computed dot product. + **/ + public double dot(int[] exampleFeatures, double[] exampleValues) { + return dot(exampleFeatures, exampleValues, defaultWeight); + } + + + /** + * Takes the dot product of this AveragedWeightVector with the argument vector, + * using the specified default weight when one is not yet present in this vector. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param defaultW The default weight. + * @return The computed dot product. + **/ + public double dot(int[] exampleFeatures, double[] exampleValues, double defaultW) { + double sum = 0; + + for (int i = 0; i < exampleFeatures.length; i++) { + double w = getAveragedWeight(exampleFeatures[i], defaultW); + sum += w * exampleValues[i]; + } + + return sum; + } + + + /** + * Takes the dot product of the regular, non-averaged, Perceptron weight vector with the + * given vector, using the hard coded default weight. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return The computed dot product. + **/ + public double simpleDot(int[] exampleFeatures, double[] exampleValues) { + return super.dot(exampleFeatures, exampleValues, defaultWeight); + } + + + /** + * Takes the dot product of the regular, non-averaged, Perceptron weight vector with the + * given vector, using the specified default weight when a feature is not yet present in + * this vector. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param defaultW An initial weight for new features. + * @return The computed dot product. + **/ + public double simpleDot(int[] exampleFeatures, double[] exampleValues, double defaultW) { + return super.dot(exampleFeatures, exampleValues, defaultW); + } + + + /** + * Performs pairwise addition of the feature values in the given vector scaled by the given + * factor, modifying this weight vector, using the specified default weight when a feature + * from the given vector is not yet present in this vector. The default weight is used to + * initialize new feature weights. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param factor The scaling factor. + **/ + public void scaledAdd(int[] exampleFeatures, double[] exampleValues, double factor) { + scaledAdd(exampleFeatures, exampleValues, factor, defaultWeight); + } + + + /** + * Performs pairwise addition of the feature values in the given vector scaled by the given + * factor, modifying this weight vector, using the specified default weight when a feature + * from the given vector is not yet present in this vector. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param factor The scaling factor. + * @param defaultW An initial weight for new features. + **/ + public void scaledAdd(int[] exampleFeatures, double[] exampleValues, double factor, + double defaultW) { + for (int i = 0; i < exampleFeatures.length; i++) { + int featureIndex = exampleFeatures[i]; + double currentWeight = getWeight(featureIndex, defaultW); + double w = currentWeight + factor * exampleValues[i]; + + double difference = w - currentWeight; + updateAveragedWeight(featureIndex, examples * difference); + + setWeight(featureIndex, w); + } + + ++examples; + } + + + /** + * Adds a new value to the current averaged weight indexed by the supplied feature index. + * + * @param featureIndex The feature index. + * @param w The value to add to the current weight. + **/ + protected void updateAveragedWeight(int featureIndex, double w) { + double newWeight = averagedWeights.get(featureIndex, defaultWeight) + w; + averagedWeights.set(featureIndex, newWeight, defaultWeight); + } + + + /** + * Outputs the contents of this SparseWeightVector into the specified + * PrintStream. The string representation starts with a "Begin" + * annotation, ends with an "End" annotation, and without a + * Lexicon passed as a parameter, the weights are simply printed in the order + * of their integer indices. + * + * @param out The stream to write to. + **/ + public void write(PrintStream out) { + out.println("Begin AveragedWeightVector"); + for (int i = 0; i < averagedWeights.size(); ++i) + out.println(getAveragedWeight(i, 0)); + out.println("End AveragedWeightVector"); + } + + + /** + * Outputs the contents of this SparseWeightVector into the specified + * PrintStream. The string representation starts with a "Begin" + * annotation, ends with an "End" annotation, and lists each feature with its + * corresponding weight on the same, separate line in between. + * + * @param out The stream to write to. + * @param lex The feature lexicon. + **/ + public void write(PrintStream out, Lexicon lex) { + out.println("Begin AveragedWeightVector"); + + Map map = lex.getMap(); + Map.Entry[] entries = (Map.Entry[]) map.entrySet().toArray(new Map.Entry[map.size()]); + Arrays.sort(entries, new Comparator() { + public int compare(Object o1, Object o2) { + Map.Entry e1 = (Map.Entry) o1; + Map.Entry e2 = (Map.Entry) o2; + int i1 = ((Integer) e1.getValue()).intValue(); + int i2 = ((Integer) e2.getValue()).intValue(); + if ((i1 < weights.size()) != (i2 < weights.size())) + return i1 - i2; + return ((Feature) e1.getKey()).compareTo(e2.getKey()); + } + }); + + int i, biggest = 0; + for (i = 0; i < entries.length; ++i) { + String key = + entries[i].getKey().toString() + + (((Integer) entries[i].getValue()).intValue() < weights.size() ? "" + : " (pruned)"); + biggest = Math.max(biggest, key.length()); + } + + if (biggest % 2 == 0) + biggest += 2; + else + ++biggest; + + for (i = 0; i < entries.length; ++i) { + String key = + entries[i].getKey().toString() + + (((Integer) entries[i].getValue()).intValue() < weights.size() ? "" + : " (pruned)"); + out.print(key); + for (int j = 0; key.length() + j < biggest; ++j) + out.print(" "); + + int index = ((Integer) entries[i].getValue()).intValue(); + double weight = getAveragedWeight(index, 0); + out.println(weight); + } + + out.println("End AveragedWeightVector"); + } + + + /** + * Writes the weight vector's internal representation in binary form. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeInt(examples); + averagedWeights.write(out); + } + + + /** + * Reads the representation of a weight vector with this object's run-time type from the + * given stream, overwriting the data in this object. + * + *

+ * This method is appropriate for reading weight vectors as written by + * {@link #write(ExceptionlessOutputStream)}. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + examples = in.readInt(); + averagedWeights.read(in); + } + + + /** + * Returns a copy of this AveragedWeightVector. + * + * @return A copy of this AveragedWeightVector. + **/ + public Object clone() { + AveragedWeightVector clone = (AveragedWeightVector) super.clone(); + clone.averagedWeights = (DVector) averagedWeights.clone(); + return clone; + } + + + /** + * Returns a new, empty weight vector with the same parameter settings as this one. + * + * @return An empty weight vector. + **/ + public SparseWeightVector emptyClone() { + return new AveragedWeightVector(); + } + + /** + * If we prune worthless weights, we must also prune useless averages. + * @param uselessfeatures useless features. + * @param numfeatures since this weight vec does not know how many features there are, it must be passed in + */ + public void pruneWeights(int[] uselessfeatures, int numfeatures) { + if (uselessfeatures.length == 0) + return; + super.pruneWeights(uselessfeatures, numfeatures); + + // create a new smaller weight vector for the pruned weights. + int oldsize = this.averagedWeights.size(); + if (oldsize > numfeatures) { + throw new RuntimeException("There was an averaged weight vector with more weights("+oldsize+ + ") than the number of features("+numfeatures+")!"); + } + int newsize = numfeatures - uselessfeatures.length; + double [] newvec = new double[newsize]; + + // copy the weights from the old vector. + int uselessindex = 0; + int newvecindex = 0; + for (int oldindex = 0; oldindex < oldsize; oldindex++) { + if (uselessindex < uselessfeatures.length && uselessfeatures[uselessindex] == oldindex) { + // this is a useless feature, we will skip it. + uselessindex++; + } else { + newvec[newvecindex] = averagedWeights.get(oldindex); + newvecindex++; + } + } + + // compress the array. + if (newvecindex != newsize) { + double[] tmp = new double[newvecindex]; + System.arraycopy(newvec, 0, tmp, 0, newvecindex);; + newvec = tmp; + } + this.averagedWeights = new DVector(newvec); + } } - } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseConfidenceWeighted.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseConfidenceWeighted.java index f0189990..3ce15216 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseConfidenceWeighted.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseConfidenceWeighted.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -13,510 +10,498 @@ import java.io.PrintStream; import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - +import edu.illinois.cs.cogcomp.lbjava.classify.Classifier; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; /** - * This is an implementation of the approximate "variance algorithm" of - * Confidence Weighted Linear Classification, Dredze, et.al (ICML, - * 2008). This algorithm envisions each parameter stored in a linear - * threshold unit's weight vector as having been drawn independently from a - * normal distribution with an independent mean and variance representing our - * estimate and confidence in that parameter. Given a training example, this - * algorithm then tries to find new values for all these means and - * confidences such that both of the following hold: - * - *

    - *
  • the KL-divergence between the old and new distributions is - * minimized, and - *
  • the current example is classified correctly when a weight vector is - * drawn according to the current distributions with user-specified - * confidence. - *
- * - *

In this implementation, the user-specified confidence parameter is a - * real value representing the result of applying the inverse cumulative - * function of the normal distribution to a probability (ie, a real value - * greater than or equal to 0 and less than or equal to 1). The inverse of - * the normal cdf is a monotonically increasing function. - * - *

It is assumed that {@link Learner#labeler} is a single discrete - * classifier that produces the same feature for every example object and - * that the values that feature may take are available through the - * {@link classify.Classifier#allowableValues()} method. The second - * value returned from {@link Classifier#allowableValues()} is - * treated as "positive", and it is assumed there are exactly 2 allowable - * values. Assertions will produce error messages if these assumptions do - * not hold. - * - *

This algorithm's user-configurable parameters are stored in member - * fields of this class. They may be set via either a constructor that names - * each parameter explicitly or a constructor that takes an instance of - * {@link SparseConfidenceWeighted.Parameters Parameters} as - * input. The documentation in each member field in this class indicates the - * default value of the associated parameter when using the former type of - * constructor. The documentation of the associated member field in the - * {@link SparseConfidenceWeighted.Parameters Parameters} class - * indicates the default value of the parameter when using the latter type of - * constructor. - * - * @author Nick Rizzolo + * This is an implementation of the approximate "variance algorithm" of Confidence Weighted + * Linear Classification, Dredze, et.al (ICML, 2008). This algorithm envisions each parameter + * stored in a linear threshold unit's weight vector as having been drawn independently from a + * normal distribution with an independent mean and variance representing our estimate and + * confidence in that parameter. Given a training example, this algorithm then tries to find new + * values for all these means and confidences such that both of the following hold: + * + *

    + *
  • the KL-divergence between the old and new distributions is minimized, and + *
  • the current example is classified correctly when a weight vector is drawn according to the + * current distributions with user-specified confidence. + *
+ * + *

+ * In this implementation, the user-specified confidence parameter is a real value representing the + * result of applying the inverse cumulative function of the normal distribution to a probability + * (ie, a real value greater than or equal to 0 and less than or equal to 1). The inverse of the + * normal cdf is a monotonically increasing function. + * + *

+ * It is assumed that {@link Learner#labeler} is a single discrete classifier that produces the same + * feature for every example object and that the values that feature may take are available through + * the {@link Classifier#allowableValues()} method. The second value returned from + * {@link Classifier#allowableValues()} is treated as "positive", and it is assumed there are + * exactly 2 allowable values. Assertions will produce error messages if these assumptions do not + * hold. + * + *

+ * This algorithm's user-configurable parameters are stored in member fields of this class. They may + * be set via either a constructor that names each parameter explicitly or a constructor that takes + * an instance of {@link SparseConfidenceWeighted.Parameters Parameters} as input. The documentation + * in each member field in this class indicates the default value of the associated parameter when + * using the former type of constructor. The documentation of the associated member field in the + * {@link SparseConfidenceWeighted.Parameters Parameters} class indicates the default value of the + * parameter when using the latter type of constructor. + * + * @author Nick Rizzolo **/ -public class SparseConfidenceWeighted extends LinearThresholdUnit -{ - /** Default value for {@link #confidence}. */ - public static final double defaultConfidence = 2; - /** Default value for {@link #initialVariance}. */ - public static final double defaultInitialVariance = 1; - - - /** - * The confidence parameter as described above; default - * {@link #defaultConfidence}. - **/ - protected double confidence; - /** - * The strictly positive initial variance of the parameters; default - * {@link #defaultInitialVariance}. - **/ - protected double initialVariance; - /** The inverses of the current variances of the parameters. */ - protected SparseWeightVector variances; - /** The bias element of the {@link #variances} vector. */ - protected double variancesBias; - - - /** All parameters get default values. */ - public SparseConfidenceWeighted() { this(""); } - - /** - * Sets the {@link #confidence} parameter. - * - * @param c The desired confidence value. - **/ - public SparseConfidenceWeighted(double c) { this("", c); } - - /** - * Sets the {@link #confidence} and {@link #initialVariance} parameters. - * - * @param c The desired confidence value. - * @param v The desired initial variance. - **/ - public SparseConfidenceWeighted(double c, double v) { - this("", c, v); - } - - /** - * Sets the {@link #confidence}, {@link #initialVariance}, and - * {@link LinearThresholdUnit#weightVector} parameters. - * - * @param c The desired confidence value. - * @param v The desired initial variance. - * @param vm An empty sparse weight vector of means, perhaps of an - * alternative subclass of {@link SparseWeightVector}. - **/ - public SparseConfidenceWeighted(double c, double v, SparseWeightVector vm) { - this("", c, v, vm); - } - - /** - * Sets the {@link #confidence}, {@link #initialVariance}, - * {@link LinearThresholdUnit#weightVector}, and {@link #variances} - * parameters. Make sure that the references passed to the last two - * arguments refer to different objects. - * - * @param c The desired confidence value. - * @param v The desired initial variance. - * @param vm An empty sparse weight vector of means, perhaps of an - * alternative subclass of {@link SparseWeightVector}. - * @param vv An empty sparse weight vector of variances, perhaps of an - * alternative subclass of {@link SparseWeightVector}. - **/ - public SparseConfidenceWeighted(double c, double v, SparseWeightVector vm, - SparseWeightVector vv) { - this("", c, v, vm, vv); - } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link SparseConfidenceWeighted.Parameters} object. - * - * @param p The settings of all parameters. - **/ - public SparseConfidenceWeighted(Parameters p) { this("", p); } - - - /** - * All parameters get default values. - * - * @param n The name of the classifier. - **/ - public SparseConfidenceWeighted(String n) { this(n, defaultConfidence); } - - /** - * Sets the {@link #confidence} parameter. - * - * @param n The name of the classifier. - * @param c The desired confidence value. - **/ - public SparseConfidenceWeighted(String n, double c) { - this(n, c, defaultInitialVariance); - } - - /** - * Sets the {@link #confidence} and {@link #initialVariance} parameters. - * - * @param n The name of the classifier. - * @param c The desired confidence value. - * @param v The desired initial variance. - **/ - public SparseConfidenceWeighted(String n, double c, double v) { - this(n, c, v, (SparseWeightVector) defaultWeightVector.clone()); - } - - /** - * Sets the {@link #confidence}, {@link #initialVariance}, and - * {@link LinearThresholdUnit#weightVector} parameters. - * - * @param n The name of the classifier. - * @param c The desired confidence value. - * @param v The desired initial variance. - * @param vm An empty sparse weight vector of means, perhaps of an - * alternative subclass of {@link SparseWeightVector}. - **/ - public SparseConfidenceWeighted(String n, double c, double v, - SparseWeightVector vm) { - this(n, c, v, vm, (SparseWeightVector) defaultWeightVector.clone()); - } - - /** - * Sets the {@link #confidence}, {@link #initialVariance}, - * {@link LinearThresholdUnit#weightVector}, and {@link #variances} - * parameters. Make sure that the references passed to the last two - * arguments refer to different objects. - * - * @param n The name of the classifier. - * @param c The desired confidence value. - * @param v The desired initial variance. - * @param vm An empty sparse weight vector of means, perhaps of an - * alternative subclass of {@link SparseWeightVector}. - * @param vv An empty sparse weight vector of variances, perhaps of an - * alternative subclass of {@link SparseWeightVector}. - **/ - public SparseConfidenceWeighted(String n, double c, double v, - SparseWeightVector vm, - SparseWeightVector vv) { - super(n); - Parameters p = new Parameters(); - p.confidence = c; - p.initialVariance = v; - p.weightVector = vm; - p.variances = vv; - setParameters(p); - } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link SparseConfidenceWeighted.Parameters} object. - * - * @param n The name of the classifier. - * @param p The settings of all parameters. - **/ - public SparseConfidenceWeighted(String n, Parameters p) { - super(n); - setParameters(p); - } - - - /** - * Sets the values of parameters that control the behavior of this learning - * algorithm. - * - * @param p The parameters. - **/ - public void setParameters(Parameters p) { - super.setParameters(p); - confidence = p.confidence; - initialVariance = p.initialVariance; - variances = p.variances; - variancesBias = 1 / initialVariance; - } - - - /** - * Retrieves the parameters that are set in this learner. - * - * @return An object containing all the values of the parameters that - * control the behavior of this learning algorithm. - **/ - public Learner.Parameters getParameters() { - Parameters p = - new Parameters((LinearThresholdUnit.Parameters) super.getParameters()); - p.confidence = confidence; - p.initialVariance = initialVariance; - p.variances = variances.emptyClone(); - return p; - } - - - /** - * Returns the current value of the {@link #confidence} variable. - * - * @return The value of the {@link #confidence} variable. - **/ - public double getConfidence() { return confidence; } - - - /** - * Sets the {@link #confidence} member variable to the specified - * value. - * - * @param c The new value for {@link #confidence}. - **/ - public void setConfidence(double c) { confidence = c; } - - - /** - * Returns the current value of the {@link #initialVariance} variable. - * - * @return The value of the {@link #initialVariance} variable. - **/ - public double getInitialVariance() { return initialVariance; } - - - /** - * Sets the {@link #initialVariance} member variable to the specified - * value. - * - * @param v The new value for {@link #initialVariance}. - **/ - public void setInitialVariance(double v) { initialVariance = v; } - - - /** - * Updates the means and variances according to the new labeled example. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of feature values - * @param exampleLabels The example's label(s) - * @param labelValues The labels' values - **/ - public void learn(int[] exampleFeatures, double[] exampleValues, - int[] exampleLabels, double[] labelValues) { - assert exampleLabels.length == 1 - : "Example must have a single label."; - assert exampleLabels[0] == 0 || exampleLabels[0] == 1 - : "Example has unallowed label value."; - - double y = 2 * exampleLabels[0] - 1; - double m = y * (weightVector.dot(exampleFeatures, exampleValues) + bias); - - Object sigmaX[] = - variances.pairwiseMultiply(exampleFeatures, exampleValues, - initialVariance, true); - int sigmaXFeatures[] = (int[])sigmaX[0]; - double sigmaXValues[] = (double[])sigmaX[1]; - - double v = - FeatureVector.dot(exampleFeatures, exampleValues, sigmaXFeatures, - sigmaXValues) - + 1 / variancesBias; - - double t = 2 * confidence * m + 1; - double sqrtTerm = t * t - 8 * confidence * (m - confidence * v); - double alpha = (-t + Math.sqrt(sqrtTerm)) / (4 * confidence * v); - - if (alpha > 0) { - weightVector.scaledAdd(sigmaXFeatures, sigmaXValues, alpha * y); - bias += alpha * y / variancesBias; - variances.scaledAdd(exampleFeatures, exampleValues, - 2 * alpha * confidence); - variancesBias += 2 * alpha * confidence; - } - } - - - /** - * This method does nothing. The entire implementation is in - * {@link #learn(Object)}. - */ - public void demote(int[] exampleFeatures, double[] exampleValues, - double rate) { - } - - - /** - * This method does nothing. The entire implementation is in - * {@link #learn(Object)}. - */ - public void promote(int[] exampleFeatures, double[] exampleValues, - double rate) { - } - - - /** - * Reinitializes the learner to the state it started at before any learning - * was performed. - **/ - public void forget() { - super.forget(); - variances = variances.emptyClone(); - variancesBias = 1 / initialVariance; - } - - - /** - * Writes the algorithm's internal representation as text. In the first - * line of output, the name of the classifier is printed, followed by - * {@link #confidence} and {@link #initialVariance}. Next, the annotation - * Begin means on its own line is followed by the contents of - * {@link LinearThresholdUnit#weightVector} and the annotation End - * means on its own line. Finally, the annotation Begin - * variances on its own line is followed by the contents of - * {@link #variances} and the annotation End variances on its - * own line. - * - * @param out The output stream. - **/ - public void write(PrintStream out) { - out.println(name + ": " + confidence + ", " + initialVariance); - out.println("Means:"); - if (lexicon.size() == 0) weightVector.write(out); - else weightVector.write(out, lexicon); - out.println("\nVariances:"); - if (lexicon.size() == 0) variances.write(out); - else variances.write(out, lexicon); - } - - - /** - * Writes the learned function's internal representation in binary form. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - out.writeDouble(confidence); - out.writeDouble(initialVariance); - out.writeDouble(variancesBias); - variances.write(out); - } - - - /** - * Reads the binary representation of a learner with this object's run-time - * type, overwriting any and all learned or manually specified parameters - * as well as the label lexicon but without modifying the feature lexicon. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - confidence = in.readDouble(); - initialVariance = in.readDouble(); - variancesBias = in.readDouble(); - variances = SparseWeightVector.readWeightVector(in); - } - - - /** Returns a deep clone of this learning algorithm. */ - public Object clone() { - SparseConfidenceWeighted clone = null; - - try { clone = (SparseConfidenceWeighted) super.clone(); } - catch (Exception e) { - System.err.println("Error cloning SparseConfidenceWeighted: " + e); - System.exit(1); - } +public class SparseConfidenceWeighted extends LinearThresholdUnit { + /** Default value for {@link #confidence}. */ + public static final double defaultConfidence = 2; + /** Default value for {@link #initialVariance}. */ + public static final double defaultInitialVariance = 1; + - if (variances != null) - clone.variances = (SparseWeightVector) variances.clone(); - return clone; - } - - - /** - * Simply a container for all of {@link SparseConfidenceWeighted}'s - * configurable parameters. Using instances of this class should make code - * more readable and constructors less complicated. - * - * @author Nick Rizzolo - **/ - public static class Parameters extends LinearThresholdUnit.Parameters - { /** - * The confidence parameter as described above; default - * {@link SparseConfidenceWeighted#defaultConfidence}. + * The confidence parameter as described above; default {@link #defaultConfidence}. **/ protected double confidence; /** - * The strictly positive initial variance of the parameters; default - * {@link SparseConfidenceWeighted#defaultInitialVariance}. + * The strictly positive initial variance of the parameters; default + * {@link #defaultInitialVariance}. **/ protected double initialVariance; + /** The inverses of the current variances of the parameters. */ + protected SparseWeightVector variances; + /** The bias element of the {@link #variances} vector. */ + protected double variancesBias; + + + /** All parameters get default values. */ + public SparseConfidenceWeighted() { + this(""); + } + /** - * The current variances of the parameters; default - * {@link LinearThresholdUnit#defaultWeightVector}. + * Sets the {@link #confidence} parameter. + * + * @param c The desired confidence value. **/ - protected SparseWeightVector variances; + public SparseConfidenceWeighted(double c) { + this("", c); + } + /** + * Sets the {@link #confidence} and {@link #initialVariance} parameters. + * + * @param c The desired confidence value. + * @param v The desired initial variance. + **/ + public SparseConfidenceWeighted(double c, double v) { + this("", c, v); + } - /** Sets all the default values. */ - public Parameters() { - confidence = defaultConfidence; - initialVariance = defaultInitialVariance; - variances = (SparseWeightVector) defaultWeightVector.clone(); + /** + * Sets the {@link #confidence}, {@link #initialVariance}, and + * {@link LinearThresholdUnit#weightVector} parameters. + * + * @param c The desired confidence value. + * @param v The desired initial variance. + * @param vm An empty sparse weight vector of means, perhaps of an alternative subclass of + * {@link SparseWeightVector}. + **/ + public SparseConfidenceWeighted(double c, double v, SparseWeightVector vm) { + this("", c, v, vm); } + /** + * Sets the {@link #confidence}, {@link #initialVariance}, + * {@link LinearThresholdUnit#weightVector}, and {@link #variances} parameters. Make sure that + * the references passed to the last two arguments refer to different objects. + * + * @param c The desired confidence value. + * @param v The desired initial variance. + * @param vm An empty sparse weight vector of means, perhaps of an alternative subclass of + * {@link SparseWeightVector}. + * @param vv An empty sparse weight vector of variances, perhaps of an alternative subclass of + * {@link SparseWeightVector}. + **/ + public SparseConfidenceWeighted(double c, double v, SparseWeightVector vm, SparseWeightVector vv) { + this("", c, v, vm, vv); + } /** - * Sets the parameters from the parent's parameters object, giving - * defaults to all parameters declared in this object. + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link SparseConfidenceWeighted.Parameters} object. + * + * @param p The settings of all parameters. **/ - public Parameters(LinearThresholdUnit.Parameters p) { - super(p); - confidence = defaultConfidence; - initialVariance = defaultInitialVariance; - variances = (SparseWeightVector) defaultWeightVector.clone(); + public SparseConfidenceWeighted(Parameters p) { + this("", p); } - /** Copy constructor. */ - public Parameters(Parameters p) { - super(p); - confidence = p.confidence; - initialVariance = p.initialVariance; - variances = p.variances; + /** + * All parameters get default values. + * + * @param n The name of the classifier. + **/ + public SparseConfidenceWeighted(String n) { + this(n, defaultConfidence); } + /** + * Sets the {@link #confidence} parameter. + * + * @param n The name of the classifier. + * @param c The desired confidence value. + **/ + public SparseConfidenceWeighted(String n, double c) { + this(n, c, defaultInitialVariance); + } /** - * Calls the appropriate Learner.setParameters(Parameters) - * method for this Parameters object. - * - * @param l The learner whose parameters will be set. + * Sets the {@link #confidence} and {@link #initialVariance} parameters. + * + * @param n The name of the classifier. + * @param c The desired confidence value. + * @param v The desired initial variance. **/ - public void setParameters(Learner l) { - ((SparseConfidenceWeighted) l).setParameters(this); + public SparseConfidenceWeighted(String n, double c, double v) { + this(n, c, v, (SparseWeightVector) defaultWeightVector.clone()); } + /** + * Sets the {@link #confidence}, {@link #initialVariance}, and + * {@link LinearThresholdUnit#weightVector} parameters. + * + * @param n The name of the classifier. + * @param c The desired confidence value. + * @param v The desired initial variance. + * @param vm An empty sparse weight vector of means, perhaps of an alternative subclass of + * {@link SparseWeightVector}. + **/ + public SparseConfidenceWeighted(String n, double c, double v, SparseWeightVector vm) { + this(n, c, v, vm, (SparseWeightVector) defaultWeightVector.clone()); + } /** - * Creates a string representation of these parameters in which only - * those parameters that differ from their default values are mentioned. + * Sets the {@link #confidence}, {@link #initialVariance}, + * {@link LinearThresholdUnit#weightVector}, and {@link #variances} parameters. Make sure that + * the references passed to the last two arguments refer to different objects. + * + * @param n The name of the classifier. + * @param c The desired confidence value. + * @param v The desired initial variance. + * @param vm An empty sparse weight vector of means, perhaps of an alternative subclass of + * {@link SparseWeightVector}. + * @param vv An empty sparse weight vector of variances, perhaps of an alternative subclass of + * {@link SparseWeightVector}. **/ - public String nonDefaultString() { - String result = super.nonDefaultString(); + public SparseConfidenceWeighted(String n, double c, double v, SparseWeightVector vm, + SparseWeightVector vv) { + super(n); + Parameters p = new Parameters(); + p.confidence = c; + p.initialVariance = v; + p.weightVector = vm; + p.variances = vv; + setParameters(p); + } + + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link SparseConfidenceWeighted.Parameters} object. + * + * @param n The name of the classifier. + * @param p The settings of all parameters. + **/ + public SparseConfidenceWeighted(String n, Parameters p) { + super(n); + setParameters(p); + } - if (confidence != SparseConfidenceWeighted.defaultConfidence) - result += ", confidence = " + confidence; - if (initialVariance != SparseConfidenceWeighted.defaultInitialVariance) - result += ", initialVariance = " + initialVariance; - if (result.startsWith(", ")) result = result.substring(2); - return result; + /** + * Sets the values of parameters that control the behavior of this learning algorithm. + * + * @param p The parameters. + **/ + public void setParameters(Parameters p) { + super.setParameters(p); + confidence = p.confidence; + initialVariance = p.initialVariance; + variances = p.variances; + variancesBias = 1 / initialVariance; + } + + + /** + * Retrieves the parameters that are set in this learner. + * + * @return An object containing all the values of the parameters that control the behavior of + * this learning algorithm. + **/ + public Learner.Parameters getParameters() { + Parameters p = new Parameters((LinearThresholdUnit.Parameters) super.getParameters()); + p.confidence = confidence; + p.initialVariance = initialVariance; + p.variances = variances.emptyClone(); + return p; + } + + + /** + * Returns the current value of the {@link #confidence} variable. + * + * @return The value of the {@link #confidence} variable. + **/ + public double getConfidence() { + return confidence; + } + + + /** + * Sets the {@link #confidence} member variable to the specified value. + * + * @param c The new value for {@link #confidence}. + **/ + public void setConfidence(double c) { + confidence = c; + } + + + /** + * Returns the current value of the {@link #initialVariance} variable. + * + * @return The value of the {@link #initialVariance} variable. + **/ + public double getInitialVariance() { + return initialVariance; } - } -} + + /** + * Sets the {@link #initialVariance} member variable to the specified value. + * + * @param v The new value for {@link #initialVariance}. + **/ + public void setInitialVariance(double v) { + initialVariance = v; + } + + + /** + * Updates the means and variances according to the new labeled example. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of feature values + * @param exampleLabels The example's label(s) + * @param labelValues The labels' values + **/ + public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, + double[] labelValues) { + assert exampleLabels.length == 1 : "Example must have a single label."; + assert exampleLabels[0] == 0 || exampleLabels[0] == 1 : "Example has unallowed label value."; + + double y = 2 * exampleLabels[0] - 1; + double m = y * (weightVector.dot(exampleFeatures, exampleValues) + bias); + + Object sigmaX[] = + variances.pairwiseMultiply(exampleFeatures, exampleValues, initialVariance, true); + int sigmaXFeatures[] = (int[]) sigmaX[0]; + double sigmaXValues[] = (double[]) sigmaX[1]; + + double v = + FeatureVector.dot(exampleFeatures, exampleValues, sigmaXFeatures, sigmaXValues) + 1 + / variancesBias; + + double t = 2 * confidence * m + 1; + double sqrtTerm = t * t - 8 * confidence * (m - confidence * v); + double alpha = (-t + Math.sqrt(sqrtTerm)) / (4 * confidence * v); + + if (alpha > 0) { + weightVector.scaledAdd(sigmaXFeatures, sigmaXValues, alpha * y); + bias += alpha * y / variancesBias; + variances.scaledAdd(exampleFeatures, exampleValues, 2 * alpha * confidence); + variancesBias += 2 * alpha * confidence; + } + } + + + /** + * This method does nothing. The entire implementation is in {@link #learn(Object)}. + */ + public void demote(int[] exampleFeatures, double[] exampleValues, double rate) {} + + + /** + * This method does nothing. The entire implementation is in {@link #learn(Object)}. + */ + public void promote(int[] exampleFeatures, double[] exampleValues, double rate) {} + + + /** + * Reinitializes the learner to the state it started at before any learning was performed. + **/ + public void forget() { + super.forget(); + variances = variances.emptyClone(); + variancesBias = 1 / initialVariance; + } + + + /** + * Writes the algorithm's internal representation as text. In the first line of output, the name + * of the classifier is printed, followed by {@link #confidence} and {@link #initialVariance}. + * Next, the annotation Begin means on its own line is followed by the contents of + * {@link LinearThresholdUnit#weightVector} and the annotation End + * means on its own line. Finally, the annotation Begin + * variances on its own line is followed by the contents of {@link #variances} and the + * annotation End variances on its own line. + * + * @param out The output stream. + **/ + public void write(PrintStream out) { + out.println(name + ": " + confidence + ", " + initialVariance); + out.println("Means:"); + if (lexicon.size() == 0) + weightVector.write(out); + else + weightVector.write(out, lexicon); + out.println("\nVariances:"); + if (lexicon.size() == 0) + variances.write(out); + else + variances.write(out, lexicon); + } + + + /** + * Writes the learned function's internal representation in binary form. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeDouble(confidence); + out.writeDouble(initialVariance); + out.writeDouble(variancesBias); + variances.write(out); + } + + + /** + * Reads the binary representation of a learner with this object's run-time type, overwriting + * any and all learned or manually specified parameters as well as the label lexicon but without + * modifying the feature lexicon. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + confidence = in.readDouble(); + initialVariance = in.readDouble(); + variancesBias = in.readDouble(); + variances = SparseWeightVector.readWeightVector(in); + } + + + /** Returns a deep clone of this learning algorithm. */ + public Object clone() { + SparseConfidenceWeighted clone = null; + + try { + clone = (SparseConfidenceWeighted) super.clone(); + } catch (Exception e) { + System.err.println("Error cloning SparseConfidenceWeighted: " + e); + System.exit(1); + } + + if (variances != null) + clone.variances = (SparseWeightVector) variances.clone(); + return clone; + } + + + /** + * Simply a container for all of {@link SparseConfidenceWeighted}'s configurable parameters. + * Using instances of this class should make code more readable and constructors less + * complicated. + * + * @author Nick Rizzolo + **/ + public static class Parameters extends LinearThresholdUnit.Parameters { + /** + * The confidence parameter as described above; default + * {@link SparseConfidenceWeighted#defaultConfidence}. + **/ + protected double confidence; + /** + * The strictly positive initial variance of the parameters; default + * {@link SparseConfidenceWeighted#defaultInitialVariance}. + **/ + protected double initialVariance; + /** + * The current variances of the parameters; default + * {@link LinearThresholdUnit#defaultWeightVector}. + **/ + protected SparseWeightVector variances; + + + /** Sets all the default values. */ + public Parameters() { + confidence = defaultConfidence; + initialVariance = defaultInitialVariance; + variances = (SparseWeightVector) defaultWeightVector.clone(); + } + + + /** + * Sets the parameters from the parent's parameters object, giving defaults to all + * parameters declared in this object. + **/ + public Parameters(LinearThresholdUnit.Parameters p) { + super(p); + confidence = defaultConfidence; + initialVariance = defaultInitialVariance; + variances = (SparseWeightVector) defaultWeightVector.clone(); + } + + + /** Copy constructor. */ + public Parameters(Parameters p) { + super(p); + confidence = p.confidence; + initialVariance = p.initialVariance; + variances = p.variances; + } + + + /** + * Calls the appropriate Learner.setParameters(Parameters) method for this + * Parameters object. + * + * @param l The learner whose parameters will be set. + **/ + public void setParameters(Learner l) { + ((SparseConfidenceWeighted) l).setParameters(this); + } + + + /** + * Creates a string representation of these parameters in which only those parameters that + * differ from their default values are mentioned. + **/ + public String nonDefaultString() { + String result = super.nonDefaultString(); + + if (confidence != SparseConfidenceWeighted.defaultConfidence) + result += ", confidence = " + confidence; + if (initialVariance != SparseConfidenceWeighted.defaultInitialVariance) + result += ", initialVariance = " + initialVariance; + + if (result.startsWith(", ")) + result = result.substring(2); + return result; + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseMIRA.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseMIRA.java index dee17c16..5427b62d 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseMIRA.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseMIRA.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -14,664 +11,642 @@ import java.util.Collection; import java.util.Iterator; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.OVector; import edu.illinois.cs.cogcomp.lbjava.classify.Classifier; import edu.illinois.cs.cogcomp.lbjava.classify.DiscretePrimitiveStringFeature; import edu.illinois.cs.cogcomp.lbjava.classify.Feature; import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector; import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; -import edu.illinois.cs.cogcomp.lbjava.util.OVector; - /** - * An implementation of the Margin Infused Relaxed Algorithm of Crammer and - * Singer. This is a multi-class, online learner that maintains a separate - * weight vector for every prediction class, just as - * {@link SparseNetworkLearner} does. However, updates to these weight - * vectors given an example vector x with label y are dependent - * on each other as follows. For each weight vector wv - * corresponding to a prediction value v, a multiplier - * tv is selected and used to update wv - * as wv += tv x. tv - * must be less than or equal to zero for all v != y. - * ty must be less than or equal to one. MIRA selects - * these multipliers so that they sum to 0 and so that the vector norm of all - * updated weight vectors concatenated is as small as possible. - * - *

In this sparse implementation of the algorithm, weight vectors - * corresponding to labels and weights for features within those vectors are - * added as they are observed in the data. Whenever a feature is observed - * for the first time, its corresponding weight in any given weight vector is - * set to a random number, which is necessary to make this algorithm work. - * It must never be the case that all weight vectors are equal to each other, - * or updates will stop happening. To ensure that results are reproducible, - * the random number generator is seeded with the same seed every time. - * - *

In addition to the observed features, each weight vector also contains - * a bias. For this reason, we also halucinate an extra dimension on every - * example vector containing a feature whose strength is 1. - * - *

It is assumed that a single discrete label feature will be produced in - * association with each example object. A feature taking one of the values - * observed in that label feature will be produced by the learned classifier. - * - *

This algorithm's user-configurable parameters are stored in member - * fields of this class. They may be set via either a constructor that names - * each parameter explicitly or a constructor that takes an instance of - * {@link SparseMIRA.Parameters Parameters} as input. - * The documentation in each member field in this class indicates the default - * value of the associated parameter when using the former type of - * constructor. The documentation of the associated member field in the - * {@link SparseMIRA.Parameters Parameters} class - * indicates the default value of the parameter when using the latter type of - * constructor. - * - * @author Nick Rizzolo + * An implementation of the Margin Infused Relaxed Algorithm of Crammer and Singer. This is a + * multi-class, online learner that maintains a separate weight vector for every prediction class, + * just as {@link SparseNetworkLearner} does. However, updates to these weight vectors given an + * example vector x with label y are dependent on each other as follows. For each + * weight vector wv corresponding to a prediction value v, a multiplier + * tv is selected and used to update wv as wv + * += tv x. tv must be less than or equal to zero for all + * v != y. ty must be less than or equal to one. MIRA selects these + * multipliers so that they sum to 0 and so that the vector norm of all updated weight vectors + * concatenated is as small as possible. + * + *

+ * In this sparse implementation of the algorithm, weight vectors corresponding to labels and + * weights for features within those vectors are added as they are observed in the data. Whenever a + * feature is observed for the first time, its corresponding weight in any given weight vector is + * set to a random number, which is necessary to make this algorithm work. It must never be the case + * that all weight vectors are equal to each other, or updates will stop happening. To ensure that + * results are reproducible, the random number generator is seeded with the same seed every time. + * + *

+ * In addition to the observed features, each weight vector also contains a bias. For this reason, + * we also halucinate an extra dimension on every example vector containing a feature whose strength + * is 1. + * + *

+ * It is assumed that a single discrete label feature will be produced in association with each + * example object. A feature taking one of the values observed in that label feature will be + * produced by the learned classifier. + * + *

+ * This algorithm's user-configurable parameters are stored in member fields of this class. They may + * be set via either a constructor that names each parameter explicitly or a constructor that takes + * an instance of {@link SparseMIRA.Parameters Parameters} as input. The documentation in each + * member field in this class indicates the default value of the associated parameter when using the + * former type of constructor. The documentation of the associated member field in the + * {@link SparseMIRA.Parameters Parameters} class indicates the default value of the parameter when + * using the latter type of constructor. + * + * @author Nick Rizzolo **/ -public class SparseMIRA extends Learner -{ - /** - * Used to decide if two values are nearly equal to each other. - * @see #nearlyEqualTo(double,double) - **/ - public static final double TOLERANCE = 1e-9; - - - /** A map from labels to the weight vector corresponding to that label. */ - protected OVector network; - /** Whether or not this learner's labeler produces conjunctive features. */ - protected boolean conjunctiveLabels; - - - /** This algorithm has no parameters to set! */ - public SparseMIRA() { this(""); } - - /** - * Initializing constructor. This constructor appears here for - * completeness; the algorithm takes no parameters. - * - * @param p The settings of all parameters. - **/ - public SparseMIRA(Parameters p) { this("", p); } - - /** - * This algorithm has no parameters to set! - * - * @param n The name of the classifier. - **/ - public SparseMIRA(String n) { - super(n); - network = new OVector(); - } - - /** - * Initializing constructor. This constructor appears here for - * completeness; the algorithm takes no parameters. - * - * @param n The name of the classifier. - * @param p The settings of all parameters. - **/ - public SparseMIRA(String n, Parameters p) { this(n); } - - - /** - * Retrieves the parameters that are set in this learner. - * - * @return An object containing all the values of the parameters that - * control the behavior of this learning algorithm. - **/ - public Learner.Parameters getParameters() { return new Parameters(); } - - - /** - * Sets the labeler. - * - * @param l A labeling classifier. - **/ - public void setLabeler(Classifier l) { - if (getClass().getName().indexOf("SparseMIRA") != -1 - && !l.getOutputType().equals("discrete")) { - System.err.println( - "LBJava WARNING: SparseMIRA will only work with a label classifier " - + "that returns discrete."); - System.err.println( - " The given label classifier, " + l.getClass().getName() - + ", returns " + l.getOutputType() + "."); +public class SparseMIRA extends Learner { + /** + * Used to decide if two values are nearly equal to each other. + * + * @see #nearlyEqualTo(double,double) + **/ + public static final double TOLERANCE = 1e-9; + + + /** A map from labels to the weight vector corresponding to that label. */ + protected OVector network; + /** Whether or not this learner's labeler produces conjunctive features. */ + protected boolean conjunctiveLabels; + + + /** This algorithm has no parameters to set! */ + public SparseMIRA() { + this(""); } - super.setLabeler(l); - } - - - /** - * Finds the optimal multiplier settings before updating the weight - * vectors. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param exampleLabels The example's label(s). - * @param labelValues The labels' values. - **/ - public void learn(int[] exampleFeatures, double[] exampleValues, - int[] exampleLabels, double[] labelValues) { - int label = exampleLabels[0]; - int N = network.size(); - - if (label >= N) { - conjunctiveLabels |= labelLexicon.lookupKey(label).isConjunctive(); - while (N++ <= label) - network.add(new BiasedRandomWeightVector()); + /** + * Initializing constructor. This constructor appears here for completeness; the algorithm takes + * no parameters. + * + * @param p The settings of all parameters. + **/ + public SparseMIRA(Parameters p) { + this("", p); } - if (N == 1) return; + /** + * This algorithm has no parameters to set! + * + * @param n The name of the classifier. + **/ + public SparseMIRA(String n) { + super(n); + network = new OVector(); + } + + /** + * Initializing constructor. This constructor appears here for completeness; the algorithm takes + * no parameters. + * + * @param n The name of the classifier. + * @param p The settings of all parameters. + **/ + public SparseMIRA(String n, Parameters p) { + this(n); + } - double norm2 = FeatureVector.L2NormSquared(exampleValues) + 1; - double[] scores = new double[N]; - boolean[] isLabel = new boolean[scores.length]; + /** + * Retrieves the parameters that are set in this learner. + * + * @return An object containing all the values of the parameters that control the behavior of + * this learning algorithm. + **/ + public Learner.Parameters getParameters() { + return new Parameters(); + } - BiasedRandomWeightVector[] w = - new BiasedRandomWeightVector[scores.length]; - double min = Double.MAX_VALUE, max = -Double.MAX_VALUE; - for (int i = 0; i < N; ++i) { - isLabel[i] = i == label; - w[i] = (BiasedRandomWeightVector) network.get(i); - scores[i] = w[i].dot(exampleFeatures, exampleValues) / norm2; - min = Math.min(min, scores[i]); - max = Math.max(max, scores[i]); + /** + * Sets the labeler. + * + * @param l A labeling classifier. + **/ + public void setLabeler(Classifier l) { + if (getClass().getName().indexOf("SparseMIRA") != -1 + && !l.getOutputType().equals("discrete")) { + System.err.println("LBJava WARNING: SparseMIRA will only work with a label classifier " + + "that returns discrete."); + System.err.println(" The given label classifier, " + l.getClass().getName() + + ", returns " + l.getOutputType() + "."); + } + + super.setLabeler(l); } - min--; - max++; - while (!nearlyEqualTo(min, max)) { - double mid = (max + min) / 2; - if (sumMultipliers(mid, scores, isLabel) <= 0) min = mid; - else max = mid; + /** + * Finds the optimal multiplier settings before updating the weight vectors. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param exampleLabels The example's label(s). + * @param labelValues The labels' values. + **/ + public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, + double[] labelValues) { + int label = exampleLabels[0]; + int N = network.size(); + + if (label >= N) { + conjunctiveLabels |= labelLexicon.lookupKey(label).isConjunctive(); + while (N++ <= label) + network.add(new BiasedRandomWeightVector()); + } + + if (N == 1) + return; + + double norm2 = FeatureVector.L2NormSquared(exampleValues) + 1; + + double[] scores = new double[N]; + boolean[] isLabel = new boolean[scores.length]; + + BiasedRandomWeightVector[] w = new BiasedRandomWeightVector[scores.length]; + double min = Double.MAX_VALUE, max = -Double.MAX_VALUE; + + for (int i = 0; i < N; ++i) { + isLabel[i] = i == label; + w[i] = (BiasedRandomWeightVector) network.get(i); + scores[i] = w[i].dot(exampleFeatures, exampleValues) / norm2; + min = Math.min(min, scores[i]); + max = Math.max(max, scores[i]); + } + + min--; + max++; + + while (!nearlyEqualTo(min, max)) { + double mid = (max + min) / 2; + if (sumMultipliers(mid, scores, isLabel) <= 0) + min = mid; + else + max = mid; + } + + for (int i = 0; i < N; ++i) { + double t = getMultiplier(min, scores[i], isLabel[i]); + if (!nearlyEqualTo(t, 0)) + w[i].scaledAdd(exampleFeatures, exampleValues, t); + } } - for (int i = 0; i < N; ++i) { - double t = getMultiplier(min, scores[i], isLabel[i]); - if (!nearlyEqualTo(t, 0)) - w[i].scaledAdd(exampleFeatures, exampleValues, t); + + /** + * Returns the multiplier for a given weight vector update. See Section 5.1 of Crammer and + * Singer (2003) for a description of where this computation comes from. + * + * @param theta See Crammer and Singer (2003). + * @param score The dot product of the weight vector with the example vector, divided by the + * norm of the example vector squared. + * @param isLabel true iff this weight vector corresponds to the example's label. + * @return The multiplier for this weight vector's update. + **/ + private static double getMultiplier(double theta, double score, boolean isLabel) { + return Math.min(theta - score, isLabel ? 1 : 0); } - } - - - /** - * Returns the multiplier for a given weight vector update. See Section - * 5.1 of Crammer and Singer (2003) for a description of where this - * computation comes from. - * - * @param theta See Crammer and Singer (2003). - * @param score The dot product of the weight vector with the example - * vector, divided by the norm of the example vector - * squared. - * @param isLabel true iff this weight vector corresponds to - * the example's label. - * @return The multiplier for this weight vector's update. - **/ - private static double getMultiplier(double theta, double score, - boolean isLabel) { - return Math.min(theta - score, isLabel ? 1 : 0); - } - - - /** - * Finds the sum of the multipliers for a given value of theta. See - * Section 5.1 of Crammer and Singer (2003) for an explanation of what - * theta is. - * - * @param theta There should exist a value for this parameter that - * causes this method to return zero. - * @param scores The dot products of the various weight vectors with the - * example vector, divided by the norm of the example - * vector squared. - * @param isLabel true at element i iff - * scores[i] is the dot product involving the - * weight vector corresponding to the example's label. - * @return The sum of the multipliers assuming the given value of - * theta. - **/ - private static double sumMultipliers(double theta, double[] scores, - boolean[] isLabel) { - double result = 0; - for (int i = 0; i < scores.length; ++i) - result += getMultiplier(theta, scores[i], isLabel[i]); - return result; - } - - - /** - * Determines if a is nearly equal to b based on - * the value of the {@link #TOLERANCE} member variable. - * - * @param a The first value. - * @param b The second value. - * @return True if they are nearly equal, false otherwise. - **/ - private static boolean nearlyEqualTo(double a, double b) { - return -TOLERANCE < a - b && a - b < TOLERANCE; - } - - - /** Clears the network. */ - public void forget() { - super.forget(); - network = new OVector(); - } - - - /** - * Produces a set of scores indicating the degree to which each possible - * discrete classification value is associated with the given example - * object. These scores are just the dot product of each weight vector - * with the example vector. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - **/ - public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { - ScoreSet result = new ScoreSet(); - int N = network.size(); - - for (int l = 0; l < N; l++) { - double score = - ((BiasedRandomWeightVector) network.get(l)) - .dot(exampleFeatures, exampleValues); - result.put(labelLexicon.lookupKey(l).getStringValue(), score); + + + /** + * Finds the sum of the multipliers for a given value of theta. See Section 5.1 of Crammer and + * Singer (2003) for an explanation of what theta is. + * + * @param theta There should exist a value for this parameter that causes this method to return + * zero. + * @param scores The dot products of the various weight vectors with the example vector, divided + * by the norm of the example vector squared. + * @param isLabel true at element i iff scores[i] is the + * dot product involving the weight vector corresponding to the example's label. + * @return The sum of the multipliers assuming the given value of theta. + **/ + private static double sumMultipliers(double theta, double[] scores, boolean[] isLabel) { + double result = 0; + for (int i = 0; i < scores.length; ++i) + result += getMultiplier(theta, scores[i], isLabel[i]); + return result; } - return result; - } - - - /** - * Returns the classification of the given example as a single feature - * instead of a {@link FeatureVector}. - * - * @param f The features array. - * @param v The values array. - * @return The classification of the example as a feature. - **/ - public Feature featureValue(int[] f, double[] v) { - double bestScore = Double.NEGATIVE_INFINITY; - int bestLabel = -1; - int N = network.size(); - - for (int l = 0; l < N; l++) { - double score = ((BiasedRandomWeightVector) network.get(l)).dot(f, v); - - if (score > bestScore) { - bestLabel = l; - bestScore = score; - } + + /** + * Determines if a is nearly equal to b based on the value of the + * {@link #TOLERANCE} member variable. + * + * @param a The first value. + * @param b The second value. + * @return True if they are nearly equal, false otherwise. + **/ + private static boolean nearlyEqualTo(double a, double b) { + return -TOLERANCE < a - b && a - b < TOLERANCE; } - if (bestLabel == -1) return null; - return predictions.get(bestLabel); - } - - - /** - * This implementation uses a winner-take-all comparison of the individual - * weight vectors' dot products. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @return The discrete value of the best prediction. - **/ - public String discreteValue(int[] exampleFeatures, double[] exampleValues) { - return featureValue(exampleFeatures, exampleValues).getStringValue(); - } - - - /** - * This implementation uses a winner-take-all comparison of the individual - * weight vectors' dot products. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @return A single feature with the winning weight vector's associated - * value. - **/ - public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) - { - return new FeatureVector(featureValue(exampleFeatures, exampleValues)); - } - - - /** - * Using this method, the winner-take-all competition is narrowed to - * involve only those labels contained in the specified list. The list - * must contain only Strings. - * - * @param example The example object. - * @param candidates A list of the only labels the example may take. - * @return The prediction as a feature or null if the network - * did not contain any of the specified labels. - **/ - public Feature valueOf(Object example, Collection candidates) { - Object[] array = getExampleArray(example, false); - return valueOf((int[]) array[0], (double[]) array[1], candidates); - } - - - /** - * Using this method, the winner-take-all competition is narrowed to - * involve only those labels contained in the specified list. The list - * must contain only Strings. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param candidates A list of the only labels the example may take. - * @return The prediction as a feature or null if the network - * did not contain any of the specified labels. - **/ - public Feature valueOf(int[] exampleFeatures, double[] exampleValues, - Collection candidates) { - double bestScore = Double.NEGATIVE_INFINITY; - int bestValue = -1; - Iterator I = candidates.iterator(); - - if (I.hasNext()) { - if (conjunctiveLabels) - return conjunctiveValueOf(exampleFeatures, exampleValues, I); - - while (I.hasNext()) { - double score = Double.NEGATIVE_INFINITY; - String label = (String) I.next(); - Feature f = - new DiscretePrimitiveStringFeature( - labeler.containingPackage, labeler.name, "", label, - labeler.valueIndexOf(label), - (short) labeler.allowableValues().length); - - int key = -1; - if (labelLexicon.contains(f)) { - key = labelLexicon.lookup(f); - score = ((BiasedRandomWeightVector) network.get(key)) - .dot(exampleFeatures, exampleValues); - } - if (score > bestScore) { - bestValue = key; - bestScore = score; - } - } + /** Clears the network. */ + public void forget() { + super.forget(); + network = new OVector(); } - else { - int N = network.size(); - for (int l = 0; l < N; l++) { - double score = - ((BiasedRandomWeightVector) network.get(l)) - .dot(exampleFeatures, exampleValues); - - if (score > bestScore) { - bestValue = l; - bestScore = score; + + + /** + * Produces a set of scores indicating the degree to which each possible discrete classification + * value is associated with the given example object. These scores are just the dot product of + * each weight vector with the example vector. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + **/ + public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { + ScoreSet result = new ScoreSet(); + int N = network.size(); + + for (int l = 0; l < N; l++) { + double score = + ((BiasedRandomWeightVector) network.get(l)).dot(exampleFeatures, exampleValues); + result.put(labelLexicon.lookupKey(l).getStringValue(), score); } - } + + return result; } - return bestValue == -1 ? null : predictions.get(bestValue); - } - - - /** - * This method is a surrogate for - * {@link #valueOf(int[],double[],Collection)} when the labeler is known to - * produce conjunctive features. It is necessary because when given a - * string label from the collection, we will not know how to construct the - * appropriate conjunctive feature key for lookup in the label lexicon. - * So, we must go through each feature in the label lexicon and use - * {@link classify.Feature#valueEquals(String)}. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param I An iterator over the set of labels to choose - * from. - * @return The prediction as a feature or null if the network - * did not contain any of the specified labels. - **/ - protected Feature conjunctiveValueOf( - int[] exampleFeatures, double[] exampleValues, Iterator I) { - double bestScore = Double.NEGATIVE_INFINITY; - int bestValue = -1; - int N = network.size(); - - while (I.hasNext()) { - String label = (String) I.next(); - - for (int i = 0; i < N; ++i) { - LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); - if (ltu == null || !predictions.get(i).valueEquals(label)) - continue; - double score = ltu.score(exampleFeatures, exampleValues); - if (score > bestScore) { - bestScore = score; - bestValue = i; + + /** + * Returns the classification of the given example as a single feature instead of a + * {@link FeatureVector}. + * + * @param f The features array. + * @param v The values array. + * @return The classification of the example as a feature. + **/ + public Feature featureValue(int[] f, double[] v) { + double bestScore = Double.NEGATIVE_INFINITY; + int bestLabel = -1; + int N = network.size(); + + for (int l = 0; l < N; l++) { + double score = ((BiasedRandomWeightVector) network.get(l)).dot(f, v); + + if (score > bestScore) { + bestLabel = l; + bestScore = score; + } } - break; - } + + if (bestLabel == -1) + return null; + return predictions.get(bestLabel); + } + + + /** + * This implementation uses a winner-take-all comparison of the individual weight vectors' dot + * products. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return The discrete value of the best prediction. + **/ + public String discreteValue(int[] exampleFeatures, double[] exampleValues) { + return featureValue(exampleFeatures, exampleValues).getStringValue(); + } + + + /** + * This implementation uses a winner-take-all comparison of the individual weight vectors' dot + * products. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return A single feature with the winning weight vector's associated value. + **/ + public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) { + return new FeatureVector(featureValue(exampleFeatures, exampleValues)); + } + + + /** + * Using this method, the winner-take-all competition is narrowed to involve only those labels + * contained in the specified list. The list must contain only Strings. + * + * @param example The example object. + * @param candidates A list of the only labels the example may take. + * @return The prediction as a feature or null if the network did not contain any + * of the specified labels. + **/ + public Feature valueOf(Object example, Collection candidates) { + Object[] array = getExampleArray(example, false); + return valueOf((int[]) array[0], (double[]) array[1], candidates); } - return bestValue == -1 ? null : predictions.get(bestValue); - } - - - /** - * Returns scores for only those labels in the given collection. If the - * given collection is empty, scores for all labels will be returned. If - * there is no {@link BiasedRandomWeightVector} associated with a given - * label from the collection, that label's score in the returned - * {@link ScoreSet} will be set to Double.NEGATIVE_INFINITY. - * - *

The elements of candidates must all be - * Strings. - * - * @param example The example object. - * @param candidates A list of the only labels the example may take. - * @return Scores for only those labels in candidates. - **/ - public ScoreSet scores(Object example, Collection candidates) { - Object[] array = getExampleArray(example, false); - return scores((int[]) array[0], (double[]) array[1], candidates); - } - - - /** - * Returns scores for only those labels in the given collection. If the - * given collection is empty, scores for all labels will be returned. If - * there is no {@link BiasedRandomWeightVector} associated with a given - * label from the collection, that label's score in the returned - * {@link ScoreSet} will be set to Double.NEGATIVE_INFINITY. - * - *

The elements of candidates must all be - * Strings. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param candidates A list of the only labels the example may take. - * @return Scores for only those labels in candidates. - **/ - public ScoreSet scores(int[] exampleFeatures, double[] exampleValues, - Collection candidates) { - ScoreSet result = new ScoreSet(); - Iterator I = candidates.iterator(); - - if (I.hasNext()) { - if (conjunctiveLabels) - return conjunctiveScores(exampleFeatures, exampleValues, I); - - while (I.hasNext()) { - double score = Double.NEGATIVE_INFINITY; - String label = (String) I.next(); - Feature f = - new DiscretePrimitiveStringFeature( - labeler.containingPackage, labeler.name, "", label, - labeler.valueIndexOf(label), - (short) labeler.allowableValues().length); - - if (labelLexicon.contains(f)) { - int key = labelLexicon.lookup(f); - score = ((BiasedRandomWeightVector) network.get(key)) - .dot(exampleFeatures, exampleValues); - result.put(label.toString(), score); + + /** + * Using this method, the winner-take-all competition is narrowed to involve only those labels + * contained in the specified list. The list must contain only Strings. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param candidates A list of the only labels the example may take. + * @return The prediction as a feature or null if the network did not contain any + * of the specified labels. + **/ + public Feature valueOf(int[] exampleFeatures, double[] exampleValues, Collection candidates) { + double bestScore = Double.NEGATIVE_INFINITY; + int bestValue = -1; + Iterator I = candidates.iterator(); + + if (I.hasNext()) { + if (conjunctiveLabels) + return conjunctiveValueOf(exampleFeatures, exampleValues, I); + + while (I.hasNext()) { + double score = Double.NEGATIVE_INFINITY; + String label = (String) I.next(); + Feature f = + new DiscretePrimitiveStringFeature(labeler.containingPackage, labeler.name, + "", label, labeler.valueIndexOf(label), + (short) labeler.allowableValues().length); + + int key = -1; + if (labelLexicon.contains(f)) { + key = labelLexicon.lookup(f); + score = + ((BiasedRandomWeightVector) network.get(key)).dot(exampleFeatures, + exampleValues); + } + + if (score > bestScore) { + bestValue = key; + bestScore = score; + } + } + } else { + int N = network.size(); + for (int l = 0; l < N; l++) { + double score = + ((BiasedRandomWeightVector) network.get(l)).dot(exampleFeatures, + exampleValues); + + if (score > bestScore) { + bestValue = l; + bestScore = score; + } + } } - } + + return bestValue == -1 ? null : predictions.get(bestValue); } - else { - int N = network.size(); - for (int l = 0; l < N; l++) { - double score = - ((BiasedRandomWeightVector) network.get(l)) - .dot(exampleFeatures, exampleValues); - result.put(labelLexicon.lookupKey(l).getStringValue(), score); - } + + + /** + * This method is a surrogate for {@link #valueOf(int[],double[],Collection)} when the labeler + * is known to produce conjunctive features. It is necessary because when given a string label + * from the collection, we will not know how to construct the appropriate conjunctive feature + * key for lookup in the label lexicon. So, we must go through each feature in the label lexicon + * and use {@link Feature#valueEquals(String)}. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param I An iterator over the set of labels to choose from. + * @return The prediction as a feature or null if the network did not contain any + * of the specified labels. + **/ + protected Feature conjunctiveValueOf(int[] exampleFeatures, double[] exampleValues, Iterator I) { + double bestScore = Double.NEGATIVE_INFINITY; + int bestValue = -1; + int N = network.size(); + + while (I.hasNext()) { + String label = (String) I.next(); + + for (int i = 0; i < N; ++i) { + LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); + if (ltu == null || !predictions.get(i).valueEquals(label)) + continue; + double score = ltu.score(exampleFeatures, exampleValues); + if (score > bestScore) { + bestScore = score; + bestValue = i; + } + break; + } + } + + return bestValue == -1 ? null : predictions.get(bestValue); } - return result; - } - - - /** - * This method is a surrogate for - * {@link #scores(int[],double[],Collection)} when the labeler is known to - * produce conjunctive features. It is necessary because when given a - * string label from the collection, we will not know how to construct the - * appropriate conjunctive feature key for lookup in the label lexicon. - * So, we must go through each feature in the label lexicon and use - * {@link classify.Feature#valueEquals(String)}. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param I An iterator over the set of labels to choose - * from. - * @return The label chosen by this classifier or null if the - * network did not contain any of the specified labels. - **/ - protected ScoreSet conjunctiveScores(int[] exampleFeatures, - double[] exampleValues, Iterator I) { - ScoreSet result = new ScoreSet(); - int N = network.size(); - - while (I.hasNext()) { - String label = (String) I.next(); - - for (int i = 0; i < N; ++i) { - LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); - if (ltu == null || !labelLexicon.lookupKey(i).valueEquals(label)) - continue; - double score = ltu.score(exampleFeatures, exampleValues); - result.put(label.toString(), score); - break; - } + + /** + * Returns scores for only those labels in the given collection. If the given collection is + * empty, scores for all labels will be returned. If there is no + * {@link BiasedRandomWeightVector} associated with a given label from the collection, that + * label's score in the returned {@link ScoreSet} will be set to + * Double.NEGATIVE_INFINITY. + * + *

+ * The elements of candidates must all be Strings. + * + * @param example The example object. + * @param candidates A list of the only labels the example may take. + * @return Scores for only those labels in candidates. + **/ + public ScoreSet scores(Object example, Collection candidates) { + Object[] array = getExampleArray(example, false); + return scores((int[]) array[0], (double[]) array[1], candidates); } - return result; - } + /** + * Returns scores for only those labels in the given collection. If the given collection is + * empty, scores for all labels will be returned. If there is no + * {@link BiasedRandomWeightVector} associated with a given label from the collection, that + * label's score in the returned {@link ScoreSet} will be set to + * Double.NEGATIVE_INFINITY. + * + *

+ * The elements of candidates must all be Strings. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param candidates A list of the only labels the example may take. + * @return Scores for only those labels in candidates. + **/ + public ScoreSet scores(int[] exampleFeatures, double[] exampleValues, Collection candidates) { + ScoreSet result = new ScoreSet(); + Iterator I = candidates.iterator(); + + if (I.hasNext()) { + if (conjunctiveLabels) + return conjunctiveScores(exampleFeatures, exampleValues, I); + + while (I.hasNext()) { + double score = Double.NEGATIVE_INFINITY; + String label = (String) I.next(); + Feature f = + new DiscretePrimitiveStringFeature(labeler.containingPackage, labeler.name, + "", label, labeler.valueIndexOf(label), + (short) labeler.allowableValues().length); + + if (labelLexicon.contains(f)) { + int key = labelLexicon.lookup(f); + score = + ((BiasedRandomWeightVector) network.get(key)).dot(exampleFeatures, + exampleValues); + result.put(label.toString(), score); + } + } + } else { + int N = network.size(); + for (int l = 0; l < N; l++) { + double score = + ((BiasedRandomWeightVector) network.get(l)).dot(exampleFeatures, + exampleValues); + result.put(labelLexicon.lookupKey(l).getStringValue(), score); + } + } - /** - * Writes the algorithm's internal representation as text. - * - * @param out The output stream. - **/ - public void write(PrintStream out) { - int N = network.size(); - for (int i = 0; i < N; ++i) { - out.println("label: " + predictions.get(i).getStringValue()); - ((BiasedRandomWeightVector) network.get(i)).write(out, lexicon); + return result; } - out.println("End of SparseMIRA"); - } - - - /** - * Writes the learned function's internal representation in binary form. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - int N = network.size(); - out.writeInt(N); - for (int i = 0; i < N; ++i) - ((BiasedRandomWeightVector) network.get(i)).write(out); - } - - - /** - * Reads the binary representation of a learner with this object's run-time - * type, overwriting any and all learned or manually specified parameters - * as well as the label lexicon but without modifying the feature lexicon. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - int N = in.readInt(); - network = new OVector(N); - for (int i = 0; i < N; ++i) - network.add(SparseWeightVector.readWeightVector(in)); - } - - - /** Returns a deep clone of this learning algorithm. */ - public Object clone() { - SparseMIRA clone = null; - - try { clone = (SparseMIRA) super.clone(); } - catch (Exception e) { - System.err.println("Error cloning SparseMIRA: " + e); - e.printStackTrace(); - System.exit(1); + + /** + * This method is a surrogate for {@link #scores(int[],double[],Collection)} when the labeler is + * known to produce conjunctive features. It is necessary because when given a string label from + * the collection, we will not know how to construct the appropriate conjunctive feature key for + * lookup in the label lexicon. So, we must go through each feature in the label lexicon and use + * {@link Feature#valueEquals(String)}. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param I An iterator over the set of labels to choose from. + * @return The label chosen by this classifier or null if the network did not + * contain any of the specified labels. + **/ + protected ScoreSet conjunctiveScores(int[] exampleFeatures, double[] exampleValues, Iterator I) { + ScoreSet result = new ScoreSet(); + int N = network.size(); + + while (I.hasNext()) { + String label = (String) I.next(); + + for (int i = 0; i < N; ++i) { + LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); + if (ltu == null || !labelLexicon.lookupKey(i).valueEquals(label)) + continue; + double score = ltu.score(exampleFeatures, exampleValues); + result.put(label.toString(), score); + break; + } + } + + return result; } - int N = network.size(); - clone.network = new OVector(N); - for (int i = 0; i < N; ++i) - clone.network.add(((BiasedRandomWeightVector) network.get(i)).clone()); - return clone; - } + /** + * Writes the algorithm's internal representation as text. + * + * @param out The output stream. + **/ + public void write(PrintStream out) { + int N = network.size(); + for (int i = 0; i < N; ++i) { + out.println("label: " + predictions.get(i).getStringValue()); + ((BiasedRandomWeightVector) network.get(i)).write(out, lexicon); + } + out.println("End of SparseMIRA"); + } - /** - * Simply a container for all of {@link SparseMIRA}'s - * configurable parameters. This class appears here for completeness; the - * algorithm has no parameters to set. - * - * @author Nick Rizzolo - **/ - public static class Parameters extends Learner.Parameters - { - /** Sets all the default values. */ - public Parameters() { } + + /** + * Writes the learned function's internal representation in binary form. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + int N = network.size(); + out.writeInt(N); + for (int i = 0; i < N; ++i) + ((BiasedRandomWeightVector) network.get(i)).write(out); + } /** - * Sets the parameters from the parent's parameters object, giving - * defaults to all parameters declared in this object. + * Reads the binary representation of a learner with this object's run-time type, overwriting + * any and all learned or manually specified parameters as well as the label lexicon but without + * modifying the feature lexicon. + * + * @param in The input stream. **/ - public Parameters(Learner.Parameters p) { super(p); } + public void read(ExceptionlessInputStream in) { + super.read(in); + int N = in.readInt(); + network = new OVector(N); + for (int i = 0; i < N; ++i) + network.add(SparseWeightVector.readWeightVector(in)); + } - /** Copy constructor. */ - public Parameters(Parameters p) { super(p); } + /** Returns a deep clone of this learning algorithm. */ + public Object clone() { + SparseMIRA clone = null; + + try { + clone = (SparseMIRA) super.clone(); + } catch (Exception e) { + System.err.println("Error cloning SparseMIRA: " + e); + e.printStackTrace(); + System.exit(1); + } + + int N = network.size(); + clone.network = new OVector(N); + for (int i = 0; i < N; ++i) + clone.network.add(((BiasedRandomWeightVector) network.get(i)).clone()); + + return clone; + } /** - * Calls the appropriate Learner.setParameters(Parameters) - * method for this Parameters object. - * - * @param l The learner whose parameters will be set. + * Simply a container for all of {@link SparseMIRA}'s configurable parameters. This class + * appears here for completeness; the algorithm has no parameters to set. + * + * @author Nick Rizzolo **/ - public void setParameters(Learner l) { } - } -} + public static class Parameters extends Learner.Parameters { + /** Sets all the default values. */ + public Parameters() {} + + /** + * Sets the parameters from the parent's parameters object, giving defaults to all + * parameters declared in this object. + **/ + public Parameters(Learner.Parameters p) { + super(p); + } + + + /** Copy constructor. */ + public Parameters(Parameters p) { + super(p); + } + + + /** + * Calls the appropriate Learner.setParameters(Parameters) method for this + * Parameters object. + * + * @param l The learner whose parameters will be set. + **/ + public void setParameters(Learner l) {} + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java index 7a31c6a0..822fc1fd 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -13,781 +10,764 @@ import java.io.PrintStream; import java.util.Collection; import java.util.Iterator; +import java.util.Map.Entry; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.OVector; import edu.illinois.cs.cogcomp.lbjava.classify.Classifier; import edu.illinois.cs.cogcomp.lbjava.classify.DiscretePrimitiveStringFeature; import edu.illinois.cs.cogcomp.lbjava.classify.Feature; import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector; import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; -import edu.illinois.cs.cogcomp.lbjava.util.OVector; +import edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.SparseNetworkOptimizer; /** - * A SparseNetworkLearner uses multiple - * {@link LinearThresholdUnit}s to make a multi-class classification. - * Any {@link LinearThresholdUnit} may be used, so long as it implements its - * clone() method and a public constructor that takes no - * arguments. - * - *

It is assumed that a single discrete label feature will be produced in - * association with each example object. A feature taking one of the values - * observed in that label feature will be produced by the learned classifier. - * - *

This algorithm's user-configurable parameters are stored in member - * fields of this class. They may be set via either a constructor that names - * each parameter explicitly or a constructor that takes an instance of - * {@link SparseNetworkLearner.Parameters Parameters} as input. - * The documentation in each member field in this class indicates the default - * value of the associated parameter when using the former type of - * constructor. The documentation of the associated member field in the - * {@link SparseNetworkLearner.Parameters Parameters} class - * indicates the default value of the parameter when using the latter type of - * constructor. - * - * @author Nick Rizzolo + * A SparseNetworkLearner uses multiple {@link LinearThresholdUnit}s to make a + * multi-class classification. Any {@link LinearThresholdUnit} may be used, so long as it implements + * its clone() method and a public constructor that takes no arguments. + * + *

+ * It is assumed that a single discrete label feature will be produced in association with each + * example object. A feature taking one of the values observed in that label feature will be + * produced by the learned classifier. + * + *

+ * This algorithm's user-configurable parameters are stored in member fields of this class. They may + * be set via either a constructor that names each parameter explicitly or a constructor that takes + * an instance of {@link SparseNetworkLearner.Parameters Parameters} as input. The documentation in + * each member field in this class indicates the default value of the associated parameter when + * using the former type of constructor. The documentation of the associated member field in the + * {@link SparseNetworkLearner.Parameters Parameters} class indicates the default value of the + * parameter when using the latter type of constructor. + * + * @author Nick Rizzolo **/ -public class SparseNetworkLearner extends Learner -{ - private static final long serialVersionUID = 1L; - - -/** Default for {@link #baseLTU}. */ - public static final LinearThresholdUnit defaultBaseLTU = new SparseAveragedPerceptron(); - - /** - * The underlying algorithm used to learn each class separately as a binary - * classifier; default {@link #defaultBaseLTU}. - **/ - protected LinearThresholdUnit baseLTU; - - /** - * A collection of the linear threshold units used to learn each label, - * indexed by the label. - **/ - protected OVector network; - - /** The total number of examples in the training data, or 0 if unknown. */ - protected int numExamples; - - /** - * The total number of distinct features in the training data, or 0 if - * unknown. - **/ - protected int numFeatures; - - /** Whether or not this learner's labeler produces conjunctive features. */ - protected boolean conjunctiveLabels; - - - /** - * Instantiates this multi-class learner with the default learning - * algorithm: {@link #defaultBaseLTU}. - **/ - public SparseNetworkLearner() { this(""); } - - /** - * Instantiates this multi-class learner using the specified algorithm to - * learn each class separately as a binary classifier. This constructor - * will normally only be called by the compiler. - * - * @param ltu The linear threshold unit used to learn binary classifiers. - **/ - public SparseNetworkLearner(LinearThresholdUnit ltu) { this("", ltu); } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link SparseNetworkLearner.Parameters} object. - * - * @param p The settings of all parameters. - **/ - public SparseNetworkLearner(Parameters p) { this("", p); } - - /** - * Instantiates this multi-class learner with the default learning - * algorithm: {@link #defaultBaseLTU}. - * - * @param n The name of the classifier. - **/ - public SparseNetworkLearner(String n) { this(n, new Parameters()); } - - /** - * Instantiates this multi-class learner using the specified algorithm to - * learn each class separately as a binary classifier. - * - * @param n The name of the classifier. - * @param ltu The linear threshold unit used to learn binary classifiers. - **/ - public SparseNetworkLearner(String n, LinearThresholdUnit ltu) { - super(n); - Parameters p = new Parameters(); - p.baseLTU = ltu; - setParameters(p); - network = new OVector(); - } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link SparseNetworkLearner.Parameters} object. - * - * @param n The name of the classifier. - * @param p The settings of all parameters. - **/ - public SparseNetworkLearner(String n, Parameters p) { - super(n); - setParameters(p); - network = new OVector(); - } - - public int getNumExamples() { return numExamples; } - - public int getNumFeatures() { return numFeatures; } - - public LinearThresholdUnit getBaseLTU() { return baseLTU; } - - public OVector getNetwork() { return network; } - - /** - * Sets the values of parameters that control the behavior of this learning - * algorithm. - * - * @param p The parameters. - **/ - public void setParameters(Parameters p) { - if (!p.baseLTU.getOutputType().equals("discrete")) { - System.err.println( - "LBJava WARNING: SparseNetworkLearner will only work with a " - + "LinearThresholdUnit that returns discrete."); - System.err.println( - " The given LTU, " + p.baseLTU.getClass().getName() - + ", returns " + p.baseLTU.getOutputType() + "."); - } - - setLTU(p.baseLTU); - } - - - /** - * Retrieves the parameters that are set in this learner. - * - * @return An object containing all the values of the parameters that - * control the behavior of this learning algorithm. - **/ - public Learner.Parameters getParameters() { - Parameters p = new Parameters(super.getParameters()); - p.baseLTU = baseLTU; - return p; - } - - - /** - * Sets the baseLTU variable. This method will not - * have any effect on the LTUs that already exist in the network. However, - * new LTUs created after this method is executed will be of the same type - * as the object specified. - * - * @param ltu The new LTU. - **/ - public void setLTU(LinearThresholdUnit ltu) { - baseLTU = ltu; - baseLTU.name = name + "$baseLTU"; - } - - - /** - * Sets the labeler. - * - * @param l A labeling classifier. - **/ - public void setLabeler(Classifier l) { - if (getClass().getName().contains("SparseNetworkLearner") - && !l.getOutputType().equals("discrete")) { - System.err.println( - "LBJava WARNING: SparseNetworkLearner will only work with a " - + "label classifier that returns discrete."); - System.err.println( - " The given label classifier, " + l.getClass().getName() - + ", returns " + l.getOutputType() + "."); - } - - super.setLabeler(l); - } - - - /** - * Sets the extractor. - * - * @param e A feature extracting classifier. - **/ - public void setExtractor(Classifier e) { - super.setExtractor(e); - baseLTU.setExtractor(e); - int N = network.size(); - - for (int i = 0; i < N; ++i) - ((LinearThresholdUnit) network.get(i)).setExtractor(e); - } - - /** - * Create a {@link LinearThresholdUnit} and add it to the network - * @param label The label associated with the LTU - */ - public void setNetworkLabel(int label) { - LinearThresholdUnit ltu = (LinearThresholdUnit) baseLTU.clone(); - ltu.initialize(numExamples, numFeatures); - network.set(label, ltu); - } - - - /** - * Each example is treated as a positive example for the linear threshold - * unit associated with the label's value that is active for the example - * and as a negative example for all other linear threshold units in the - * network. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param exampleLabels The example's label(s). - * @param labelValues The labels' values. - **/ - public void learn(int[] exampleFeatures, double[] exampleValues, - int[] exampleLabels, double[] labelValues) { - int label = exampleLabels[0]; - int N = network.size(); - - if (label >= N || network.get(label) == null) { - conjunctiveLabels |= labelLexicon.lookupKey(label).isConjunctive(); - - LinearThresholdUnit ltu = (LinearThresholdUnit) baseLTU.clone(); - ltu.initialize(numExamples, numFeatures); - network.set(label, ltu); - N = label + 1; - } - - int[] l = new int[1]; - for (int i = 0; i < N; ++i) { - LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); - if (ltu == null) continue; - - l[0] = (i == label) ? 1 : 0; - ltu.learn(exampleFeatures, exampleValues, l, labelValues); - } - } - - - /** Simply calls doneLearning() on every LTU in the network. */ - public void doneLearning() { - super.doneLearning(); - int N = network.size(); - for (int i = 0; i < N; ++i) { - LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); - if (ltu == null) continue; - ltu.doneLearning(); - } - } - - - /** Sets the number of examples and features. */ - public void initialize(int ne, int nf) { - numExamples = ne; - numFeatures = nf; - } - - - /** Simply calls {@link LinearThresholdUnit#doneWithRound()} on every - LTU in the network. */ - public void doneWithRound() { - super.doneWithRound(); - int N = network.size(); - for (int i = 0; i < N; ++i) { - LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); - if (ltu == null) continue; - ltu.doneWithRound(); - } - } - - - /** Clears the network. */ - public void forget() { - super.forget(); - network = new OVector(); - } - - - /** - * Returns scores for only those labels in the given collection. If the - * given collection is empty, scores for all labels will be returned. If - * there is no {@link LinearThresholdUnit} associated with a given label - * from the collection, that label's score in the returned {@link ScoreSet} - * will be set to Double.NEGATIVE_INFINITY. - * - *

The elements of candidates must all be - * Strings. - * - * @param example The example object. - * @param candidates A list of the only labels the example may take. - * @return Scores for only those labels in candidates. - **/ - public ScoreSet scores(Object example, Collection candidates) { - Object[] exampleArray = getExampleArray(example, false); - return - scores((int[]) exampleArray[0], (double[]) exampleArray[1], candidates); - } - - - /** - * Returns scores for only those labels in the given collection. If the - * given collection is empty, scores for all labels will be returned. If - * there is no {@link LinearThresholdUnit} associated with a given label - * from the collection, that label's score in the returned {@link ScoreSet} - * will be set to Double.NEGATIVE_INFINITY. - * - *

The elements of candidates must all be - * Strings. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param candidates A list of the only labels the example may take. - * @return Scores for only those labels in candidates. - **/ - public ScoreSet scores(int[] exampleFeatures, double[] exampleValues, - Collection candidates) { - ScoreSet result = new ScoreSet(); - Iterator I = candidates.iterator(); - - if (I.hasNext()) { - if (conjunctiveLabels) - return conjunctiveScores(exampleFeatures, exampleValues, I); - - while (I.hasNext()) { - String label = (String) I.next(); - Feature f = - new DiscretePrimitiveStringFeature( - labeler.containingPackage, labeler.name, "", label, - labeler.valueIndexOf(label), - (short) labeler.allowableValues().length); - - if (labelLexicon.contains(f)) { - int key = labelLexicon.lookup(f); - LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(key); - if (ltu != null) - result.put(label, - ltu.score(exampleFeatures, exampleValues) - - ltu.getThreshold()); +public class SparseNetworkLearner extends Learner { + private static final long serialVersionUID = 1L; + + /** Default for {@link #baseLTU}. */ + public static final LinearThresholdUnit defaultBaseLTU = new SparseAveragedPerceptron(); + + /** + * The underlying algorithm used to learn each class separately as a binary classifier; default + * {@link #defaultBaseLTU}. + **/ + protected LinearThresholdUnit baseLTU; + + /** + * A collection of the linear threshold units used to learn each label, indexed by the label. + **/ + protected OVector network; + + /** The total number of examples in the training data, or 0 if unknown. */ + protected int numExamples; + + /** + * The total number of distinct features in the training data, or 0 if unknown. + **/ + protected int numFeatures; + + /** Whether or not this learner's labeler produces conjunctive features. */ + protected boolean conjunctiveLabels; + + /** + * Instantiates this multi-class learner with the default learning algorithm: + * {@link #defaultBaseLTU}. + **/ + public SparseNetworkLearner() { + this(""); + } + + /** + * Instantiates this multi-class learner using the specified algorithm to learn each class + * separately as a binary classifier. This constructor will normally only be called by the + * compiler. + * + * @param ltu The linear threshold unit used to learn binary classifiers. + **/ + public SparseNetworkLearner(LinearThresholdUnit ltu) { + this("", ltu); + } + + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link SparseNetworkLearner.Parameters} object. + * + * @param p The settings of all parameters. + **/ + public SparseNetworkLearner(Parameters p) { + this("", p); + } + + /** + * Instantiates this multi-class learner with the default learning algorithm: + * {@link #defaultBaseLTU}. + * + * @param n The name of the classifier. + **/ + public SparseNetworkLearner(String n) { + this(n, new Parameters()); + } + + /** + * Instantiates this multi-class learner using the specified algorithm to learn each class + * separately as a binary classifier. + * + * @param n The name of the classifier. + * @param ltu The linear threshold unit used to learn binary classifiers. + **/ + public SparseNetworkLearner(String n, LinearThresholdUnit ltu) { + super(n); + Parameters p = new Parameters(); + p.baseLTU = ltu; + setParameters(p); + network = new OVector(); + } + + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link SparseNetworkLearner.Parameters} object. + * + * @param n The name of the classifier. + * @param p The settings of all parameters. + **/ + public SparseNetworkLearner(String n, Parameters p) { + super(n); + setParameters(p); + network = new OVector(); + } + + public int getNumExamples() { + return numExamples; + } + + public int getNumFeatures() { + return numFeatures; + } + + public LinearThresholdUnit getBaseLTU() { + return baseLTU; + } + + public OVector getNetwork() { + return network; + } + + public boolean isUsingConjunctiveLabels() { + return conjunctiveLabels; + } + + public void setConjunctiveLabels(boolean setConjunctiveLabels) { + conjunctiveLabels = setConjunctiveLabels; + } + + /** + * returns the i-th LTU; the type of this depends on the type of the baseLTU (see above). + */ + public Object getLTU(int i) { + return network.get(i); + } + + /** + * Sets the values of parameters that control the behavior of this learning algorithm. + * + * @param p The parameters. + **/ + public void setParameters(Parameters p) { + if (!p.baseLTU.getOutputType().equals("discrete")) { + System.err.println("LBJava WARNING: SparseNetworkLearner will only work with a " + + "LinearThresholdUnit that returns discrete."); + System.err.println(" The given LTU, " + p.baseLTU.getClass().getName() + + ", returns " + p.baseLTU.getOutputType() + "."); } - } - } - else { - int N = network.size(); - for (int l = 0; l < N; ++l) { - LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(l); - if (ltu == null) continue; - result.put(labelLexicon.lookupKey(l).getStringValue(), - ltu.score(exampleFeatures, exampleValues) - - ltu.getThreshold()); - } - } - - return result; - } - - - /** - * This method is a surrogate for - * {@link #scores(int[],double[],Collection)} when the labeler is known to - * produce conjunctive features. It is necessary because when given a - * string label from the collection, we will not know how to construct the - * appropriate conjunctive feature key for lookup in the label lexicon. - * So, we must go through each feature in the label lexicon and use - * {@link edu.illinois.cs.cogcomp.lbjava.classify.Feature#valueEquals(String)}. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param I An iterator over the set of labels to choose - * from. - * @return The label chosen by this classifier or null if the - * network did not contain any of the specified labels. - **/ - protected ScoreSet conjunctiveScores(int[] exampleFeatures, - double[] exampleValues, Iterator I) { - ScoreSet result = new ScoreSet(); - int N = network.size(); - - while (I.hasNext()) { - String label = (String) I.next(); - - for (int i = 0; i < N; ++i) { - LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); - if (ltu == null || !labelLexicon.lookupKey(i).valueEquals(label)) - continue; - double score = ltu.score(exampleFeatures, exampleValues); - result.put(label, score); - break; - } - } - - return result; - } - - - /** - * Produces a set of scores indicating the degree to which each possible - * discrete classification value is associated with the given example - * object. These scores are just the scores of each LTU's positive - * classification as produced by - * LinearThresholdUnit.scores(Object). - * - * @see LinearThresholdUnit#scores(Object) - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @return The set of scores produced by the LTUs - **/ - public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { - ScoreSet result = new ScoreSet(); - int N = network.size(); - - for (int l = 0; l < N; l++) { - LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(l); - if (ltu == null) continue; - - result.put(labelLexicon.lookupKey(l).getStringValue(), - ltu.score(exampleFeatures, exampleValues) - - ltu.getThreshold()); - } - - return result; - } - - - /** - * Returns the classification of the given example as a single feature - * instead of a {@link FeatureVector}. - * - * @param f The features array. - * @param v The values array. - * @return The classification of the example as a feature. - **/ - public Feature featureValue(int[] f, double[] v) { - double bestScore = Double.NEGATIVE_INFINITY; - int bestValue = -1; - int N = network.size(); - - for (int l = 0; l < N; l++) { - LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(l); - if (ltu == null) continue; - double score = ltu.score(f, v); - - if (score > bestScore) { - bestValue = l; - bestScore = score; - } - } - - return bestValue == -1 ? null : predictions.get(bestValue); - } - - - /** - * This implementation uses a winner-take-all comparison of the outputs - * from the individual linear threshold units' score methods. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @return A single value with the winning linear threshold unit's - * associated value. - **/ - public String discreteValue(int[] exampleFeatures, double[] exampleValues) { - return featureValue(exampleFeatures, exampleValues).getStringValue(); - } - - - /** - * This implementation uses a winner-take-all comparison of the outputs - * from the individual linear threshold units' score methods. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @return A single feature with the winning linear threshold unit's - * associated value. - **/ - public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) - { - return new FeatureVector(featureValue(exampleFeatures, exampleValues)); - } - - - /** - * Using this method, the winner-take-all competition is narrowed to - * involve only those labels contained in the specified list. The list - * must contain only Strings. - * - * @param example The example object. - * @param candidates A list of the only labels the example may take. - * @return The prediction as a feature or null if the network - * did not contain any of the specified labels. - **/ - public Feature valueOf(Object example, Collection candidates) { - Object[] exampleArray = getExampleArray(example, false); - return - valueOf((int[]) exampleArray[0], (double[]) exampleArray[1], - candidates); - } - - - /** - * Using this method, the winner-take-all competition is narrowed to - * involve only those labels contained in the specified list. The list - * must contain only Strings. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param candidates A list of the only labels the example may take. - * @return The prediction as a feature or null if the network - * did not contain any of the specified labels. - **/ - public Feature valueOf(int[] exampleFeatures, double[] exampleValues, - Collection candidates) { - double bestScore = Double.NEGATIVE_INFINITY; - int bestValue = -1; - Iterator cI = candidates.iterator(); - - if (cI.hasNext()) { - if (conjunctiveLabels) - return conjunctiveValueOf(exampleFeatures, exampleValues, cI); - - while (cI.hasNext()) { - double score = Double.NEGATIVE_INFINITY; - String label = (String) cI.next(); - - Feature f = - new DiscretePrimitiveStringFeature( - labeler.containingPackage, labeler.name, "", label, - labeler.valueIndexOf(label), - (short) labeler.allowableValues().length); - - int key = -1; - if (labelLexicon.contains(f)) { - key = labelLexicon.lookup(f); - LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(key); - if (ltu != null) score = ltu.score(exampleFeatures, exampleValues); + + setLTU(p.baseLTU); + } + + /** + * Retrieves the parameters that are set in this learner. + * + * @return An object containing all the values of the parameters that control the behavior of + * this learning algorithm. + **/ + public Learner.Parameters getParameters() { + Parameters p = new Parameters(super.getParameters()); + p.baseLTU = baseLTU; + return p; + } + + /** + * Sets the baseLTU variable. This method will not have any effect on the + * LTUs that already exist in the network. However, new LTUs created after this method is + * executed will be of the same type as the object specified. + * + * @param ltu The new LTU. + **/ + public void setLTU(LinearThresholdUnit ltu) { + baseLTU = ltu; + baseLTU.name = name + "$baseLTU"; + } + + /** + * Sets the labeler. + * + * @param l A labeling classifier. + **/ + public void setLabeler(Classifier l) { + if (getClass().getName().contains("SparseNetworkLearner") + && !l.getOutputType().equals("discrete")) { + System.err.println("LBJava WARNING: SparseNetworkLearner will only work with a " + + "label classifier that returns discrete."); + System.err.println(" The given label classifier, " + l.getClass().getName() + + ", returns " + l.getOutputType() + "."); } - if (score > bestScore) { - bestValue = key; - bestScore = score; + super.setLabeler(l); + } + + /** + * Sets the extractor. + * + * @param e A feature extracting classifier. + **/ + public void setExtractor(Classifier e) { + super.setExtractor(e); + baseLTU.setExtractor(e); + int N = network.size(); + + for (int i = 0; i < N; ++i) + ((LinearThresholdUnit) network.get(i)).setExtractor(e); + } + + /** + * Create a {@link LinearThresholdUnit} and add it to the network + * + * @param label The label associated with the LTU + */ + public void setNetworkLabel(int label) { + LinearThresholdUnit ltu = (LinearThresholdUnit) baseLTU.clone(); + ltu.initialize(numExamples, numFeatures); + network.set(label, ltu); + } + + /** + * Each example is treated as a positive example for the linear threshold unit associated with + * the label's value that is active for the example and as a negative example for all other + * linear threshold units in the network. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param exampleLabels The example's label(s). + * @param labelValues The labels' values. + **/ + public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, + double[] labelValues) { + int label = exampleLabels[0]; + int N = network.size(); + + if (label >= N || network.get(label) == null) { + conjunctiveLabels |= labelLexicon.lookupKey(label).isConjunctive(); + + LinearThresholdUnit ltu = (LinearThresholdUnit) baseLTU.clone(); + ltu.initialize(numExamples, numFeatures); + network.set(label, ltu); + N = label + 1; + } + + int[] l = new int[1]; + for (int i = 0; i < N; ++i) { + LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); + if (ltu == null) + continue; + + l[0] = (i == label) ? 1 : 0; + ltu.learn(exampleFeatures, exampleValues, l, labelValues); + } + } + + /** + * When we complete learning, we will do an optimization. + */ + public void doneTraining() { + super.doneTraining(); + + // do the optimization + SparseNetworkOptimizer optimizer = new SparseNetworkOptimizer(this); + optimizer.optimize(); + } + + /** Simply calls doneLearning() on every LTU in the network. */ + public void doneLearning() { + super.doneLearning(); + int N = network.size(); + for (int i = 0; i < N; ++i) { + LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); + if (ltu == null) + continue; + ltu.doneLearning(); + } + } + + /** Sets the number of examples and features. */ + public void initialize(int ne, int nf) { + numExamples = ne; + numFeatures = nf; + } + + /** + * Simply calls {@link LinearThresholdUnit#doneWithRound()} on every LTU in the network. + */ + public void doneWithRound() { + super.doneWithRound(); + int N = network.size(); + for (int i = 0; i < N; ++i) { + LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); + if (ltu == null) + continue; + ltu.doneWithRound(); + } + } + + /** Clears the network. */ + public void forget() { + super.forget(); + network = new OVector(); + } + + /** + * Returns scores for only those labels in the given collection. If the given collection is + * empty, scores for all labels will be returned. If there is no {@link LinearThresholdUnit} + * associated with a given label from the collection, that label's score in the returned + * {@link ScoreSet} will be set to Double.NEGATIVE_INFINITY. + * + *

+ * The elements of candidates must all be Strings. + * + * @param example The example object. + * @param candidates A list of the only labels the example may take. + * @return Scores for only those labels in candidates. + **/ + public ScoreSet scores(Object example, Collection candidates) { + Object[] exampleArray = getExampleArray(example, false); + return scores((int[]) exampleArray[0], (double[]) exampleArray[1], candidates); + } + + /** + * Returns scores for only those labels in the given collection. If the given collection is + * empty, scores for all labels will be returned. If there is no {@link LinearThresholdUnit} + * associated with a given label from the collection, that label's score in the returned + * {@link ScoreSet} will be set to Double.NEGATIVE_INFINITY. + * + *

+ * The elements of candidates must all be Strings. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param candidates A list of the only labels the example may take. + * @return Scores for only those labels in candidates. + **/ + public ScoreSet scores(int[] exampleFeatures, double[] exampleValues, Collection candidates) { + ScoreSet result = new ScoreSet(); + Iterator I = candidates.iterator(); + + if (I.hasNext()) { + if (conjunctiveLabels) + return conjunctiveScores(exampleFeatures, exampleValues, I); + + while (I.hasNext()) { + String label = (String) I.next(); + Feature f = + new DiscretePrimitiveStringFeature(labeler.containingPackage, labeler.name, + "", label, labeler.valueIndexOf(label), + (short) labeler.allowableValues().length); + + if (labelLexicon.contains(f)) { + int key = labelLexicon.lookup(f); + LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(key); + if (ltu != null) + result.put(label, + ltu.score(exampleFeatures, exampleValues) - ltu.getThreshold()); + } + } + } else { + int N = network.size(); + for (int l = 0; l < N; ++l) { + LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(l); + if (ltu == null) + continue; + result.put(labelLexicon.lookupKey(l).getStringValue(), + ltu.score(exampleFeatures, exampleValues) - ltu.getThreshold()); + } + } + + return result; + } + + /** + * This method is a surrogate for {@link #scores(int[],double[],Collection)} when the labeler is + * known to produce conjunctive features. It is necessary because when given a string label from + * the collection, we will not know how to construct the appropriate conjunctive feature key for + * lookup in the label lexicon. So, we must go through each feature in the label lexicon and use + * {@link edu.illinois.cs.cogcomp.lbjava.classify.Feature#valueEquals(String)}. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param I An iterator over the set of labels to choose from. + * @return The label chosen by this classifier or null if the network did not + * contain any of the specified labels. + **/ + protected ScoreSet conjunctiveScores(int[] exampleFeatures, double[] exampleValues, Iterator I) { + ScoreSet result = new ScoreSet(); + int N = network.size(); + + while (I.hasNext()) { + String label = (String) I.next(); + + for (int i = 0; i < N; ++i) { + LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); + if (ltu == null || !labelLexicon.lookupKey(i).valueEquals(label)) + continue; + double score = ltu.score(exampleFeatures, exampleValues); + result.put(label, score); + break; + } } - } + + return result; } - else { - int N = network.size(); - for (int i = 0; i < N; ++i) { - LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); - if (ltu == null) continue; - double score = ltu.score(exampleFeatures, exampleValues); + /** + * Produces a set of scores indicating the degree to which each possible discrete classification + * value is associated with the given example object. These scores are just the scores of each + * LTU's positive classification as produced by LinearThresholdUnit.scores(Object). + * + * @see LinearThresholdUnit#scores(Object) + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return The set of scores produced by the LTUs + **/ + public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { + ScoreSet result = new ScoreSet(); + int N = network.size(); + + for (int l = 0; l < N; l++) { + LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(l); + if (ltu == null) + continue; - if (score > bestScore) { - bestValue = i; - bestScore = score; + result.put(labelLexicon.lookupKey(l).getStringValue(), + ltu.score(exampleFeatures, exampleValues) - ltu.getThreshold()); } - } - } - - return predictions.get(bestValue); - } - - - /** - * This method is a surrogate for - * {@link #valueOf(int[],double[],Collection)} when the labeler is known to - * produce conjunctive features. It is necessary because when given a - * string label from the collection, we will not know how to construct the - * appropriate conjunctive feature key for lookup in the label lexicon. - * So, we must go through each feature in the label lexicon and use - * {@link edu.illinois.cs.cogcomp.lbjava.classify.Feature#valueEquals(String)}. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param I An iterator over the set of labels to choose - * from. - * @return The label chosen by this classifier or null if the - * network did not contain any of the specified labels. - **/ - protected Feature conjunctiveValueOf( - int[] exampleFeatures, double[] exampleValues, Iterator I) { - double bestScore = Double.NEGATIVE_INFINITY; - int bestValue = -1; - int N = network.size(); - - while (I.hasNext()) { - String label = (String) I.next(); - - for (int i = 0; i < N; ++i) { - LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); - if (ltu == null || !labelLexicon.lookupKey(i).valueEquals(label)) - continue; - double score = ltu.score(exampleFeatures, exampleValues); - if (score > bestScore) { - bestScore = score; - bestValue = i; + + return result; + } + + /** + * Returns the classification of the given example as a single feature instead of a + * {@link FeatureVector}. + * + * @param f The features array. + * @param v The values array. + * @return The classification of the example as a feature. + **/ + public Feature featureValue(int[] f, double[] v) { + double bestScore = Double.NEGATIVE_INFINITY; + int bestValue = -1; + int N = network.size(); + + for (int l = 0; l < N; l++) { + LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(l); + if (ltu == null) + continue; + double score = ltu.score(f, v); + + if (score > bestScore) { + bestValue = l; + bestScore = score; + } } - break; - } - } - - return predictions.get(bestValue); - } - - - /** - * Writes the algorithm's internal representation as text. - * - * @param out The output stream. - **/ - public void write(PrintStream out) { - out.println(baseLTU.getClass().getName()); - baseLTU.write(out); - int N = network.size(); - - for (int i = 0; i < N; ++i) { - LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); - if (ltu == null) continue; - out.println("label: " + labelLexicon.lookupKey(i).getStringValue()); - ltu.setLexicon(lexicon); - ltu.write(out); - ltu.setLexicon(null); - } - - out.println("End of SparseNetworkLearner"); - out.close(); - } - - - /** - * Writes the learned function's internal representation in binary form. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - baseLTU.write(out); - out.writeBoolean(conjunctiveLabels); - int N = network.size(); - out.writeInt(N); - - for (int i = 0; i < N; ++i) { - LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); - if (ltu == null) out.writeString(null); - else ltu.write(out); - } - out.close(); - } - - - /** - * Reads the binary representation of a learner with this object's run-time - * type, overwriting any and all learned or manually specified parameters - * as well as the label lexicon but without modifying the feature lexicon. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - baseLTU = (LinearThresholdUnit) Learner.readLearner(in); - conjunctiveLabels = in.readBoolean(); - int N = in.readInt(); - network = new OVector(N); - for (int i = 0; i < N; ++i) - network.add(Learner.readLearner(in)); - } - - - /** Returns a deep clone of this learning algorithm. */ - public Object clone() { - SparseNetworkLearner clone = null; - try { clone = (SparseNetworkLearner) super.clone(); } - catch (Exception e) { - System.err.println("Error cloning SparseNetworkLearner: " + e); - e.printStackTrace(); - System.exit(1); - } - - clone.baseLTU = (LinearThresholdUnit) baseLTU.clone(); - int N = network.size(); - clone.network = new OVector(N); - - for (int i = 0; i < N; ++i) { - LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); - if (ltu == null) clone.network.add(null); - else clone.network.add(ltu.clone()); - } - - return clone; - } - - - /** - * Simply a container for all of {@link SparseNetworkLearner}'s - * configurable parameters. Using instances of this class should make code - * more readable and constructors less complicated. - * - * @author Nick Rizzolo - **/ - public static class Parameters extends Learner.Parameters - { - private static final long serialVersionUID = 1L; - - /** - * The underlying algorithm used to learn each class separately as a - * binary classifier; default - * {@link SparseNetworkLearner#defaultBaseLTU}. + + return bestValue == -1 ? null : predictions.get(bestValue); + } + + /** + * This implementation uses a winner-take-all comparison of the outputs from the individual + * linear threshold units' score methods. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return A single value with the winning linear threshold unit's associated value. **/ - public LinearThresholdUnit baseLTU; + public String discreteValue(int[] exampleFeatures, double[] exampleValues) { + return featureValue(exampleFeatures, exampleValues).getStringValue(); + } + /** + * This implementation uses a winner-take-all comparison of the outputs from the individual + * linear threshold units' score methods. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return A single feature with the winning linear threshold unit's associated value. + **/ + public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) { + return new FeatureVector(featureValue(exampleFeatures, exampleValues)); + } - /** Sets all the default values. */ - public Parameters() { - baseLTU = (LinearThresholdUnit) defaultBaseLTU.clone(); + /** + * Using this method, the winner-take-all competition is narrowed to involve only those labels + * contained in the specified list. The list must contain only Strings. + * + * @param example The example object. + * @param candidates A list of the only labels the example may take. + * @return The prediction as a feature or null if the network did not contain any + * of the specified labels. + **/ + public Feature valueOf(Object example, Collection candidates) { + Object[] exampleArray = getExampleArray(example, false); + return valueOf((int[]) exampleArray[0], (double[]) exampleArray[1], candidates); } + /** + * Using this method, the winner-take-all competition is narrowed to involve only those labels + * contained in the specified list. The list must contain only Strings. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param candidates A list of the only labels the example may take. + * @return The prediction as a feature or null if the network did not contain any + * of the specified labels. + **/ + public Feature valueOf(int[] exampleFeatures, double[] exampleValues, Collection candidates) { + double bestScore = Double.NEGATIVE_INFINITY; + int bestValue = -1; + Iterator cI = candidates.iterator(); + + if (cI.hasNext()) { + if (conjunctiveLabels) + return conjunctiveValueOf(exampleFeatures, exampleValues, cI); + + while (cI.hasNext()) { + double score = Double.NEGATIVE_INFINITY; + String label = (String) cI.next(); + + Feature f = + new DiscretePrimitiveStringFeature(labeler.containingPackage, labeler.name, + "", label, labeler.valueIndexOf(label), + (short) labeler.allowableValues().length); + + int key = -1; + if (labelLexicon.contains(f)) { + key = labelLexicon.lookup(f); + LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(key); + if (ltu != null) + score = ltu.score(exampleFeatures, exampleValues); + } + + if (score > bestScore) { + bestValue = key; + bestScore = score; + } + } + } else { + int N = network.size(); + for (int i = 0; i < N; ++i) { + LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); + if (ltu == null) + continue; + + double score = ltu.score(exampleFeatures, exampleValues); + + if (score > bestScore) { + bestValue = i; + bestScore = score; + } + } + } + + return predictions.get(bestValue); + } /** - * Sets the parameters from the parent's parameters object, giving - * defaults to all parameters declared in this object. + * This method is a surrogate for {@link #valueOf(int[],double[],Collection)} when the labeler + * is known to produce conjunctive features. It is necessary because when given a string label + * from the collection, we will not know how to construct the appropriate conjunctive feature + * key for lookup in the label lexicon. So, we must go through each feature in the label lexicon + * and use {@link edu.illinois.cs.cogcomp.lbjava.classify.Feature#valueEquals(String)}. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param I An iterator over the set of labels to choose from. + * @return The label chosen by this classifier or null if the network did not + * contain any of the specified labels. **/ - public Parameters(Learner.Parameters p) { - super(p); - baseLTU = (LinearThresholdUnit) defaultBaseLTU.clone(); + protected Feature conjunctiveValueOf(int[] exampleFeatures, double[] exampleValues, Iterator I) { + double bestScore = Double.NEGATIVE_INFINITY; + int bestValue = -1; + int N = network.size(); + + while (I.hasNext()) { + String label = (String) I.next(); + + for (int i = 0; i < N; ++i) { + LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); + if (ltu == null || !labelLexicon.lookupKey(i).valueEquals(label)) + continue; + double score = ltu.score(exampleFeatures, exampleValues); + if (score > bestScore) { + bestScore = score; + bestValue = i; + } + break; + } + } + + return predictions.get(bestValue); } + /** + * Writes the algorithm's internal representation as text. + * + * @param out The output stream. + **/ + public void write(PrintStream out) { + out.println(baseLTU.getClass().getName()); + baseLTU.write(out); + int N = network.size(); + + for (int i = 0; i < N; ++i) { + LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); + if (ltu == null) + continue; + out.println("label: " + labelLexicon.lookupKey(i).getStringValue()); + ltu.setLexicon(lexicon); + ltu.write(out); + ltu.setLexicon(null); + } - /** Copy constructor. */ - public Parameters(Parameters p) { - super(p); - baseLTU = p.baseLTU; + out.println("End of SparseNetworkLearner"); + out.close(); } + /** + * Writes the learned function's internal representation in binary form. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + baseLTU.write(out); + out.writeBoolean(conjunctiveLabels); + int N = network.size(); + out.writeInt(N); + + for (int i = 0; i < N; ++i) { + LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); + if (ltu == null) + out.writeString(null); + else + ltu.write(out); + } + out.close(); + } /** - * Calls the appropriate Learner.setParameters(Parameters) - * method for this Parameters object. - * - * @param l The learner whose parameters will be set. + * Reads the binary representation of a learner with this object's run-time type, overwriting + * any and all learned or manually specified parameters as well as the label lexicon but without + * modifying the feature lexicon. + * + * @param in The input stream. **/ - public void setParameters(Learner l) { - ((SparseNetworkLearner) l).setParameters(this); + public void read(ExceptionlessInputStream in) { + super.read(in); + baseLTU = (LinearThresholdUnit) Learner.readLearner(in); + conjunctiveLabels = in.readBoolean(); + int N = in.readInt(); + network = new OVector(N); + for (int i = 0; i < N; ++i) + network.add(Learner.readLearner(in)); } + /** Returns a deep clone of this learning algorithm. */ + public Object clone() { + SparseNetworkLearner clone = null; + try { + clone = (SparseNetworkLearner) super.clone(); + } catch (Exception e) { + System.err.println("Error cloning SparseNetworkLearner: " + e); + e.printStackTrace(); + System.exit(1); + } + + clone.baseLTU = (LinearThresholdUnit) baseLTU.clone(); + int N = network.size(); + clone.network = new OVector(N); + + for (int i = 0; i < N; ++i) { + LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i); + if (ltu == null) + clone.network.add(null); + else + clone.network.add(ltu.clone()); + } + + return clone; + } /** - * Creates a string representation of these parameters in which only - * those parameters that differ from their default values are mentioned. + * Simply a container for all of {@link SparseNetworkLearner}'s configurable parameters. Using + * instances of this class should make code more readable and constructors less complicated. + * + * @author Nick Rizzolo **/ - public String nonDefaultString() { - String name = baseLTU.getClass().getName(); - name = name.substring(name.lastIndexOf('.') + 1); - return name + ": " + baseLTU.getParameters().nonDefaultString(); + public static class Parameters extends Learner.Parameters { + private static final long serialVersionUID = 1L; + + /** + * The underlying algorithm used to learn each class separately as a binary classifier; + * default {@link SparseNetworkLearner#defaultBaseLTU}. + **/ + public LinearThresholdUnit baseLTU; + + /** Sets all the default values. */ + public Parameters() { + baseLTU = (LinearThresholdUnit) defaultBaseLTU.clone(); + } + + /** + * Sets the parameters from the parent's parameters object, giving defaults to all + * parameters declared in this object. + **/ + public Parameters(Learner.Parameters p) { + super(p); + baseLTU = (LinearThresholdUnit) defaultBaseLTU.clone(); + } + + /** Copy constructor. */ + public Parameters(Parameters p) { + super(p); + baseLTU = p.baseLTU; + } + + /** + * Calls the appropriate Learner.setParameters(Parameters) method for this + * Parameters object. + * + * @param l The learner whose parameters will be set. + **/ + public void setParameters(Learner l) { + ((SparseNetworkLearner) l).setParameters(this); + } + + /** + * Creates a string representation of these parameters in which only those parameters that + * differ from their default values are mentioned. + **/ + public String nonDefaultString() { + String name = baseLTU.getClass().getName(); + name = name.substring(name.lastIndexOf('.') + 1); + return name + ": " + baseLTU.getParameters().nonDefaultString(); + } } - } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparsePerceptron.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparsePerceptron.java index 5318514f..1837c7ce 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparsePerceptron.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparsePerceptron.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -14,309 +11,303 @@ /** - * Simple sparse Perceptron implementation. It is assumed that - * {@link Learner#labeler} is a single discrete classifier that produces the - * same feature for every example object and that the values that feature may - * take are available through the - * {@link edu.illinois.cs.cogcomp.lbjava.classify.Classifier#allowableValues()} method. The second - * value returned from {@link edu.illinois.cs.cogcomp.lbjava.classify.Classifier#allowableValues()} is - * treated as "positive", and it is assumed there are exactly 2 allowable - * values. Assertions will produce error messages if these assumptions do - * not hold. - * - *

This algorithm's user-configurable parameters are stored in member - * fields of this class. They may be set via either a constructor that names - * each parameter explicitly or a constructor that takes an instance of - * {@link edu.illinois.cs.cogcomp.lbjava.learn.SparsePerceptron.Parameters Parameters} as input. - * The documentation in each member field in this class indicates the default - * value of the associated parameter when using the former type of - * constructor. The documentation of the associated member field in the - * {@link edu.illinois.cs.cogcomp.lbjava.learn.SparsePerceptron.Parameters Parameters} class - * indicates the default value of the parameter when using the latter type of - * constructor. - * - * @author Nick Rizzolo + * Simple sparse Perceptron implementation. It is assumed that {@link Learner#labeler} is a single + * discrete classifier that produces the same feature for every example object and that the values + * that feature may take are available through the + * {@link edu.illinois.cs.cogcomp.lbjava.classify.Classifier#allowableValues()} method. The second + * value returned from {@link edu.illinois.cs.cogcomp.lbjava.classify.Classifier#allowableValues()} + * is treated as "positive", and it is assumed there are exactly 2 allowable values. Assertions will + * produce error messages if these assumptions do not hold. + * + *

+ * This algorithm's user-configurable parameters are stored in member fields of this class. They may + * be set via either a constructor that names each parameter explicitly or a constructor that takes + * an instance of {@link edu.illinois.cs.cogcomp.lbjava.learn.SparsePerceptron.Parameters + * Parameters} as input. The documentation in each member field in this class indicates the default + * value of the associated parameter when using the former type of constructor. The documentation of + * the associated member field in the + * {@link edu.illinois.cs.cogcomp.lbjava.learn.SparsePerceptron.Parameters Parameters} class + * indicates the default value of the parameter when using the latter type of constructor. + * + * @author Nick Rizzolo **/ -public class SparsePerceptron extends LinearThresholdUnit -{ - /** - * The learning rate and threshold take default values, while the name of - * the classifier gets the empty string. - **/ - public SparsePerceptron() { super(); } - - /** - * Sets the learning rate to the specified value, and the threshold takes - * the default, while the name of the classifier gets the empty string. - * - * @param r The desired learning rate value. - **/ - public SparsePerceptron(double r) { super(r); } - - /** - * Sets the learning rate and threshold to the specified values, while the - * name of the classifier gets the empty string. - * - * @param r The desired learning rate value. - * @param t The desired threshold value. - **/ - public SparsePerceptron(double r, double t) { - super(r, t); - } - - /** - * Use this constructor to fit a thick separator, where both the positive - * and negative sides of the hyperplane will be given the specified - * thickness, while the name of the classifier gets the empty string. - * - * @param r The desired learning rate value. - * @param t The desired threshold value. - * @param pt The desired thickness. - **/ - public SparsePerceptron(double r, double t, double pt) { - super(r, t, pt); - } - - /** - * Use this constructor to fit a thick separator, where the positive and - * negative sides of the hyperplane will be given the specified separate - * thicknesses, while the name of the classifier gets the empty string. - * - * @param r The desired learning rate value. - * @param t The desired threshold value. - * @param pt The desired positive thickness. - * @param nt The desired negative thickness. - **/ - public SparsePerceptron(double r, double t, double pt, double nt) { - super(r, t, pt, nt); - } - - /** - * Use this constructor to specify an alternative subclass of - * {@link SparseWeightVector}, while the name of the classifier gets the - * empty string. - * - * @param r The desired learning rate value. - * @param t The desired threshold value. - * @param pt The desired positive thickness. - * @param nt The desired negative thickness. - * @param v An empty sparse weight vector. - **/ - public SparsePerceptron(double r, double t, double pt, double nt, - SparseWeightVector v) { - super("", r, t, pt, nt, v); - } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link SparsePerceptron.Parameters} object. - * - * @param p The settings of all parameters. - **/ - public SparsePerceptron(Parameters p) { super(p); } - - - /** - * The learning rate and threshold take default values. - * - * @param n The name of the classifier. - **/ - public SparsePerceptron(String n) { super(n); } - - /** - * Sets the learning rate to the specified value, and the threshold takes - * the default. - * - * @param n The name of the classifier. - * @param r The desired learning rate value. - **/ - public SparsePerceptron(String n, double r) { - super(n, r); - } - - /** - * Sets the learning rate and threshold to the specified values. - * - * @param n The name of the classifier. - * @param r The desired learning rate value. - * @param t The desired threshold value. - **/ - public SparsePerceptron(String n, double r, double t) { - super(n, r, t); - } - - /** - * Use this constructor to fit a thick separator, where both the positive - * and negative sides of the hyperplane will be given the specified - * thickness. - * - * @param n The name of the classifier. - * @param r The desired learning rate value. - * @param t The desired threshold value. - * @param pt The desired thickness. - **/ - public SparsePerceptron(String n, double r, double t, double pt) { - super(n, r, t, pt); - } - - /** - * Use this constructor to fit a thick separator, where the positive and - * negative sides of the hyperplane will be given the specified separate - * thicknesses. - * - * @param n The name of the classifier. - * @param r The desired learning rate value. - * @param t The desired threshold value. - * @param pt The desired positive thickness. - * @param nt The desired negative thickness. - **/ - public SparsePerceptron(String n, double r, double t, double pt, double nt) - { - super(n, r, t, pt, nt); - } - - /** - * Use this constructor to specify an alternative subclass of - * {@link SparseWeightVector}. - * - * @param n The name of the classifier. - * @param r The desired learning rate value. - * @param t The desired threshold value. - * @param pt The desired positive thickness. - * @param nt The desired negative thickness. - * @param v An empty sparse weight vector. - **/ - public SparsePerceptron(String n, double r, double t, double pt, double nt, - SparseWeightVector v) { - super(n, r, t, pt, nt, v); - } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link SparsePerceptron.Parameters} object. - * - * @param n The name of the classifier. - * @param p The settings of all parameters. - **/ - public SparsePerceptron(String n, Parameters p) { - super(n, p); - } - - - /** - * Retrieves the parameters that are set in this learner. - * - * @return An object containing all the values of the parameters that - * control the behavior of this learning algorithm. - **/ - public Learner.Parameters getParameters() { - Parameters p = - new Parameters((LinearThresholdUnit.Parameters) super.getParameters()); - return p; - } - - - /** - * Returns the current value of the {@link #learningRate} variable. - * - * @return The value of the {@link #learningRate} variable. - **/ - public double getLearningRate() { return learningRate; } - - - /** - * Sets the {@link #learningRate} member variable to the specified - * value. - * - * @param r The new value for {@link #learningRate}. - **/ - public void setLearningRate(double r) { learningRate = r; } - - - /** - * Scales the feature vector produced by the extractor by the learning rate - * and adds it to the weight vector. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of feature values - * @param rate The learning rate at which the weights are updated. - **/ - public void promote(int[] exampleFeatures, double[] exampleValues, - double rate) { - weightVector.scaledAdd(exampleFeatures, exampleValues, rate, - initialWeight); - bias += rate; - } - - - /** - * Scales the feature vector produced by the extractor by the learning rate - * and subtracts it from the weight vector. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of feature values - * @param rate The learning rate at which the weights are updated. - **/ - public void demote(int[] exampleFeatures, double[] exampleValues, - double rate) { - weightVector.scaledAdd(exampleFeatures, exampleValues, -rate, - initialWeight); - bias -= rate; - } - - /** - * Writes the algorithm's internal representation as text. In the first - * line of output, the name of the classifier is printed, followed by - * {@link #learningRate}, {@link LinearThresholdUnit#initialWeight}, - * {@link LinearThresholdUnit#threshold}, - * {@link LinearThresholdUnit#positiveThickness}, - * {@link LinearThresholdUnit#negativeThickness}, and finally - * {@link LinearThresholdUnit#bias}. - * - * @param out The output stream. - **/ - public void write(PrintStream out) { - out.println(name + ": " + learningRate + ", " + initialWeight + ", " - + threshold + ", " + positiveThickness + ", " - + negativeThickness + ", " + bias); - if (lexicon.size() == 0) weightVector.write(out); - else weightVector.write(out, lexicon); - } - - - /** - * Simply a container for all of {@link SparsePerceptron}'s configurable - * parameters. Using instances of this class should make code more - * readable and constructors less complicated. - * - * @author Nick Rizzolo - **/ - public static class Parameters extends LinearThresholdUnit.Parameters - { - /** Sets all the default values. */ - public Parameters() { } +public class SparsePerceptron extends LinearThresholdUnit { + /** + * The learning rate and threshold take default values, while the name of the classifier gets + * the empty string. + **/ + public SparsePerceptron() { + super(); + } + /** + * Sets the learning rate to the specified value, and the threshold takes the default, while the + * name of the classifier gets the empty string. + * + * @param r The desired learning rate value. + **/ + public SparsePerceptron(double r) { + super(r); + } /** - * Sets the parameters from the parent's parameters object, giving - * defaults to all parameters declared in this object. + * Sets the learning rate and threshold to the specified values, while the name of the + * classifier gets the empty string. + * + * @param r The desired learning rate value. + * @param t The desired threshold value. **/ - public Parameters(LinearThresholdUnit.Parameters p) { super(p); } + public SparsePerceptron(double r, double t) { + super(r, t); + } + /** + * Use this constructor to fit a thick separator, where both the positive and negative sides of + * the hyperplane will be given the specified thickness, while the name of the classifier gets + * the empty string. + * + * @param r The desired learning rate value. + * @param t The desired threshold value. + * @param pt The desired thickness. + **/ + public SparsePerceptron(double r, double t, double pt) { + super(r, t, pt); + } - /** Copy constructor. */ - public Parameters(Parameters p) { super(p); } + /** + * Use this constructor to fit a thick separator, where the positive and negative sides of the + * hyperplane will be given the specified separate thicknesses, while the name of the classifier + * gets the empty string. + * + * @param r The desired learning rate value. + * @param t The desired threshold value. + * @param pt The desired positive thickness. + * @param nt The desired negative thickness. + **/ + public SparsePerceptron(double r, double t, double pt, double nt) { + super(r, t, pt, nt); + } + /** + * Use this constructor to specify an alternative subclass of {@link SparseWeightVector}, while + * the name of the classifier gets the empty string. + * + * @param r The desired learning rate value. + * @param t The desired threshold value. + * @param pt The desired positive thickness. + * @param nt The desired negative thickness. + * @param v An empty sparse weight vector. + **/ + public SparsePerceptron(double r, double t, double pt, double nt, SparseWeightVector v) { + super("", r, t, pt, nt, v); + } /** - * Calls the appropriate Learner.setParameters(Parameters) - * method for this Parameters object. - * - * @param l The learner whose parameters will be set. + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link SparsePerceptron.Parameters} object. + * + * @param p The settings of all parameters. **/ - public void setParameters(Learner l) { - ((LinearThresholdUnit) l).setParameters(this); + public SparsePerceptron(Parameters p) { + super(p); + } + + + /** + * The learning rate and threshold take default values. + * + * @param n The name of the classifier. + **/ + public SparsePerceptron(String n) { + super(n); + } + + /** + * Sets the learning rate to the specified value, and the threshold takes the default. + * + * @param n The name of the classifier. + * @param r The desired learning rate value. + **/ + public SparsePerceptron(String n, double r) { + super(n, r); + } + + /** + * Sets the learning rate and threshold to the specified values. + * + * @param n The name of the classifier. + * @param r The desired learning rate value. + * @param t The desired threshold value. + **/ + public SparsePerceptron(String n, double r, double t) { + super(n, r, t); + } + + /** + * Use this constructor to fit a thick separator, where both the positive and negative sides of + * the hyperplane will be given the specified thickness. + * + * @param n The name of the classifier. + * @param r The desired learning rate value. + * @param t The desired threshold value. + * @param pt The desired thickness. + **/ + public SparsePerceptron(String n, double r, double t, double pt) { + super(n, r, t, pt); + } + + /** + * Use this constructor to fit a thick separator, where the positive and negative sides of the + * hyperplane will be given the specified separate thicknesses. + * + * @param n The name of the classifier. + * @param r The desired learning rate value. + * @param t The desired threshold value. + * @param pt The desired positive thickness. + * @param nt The desired negative thickness. + **/ + public SparsePerceptron(String n, double r, double t, double pt, double nt) { + super(n, r, t, pt, nt); + } + + /** + * Use this constructor to specify an alternative subclass of {@link SparseWeightVector}. + * + * @param n The name of the classifier. + * @param r The desired learning rate value. + * @param t The desired threshold value. + * @param pt The desired positive thickness. + * @param nt The desired negative thickness. + * @param v An empty sparse weight vector. + **/ + public SparsePerceptron(String n, double r, double t, double pt, double nt, SparseWeightVector v) { + super(n, r, t, pt, nt, v); } - } -} + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link SparsePerceptron.Parameters} object. + * + * @param n The name of the classifier. + * @param p The settings of all parameters. + **/ + public SparsePerceptron(String n, Parameters p) { + super(n, p); + } + + + /** + * Retrieves the parameters that are set in this learner. + * + * @return An object containing all the values of the parameters that control the behavior of + * this learning algorithm. + **/ + public Learner.Parameters getParameters() { + Parameters p = new Parameters((LinearThresholdUnit.Parameters) super.getParameters()); + return p; + } + + + /** + * Returns the current value of the {@link #learningRate} variable. + * + * @return The value of the {@link #learningRate} variable. + **/ + public double getLearningRate() { + return learningRate; + } + + + /** + * Sets the {@link #learningRate} member variable to the specified value. + * + * @param r The new value for {@link #learningRate}. + **/ + public void setLearningRate(double r) { + learningRate = r; + } + + + /** + * Scales the feature vector produced by the extractor by the learning rate and adds it to the + * weight vector. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of feature values + * @param rate The learning rate at which the weights are updated. + **/ + public void promote(int[] exampleFeatures, double[] exampleValues, double rate) { + weightVector.scaledAdd(exampleFeatures, exampleValues, rate, initialWeight); + bias += rate; + } + + + /** + * Scales the feature vector produced by the extractor by the learning rate and subtracts it + * from the weight vector. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of feature values + * @param rate The learning rate at which the weights are updated. + **/ + public void demote(int[] exampleFeatures, double[] exampleValues, double rate) { + weightVector.scaledAdd(exampleFeatures, exampleValues, -rate, initialWeight); + bias -= rate; + } + + /** + * Writes the algorithm's internal representation as text. In the first line of output, the name + * of the classifier is printed, followed by {@link #learningRate}, + * {@link LinearThresholdUnit#initialWeight}, {@link LinearThresholdUnit#threshold}, + * {@link LinearThresholdUnit#positiveThickness}, {@link LinearThresholdUnit#negativeThickness}, + * and finally {@link LinearThresholdUnit#bias}. + * + * @param out The output stream. + **/ + public void write(PrintStream out) { + out.println(name + ": " + learningRate + ", " + initialWeight + ", " + threshold + ", " + + positiveThickness + ", " + negativeThickness + ", " + bias); + if (lexicon.size() == 0) + weightVector.write(out); + else + weightVector.write(out, lexicon); + } + + + /** + * Simply a container for all of {@link SparsePerceptron}'s configurable parameters. Using + * instances of this class should make code more readable and constructors less complicated. + * + * @author Nick Rizzolo + **/ + public static class Parameters extends LinearThresholdUnit.Parameters { + /** Sets all the default values. */ + public Parameters() {} + + + /** + * Sets the parameters from the parent's parameters object, giving defaults to all + * parameters declared in this object. + **/ + public Parameters(LinearThresholdUnit.Parameters p) { + super(p); + } + + + /** Copy constructor. */ + public Parameters(Parameters p) { + super(p); + } + + + /** + * Calls the appropriate Learner.setParameters(Parameters) method for this + * Parameters object. + * + * @param l The learner whose parameters will be set. + **/ + public void setParameters(Learner l) { + ((LinearThresholdUnit) l).setParameters(this); + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWeightVector.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWeightVector.java index 8f5ae3ef..0353daba 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWeightVector.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWeightVector.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -17,477 +14,523 @@ import java.util.Comparator; import java.util.Map; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.DVector; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.classify.Feature; import edu.illinois.cs.cogcomp.lbjava.util.ClassUtils; -import edu.illinois.cs.cogcomp.lbjava.util.DVector; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - /** - * This class is used as a weight vector in sparse learning algorithms. - * {@link Feature}s are associated with Doubles and/or with - * double[]s representing the weights of the features they - * produce. Features not appearing in the vector are assumed to have the + * This class is used as a weight vector in sparse learning algorithms. {@link Feature}s are + * associated with Doubles and/or with double[]s representing the weights + * of the features they produce. Features not appearing in the vector are assumed to have the * {@link #defaultWeight}. * * @author Nick Rizzolo **/ -public class SparseWeightVector implements Cloneable, Serializable -{ +public class SparseWeightVector implements Cloneable, Serializable { + /** default. */ + private static final long serialVersionUID = 1L; /** - * When a feature appears in an example but not in this vector, it is - * assumed to have this weight. - **/ - protected static final double defaultWeight = 0; - /** The initial capacity for {@link #weights} if not specified otherwise. */ - protected static final int defaultCapacity = 1 << 10; + * When a feature appears in an example but not in this vector, it is assumed to have this + * weight. + **/ + protected static final double defaultWeight = 0; + /** The initial capacity for {@link #weights} if not specified otherwise. */ + protected static final int defaultCapacity = 1 << 10; - /** The weights in the vector indexed by their {@link Lexicon} key. */ - protected DVector weights; + /** The weights in the vector indexed by their {@link Lexicon} key. */ + protected DVector weights; /** Simply instantiates {@link #weights}. */ - public SparseWeightVector() { this(new DVector(defaultCapacity)); } - - /** - * Simply initializes {@link #weights}. - * - * @param w An array of weights. - **/ - public SparseWeightVector(double[] w) { this(new DVector(w)); } - - /** - * Simply initializes {@link #weights}. - * - * @param w A vector of weights. - **/ - public SparseWeightVector(DVector w) { weights = w; } - - /** - * Returns the weight of the given feature. - * - * @param featureIndex The feature index. - * @return The weight of the feature. - **/ - public double getWeight(int featureIndex) { - return getWeight(featureIndex, defaultWeight); - } - - /** - * Returns the weight of the given feature. - * - * @param featureIndex The feature index. - * @param defaultW The default weight. - * @return The weight of the feature. - **/ - public double getWeight(int featureIndex, double defaultW) { - return weights.get(featureIndex, defaultW); - } - - - /** - * Sets the weight of the given feature. - * - * @param featureIndex The feature index. - * @param w The new weight. - **/ - protected void setWeight(int featureIndex, double w) { - setWeight(featureIndex, w, defaultWeight); + public SparseWeightVector() { + this(new DVector(defaultCapacity)); + } + + /** + * Simply initializes {@link #weights}. + * + * @param w An array of weights. + **/ + public SparseWeightVector(double[] w) { + this(new DVector(w)); + } + + /** + * Simply initializes {@link #weights}. + * + * @param w A vector of weights. + **/ + public SparseWeightVector(DVector w) { + weights = w; + } + + /** + * Returns the weight of the given feature. + * + * @param featureIndex The feature index. + * @return The weight of the feature. + **/ + public double getWeight(int featureIndex) { + return getWeight(featureIndex, defaultWeight); + } + + /** + * Returns the weight of the given feature. + * + * @param featureIndex The feature index. + * @param defaultW The default weight. + * @return The weight of the feature. + **/ + public double getWeight(int featureIndex, double defaultW) { + return weights.get(featureIndex, defaultW); + } + + + /** + * Sets the weight of the given feature. + * + * @param featureIndex The feature index. + * @param w The new weight. + **/ + public void setWeight(int featureIndex, double w) { + setWeight(featureIndex, w, defaultWeight); + } + + /** + * Sets the weight of the given feature. + * + * @param featureIndex The feature index. + * @param w The new weight. + * @param defaultW The default weight. + **/ + public void setWeight(int featureIndex, double w, double defaultW) { + weights.set(featureIndex, w, defaultW); + } + + /** + * For those cases where we need the raw weights (during model optimization). + * @return the unmolested weights. + */ + public DVector getRawWeights() { + return weights; } - /** - * Sets the weight of the given feature. - * - * @param featureIndex The feature index. - * @param w The new weight. - * @param defaultW The default weight. - **/ - protected void setWeight(int featureIndex, double w, double defaultW) { - weights.set(featureIndex, w, defaultW); - } - - - /** - * Takes the dot product of this SparseWeightVector with the - * argument vector, using the hard coded default weight. - * - * @param exampleFeatures The example's feature indices. - * @param exampleValues The example's feature values. - * @return The computed dot product. - **/ - public double dot(int[] exampleFeatures, double[] exampleValues) { - return dot(exampleFeatures, exampleValues, defaultWeight); - } - - /** - * Takes the dot product of this SparseWeightVector with the - * argument vector, using the specified default weight when one is not yet - * present in this vector. - * - * @param exampleFeatures The example's feature indices. - * @param exampleValues The example's feature values. - * @param defaultW The default weight. - * @return The computed dot product. - **/ - public double dot(int[] exampleFeatures, double[] exampleValues, - double defaultW) { - double sum = 0; - - for (int i = 0; i < exampleFeatures.length; i++) { - double w = getWeight(exampleFeatures[i], defaultW); - sum += w * exampleValues[i]; - } - - return sum; - } - - - /** - * Self-modifying vector addition. - * - * @param exampleFeatures The example's feature indices. - * @param exampleValues The example's feature values. - **/ - public void scaledAdd(int[] exampleFeatures, double[] exampleValues) { - scaledAdd(exampleFeatures, exampleValues, 1, defaultWeight); - } - - /** - * Self-modifying vector addition where the argument vector is first scaled - * by the given factor. The default weight is used to initialize new - * feature weights. - * - * @param exampleFeatures The example's feature indices. - * @param exampleValues The example's feature values. - * @param factor The scaling factor. - **/ - public void scaledAdd(int[] exampleFeatures, double[] exampleValues, - double factor) { - scaledAdd(exampleFeatures, exampleValues, factor, defaultWeight); - } - - /** - * Self-modifying vector addition where the argument vector is first scaled - * by the given factor. - * - * @param exampleFeatures The example's feature indices. - * @param exampleValues The example's feature values. - * @param factor The scaling factor. - * @param defaultW An initial weight for previously unseen - * features. - **/ - public void scaledAdd(int[] exampleFeatures, double[] exampleValues, - double factor, double defaultW) { - for (int i = 0; i < exampleFeatures.length; i++) { - int featureIndex = exampleFeatures[i]; - double w = getWeight(featureIndex, defaultW) + factor*exampleValues[i]; - setWeight(featureIndex, w, defaultW); - } - } - - - /** - * Self-modifying vector multiplication where the argument vector is first - * scaled by the given factor. The default weight is used to initialize - * new feature weights. - * - * @param exampleFeatures The example's feature indices. - * @param exampleValues The example's feature values. - * @param factor The scaling factor. - **/ - public void scaledMultiply(int[] exampleFeatures, double[] exampleValues, - double factor) { - scaledMultiply(exampleFeatures, exampleValues, factor, defaultWeight); - } - - /** - * Self-modifying vector multiplication where the argument vector is first - * scaled by the given factor. - * - * @param exampleFeatures The example's feature indices. - * @param exampleValues The example's feature values. - * @param factor The scaling factor. - * @param defaultW An initial weight for previously unseen - * features. - **/ - public void scaledMultiply(int[] exampleFeatures, double[] exampleValues, - double factor, double defaultW) { - for (int i = 0; i < exampleFeatures.length; i++) { - int featureIndex = exampleFeatures[i]; - double s = exampleValues[i]; - - double multiplier = factor; - if (s == 0) multiplier = 1; - else if (s != 1) multiplier = Math.pow(factor, s); - - double w = getWeight(featureIndex, defaultW) * multiplier; - setWeight(featureIndex, w, defaultW); - } - } - - - /** - * The strength of each feature in the argument vector is multiplied by the - * corresponding weight in this weight vector and the result is returned as - * an array of arrays. The first array contains the integer keys of the - * example's features, as indexed in the lexicon. The second array gives - * the double values corresponding to the product of the pairwise - * multiplication of the strengths of that feature. - * - * @param exampleFeatures The example's feature indices. - * @param exampleValues The example's feature values. - * @param defaultW An initial weight for previously unseen - * features. - * @param inverse When set to true the weight in this - * vector is inverted before the multiplication - * takes place. - * @return A new example vector representing the pairwise multiplication. - **/ - public Object[] pairwiseMultiply(int[] exampleFeatures, - double[] exampleValues, - double defaultW, boolean inverse) { - int resultFeatures[] = new int[exampleFeatures.length]; - double resultValues[] = new double[exampleFeatures.length]; - - for (int i = 0; i < exampleFeatures.length; i++) { - int featureIndex = exampleFeatures[i]; - double w = getWeight(featureIndex, defaultW); - if (inverse) w = 1 / w; - resultFeatures[i] = exampleFeatures[i]; - resultValues[i] = w * exampleValues[i]; - } - - return new Object[] {resultFeatures, resultValues}; - } - - - - /** Empties the weight map. */ - public void clear() { weights = new DVector(defaultCapacity); } - /** Returns the length of the weight vector. */ - public int size() { return weights.size(); } - - /** - * Outputs the contents of this SparseWeightVector into the - * specified PrintStream. The string representation starts - * with a "Begin" annotation, ends with an "End" - * annotation, and without a Lexicon passed as a parameter, - * the weights are simply printed in the order of their integer indices. - * - * @param out The stream to write to. - **/ - public void write(PrintStream out) { - out.println("Begin SparseWeightVector"); - toStringJustWeights(out); - out.println("End SparseWeightVector"); - } - - - /** - * Outputs the contents of this SparseWeightVector into the - * specified PrintStream. The string representation starts - * with a "Begin" annotation, ends with an "End" - * annotation, and lists each feature with its corresponding weight on the - * same, separate line in between. - * - * @param out The stream to write to. - * @param lex The feature lexicon. - **/ - public void write(PrintStream out, Lexicon lex) { - out.println("Begin SparseWeightVector"); - toStringJustWeights(out, 0, lex); - out.println("End SparseWeightVector"); - } - - - /** - * Outputs a textual representation of this SparseWeightVector - * to a stream just like {@link #write(PrintStream)}, but without the - * "Begin" and "End" annotations. Without a - * Lexicon passed as a parameter, the weights are simply - * printed in the order of their integer indices. - * - * @param out The stream to write to. - **/ - public void toStringJustWeights(PrintStream out) { - for (int i = 0; i < weights.size(); i++) - out.println(weights.get(i)); - } - - - /** - * Outputs a textual representation of this SparseWeightVector - * to a stream just like {@link #write(PrintStream)}, but without the - * "Begin" and "End" annotations. With a - * Lexicon passed as a parameter, the feature is printed along - * with each weight. - * - * @param out The stream to write to. - * @param min Sets the minimum width for the textual representation of all - * features. - * @param lex The feature lexicon. - **/ + /** + * Takes the dot product of this SparseWeightVector with the argument vector, using + * the hard coded default weight. + * + * @param exampleFeatures The example's feature indices. + * @param exampleValues The example's feature values. + * @return The computed dot product. + **/ + public double dot(int[] exampleFeatures, double[] exampleValues) { + return dot(exampleFeatures, exampleValues, defaultWeight); + } + + /** + * Takes the dot product of this SparseWeightVector with the argument vector, using + * the specified default weight when one is not yet present in this vector. + * + * @param exampleFeatures The example's feature indices. + * @param exampleValues The example's feature values. + * @param defaultW The default weight. + * @return The computed dot product. + **/ + public double dot(int[] exampleFeatures, double[] exampleValues, double defaultW) { + double sum = 0; + + for (int i = 0; i < exampleFeatures.length; i++) { + double w = getWeight(exampleFeatures[i], defaultW); + sum += w * exampleValues[i]; + } + + return sum; + } + + + /** + * Self-modifying vector addition. + * + * @param exampleFeatures The example's feature indices. + * @param exampleValues The example's feature values. + **/ + public void scaledAdd(int[] exampleFeatures, double[] exampleValues) { + scaledAdd(exampleFeatures, exampleValues, 1, defaultWeight); + } + + /** + * Self-modifying vector addition where the argument vector is first scaled by the given factor. + * The default weight is used to initialize new feature weights. + * + * @param exampleFeatures The example's feature indices. + * @param exampleValues The example's feature values. + * @param factor The scaling factor. + **/ + public void scaledAdd(int[] exampleFeatures, double[] exampleValues, double factor) { + scaledAdd(exampleFeatures, exampleValues, factor, defaultWeight); + } + + /** + * Self-modifying vector addition where the argument vector is first scaled by the given factor. + * + * @param exampleFeatures The example's feature indices. + * @param exampleValues The example's feature values. + * @param factor The scaling factor. + * @param defaultW An initial weight for previously unseen features. + **/ + public void scaledAdd(int[] exampleFeatures, double[] exampleValues, double factor, + double defaultW) { + for (int i = 0; i < exampleFeatures.length; i++) { + int featureIndex = exampleFeatures[i]; + double w = getWeight(featureIndex, defaultW) + factor * exampleValues[i]; + setWeight(featureIndex, w, defaultW); + } + } + + + /** + * Self-modifying vector multiplication where the argument vector is first scaled by the given + * factor. The default weight is used to initialize new feature weights. + * + * @param exampleFeatures The example's feature indices. + * @param exampleValues The example's feature values. + * @param factor The scaling factor. + **/ + public void scaledMultiply(int[] exampleFeatures, double[] exampleValues, double factor) { + scaledMultiply(exampleFeatures, exampleValues, factor, defaultWeight); + } + + /** + * Self-modifying vector multiplication where the argument vector is first scaled by the given + * factor. + * + * @param exampleFeatures The example's feature indices. + * @param exampleValues The example's feature values. + * @param factor The scaling factor. + * @param defaultW An initial weight for previously unseen features. + **/ + public void scaledMultiply(int[] exampleFeatures, double[] exampleValues, double factor, + double defaultW) { + for (int i = 0; i < exampleFeatures.length; i++) { + int featureIndex = exampleFeatures[i]; + double s = exampleValues[i]; + + double multiplier = factor; + if (s == 0) + multiplier = 1; + else if (s != 1) + multiplier = Math.pow(factor, s); + + double w = getWeight(featureIndex, defaultW) * multiplier; + setWeight(featureIndex, w, defaultW); + } + } + + + /** + * The strength of each feature in the argument vector is multiplied by the corresponding weight + * in this weight vector and the result is returned as an array of arrays. The first array + * contains the integer keys of the example's features, as indexed in the lexicon. The second + * array gives the double values corresponding to the product of the pairwise multiplication of + * the strengths of that feature. + * + * @param exampleFeatures The example's feature indices. + * @param exampleValues The example's feature values. + * @param defaultW An initial weight for previously unseen features. + * @param inverse When set to true the weight in this vector is inverted before the + * multiplication takes place. + * @return A new example vector representing the pairwise multiplication. + **/ + public Object[] pairwiseMultiply(int[] exampleFeatures, double[] exampleValues, + double defaultW, boolean inverse) { + int resultFeatures[] = new int[exampleFeatures.length]; + double resultValues[] = new double[exampleFeatures.length]; + + for (int i = 0; i < exampleFeatures.length; i++) { + int featureIndex = exampleFeatures[i]; + double w = getWeight(featureIndex, defaultW); + if (inverse) + w = 1 / w; + resultFeatures[i] = exampleFeatures[i]; + resultValues[i] = w * exampleValues[i]; + } + + return new Object[] {resultFeatures, resultValues}; + } + + + + /** Empties the weight map. */ + public void clear() { + weights = new DVector(defaultCapacity); + } + + /** Returns the length of the weight vector. */ + public int size() { + return weights.size(); + } + + /** + * Outputs the contents of this SparseWeightVector into the specified + * PrintStream. The string representation starts with a "Begin" + * annotation, ends with an "End" annotation, and without a Lexicon + * passed as a parameter, the weights are simply printed in the order of their integer indices. + * + * @param out The stream to write to. + **/ + public void write(PrintStream out) { + out.println("Begin SparseWeightVector"); + toStringJustWeights(out); + out.println("End SparseWeightVector"); + } + + + /** + * Outputs the contents of this SparseWeightVector into the specified + * PrintStream. The string representation starts with a "Begin" + * annotation, ends with an "End" annotation, and lists each feature with its + * corresponding weight on the same, separate line in between. + * + * @param out The stream to write to. + * @param lex The feature lexicon. + **/ + public void write(PrintStream out, Lexicon lex) { + out.println("Begin SparseWeightVector"); + toStringJustWeights(out, 0, lex); + out.println("End SparseWeightVector"); + } + + + /** + * Outputs a textual representation of this SparseWeightVector to a stream just + * like {@link #write(PrintStream)}, but without the "Begin" and "End" + * annotations. Without a Lexicon passed as a parameter, the weights are simply + * printed in the order of their integer indices. + * + * @param out The stream to write to. + **/ + public void toStringJustWeights(PrintStream out) { + for (int i = 0; i < weights.size(); i++) + out.println(weights.get(i)); + } + + + /** + * Outputs a textual representation of this SparseWeightVector to a stream just + * like {@link #write(PrintStream)}, but without the "Begin" and "End" + * annotations. With a Lexicon passed as a parameter, the feature is printed along + * with each weight. + * + * @param out The stream to write to. + * @param min Sets the minimum width for the textual representation of all features. + * @param lex The feature lexicon. + **/ + @SuppressWarnings({ "rawtypes", "unchecked" }) public void toStringJustWeights(PrintStream out, int min, Lexicon lex) { - Map map = lex.getMap(); - Map.Entry[] entries = - (Map.Entry[]) map.entrySet().toArray(new Map.Entry[map.size()]); - Arrays.sort(entries, - new Comparator() { - public int compare(Object o1, Object o2) { - Map.Entry e1 = (Map.Entry) o1; - Map.Entry e2 = (Map.Entry) o2; - int i1 = ((Integer) e1.getValue()).intValue(); - int i2 = ((Integer) e2.getValue()).intValue(); - if ((i1 < weights.size()) != (i2 < weights.size())) - return i1 - i2; - return ((Feature) e1.getKey()).compareTo(e2.getKey()); - } - }); - - int i, biggest = min; - for (i = 0; i < entries.length; ++i) { - //for (i = 0; i < weights.size(); ++i) - String key = - entries[i].getKey().toString() - + (((Integer) entries[i].getValue()).intValue() < weights.size() - ? "" : " (pruned)"); - biggest = Math.max(biggest, key.length()); - } - - if (biggest % 2 == 0) biggest += 2; - else ++biggest; - - for (i = 0; i < entries.length; ++i) { - //for (i = 0; i < weights.size(); ++i) - String key = - entries[i].getKey().toString() - + (((Integer) entries[i].getValue()).intValue() < weights.size() - ? "" : " (pruned)"); - out.print(key); - for (int j = 0; key.length() + j < biggest; ++j) out.print(" "); - - int index = ((Integer) entries[i].getValue()).intValue(); - out.println(weights.get(index)); - } - } - - - /** - * Creates a string representation of this SparseWeightVector. - * This method merely returns the data computed by - * {@link #write(PrintStream)}. - * - * @return A textual representation of this vector. - **/ - public String toString() { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - PrintStream out = new PrintStream(baos); - write(out); - return baos.toString(); - } - - - /** - * Creates a string representation of this SparseWeightVector. - * This method merely returns the data computed by - * {@link #write(PrintStream,Lexicon)}. - * - * @param lex The feature lexicon. - * @return A textual representation of this vector. - **/ - public String toString(Lexicon lex) { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - PrintStream out = new PrintStream(baos); - write(out, lex); - return baos.toString(); - } - - - /** - * Writes the weight vector's internal representation in binary form. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - out.writeString(getClass().getName()); - weights.write(out); - } - - - /** - * Reads the binary representation of a weight vector of any type from the - * given stream. The stream is expected to first return a string - * containing the fully qualified class name of the weight vector. If the - * short value -1 appears instead, this method returns - * null. - * - *

This method is appropriate for reading weight vectors as written by - * {@link #write(ExceptionlessOutputStream)}. - * - * @param in The input stream. - * @return The weight vector read from the stream. - **/ - public static SparseWeightVector readWeightVector( - ExceptionlessInputStream in) { - String name = in.readString(); - if (name == null) return null; + Map map = lex.getMap(); + Map.Entry[] entries = (Map.Entry[]) map.entrySet().toArray(new Map.Entry[map.size()]); + Arrays.sort(entries, new Comparator() { + public int compare(Object o1, Object o2) { + Map.Entry e1 = (Map.Entry) o1; + Map.Entry e2 = (Map.Entry) o2; + int i1 = ((Integer) e1.getValue()).intValue(); + int i2 = ((Integer) e2.getValue()).intValue(); + if ((i1 < weights.size()) != (i2 < weights.size())) + return i1 - i2; + return ((Feature) e1.getKey()).compareTo(e2.getKey()); + } + }); + + int i, biggest = min; + for (i = 0; i < entries.length; ++i) { + // for (i = 0; i < weights.size(); ++i) + String key = + entries[i].getKey().toString() + + (((Integer) entries[i].getValue()).intValue() < weights.size() ? "" + : " (pruned)"); + biggest = Math.max(biggest, key.length()); + } + + if (biggest % 2 == 0) + biggest += 2; + else + ++biggest; + + for (i = 0; i < entries.length; ++i) { + // for (i = 0; i < weights.size(); ++i) + String key = + entries[i].getKey().toString() + + (((Integer) entries[i].getValue()).intValue() < weights.size() ? "" + : " (pruned)"); + out.print(key); + for (int j = 0; key.length() + j < biggest; ++j) + out.print(" "); + + int index = ((Integer) entries[i].getValue()).intValue(); + out.println(weights.get(index)); + } + } + + + /** + * Creates a string representation of this SparseWeightVector. This method merely + * returns the data computed by {@link #write(PrintStream)}. + * + * @return A textual representation of this vector. + **/ + public String toString() { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(baos); + write(out); + return baos.toString(); + } + + + /** + * Creates a string representation of this SparseWeightVector. This method merely + * returns the data computed by {@link #write(PrintStream,Lexicon)}. + * + * @param lex The feature lexicon. + * @return A textual representation of this vector. + **/ + public String toString(Lexicon lex) { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(baos); + write(out, lex); + return baos.toString(); + } + + + /** + * Writes the weight vector's internal representation in binary form. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + out.writeString(getClass().getName()); + weights.write(out); + } + + + /** + * Reads the binary representation of a weight vector of any type from the given stream. The + * stream is expected to first return a string containing the fully qualified class name of the + * weight vector. If the short value -1 appears instead, this method returns + * null. + * + *

+ * This method is appropriate for reading weight vectors as written by + * {@link #write(ExceptionlessOutputStream)}. + * + * @param in The input stream. + * @return The weight vector read from the stream. + **/ + public static SparseWeightVector readWeightVector(ExceptionlessInputStream in) { + String name = in.readString(); + if (name == null) + return null; + @SuppressWarnings("rawtypes") Class c = ClassUtils.getClass(name); - SparseWeightVector result = null; - - try { result = (SparseWeightVector) c.newInstance(); } - catch (Exception e) { - System.err.println("Error instantiating weight vector '" + name + "':"); - e.printStackTrace(); - in.close(); - System.exit(1); + SparseWeightVector result = null; + + try { + result = (SparseWeightVector) c.newInstance(); + } catch (Exception e) { + System.err.println("Error instantiating weight vector '" + name + "':"); + e.printStackTrace(); + in.close(); + System.exit(1); + } + + result.read(in); + return result; + } + + + /** + * Reads the representation of a weight vector with this object's run-time type from the given + * stream, overwriting the data in this object. + * + *

+ * This method is appropriate for reading weight vectors as written by + * {@link #write(ExceptionlessOutputStream)}. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + weights.read(in); + } + + + /** + * Returns a copy of this SparseWeightVector in which the {@link #weights} variable + * has been cloned deeply. + * + * @return A copy of this SparseWeightVector. + **/ + public Object clone() { + SparseWeightVector clone = null; + + try { + clone = (SparseWeightVector) super.clone(); + } catch (Exception e) { + System.err.println("Error cloning " + getClass().getName() + ":"); + e.printStackTrace(); + System.exit(1); + } + + clone.weights = (DVector) weights.clone(); + return clone; + } + + + /** + * Returns a new, empty weight vector with the same parameter settings as this one. + * + * @return An empty weight vector. + **/ + public SparseWeightVector emptyClone() { + return new SparseWeightVector(); + } + + /** + * delete all irrelevant feature weights. + * @param uselessfeatures useless features. + * @param numfeatures since this weight vec does not know how many features there are, it must be passed in + */ + public void pruneWeights(int[] uselessfeatures, int numfeatures) { + if (uselessfeatures.length == 0) + return; + + // create a new smaller weight vector for the pruned weights. + int oldsize = weights.size(); + if (oldsize > numfeatures) { + throw new RuntimeException("There was a weight vector with more weights("+oldsize+ + ") than the number of features("+numfeatures+")!"); } - - result.read(in); - return result; - } - - - /** - * Reads the representation of a weight vector with this object's run-time - * type from the given stream, overwriting the data in this object. - * - *

This method is appropriate for reading weight vectors as written by - * {@link #write(ExceptionlessOutputStream)}. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { weights.read(in); } - - - /** - * Returns a copy of this SparseWeightVector in which the - * {@link #weights} variable has been cloned deeply. - * - * @return A copy of this SparseWeightVector. - **/ - public Object clone() { - SparseWeightVector clone = null; - - try { clone = (SparseWeightVector) super.clone(); } - catch (Exception e) { - System.err.println("Error cloning " + getClass().getName() + ":"); - e.printStackTrace(); - System.exit(1); + int newsize = numfeatures - uselessfeatures.length; + double [] newvec = new double[newsize]; + + // copy the weights from the old vector. + int uselessindex = 0; + int newvecindex = 0; + for (int oldindex = 0; oldindex < oldsize; oldindex++) { + if (uselessindex < uselessfeatures.length && uselessfeatures[uselessindex] == oldindex) { + // this is a useless feature, we will skip it. + uselessindex++; + } else { + newvec[newvecindex] = weights.get(oldindex); + newvecindex++; + } } - - clone.weights = (DVector) weights.clone(); - return clone; - } - - - /** - * Returns a new, empty weight vector with the same parameter settings as - * this one. - * - * @return An empty weight vector. - **/ - public SparseWeightVector emptyClone() { - return new SparseWeightVector(); + + // compress the array. + if (newvecindex != newsize) { + double[] tmp = new double[newvecindex]; + System.arraycopy(newvec, 0, tmp, 0, newvecindex);; + newvec = tmp; + } + this.weights = new DVector(newvec); } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWinnow.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWinnow.java index fbc22978..dccfea9f 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWinnow.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseWinnow.java @@ -1,501 +1,492 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; import java.io.PrintStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; /** - * Simple sparse Winnow implementation. It is assumed that - * {@link Learner#labeler} is a single discrete classifier whose returned - * feature values are available through the - * {@link edu.illinois.cs.cogcomp.lbjava.classify.Classifier#allowableValues()} method. The second - * value returned from {@link edu.illinois.cs.cogcomp.lbjava.classify.Classifier#allowableValues()} is - * treated as "positive", and it is assumed there are exactly 2 allowable - * values. Assertions will produce error messages if these assumptions do - * not hold. - * - *

This algorithm's user-configurable parameters are stored in member - * fields of this class. They may be set via either a constructor that names - * each parameter explicitly or a constructor that takes an instance of - * {@link edu.illinois.cs.cogcomp.lbjava.learn.SparseWinnow.Parameters Parameters} as input. The - * documentation in each member field in this class indicates the default - * value of the associated parameter when using the former type of - * constructor. The documentation of the associated member field in the - * {@link edu.illinois.cs.cogcomp.lbjava.learn.SparseWinnow.Parameters Parameters} class indicates the - * default value of the parameter when using the latter type of constructor. - * - * @author Nick Rizzolo + * Simple sparse Winnow implementation. It is assumed that {@link Learner#labeler} is a single + * discrete classifier whose returned feature values are available through the + * {@link edu.illinois.cs.cogcomp.lbjava.classify.Classifier#allowableValues()} method. The second + * value returned from {@link edu.illinois.cs.cogcomp.lbjava.classify.Classifier#allowableValues()} + * is treated as "positive", and it is assumed there are exactly 2 allowable values. Assertions will + * produce error messages if these assumptions do not hold. + * + *

+ * This algorithm's user-configurable parameters are stored in member fields of this class. They may + * be set via either a constructor that names each parameter explicitly or a constructor that takes + * an instance of {@link edu.illinois.cs.cogcomp.lbjava.learn.SparseWinnow.Parameters Parameters} as + * input. The documentation in each member field in this class indicates the default value of the + * associated parameter when using the former type of constructor. The documentation of the + * associated member field in the + * {@link edu.illinois.cs.cogcomp.lbjava.learn.SparseWinnow.Parameters Parameters} class indicates + * the default value of the parameter when using the latter type of constructor. + * + * @author Nick Rizzolo **/ -public class SparseWinnow extends LinearThresholdUnit -{ - /** Default for {@link #learningRate}. */ - public static final double defaultLearningRate = 2; - /** Default for {@link LinearThresholdUnit#threshold}. */ - public static final double defaultThreshold = 16; - /** Default for {@link LinearThresholdUnit#initialWeight}. */ - public static final double defaultInitialWeight = 1; - - - /** - * The rate at which weights are demoted; default equal to 1 / - * {@link #learningRate}. - **/ - protected double beta; - - - /** - * {@link #learningRate}, {@link #beta}, and - * {@link LinearThresholdUnit#threshold} take default values, while the - * name of the classifier gets the empty string. - **/ - public SparseWinnow() { this(""); } - - /** - * Sets {@link #learningRate} to the specified value, {@link #beta} to 1 / - * {@link #learningRate}, and the {@link LinearThresholdUnit#threshold} - * takes the default, while the name of the classifier gets the empty - * string. - * - * @param a The desired value of the promotion parameter. - **/ - public SparseWinnow(double a) { this("", a); } - - /** - * Sets {@link #learningRate} and {@link #beta} to the specified values, - * and the {@link LinearThresholdUnit#threshold} takes the default, while - * the name of the classifier gets the empty string. - * - * @param a The desired value of the promotion parameter. - * @param b The desired value of the demotion parameter. - **/ - public SparseWinnow(double a, double b) { this("", a, b); } - - /** - * Sets {@link #learningRate}, {@link #beta}, and - * {@link LinearThresholdUnit#threshold} to the specified values, while the - * name of the classifier gets the empty string. - * - * @param a The desired value of the promotion parameter. - * @param b The desired value of the demotion parameter. - * @param t The desired threshold value. - **/ - public SparseWinnow(double a, double b, double t) { - this("", a, b, t); - } - - /** - * Use this constructor to fit a thick separator, where both the positive - * and negative sides of the hyperplane will be given the specified - * thickness, while the name of the classifier gets the empty string. - * - * @param a The desired value of the promotion parameter. - * @param b The desired value of the demotion parameter. - * @param t The desired threshold value. - * @param pt The desired positive thickness. - **/ - public SparseWinnow(double a, double b, double t, double pt) { - this("", a, b, t, pt); - } - - /** - * Use this constructor to fit a thick separator, where the positive and - * negative sides of the hyperplane will be given the specified separate - * thicknesses, while the name of the classifier gets the empty string. - * - * @param a The desired value of the promotion parameter. - * @param b The desired value of the demotion parameter. - * @param t The desired threshold value. - * @param pt The desired positive thickness. - * @param nt The desired negative thickness. - **/ - public SparseWinnow(double a, double b, double t, double pt, double nt) { - this("", a, b, t, pt, nt); - } - - /** - * Use this constructor to specify an alternative subclass of - * {@link SparseWeightVector}, while the name of the classifier gets the - * empty string. - * - * @param a The desired value of the promotion parameter. - * @param b The desired value of the demotion parameter. - * @param t The desired threshold value. - * @param pt The desired positive thickness. - * @param nt The desired negative thickness. - * @param v An empty sparse weight vector. - **/ - public SparseWinnow(double a, double b, double t, double pt, double nt, - SparseWeightVector v) { - this("", a, b, t, pt, nt, v); - } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link SparseWinnow.Parameters} object. - * - * @param p The settings of all parameters. - **/ - public SparseWinnow(Parameters p) { this("", p); } - - - /** - * {@link #learningRate}, {@link #beta}, and - * {@link LinearThresholdUnit#threshold} take default values. - * - * @param n The name of the classifier. - **/ - public SparseWinnow(String n) { this(n, defaultLearningRate); } - - /** - * Sets {@link #learningRate} to the specified value, {@link #beta} to 1 / - * {@link #learningRate}, and the {@link LinearThresholdUnit#threshold} - * takes the default. - * - * @param n The name of the classifier. - * @param a The desired value of the promotion parameter. - **/ - public SparseWinnow(String n, double a) { this(n, a, 1 / a); } - - /** - * Sets {@link #learningRate} and {@link #beta} to the specified values, - * and the {@link LinearThresholdUnit#threshold} takes the default. - * - * @param n The name of the classifier. - * @param a The desired value of the promotion parameter. - * @param b The desired value of the demotion parameter. - **/ - public SparseWinnow(String n, double a, double b) { - this(n, a, b, defaultThreshold); - } - - /** - * Sets {@link #learningRate}, {@link #beta}, and - * {@link LinearThresholdUnit#threshold} to the specified values. - * - * @param n The name of the classifier. - * @param a The desired value of the promotion parameter. - * @param b The desired value of the demotion parameter. - * @param t The desired threshold value. - **/ - public SparseWinnow(String n, double a, double b, double t) { - this(n, a, b, t, LinearThresholdUnit.defaultThickness); - } - - /** - * Use this constructor to fit a thick separator, where both the positive - * and negative sides of the hyperplane will be given the specified - * thickness. - * - * @param n The name of the classifier. - * @param a The desired value of the promotion parameter. - * @param b The desired value of the demotion parameter. - * @param t The desired threshold value. - * @param pt The desired positive thickness. - **/ - public SparseWinnow(String n, double a, double b, double t, double pt) { - this(n, a, b, t, pt, pt); - } - - /** - * Use this constructor to fit a thick separator, where the positive and - * negative sides of the hyperplane will be given the specified separate - * thicknesses. - * - * @param n The name of the classifier. - * @param a The desired value of the promotion parameter. - * @param b The desired value of the demotion parameter. - * @param t The desired threshold value. - * @param pt The desired positive thickness. - * @param nt The desired negative thickness. - **/ - public SparseWinnow(String n, double a, double b, double t, double pt, - double nt) { - this(n, a, b, t, pt, nt, - (SparseWeightVector) - LinearThresholdUnit.defaultWeightVector.clone()); - } - - /** - * Use this constructor to specify an alternative subclass of - * {@link SparseWeightVector}. - * - * @param n The name of the classifier. - * @param a The desired value of the promotion parameter. - * @param b The desired value of the demotion parameter. - * @param t The desired threshold value. - * @param pt The desired positive thickness. - * @param nt The desired negative thickness. - * @param v An empty sparse weight vector. - **/ - public SparseWinnow(String n, double a, double b, double t, double pt, - double nt, SparseWeightVector v) { - super(n); - Parameters p = new Parameters(); - p.learningRate = a; - p.beta = b; - p.threshold = t; - p.positiveThickness = pt; - p.negativeThickness = nt; - p.weightVector = v; - setParameters(p); - } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link SparseWinnow.Parameters} object. - * - * @param n The name of the classifier. - * @param p The settings of all parameters. - **/ - public SparseWinnow(String n, Parameters p) { - super(n); - setParameters(p); - } - - - /** - * Sets the values of parameters that control the behavior of this learning - * algorithm. - * - * @param p The parameters. - **/ - public void setParameters(Parameters p) { - super.setParameters(p); - beta = p.beta; - } - - - /** - * Retrieves the parameters that are set in this learner. - * - * @return An object containing all the values of the parameters that - * control the behavior of this learning algorithm. - **/ - public Learner.Parameters getParameters() { - Parameters p = - new Parameters((LinearThresholdUnit.Parameters) super.getParameters()); - p.beta = beta; - return p; - } - - - /** - * Returns the current value of the {@link #learningRate} variable. - * - * @return The value of the {@link #learningRate} variable. - **/ - public double getLearningRate() { return learningRate; } - - - /** - * Sets the {@link #learningRate} member variable to the specified value. - * - * @param t The new value for {@link #learningRate}. - **/ - public void setLearningRate(double t) { learningRate = t; } - - - /** - * Returns the current value of the {@link #beta} variable. - * - * @return The value of the {@link #beta} variable. - **/ - public double getBeta() { return beta; } - - - /** - * Sets the {@link #beta} member variable to the specified value. - * - * @param t The new value for {@link #beta}. - **/ - public void setBeta(double t) { beta = t; } - - - /** - * Returns the learning rate, which is {@link #learningRate} (alpha) if it - * is a positive example, and {@link #beta} if it is a negative example. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param s The score. - * @param label The example label. - * @return The appropriate learning rate. - **/ - public double computeLearningRate(int[] exampleFeatures, - double[] exampleValues, double s, - boolean label) { - if (label) return learningRate; - else return beta; - } - - - /** - * Promotion is simply w_i *= learningRatex_i. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param rate The learning rate at which the weights are - * updated. - **/ - public void promote(int[] exampleFeatures, double[] exampleValues, - double rate) { - update(exampleFeatures, exampleValues, rate); - } - - - /** - * Demotion is simply w_i *= betax_i. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param rate The learning rate at which the weights are - * updated. - **/ - public void demote(int[] exampleFeatures, double[] exampleValues, - double rate) { - update(exampleFeatures, exampleValues, rate); - } - - - /** - * This method performs an update w_i *= basex_i, - * initalizing weights in the weight vector as needed. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of values. - * @param base As described above. - **/ - public void update(int[] exampleFeatures, double[] exampleValues, - double base) { - weightVector.scaledMultiply(exampleFeatures, exampleValues, base, - initialWeight); - bias *= base; - } - - - /** - * Writes the algorithm's internal representation as text. In the first - * line of output, the name of the classifier is printed, followed by - * {@link #learningRate}, {@link #beta}, - * {@link LinearThresholdUnit#initialWeight}, - * {@link LinearThresholdUnit#threshold}, - * {@link LinearThresholdUnit#positiveThickness}, - * {@link LinearThresholdUnit#negativeThickness}, and finally - * {@link LinearThresholdUnit#bias}. - * - * @param out The output stream. - **/ - public void write(PrintStream out) { - out.println(name + ": " + learningRate + ", " + beta + ", " - + initialWeight + ", " + threshold + ", " + positiveThickness - + ", " + negativeThickness + ", " + bias); - if (lexicon.size() == 0) weightVector.write(out); - else weightVector.write(out, lexicon); - } - - - /** - * Writes the learned function's internal representation in binary form. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - out.writeDouble(beta); - } - - - /** - * Reads the binary representation of a learner with this object's run-time - * type, overwriting any and all learned or manually specified parameters - * as well as the label lexicon but without modifying the feature lexicon. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - beta = in.readDouble(); - } - - - /** - * Simply a container for all of {@link SparseWinnow}'s configurable - * parameters. Using instances of this class should make code more - * readable and constructors less complicated. - * - * @author Nick Rizzolo - **/ - public static class Parameters extends LinearThresholdUnit.Parameters - { +public class SparseWinnow extends LinearThresholdUnit { + /** Default for {@link #learningRate}. */ + public static final double defaultLearningRate = 2; + /** Default for {@link LinearThresholdUnit#threshold}. */ + public static final double defaultThreshold = 16; + /** Default for {@link LinearThresholdUnit#initialWeight}. */ + public static final double defaultInitialWeight = 1; + + /** - * The rate at which weights are demoted; default equal to 1 - * / {@link #learningRate}. + * The rate at which weights are demoted; default equal to 1 / + * {@link #learningRate}. **/ - public double beta; + protected double beta; - /** Sets all the default values. */ - public Parameters() { - learningRate = defaultLearningRate; - beta = 1 / defaultLearningRate; - threshold = defaultThreshold; - initialWeight = defaultInitialWeight; + /** + * {@link #learningRate}, {@link #beta}, and {@link LinearThresholdUnit#threshold} take default + * values, while the name of the classifier gets the empty string. + **/ + public SparseWinnow() { + this(""); } + /** + * Sets {@link #learningRate} to the specified value, {@link #beta} to 1 / {@link #learningRate} + * , and the {@link LinearThresholdUnit#threshold} takes the default, while the name of the + * classifier gets the empty string. + * + * @param a The desired value of the promotion parameter. + **/ + public SparseWinnow(double a) { + this("", a); + } /** - * Sets the parameters from the parent's parameters object, giving - * defaults to all parameters declared in this object. + * Sets {@link #learningRate} and {@link #beta} to the specified values, and the + * {@link LinearThresholdUnit#threshold} takes the default, while the name of the classifier + * gets the empty string. + * + * @param a The desired value of the promotion parameter. + * @param b The desired value of the demotion parameter. **/ - public Parameters(LinearThresholdUnit.Parameters p) { - super(p); - beta = 1 / learningRate; + public SparseWinnow(double a, double b) { + this("", a, b); } + /** + * Sets {@link #learningRate}, {@link #beta}, and {@link LinearThresholdUnit#threshold} to the + * specified values, while the name of the classifier gets the empty string. + * + * @param a The desired value of the promotion parameter. + * @param b The desired value of the demotion parameter. + * @param t The desired threshold value. + **/ + public SparseWinnow(double a, double b, double t) { + this("", a, b, t); + } + + /** + * Use this constructor to fit a thick separator, where both the positive and negative sides of + * the hyperplane will be given the specified thickness, while the name of the classifier gets + * the empty string. + * + * @param a The desired value of the promotion parameter. + * @param b The desired value of the demotion parameter. + * @param t The desired threshold value. + * @param pt The desired positive thickness. + **/ + public SparseWinnow(double a, double b, double t, double pt) { + this("", a, b, t, pt); + } - /** Copy constructor. */ - public Parameters(Parameters p) { - super(p); - beta = p.beta; + /** + * Use this constructor to fit a thick separator, where the positive and negative sides of the + * hyperplane will be given the specified separate thicknesses, while the name of the classifier + * gets the empty string. + * + * @param a The desired value of the promotion parameter. + * @param b The desired value of the demotion parameter. + * @param t The desired threshold value. + * @param pt The desired positive thickness. + * @param nt The desired negative thickness. + **/ + public SparseWinnow(double a, double b, double t, double pt, double nt) { + this("", a, b, t, pt, nt); } + /** + * Use this constructor to specify an alternative subclass of {@link SparseWeightVector}, while + * the name of the classifier gets the empty string. + * + * @param a The desired value of the promotion parameter. + * @param b The desired value of the demotion parameter. + * @param t The desired threshold value. + * @param pt The desired positive thickness. + * @param nt The desired negative thickness. + * @param v An empty sparse weight vector. + **/ + public SparseWinnow(double a, double b, double t, double pt, double nt, SparseWeightVector v) { + this("", a, b, t, pt, nt, v); + } /** - * Calls the appropriate Learner.setParameters(Parameters) - * method for this Parameters object. - * - * @param l The learner whose parameters will be set. + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link SparseWinnow.Parameters} object. + * + * @param p The settings of all parameters. **/ - public void setParameters(Learner l) { - ((SparseWinnow) l).setParameters(this); + public SparseWinnow(Parameters p) { + this("", p); } /** - * Creates a string representation of these parameters in which only - * those parameters that differ from their default values are mentioned. + * {@link #learningRate}, {@link #beta}, and {@link LinearThresholdUnit#threshold} take default + * values. + * + * @param n The name of the classifier. **/ - public String nonDefaultString() { - String result = super.nonDefaultString(); + public SparseWinnow(String n) { + this(n, defaultLearningRate); + } - if (beta != 1 / LinearThresholdUnit.defaultLearningRate) - result += ", beta = " + beta; + /** + * Sets {@link #learningRate} to the specified value, {@link #beta} to 1 / {@link #learningRate} + * , and the {@link LinearThresholdUnit#threshold} takes the default. + * + * @param n The name of the classifier. + * @param a The desired value of the promotion parameter. + **/ + public SparseWinnow(String n, double a) { + this(n, a, 1 / a); + } - if (result.startsWith(", ")) result = result.substring(2); - return result; + /** + * Sets {@link #learningRate} and {@link #beta} to the specified values, and the + * {@link LinearThresholdUnit#threshold} takes the default. + * + * @param n The name of the classifier. + * @param a The desired value of the promotion parameter. + * @param b The desired value of the demotion parameter. + **/ + public SparseWinnow(String n, double a, double b) { + this(n, a, b, defaultThreshold); } - } -} + /** + * Sets {@link #learningRate}, {@link #beta}, and {@link LinearThresholdUnit#threshold} to the + * specified values. + * + * @param n The name of the classifier. + * @param a The desired value of the promotion parameter. + * @param b The desired value of the demotion parameter. + * @param t The desired threshold value. + **/ + public SparseWinnow(String n, double a, double b, double t) { + this(n, a, b, t, LinearThresholdUnit.defaultThickness); + } + + /** + * Use this constructor to fit a thick separator, where both the positive and negative sides of + * the hyperplane will be given the specified thickness. + * + * @param n The name of the classifier. + * @param a The desired value of the promotion parameter. + * @param b The desired value of the demotion parameter. + * @param t The desired threshold value. + * @param pt The desired positive thickness. + **/ + public SparseWinnow(String n, double a, double b, double t, double pt) { + this(n, a, b, t, pt, pt); + } + + /** + * Use this constructor to fit a thick separator, where the positive and negative sides of the + * hyperplane will be given the specified separate thicknesses. + * + * @param n The name of the classifier. + * @param a The desired value of the promotion parameter. + * @param b The desired value of the demotion parameter. + * @param t The desired threshold value. + * @param pt The desired positive thickness. + * @param nt The desired negative thickness. + **/ + public SparseWinnow(String n, double a, double b, double t, double pt, double nt) { + this(n, a, b, t, pt, nt, (SparseWeightVector) LinearThresholdUnit.defaultWeightVector + .clone()); + } + + /** + * Use this constructor to specify an alternative subclass of {@link SparseWeightVector}. + * + * @param n The name of the classifier. + * @param a The desired value of the promotion parameter. + * @param b The desired value of the demotion parameter. + * @param t The desired threshold value. + * @param pt The desired positive thickness. + * @param nt The desired negative thickness. + * @param v An empty sparse weight vector. + **/ + public SparseWinnow(String n, double a, double b, double t, double pt, double nt, + SparseWeightVector v) { + super(n); + Parameters p = new Parameters(); + p.learningRate = a; + p.beta = b; + p.threshold = t; + p.positiveThickness = pt; + p.negativeThickness = nt; + p.weightVector = v; + setParameters(p); + } + + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link SparseWinnow.Parameters} object. + * + * @param n The name of the classifier. + * @param p The settings of all parameters. + **/ + public SparseWinnow(String n, Parameters p) { + super(n); + setParameters(p); + } + + + /** + * Sets the values of parameters that control the behavior of this learning algorithm. + * + * @param p The parameters. + **/ + public void setParameters(Parameters p) { + super.setParameters(p); + beta = p.beta; + } + + + /** + * Retrieves the parameters that are set in this learner. + * + * @return An object containing all the values of the parameters that control the behavior of + * this learning algorithm. + **/ + public Learner.Parameters getParameters() { + Parameters p = new Parameters((LinearThresholdUnit.Parameters) super.getParameters()); + p.beta = beta; + return p; + } + + + /** + * Returns the current value of the {@link #learningRate} variable. + * + * @return The value of the {@link #learningRate} variable. + **/ + public double getLearningRate() { + return learningRate; + } + + + /** + * Sets the {@link #learningRate} member variable to the specified value. + * + * @param t The new value for {@link #learningRate}. + **/ + public void setLearningRate(double t) { + learningRate = t; + } + + + /** + * Returns the current value of the {@link #beta} variable. + * + * @return The value of the {@link #beta} variable. + **/ + public double getBeta() { + return beta; + } + + + /** + * Sets the {@link #beta} member variable to the specified value. + * + * @param t The new value for {@link #beta}. + **/ + public void setBeta(double t) { + beta = t; + } + + + /** + * Returns the learning rate, which is {@link #learningRate} (alpha) if it is a positive + * example, and {@link #beta} if it is a negative example. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param s The score. + * @param label The example label. + * @return The appropriate learning rate. + **/ + public double computeLearningRate(int[] exampleFeatures, double[] exampleValues, double s, + boolean label) { + if (label) + return learningRate; + else + return beta; + } + + + /** + * Promotion is simply w_i *= learningRatex_i. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param rate The learning rate at which the weights are updated. + **/ + public void promote(int[] exampleFeatures, double[] exampleValues, double rate) { + update(exampleFeatures, exampleValues, rate); + } + + + /** + * Demotion is simply w_i *= betax_i. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param rate The learning rate at which the weights are updated. + **/ + public void demote(int[] exampleFeatures, double[] exampleValues, double rate) { + update(exampleFeatures, exampleValues, rate); + } + + + /** + * This method performs an update w_i *= basex_i, initalizing weights in + * the weight vector as needed. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of values. + * @param base As described above. + **/ + public void update(int[] exampleFeatures, double[] exampleValues, double base) { + weightVector.scaledMultiply(exampleFeatures, exampleValues, base, initialWeight); + bias *= base; + } + + + /** + * Writes the algorithm's internal representation as text. In the first line of output, the name + * of the classifier is printed, followed by {@link #learningRate}, {@link #beta}, + * {@link LinearThresholdUnit#initialWeight}, {@link LinearThresholdUnit#threshold}, + * {@link LinearThresholdUnit#positiveThickness}, {@link LinearThresholdUnit#negativeThickness}, + * and finally {@link LinearThresholdUnit#bias}. + * + * @param out The output stream. + **/ + public void write(PrintStream out) { + out.println(name + ": " + learningRate + ", " + beta + ", " + initialWeight + ", " + + threshold + ", " + positiveThickness + ", " + negativeThickness + ", " + bias); + if (lexicon.size() == 0) + weightVector.write(out); + else + weightVector.write(out, lexicon); + } + + + /** + * Writes the learned function's internal representation in binary form. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeDouble(beta); + } + + + /** + * Reads the binary representation of a learner with this object's run-time type, overwriting + * any and all learned or manually specified parameters as well as the label lexicon but without + * modifying the feature lexicon. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + beta = in.readDouble(); + } + + + /** + * Simply a container for all of {@link SparseWinnow}'s configurable parameters. Using instances + * of this class should make code more readable and constructors less complicated. + * + * @author Nick Rizzolo + **/ + public static class Parameters extends LinearThresholdUnit.Parameters { + /** + * The rate at which weights are demoted; default equal to 1 + * / {@link #learningRate}. + **/ + public double beta; + + + /** Sets all the default values. */ + public Parameters() { + learningRate = defaultLearningRate; + beta = 1 / defaultLearningRate; + threshold = defaultThreshold; + initialWeight = defaultInitialWeight; + } + + + /** + * Sets the parameters from the parent's parameters object, giving defaults to all + * parameters declared in this object. + **/ + public Parameters(LinearThresholdUnit.Parameters p) { + super(p); + beta = 1 / learningRate; + } + + + /** Copy constructor. */ + public Parameters(Parameters p) { + super(p); + beta = p.beta; + } + + + /** + * Calls the appropriate Learner.setParameters(Parameters) method for this + * Parameters object. + * + * @param l The learner whose parameters will be set. + **/ + public void setParameters(Learner l) { + ((SparseWinnow) l).setParameters(this); + } + + + /** + * Creates a string representation of these parameters in which only those parameters that + * differ from their default values are mentioned. + **/ + public String nonDefaultString() { + String result = super.nonDefaultString(); + + if (beta != 1 / LinearThresholdUnit.defaultLearningRate) + result += ", beta = " + beta; + + if (result.startsWith(", ")) + result = result.substring(2); + return result; + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescent.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescent.java index 500a6254..3c41b858 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescent.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/StochasticGradientDescent.java @@ -1,386 +1,382 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; import java.io.PrintStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.classify.Feature; import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector; import edu.illinois.cs.cogcomp.lbjava.classify.RealPrimitiveStringFeature; import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - /** - * Gradient descent is a batch learning algorithm for function approximation - * in which the learner tries to follow the gradient of the error function to - * the solution of minimal error. This implementation is a stochastic - * approximation to gradient descent in which the approximated function is - * assumed to have linear form. - * - *

This algorithm's user-configurable parameters are stored in member - * fields of this class. They may be set via either a constructor that names - * each parameter explicitly or a constructor that takes an instance of - * {@link edu.illinois.cs.cogcomp.lbjava.learn.StochasticGradientDescent.Parameters Parameters} as - * input. The documentation in each member field in this class indicates the - * default value of the associated parameter when using the former type of - * constructor. The documentation of the associated member field in the - * {@link edu.illinois.cs.cogcomp.lbjava.learn.StochasticGradientDescent.Parameters Parameters} class - * indicates the default value of the parameter when using the latter type of - * constructor. - * - * @author Nick Rizzolo + * Gradient descent is a batch learning algorithm for function approximation in which the learner + * tries to follow the gradient of the error function to the solution of minimal error. This + * implementation is a stochastic approximation to gradient descent in which the approximated + * function is assumed to have linear form. + * + *

+ * This algorithm's user-configurable parameters are stored in member fields of this class. They may + * be set via either a constructor that names each parameter explicitly or a constructor that takes + * an instance of {@link edu.illinois.cs.cogcomp.lbjava.learn.StochasticGradientDescent.Parameters + * Parameters} as input. The documentation in each member field in this class indicates the default + * value of the associated parameter when using the former type of constructor. The documentation of + * the associated member field in the + * {@link edu.illinois.cs.cogcomp.lbjava.learn.StochasticGradientDescent.Parameters Parameters} + * class indicates the default value of the parameter when using the latter type of constructor. + * + * @author Nick Rizzolo **/ -public class StochasticGradientDescent extends Learner -{ - /** Default value for {@link #learningRate}. */ - public static final double defaultLearningRate = 0.1; - /** Default for {@link #weightVector}. */ - public static final SparseWeightVector defaultWeightVector = - new SparseWeightVector(); - - - /** The hypothesis vector; default {@link #defaultWeightVector}. */ - protected SparseWeightVector weightVector; - /** - * The bias is stored here rather than as an element of the weight vector. - **/ - protected double bias; - /** - * The rate at which weights are updated; default - * {@link #defaultLearningRate}. - **/ - protected double learningRate; - - - /** - * The learning rate takes the default value, while the name of the - * classifier gets the empty string. - **/ - public StochasticGradientDescent() { this(""); } - - /** - * Sets the learning rate to the specified value, while the name of the - * classifier gets the empty string. - * - * @param r The desired learning rate value. - **/ - public StochasticGradientDescent(double r) { this("", r); } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link StochasticGradientDescent.Parameters} object. - * - * @param p The settings of all parameters. - **/ - public StochasticGradientDescent(Parameters p) { this("", p); } - - /** - * The learning rate takes the default value. - * - * @param n The name of the classifier. - **/ - public StochasticGradientDescent(String n) { this(n, defaultLearningRate); } - - /** - * Use this constructor to specify an alternative subclass of - * {@link SparseWeightVector}. - * - * @param n The name of the classifier. - * @param r The desired learning rate value. - **/ - public StochasticGradientDescent(String n, double r) { - super(n); - Parameters p = new Parameters(); - p.learningRate = r; - setParameters(p); - } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link StochasticGradientDescent.Parameters} object. - * - * @param n The name of the classifier. - * @param p The settings of all parameters. - **/ - public StochasticGradientDescent(String n, Parameters p) { - super(n); - setParameters(p); - } - - - /** - * Sets the values of parameters that control the behavior of this learning - * algorithm. - * - * @param p The parameters. - **/ - public void setParameters(Parameters p) { - weightVector = p.weightVector; - learningRate = p.learningRate; - } - - - /** - * Retrieves the parameters that are set in this learner. - * - * @return An object containing all the values of the parameters that - * control the behavior of this learning algorithm. - **/ - public Learner.Parameters getParameters() { - Parameters p = new Parameters(super.getParameters()); - p.weightVector = weightVector.emptyClone(); - p.learningRate = learningRate; - return p; - } - - - /** - * Returns the current value of the {@link #learningRate} variable. - * - * @return The value of the {@link #learningRate} variable. - **/ - public double getLearningRate() { return learningRate; } - - - /** - * Sets the {@link #learningRate} member variable to the specified - * value. - * - * @param t The new value for {@link #learningRate}. - **/ - public void setLearningRate(double t) { learningRate = t; } - - - /** Resets the weight vector to all zeros. */ - public void forget() { - super.forget(); - weightVector = weightVector.emptyClone(); - bias = 0; - } - - - /** - * Returns a string describing the output feature type of this classifier. - * - * @return "real" - **/ - public String getOutputType() { return "real"; } - - - /** - * Trains the learning algorithm given an object as an example. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param exampleLabels The example's label(s). - * @param labelValues The labels' values. - **/ - public void learn(int[] exampleFeatures, double[] exampleValues, - int[] exampleLabels, double[] labelValues) { - assert exampleLabels.length == 1 - : "Example must have a single label."; - - double labelValue = labelValues[0]; - double multiplier = - learningRate - * (labelValue - weightVector.dot(exampleFeatures, exampleValues) - - bias); - weightVector.scaledAdd(exampleFeatures, exampleValues, multiplier); - bias += multiplier; - } - - - /** - * Since this algorithm returns a real feature, it does not return scores. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @return null - **/ - public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { - return null; - } - - - /** - * Returns the classification of the given example as a single feature - * instead of a {@link FeatureVector}. - * - * @param f The features array. - * @param v The values array. - * @return The classification of the example as a feature. - **/ - public Feature featureValue(int[] f, double[] v) { - return - new RealPrimitiveStringFeature(containingPackage, name, "", - realValue(f, v)); - } - - - /** - * Simply computes the dot product of the weight vector and the example - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @return The computed real value. - **/ - public double realValue(int[] exampleFeatures, double[] exampleValues) { - return weightVector.dot(exampleFeatures, exampleValues) + bias; - } - - - /** - * Simply computes the dot product of the weight vector and the feature - * vector extracted from the example object. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @return The computed feature (in a vector). - **/ - public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) - { - return new FeatureVector(featureValue(exampleFeatures, exampleValues)); - } - - - /** - * Writes the algorithm's internal representation as text. In the first - * line of output, the name of the classifier is printed, followed by - * {@link #learningRate} and {@link #bias}. - * - * @param out The output stream. - **/ - public void write(PrintStream out) { - out.println(name + ": " + learningRate + ", " + bias); - if (lexicon.size() == 0) weightVector.write(out); - else weightVector.write(out, lexicon); - } - - - /** - * Writes the learned function's internal representation in binary form. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - out.writeDouble(learningRate); - out.writeDouble(bias); - weightVector.write(out); - } - - - /** - * Reads the binary representation of a learner with this object's run-time - * type, overwriting any and all learned or manually specified parameters - * as well as the label lexicon but without modifying the feature lexicon. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - learningRate = in.readDouble(); - bias = in.readDouble(); - weightVector = SparseWeightVector.readWeightVector(in); - } - - - /** Returns a deep clone of this learning algorithm. */ - public Object clone() { - StochasticGradientDescent clone = null; - - try { clone = (StochasticGradientDescent) super.clone(); } - catch (Exception e) { - System.err.println("Error cloning StochasticGradientDescent: " + e); - System.exit(1); +public class StochasticGradientDescent extends Learner { + /** Default value for {@link #learningRate}. */ + public static final double defaultLearningRate = 0.1; + /** Default for {@link #weightVector}. */ + public static final SparseWeightVector defaultWeightVector = new SparseWeightVector(); + + + /** The hypothesis vector; default {@link #defaultWeightVector}. */ + protected SparseWeightVector weightVector; + /** + * The bias is stored here rather than as an element of the weight vector. + **/ + protected double bias; + /** + * The rate at which weights are updated; default {@link #defaultLearningRate}. + **/ + protected double learningRate; + + + /** + * The learning rate takes the default value, while the name of the classifier gets the empty + * string. + **/ + public StochasticGradientDescent() { + this(""); } - clone.weightVector = (SparseWeightVector) weightVector.clone(); - return clone; - } + /** + * Sets the learning rate to the specified value, while the name of the classifier gets the + * empty string. + * + * @param r The desired learning rate value. + **/ + public StochasticGradientDescent(double r) { + this("", r); + } + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link StochasticGradientDescent.Parameters} object. + * + * @param p The settings of all parameters. + **/ + public StochasticGradientDescent(Parameters p) { + this("", p); + } - /** - * Simply a container for all of {@link StochasticGradientDescent}'s - * configurable parameters. Using instances of this class should make code - * more readable and constructors less complicated. - * - * @author Nick Rizzolo - **/ - public static class Parameters extends Learner.Parameters - { /** - * The hypothesis vector; default - * {@link StochasticGradientDescent#defaultWeightVector}. + * The learning rate takes the default value. + * + * @param n The name of the classifier. **/ - public SparseWeightVector weightVector; + public StochasticGradientDescent(String n) { + this(n, defaultLearningRate); + } + /** - * The rate at which weights are updated; default - * {@link #defaultLearningRate}. + * Use this constructor to specify an alternative subclass of {@link SparseWeightVector}. + * + * @param n The name of the classifier. + * @param r The desired learning rate value. **/ - public double learningRate; + public StochasticGradientDescent(String n, double r) { + super(n); + Parameters p = new Parameters(); + p.learningRate = r; + setParameters(p); + } + + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link StochasticGradientDescent.Parameters} object. + * + * @param n The name of the classifier. + * @param p The settings of all parameters. + **/ + public StochasticGradientDescent(String n, Parameters p) { + super(n); + setParameters(p); + } - /** Sets all the default values. */ - public Parameters() { - weightVector = (SparseWeightVector) defaultWeightVector.clone(); - learningRate = defaultLearningRate; + /** + * Sets the values of parameters that control the behavior of this learning algorithm. + * + * @param p The parameters. + **/ + public void setParameters(Parameters p) { + weightVector = p.weightVector; + learningRate = p.learningRate; } /** - * Sets the parameters from the parent's parameters object, giving - * defaults to all parameters declared in this object. + * Retrieves the parameters that are set in this learner. + * + * @return An object containing all the values of the parameters that control the behavior of + * this learning algorithm. **/ - public Parameters(Learner.Parameters p) { - super(p); - weightVector = (SparseWeightVector) defaultWeightVector.clone(); - learningRate = defaultLearningRate; + public Learner.Parameters getParameters() { + Parameters p = new Parameters(super.getParameters()); + p.weightVector = weightVector; + p.learningRate = learningRate; + return p; } - /** Copy constructor. */ - public Parameters(Parameters p) { - super(p); - weightVector = p.weightVector; - learningRate = p.learningRate; + /** + * Returns the current value of the {@link #learningRate} variable. + * + * @return The value of the {@link #learningRate} variable. + **/ + public double getLearningRate() { + return learningRate; } /** - * Calls the appropriate Learner.setParameters(Parameters) - * method for this Parameters object. - * - * @param l The learner whose parameters will be set. + * Sets the {@link #learningRate} member variable to the specified value. + * + * @param t The new value for {@link #learningRate}. **/ - public void setParameters(Learner l) { - ((StochasticGradientDescent) l).setParameters(this); + public void setLearningRate(double t) { + learningRate = t; + } + + + /** Resets the weight vector to all zeros. */ + public void forget() { + super.forget(); + weightVector = weightVector.emptyClone(); + bias = 0; } /** - * Creates a string representation of these parameters in which only - * those parameters that differ from their default values are mentioned. + * Returns a string describing the output feature type of this classifier. + * + * @return "real" **/ - public String nonDefaultString() { - String result = super.nonDefaultString(); + public String getOutputType() { + return "real"; + } - if (learningRate != StochasticGradientDescent.defaultLearningRate) - result += ", learningRate = " + learningRate; - if (result.startsWith(", ")) result = result.substring(2); - return result; + /** + * Trains the learning algorithm given an object as an example. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param exampleLabels The example's label(s). + * @param labelValues The labels' values. + **/ + public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, + double[] labelValues) { + assert exampleLabels.length == 1 : "Example must have a single label."; + + double labelValue = labelValues[0]; + double multiplier = + learningRate + * (labelValue - weightVector.dot(exampleFeatures, exampleValues) - bias); + weightVector.scaledAdd(exampleFeatures, exampleValues, multiplier); + bias += multiplier; + } + + + /** + * Since this algorithm returns a real feature, it does not return scores. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return null + **/ + public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { + return null; + } + + + /** + * Returns the classification of the given example as a single feature instead of a + * {@link FeatureVector}. + * + * @param f The features array. + * @param v The values array. + * @return The classification of the example as a feature. + **/ + public Feature featureValue(int[] f, double[] v) { + return new RealPrimitiveStringFeature(containingPackage, name, "", realValue(f, v)); + } + + + /** + * Simply computes the dot product of the weight vector and the example + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return The computed real value. + **/ + public double realValue(int[] exampleFeatures, double[] exampleValues) { + return weightVector.dot(exampleFeatures, exampleValues) + bias; + } + + + /** + * Simply computes the dot product of the weight vector and the feature vector extracted from + * the example object. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return The computed feature (in a vector). + **/ + public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) { + return new FeatureVector(featureValue(exampleFeatures, exampleValues)); + } + + + /** + * Writes the algorithm's internal representation as text. In the first line of output, the name + * of the classifier is printed, followed by {@link #learningRate} and {@link #bias}. + * + * @param out The output stream. + **/ + public void write(PrintStream out) { + out.println(name + ": " + learningRate + ", " + bias); + if (lexicon.size() == 0) + weightVector.write(out); + else + weightVector.write(out, lexicon); } - } -} + + /** + * Writes the learned function's internal representation in binary form. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeDouble(learningRate); + out.writeDouble(bias); + weightVector.write(out); + } + + + /** + * Reads the binary representation of a learner with this object's run-time type, overwriting + * any and all learned or manually specified parameters as well as the label lexicon but without + * modifying the feature lexicon. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + super.read(in); + learningRate = in.readDouble(); + bias = in.readDouble(); + weightVector = SparseWeightVector.readWeightVector(in); + } + + + /** Returns a deep clone of this learning algorithm. */ + public Object clone() { + StochasticGradientDescent clone = null; + + try { + clone = (StochasticGradientDescent) super.clone(); + } catch (Exception e) { + System.err.println("Error cloning StochasticGradientDescent: " + e); + System.exit(1); + } + + clone.weightVector = (SparseWeightVector) weightVector.clone(); + return clone; + } + + + /** + * Simply a container for all of {@link StochasticGradientDescent}'s configurable parameters. + * Using instances of this class should make code more readable and constructors less + * complicated. + * + * @author Nick Rizzolo + **/ + public static class Parameters extends Learner.Parameters { + /** + * The hypothesis vector; default {@link StochasticGradientDescent#defaultWeightVector}. + **/ + public SparseWeightVector weightVector; + /** + * The rate at which weights are updated; default {@link #defaultLearningRate}. + **/ + public double learningRate; + + + /** Sets all the default values. */ + public Parameters() { + weightVector = (SparseWeightVector) defaultWeightVector.clone(); + learningRate = defaultLearningRate; + } + + + /** + * Sets the parameters from the parent's parameters object, giving defaults to all + * parameters declared in this object. + **/ + public Parameters(Learner.Parameters p) { + super(p); + weightVector = (SparseWeightVector) defaultWeightVector.clone(); + learningRate = defaultLearningRate; + } + + + /** Copy constructor. */ + public Parameters(Parameters p) { + super(p); + weightVector = p.weightVector; + learningRate = p.learningRate; + } + + + /** + * Calls the appropriate Learner.setParameters(Parameters) method for this + * Parameters object. + * + * @param l The learner whose parameters will be set. + **/ + public void setParameters(Learner l) { + ((StochasticGradientDescent) l).setParameters(this); + } + + + /** + * Creates a string representation of these parameters in which only those parameters that + * differ from their default values are mentioned. + **/ + public String nonDefaultString() { + String result = super.nonDefaultString(); + + if (learningRate != StochasticGradientDescent.defaultLearningRate) + result += ", learningRate = " + learningRate; + + if (result.startsWith(", ")) + result = result.substring(2); + return result; + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SupportVectorMachine.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SupportVectorMachine.java index 4c0e3228..2c930b57 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SupportVectorMachine.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SupportVectorMachine.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -18,1190 +15,1290 @@ import java.util.Iterator; import de.bwaldvogel.liblinear.FeatureNode; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.IVector; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.OVector; import edu.illinois.cs.cogcomp.lbjava.classify.Classifier; import edu.illinois.cs.cogcomp.lbjava.classify.DiscretePrimitiveStringFeature; import edu.illinois.cs.cogcomp.lbjava.classify.Feature; import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector; import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet; +import edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.SupportVectorMachineOptimizer; import edu.illinois.cs.cogcomp.lbjava.util.ByteString; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.util.FVector; -import edu.illinois.cs.cogcomp.lbjava.util.IVector; -import edu.illinois.cs.cogcomp.lbjava.util.OVector; /** - * Wrapper class for the - * - * liblinear library which supports support vector machine - * classification. That library must be downloaded separately and placed on - * your CLASSPATH for this class to work correctly. This class - * can perform both binary classification and multi-class classification. It - * is assumed that {@link Learner#labeler} is a single discrete classifier - * that produces the same feature for every example object. Assertions will - * produce error messages if this assumption does not hold. - * - *

When calling this algorithm in a with clause inside an - * LBJava source file, there is no need to specify the rounds - * clause. At runtime, calling {@link #learn(Object)} merely performs - * feature extraction and stores an indexed representation of the example - * vector in memory. The learning algorithm executes when - * {@link #doneLearning()} is called. This call also frees the memory in - * which the example vectors are stored. Thus, subsequent calls to - * {@link #learn(Object)} and {@link #doneLearning()} will discard the - * previous hypothesis and learn an entirely new one. - * - *

liblinear performs binary classification (as opposed to - * 1-vs.-all) whenever the solver type is not MCSVM_CS - * and exactly two class labels are observed in the training data. - * - *

This algorithm's user-configurable parameters are stored in member - * fields of this class. They may be set via either a constructor that names - * each parameter explicitly or a constructor that takes an instance of - * {@link edu.illinois.cs.cogcomp.lbjava.learn.SupportVectorMachine.Parameters Parameters} as input. - * The documentation in each member field in this class indicates the default - * value of the associated parameter when using the former type of - * constructor. The documentation of the associated member field in the - * {@link edu.illinois.cs.cogcomp.lbjava.learn.SupportVectorMachine.Parameters Parameters} class - * indicates the default value of the parameter when using the latter type of - * constructor. - * - * @author Michael Paul + * Wrapper class for the + * liblinear library which supports support vector machine classification. That + * library must be downloaded separately and placed on your CLASSPATH for this class to + * work correctly. This class can perform both binary classification and multi-class classification. + * It is assumed that {@link Learner#labeler} is a single discrete classifier that produces the same + * feature for every example object. Assertions will produce error messages if this assumption does + * not hold. + * + *

+ * When calling this algorithm in a with clause inside an LBJava source file, there is + * no need to specify the rounds clause. At runtime, calling {@link #learn(Object)} + * merely performs feature extraction and stores an indexed representation of the example vector in + * memory. The learning algorithm executes when {@link #doneLearning()} is called. This call also + * frees the memory in which the example vectors are stored. Thus, subsequent calls to + * {@link #learn(Object)} and {@link #doneLearning()} will discard the previous hypothesis and learn + * an entirely new one. + * + *

+ * liblinear performs binary classification (as opposed to 1-vs.-all) whenever the + * solver type is not MCSVM_CS and exactly two class labels are observed in the + * training data. + * + *

+ * This algorithm's user-configurable parameters are stored in member fields of this class. They may + * be set via either a constructor that names each parameter explicitly or a constructor that takes + * an instance of {@link edu.illinois.cs.cogcomp.lbjava.learn.SupportVectorMachine.Parameters + * Parameters} as input. The documentation in each member field in this class indicates the default + * value of the associated parameter when using the former type of constructor. The documentation of + * the associated member field in the + * {@link edu.illinois.cs.cogcomp.lbjava.learn.SupportVectorMachine.Parameters Parameters} class + * indicates the default value of the parameter when using the latter type of constructor. + * + * @author Michael Paul **/ -public class SupportVectorMachine extends Learner -{ - /** Default for {@link #solverType}. */ - public static final String defaultSolverType = "L2LOSS_SVM"; - /** Default for {@link #C}. */ - public static final double defaultC = 1.0; - /** Default for {@link #epsilon}. */ - public static final double defaultEpsilon = 0.1; - /** Default for {@link #bias}. */ - public static final double defaultBias = 1.0; - - /** - * Keeps track of whether the doneLearning() warning message has been - * printed. - **/ - private boolean warningPrinted; - - /** - * The type of solver; default {@link #defaultSolverType} unless there - * are more than 2 labels observed in the training data, in which case - * "MCSVM_CS" becomes the default. Note that if you are doing multi-class - * classification, you can still override the "MCSVM_CS" default to use - * another solver type. - * - *

Possible values: - *

    - *
  • "L2_LR" = L2-regularized logistic regression; - *
  • "L2LOSS_SVM_DUAL" = L2-loss support vector machines - * (dual); - *
  • "L2LOSS_SVM" = L2-loss support vector machines - * (primal); - *
  • "L1LOSS_SVM_DUAL" = L1-loss support vector machines - * (dual); - *
  • "MCSVM_CS" = multi-class support vector machines by - * Crammer and Singer - *
- **/ - protected String solverType; - - /** - * The cost parameter C; default {@link #defaultC} - **/ - protected double C; - - /** - * The tolerance of termination criterion; - * default {@link #defaultEpsilon}. - **/ - protected double epsilon; - - /** - * If {@link #bias} >= 0, an instance vector x becomes [x; bias]; - * otherwise, if {@link #bias} < 0, no bias term is added. - **/ - protected double bias; - /** The number of bias features; there are either 0 or 1 of them. */ - protected int biasFeatures; - - /** Controls if liblinear-related messages are output */ - protected boolean displayLL = false; - - /** The number of unique class labels seen during training. */ - protected int numClasses; - /** The number of unique features seen during training. */ - protected int numFeatures; - /** Whether or not this learner's labeler produces conjunctive features. */ - protected boolean conjunctiveLabels; - - /** - * An array of weights representing the weight vector learned - * after training with liblinear. - **/ - protected double[] weights; - - /** The array of example labels */ - protected IVector allLabels; - /** The array of example vectors. */ - protected OVector allExamples; - - /** The label producing classifier's allowable values. */ - protected String[] allowableValues; - - /** - * Created during {@link #doneLearning()} in case the training examples - * observed by {@link #learn(int[],double[],int[],double[])} are only a - * subset of a larger, pre-extracted set. If this is not the case, it - * will simply be a duplicate reference to {@link #labelLexicon}. - **/ - protected Lexicon newLabelLexicon; - - - /** - * Default constructor. C, epsilon, the bias, and the solver type - * take the default values while the name of the classifier - * gets the empty string. - **/ - public SupportVectorMachine() { this(""); } - - /** - * Initializing constructor. The name of the classifier gets - * the empty string. - * - * @param c The desired C value. - **/ - public SupportVectorMachine(double c) { this(c, defaultEpsilon); } - - /** - * Initializing constructor. The name of the classifier gets - * the empty string. - * - * @param c The desired C value. - * @param e The desired epsilon value. - **/ - public SupportVectorMachine(double c, double e) { this(c, e, defaultBias); } - - /** - * Initializing constructor. The name of the classifier gets - * the empty string. - * - * @param c The desired C value. - * @param e The desired epsilon value. - * @param b The desired bias. - **/ - public SupportVectorMachine(double c, double e, double b) { - this(c, e, b, ""); - } - - /** - * Initializing constructor. The name of the classifier gets - * the empty string. - * - * @param c The desired C value. - * @param e The desired epsilon value. - * @param b The desired bias. - * @param s The solver type. - **/ - public SupportVectorMachine(double c, double e, double b, String s) { - this("", c, e, b, s, false); - } - - /** - * Initializing constructor. The name of the classifier gets - * the empty string. - * - * @param c The desired C value. - * @param e The desired epsilon value. - * @param b The desired bias. - * @param s The solver type. - * @param d Toggles if the liblinear-related output should be - * displayed. - **/ - public SupportVectorMachine(double c, double e, double b, String s, - boolean d) { - this("", c, e, b, s, d); - } - - /** - * Initializing constructor. C, epsilon, the bias, and the solver type - * take the default values. - * - * @param n The name of the classifier. - **/ - public SupportVectorMachine(String n) { - this(n, new Parameters()); - } - - /** - * Initializing constructor. - * - * @param n The name of the classifier. - * @param c The desired C value. - **/ - public SupportVectorMachine(String n, double c) { - this(n, c, defaultEpsilon); - } - - /** - * Initializing constructor. - * - * @param n The name of the classifier. - * @param c The desired C value. - * @param e The desired epsilon value. - **/ - public SupportVectorMachine(String n, double c, double e) { - this(n, c, e, defaultBias); - } - - /** - * Initializing constructor. - * - * @param n The name of the classifier. - * @param c The desired C value. - * @param e The desired epsilon value. - * @param b The desired bias. - **/ - public SupportVectorMachine(String n, double c, double e, double b) { - this(n, c, e, b, ""); - } - - /** - * Initializing constructor. - * - * @param n The name of the classifier. - * @param c The desired C value. - * @param e The desired epsilon value. - * @param b The desired bias. - * @param s The solver type. - **/ - public SupportVectorMachine(String n, double c, double e, double b, - String s) { - this(n, c, e, b, s, false); - } - - /** - * Initializing constructor. - * - * @param n The name of the classifier. - * @param c The desired C value. - * @param e The desired epsilon value. - * @param b The desired bias. - * @param s The solver type. - * @param d Toggles if the liblinear-related output should be - * displayed. - **/ - public SupportVectorMachine(String n, double c, double e, double b, - String s, boolean d) { - super(n); - newLabelLexicon = labelLexicon; - Parameters p = new Parameters(); - p.C = c; - p.epsilon = e; - p.bias = b; - p.solverType = s; - p.displayLL = d; - allowableValues = new String[0]; - setParameters(p); - } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link SupportVectorMachine.Parameters} object. - * The name of the classifier gets the empty string. - * - * @param p The settings of all parameters. - **/ - public SupportVectorMachine(Parameters p) { this("", p); } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link SupportVectorMachine.Parameters} object. - * - * @param n The name of the classifier. - * @param p The settings of all parameters. - **/ - public SupportVectorMachine(String n, Parameters p) { - super(n); - newLabelLexicon = labelLexicon; - allowableValues = new String[0]; - setParameters(p); - } - - public double[] getWeights() { - return weights; - } - - public int getNumClasses() { return numClasses; } - - /** - * Sets the values of parameters that control the behavior of this learning - * algorithm. - * - * @param p The parameters. - **/ - public void setParameters(Parameters p) { - C = p.C; - epsilon = p.epsilon; - bias = p.bias; - biasFeatures = (bias >= 0) ? 1 : 0; - solverType = p.solverType; - displayLL = p.displayLL; - } - - - /** - * Retrieves the parameters that are set in this learner. - * - * @return An object containing all the values of the parameters that - * control the behavior of this learning algorithm. - **/ - public Learner.Parameters getParameters() { - Parameters p = new Parameters(super.getParameters()); - p.C = C; - p.epsilon = epsilon; - p.bias = bias; - p.solverType = solverType; - p.displayLL = displayLL; - return p; - } - - - /** - * Sets the labels list. - * - * @param l A new label producing classifier. - **/ - public void setLabeler(Classifier l) { - super.setLabeler(l); - allowableValues = l == null ? null : l.allowableValues(); - if (allowableValues == null) allowableValues = new String[0]; - } - - - /** - * Returns the array of allowable values that a feature returned by this - * classifier may take. - * - * @return The allowable values of this learner's labeler, or an array of - * length zero if the labeler has not yet been established or does - * not specify allowable values. - **/ - public String[] allowableValues() { return allowableValues; } - - - /** - * Initializes the example vector arrays. - * - * @param ne The number of examples to train. - * @param nf The number of features. - **/ - public void initialize(int ne, int nf) { - allLabels = new IVector(ne); - allExamples = new OVector(ne); - } - - - /** - * This method adds the example's features and labels to the arrays storing - * the training examples. These examples will eventually be passed to - * liblinear.Linear.train() for training. - * - *

Note that learning via the liblinear library does not - * actually take place until {@link #doneLearning()} is called. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param exampleLabels The example's array of label indices. - * @param labelValues The example's array of label values. - **/ - public void learn(final int[] exampleFeatures, double[] exampleValues, - int[] exampleLabels, double[] labelValues) { - // Expand the size of the example arrays if they are full. - - if (allLabels == null || allLabels.size() == 0) { - if (allLabels == null) { - allLabels = new IVector(); - allExamples = new OVector(); - } - weights = null; - warningPrinted = false; +public class SupportVectorMachine extends Learner { + /** + * + */ + private static final long serialVersionUID = 1L; + /** Default for {@link #solverType}. */ + public static final String defaultSolverType = "L2LOSS_SVM"; + /** Default for {@link #C}. */ + public static final double defaultC = 1.0; + /** Default for {@link #epsilon}. */ + public static final double defaultEpsilon = 0.1; + /** Default for {@link #bias}. */ + public static final double defaultBias = 1.0; + /** any weight less than this is considered irrelevant. This is for prunning. */ + public static final double defaultFeaturePruningThreshold = 0.000001; + + /** feature pruning threshold caps magnitude of useful features. */ + public double featurePruningThreshold; + + /** + * Keeps track of whether the doneLearning() warning message has been printed. + **/ + private boolean warningPrinted; + + /** + * The type of solver; default {@link #defaultSolverType} unless there are more than 2 labels + * observed in the training data, in which case "MCSVM_CS" becomes the default. Note that if you + * are doing multi-class classification, you can still override the "MCSVM_CS" default to use + * another solver type. + * + *

+ * Possible values: + *

    + *
  • "L2_LR" = L2-regularized logistic regression; + *
  • "L2LOSS_SVM_DUAL" = L2-loss support vector machines (dual); + *
  • "L2LOSS_SVM" = L2-loss support vector machines (primal); + *
  • "L1LOSS_SVM_DUAL" = L1-loss support vector machines (dual); + *
  • "MCSVM_CS" = multi-class support vector machines by Crammer and Singer + *
+ **/ + protected String solverType; + + /** + * The cost parameter C; default {@link #defaultC} + **/ + protected double C; + + /** + * The tolerance of termination criterion; default {@link #defaultEpsilon}. + **/ + protected double epsilon; + + /** + * If {@link #bias} >= 0, an instance vector x becomes [x; bias]; otherwise, if {@link #bias} + * < 0, no bias term is added. + **/ + protected double bias; + /** The number of bias features; there are either 0 or 1 of them. */ + protected int biasFeatures; + + /** + * @return the biasFeatures + */ + public int getBiasFeatures() { + return biasFeatures; } - // Add the label to the examples array. - assert exampleLabels.length == 1 - : "Example must have a single label."; + /** Controls if liblinear-related messages are output */ + protected boolean displayLL = false; + + /** The number of unique class labels seen during training. */ + protected int numClasses; + /** The number of unique features seen during training. */ + protected int numFeatures; + /** Whether or not this learner's labeler produces conjunctive features. */ + protected boolean conjunctiveLabels; + + /** + * An array of weights representing the weight vector learned after training with + * liblinear. + **/ + protected double[] weights; + + /** The array of example labels */ + protected IVector allLabels; + /** The array of example vectors. */ + protected OVector allExamples; - allLabels.add(exampleLabels[0]); + /** The label producing classifier's allowable values. */ + protected String[] allowableValues; - // Add the example to the examples list. Space for the bias feature is - // allocated, but it isn't instantiated because we don't know its index - // yet. + /** + * Created during {@link #doneLearning()} in case the training examples observed by + * {@link #learn(int[],double[],int[],double[])} are only a subset of a larger, pre-extracted + * set. If this is not the case, it will simply be a duplicate reference to + * {@link #labelLexicon}. + **/ + protected Lexicon newLabelLexicon; - int F = exampleFeatures.length; - FeatureNode[] liblinearExample = new FeatureNode[F + biasFeatures]; - allExamples.add(liblinearExample); - for (int i = 0; i < F; ++i) { - int featureIndex = exampleFeatures[i] + 1; - numFeatures = Math.max(numFeatures, featureIndex); - liblinearExample[i] = new FeatureNode(featureIndex, exampleValues[i]); + /** + * Default constructor. C, epsilon, the bias, and the solver type take the default values while + * the name of the classifier gets the empty string. + **/ + public SupportVectorMachine() { + this(""); } - Arrays.sort(liblinearExample, 0, F, - new Comparator() { - public int compare(Object o1, Object o2) { - FeatureNode f1 = (FeatureNode) o1; - FeatureNode f2 = (FeatureNode) o2; - return f1.index - f2.index; - } - }); - - // Check for duplicate features. If there are any, add up all strengths - // corresponding to a given feature index and put them in a single - // feature. - - int previousI = -1; - int realCount = F; - - for (int i = 0; i < F; i++) { - int f = liblinearExample[i].index; - - if (previousI != -1 && f == liblinearExample[previousI].index) { - realCount--; - liblinearExample[previousI] = - new FeatureNode(f, - liblinearExample[previousI].value - + liblinearExample[i].value); - liblinearExample[i] = null; - } - else previousI = i; + /** + * Initializing constructor. The name of the classifier gets the empty string. + * + * @param c The desired C value. + **/ + public SupportVectorMachine(double c) { + this(c, defaultEpsilon); } - // If duplicate features were observed, rebuild the example array without - // the duplicates. + /** + * Initializing constructor. The name of the classifier gets the empty string. + * + * @param c The desired C value. + * @param e The desired epsilon value. + **/ + public SupportVectorMachine(double c, double e) { + this(c, e, defaultBias); + } - if (realCount < F) { - FeatureNode[] temp = new FeatureNode[realCount + biasFeatures]; - int k = 0; - for (int i = 0; i < F; i++) - if (liblinearExample[i] != null) - temp[k++] = liblinearExample[i]; - allExamples.set(allExamples.size() - 1, temp); + /** + * Initializing constructor. The name of the classifier gets the empty string. + * + * @param c The desired C value. + * @param e The desired epsilon value. + * @param b The desired bias. + **/ + public SupportVectorMachine(double c, double e, double b) { + this(c, e, b, ""); } - } - - - /** - * This method converts the arrays of examples stored in this class - * into input for the liblinear training method. - * The learned weight vector is stored in {@link #weights}. - **/ - public void doneLearning() { - super.doneLearning(); - - // Create the new lexicon of labels given the examples seen during - // training. This is necessary when doing cross-validation, where the - // supplied lexicon is based on all of the examples and so the lexicon - // might not match up with the subset of examples seen during the current - // fold. - - // liblinear expects that it sees y labels in increasing order, which - // might not be the case during some folds of cross-validation, where the - // label lexicon is not created during the particular fold. liblinear - // also only allocates space in the weight vector for the labels it - // encounters during training -- if the label lexicon contains more labels - // than what are observed during training, then liblinear's label - // representation will not match up with our label lexicon. Thus, - // creating a new lexicon here solves both of these problems. - - if (labelLexicon.size() > 2 || solverType.equals("MCSVM_CS")) { - newLabelLexicon = new Lexicon(); - boolean same = true; - for (int i = 0; i < allExamples.size(); i++) { - Feature label = labelLexicon.lookupKey(allLabels.get(i)); - int newLabel = newLabelLexicon.lookup(label, true); - same &= newLabel == allLabels.get(i); - allLabels.set(i, newLabel); - } - - if (same && newLabelLexicon.size() == labelLexicon.size()) - newLabelLexicon = labelLexicon; - else if (newLabelLexicon.size() > labelLexicon.size()) { - System.err.println( - "LBJava ERROR: SupportVectorMachine: new label lexicon is too big!"); - new Exception().printStackTrace(); - System.exit(1); - } - else { - int N = newLabelLexicon.size(); - predictions = new FVector(N); - for (int i = 0; i < N; ++i) - createPrediction(newLabelLexicon, i); - } + + /** + * Initializing constructor. The name of the classifier gets the empty string. + * + * @param c The desired C value. + * @param e The desired epsilon value. + * @param b The desired bias. + * @param s The solver type. + **/ + public SupportVectorMachine(double c, double e, double b, String s) { + this("", c, e, b, s, false); } - if (displayLL) - System.out.println(" Training via liblinear at " + new Date()); - if (allLabels == null) { - if (displayLL) { - System.out.println(" No training examples; no action taken."); - System.out.println(" Finished training at " + new Date()); - } - return; + /** + * Initializing constructor. The name of the classifier gets the empty string. + * + * @param c The desired C value. + * @param e The desired epsilon value. + * @param b The desired bias. + * @param s The solver type. + * @param d Toggles if the liblinear-related output should be displayed. + **/ + public SupportVectorMachine(double c, double e, double b, String s, boolean d) { + this("", c, e, b, s, d); } - if (solverType.length() == 0) solverType = defaultSolverType; - numClasses = newLabelLexicon.size(); - for (int i = 0; i < numClasses && !conjunctiveLabels; ++i) - conjunctiveLabels = newLabelLexicon.lookupKey(i).isConjunctive(); - - int l = allExamples.size(); // number of examples - int n = numFeatures + biasFeatures; // number of features - - if (biasFeatures == 1) - for (int i = 0; i < l; i++) { - FeatureNode[] ex = (FeatureNode[]) allExamples.get(i); - ex[ex.length - 1] = new FeatureNode(n, bias); - } - - // In the binary case, liblinear will consider the integer label it sees - // on the first example to represent "positive". We need the string in - // allowableValues[1] to mean "positive". - boolean fixLabels = - !solverType.equals("MCSVM_CS") && numClasses == 2 - && allowableValues.length == 2; - - if (l > 0 && fixLabels) { - Feature f = - new DiscretePrimitiveStringFeature( - labeler.containingPackage, labeler.name, "", allowableValues[1], - (short) 1, (short) 2); - int p = newLabelLexicon.lookup(f); - int positive = 0; - - while (positive < l && allLabels.get(positive) == 1 - p) ++positive; - if (positive > 0 && positive < l) { - allLabels.set(0, p); - allLabels.set(positive, 1 - p); - allExamples.set(0, allExamples.set(positive, allExamples.get(0))); - - newLabelLexicon = new Lexicon(); - newLabelLexicon.lookup(f, true); - newLabelLexicon.lookup( - new DiscretePrimitiveStringFeature( - labeler.containingPackage, labeler.name, "", - allowableValues[0], (short) 0, (short) 2), - true); - predictions = new FVector(2); - createPrediction(newLabelLexicon, 0); - createPrediction(newLabelLexicon, 1); - } + /** + * Initializing constructor. The name of the classifier gets the empty string. + * + * @param c The desired C value. + * @param e The desired epsilon value. + * @param b The desired bias. + * @param s The solver type. + * @param d Toggles if the liblinear-related output should be displayed. + * @param fpt the feature pruning threshold. + **/ + public SupportVectorMachine(double c, double e, double b, String s, boolean d, double fpt) { + this("", c, e, b, s, d, fpt); } - de.bwaldvogel.liblinear.Problem prob = new de.bwaldvogel.liblinear.Problem(); - prob.bias = bias; - prob.l = l; - prob.n = n; - prob.x = new FeatureNode[l][]; - for (int i = 0; i < l; ++i) - prob.x[i] = (FeatureNode[]) allExamples.get(i); - prob.y = allLabels.toArrayDouble(); - - de.bwaldvogel.liblinear.Parameter params = - new de.bwaldvogel.liblinear.Parameter(Parameters.getSolverType(solverType), C, - epsilon); - - de.bwaldvogel.liblinear.Model trainedModel = de.bwaldvogel.liblinear.Linear.train(prob, params); - weights = trainedModel.getFeatureWeights(); - allExamples = null; - allLabels = null; - - if (displayLL) - System.out.println(" Finished training at " + new Date()); - } - - - /** - * Writes the algorithm's internal representation as text. In the first - * line of output, the name of the classifier is printed, followed by - * {@link #C}, {@link #epsilon}, - * {@link #bias}, and finally {@link #solverType}. - * - * @param out The output stream. - **/ - public void write(PrintStream out) { - demandLexicon(); - out.println(name + ": " + C + ", " + epsilon + ", " - + bias + ", " + solverType); - - if (weights != null) { - out.println(); - - out.println("Feature weights:"); - out.println("========================================="); - - int F = numFeatures; - if (bias >= 0) F++; - - // only display one weight vector if binary solver - if (!solverType.equals("MCSVM_CS") && numClasses <= 2) numClasses = 1; - - for (int c = 0; c < numClasses; c++) { - if (numClasses > 1) { - String className = newLabelLexicon.lookupKey(c).getStringValue(); - out.println("Class = " + className); - } + /** + * Initializing constructor. C, epsilon, the bias, and the solver type take the default values. + * + * @param n The name of the classifier. + **/ + public SupportVectorMachine(String n) { + this(n, new Parameters()); + } - for (int f = 0; f < F; f++) { - if (f < numFeatures) out.print(lexicon.lookupKey(f)); - else out.print("[bias]"); - double weight = weights[f*numClasses + c]; - out.println("\t\t\t" + weight); - } - } - out.println("========================================="); + /** + * Initializing constructor. + * + * @param n The name of the classifier. + * @param c The desired C value. + **/ + public SupportVectorMachine(String n, double c) { + this(n, c, defaultEpsilon); } - out.println("End of SupportVectorMachine"); - } - - - /** - * Writes the learned function's internal representation in binary form. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - out.writeString(solverType); - out.writeDouble(C); - out.writeDouble(epsilon); - out.writeDouble(bias); - out.writeBoolean(displayLL); - out.writeInt(numClasses); - out.writeInt(numFeatures); - out.writeBoolean(conjunctiveLabels); - - out.writeInt(allowableValues.length); - for (int i = 0; i < allowableValues.length; ++i) - out.writeString(allowableValues[i]); - - if (newLabelLexicon == labelLexicon) out.writeBoolean(false); - else { - out.writeBoolean(true); - newLabelLexicon.write(out); + /** + * Initializing constructor. + * + * @param n The name of the classifier. + * @param c The desired C value. + * @param e The desired epsilon value. + **/ + public SupportVectorMachine(String n, double c, double e) { + this(n, c, e, defaultBias); } - if (weights == null) out.writeInt(0); - else { - out.writeInt(weights.length); - for (int i = 0; i < weights.length; ++i) - out.writeDouble(weights[i]); + /** + * Initializing constructor. + * + * @param n The name of the classifier. + * @param c The desired C value. + * @param e The desired epsilon value. + * @param b The desired bias. + **/ + public SupportVectorMachine(String n, double c, double e, double b) { + this(n, c, e, b, ""); } - } - - - /** - * Reads the binary representation of a learner with this object's run-time - * type, overwriting any and all learned or manually specified parameters - * as well as the label lexicon but without modifying the feature lexicon. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - solverType = in.readString(); - C = in.readDouble(); - epsilon = in.readDouble(); - bias = in.readDouble(); - biasFeatures = (bias >= 0) ? 1 : 0; - displayLL = in.readBoolean(); - numClasses = in.readInt(); - numFeatures = in.readInt(); - conjunctiveLabels = in.readBoolean(); - - int N = in.readInt(); - allowableValues = new String[N]; - for (int i = 0; i < N; ++i) - allowableValues[i] = in.readString(); - - if (in.readBoolean()) newLabelLexicon = Lexicon.readLexicon(in); - else newLabelLexicon = labelLexicon; - - N = in.readInt(); - weights = new double[N]; - for (int i = 0; i < N; ++i) - weights[i] = in.readDouble(); - } - - - /** - * Returns the classification of the given example as a single feature - * instead of a {@link FeatureVector}. - * - * @param f The features array. - * @param v The values array. - * @return The classification of the example as a feature. - **/ - public Feature featureValue(int[] f, double[] v) { - if (weights == null && allLabels != null && !warningPrinted) { - System.err.println( - "LBJava WARNING: SupportVectorMachine's doneLearning() method should " - + "be called before attempting to make predictions."); - warningPrinted = true; + + /** + * Initializing constructor. + * + * @param n The name of the classifier. + * @param c The desired C value. + * @param e The desired epsilon value. + * @param b The desired bias. + * @param s The solver type. + **/ + public SupportVectorMachine(String n, double c, double e, double b, String s) { + this(n, c, e, b, s, false); } - if (weights == null) return null; - double bestScore = Double.NEGATIVE_INFINITY; - int prediction = 0; + /** + * Initializing constructor. + * + * @param n The name of the classifier. + * @param c The desired C value. + * @param e The desired epsilon value. + * @param b The desired bias. + * @param s The solver type. + * @param d Toggles if the liblinear-related output should be displayed. + **/ + public SupportVectorMachine(String n, double c, double e, double b, String s, boolean d) { + this(n, c, e, b, s, d, SupportVectorMachine.defaultFeaturePruningThreshold); + } + + /** + * Initializing constructor. + * + * @param n The name of the classifier. + * @param c The desired C value. + * @param e The desired epsilon value. + * @param b The desired bias. + * @param s The solver type. + * @param d Toggles if the liblinear-related output should be displayed. + **/ + public SupportVectorMachine(String n, double c, double e, double b, String s, boolean d, double fpt) { + super(n); + newLabelLexicon = labelLexicon; + Parameters p = new Parameters(); + p.C = c; + p.epsilon = e; + p.bias = b; + p.solverType = s; + p.displayLL = d; + p.featurePruningThreshold = fpt; + allowableValues = new String[0]; + setParameters(p); + } - if (numClasses > 2 || solverType.equals("MCSVM_CS")) { - for (int c = 0; c < numClasses; c++) { - double s = score(f, v, c); - if (s > bestScore) { - bestScore = s; - prediction = c; - } - } + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link SupportVectorMachine.Parameters} object. The name of the classifier gets the empty + * string. + * + * @param p The settings of all parameters. + **/ + public SupportVectorMachine(Parameters p) { + this("", p); } - else { - double s = score(f, v, 0); - if (s < 0) prediction = 1; + + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link SupportVectorMachine.Parameters} object. + * + * @param n The name of the classifier. + * @param p The settings of all parameters. + **/ + public SupportVectorMachine(String n, Parameters p) { + super(n); + newLabelLexicon = labelLexicon; + allowableValues = new String[0]; + setParameters(p); } - return predictions.get(prediction); - } - - - /** - * The evaluate method returns the class label which yields the - * highest score for this example. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of feature values - * @return The computed feature (in a vector). - **/ - public String discreteValue(int[] exampleFeatures, double[] exampleValues) { - return featureValue(exampleFeatures, exampleValues).getStringValue(); - } - - - /** - * Evaluates the given example using liblinear's prediction - * method. Returns a {@link DiscretePrimitiveStringFeature} set to the - * label value. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of feature values - **/ - public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) - { - return new FeatureVector(featureValue(exampleFeatures, exampleValues)); - } - - - /** - * An SVM returns a classification score for each class. The score for - * each class is the result of {@link #score(int[],double[],int)}. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of feature values - * @return The set of scores as described above. - **/ - public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { - // score the example and save the results - ScoreSet result = new ScoreSet(); - - if (weights == null) { - if (allLabels != null && !warningPrinted) { - System.err.println( - "LBJava WARNING: SupportVectorMachine's doneLearning() method should " - + "be called before attempting to make predictions."); - warningPrinted = true; - } - - return result; + public double[] getWeights() { + return weights; } - if (numClasses > 2 || solverType.equals("MCSVM_CS")) { - for (int c = 0; c < numClasses; c++) { - String className = newLabelLexicon.lookupKey(c).getStringValue(); - double s = score(exampleFeatures, exampleValues, c); - result.put(className, s); - } + /** + * @return the numFeatures + */ + public int getNumFeatures() { + return numFeatures; } - else { - String className = newLabelLexicon.lookupKey(0).getStringValue(); - double s = score(exampleFeatures, exampleValues, 0); - result.put(className, s); - className = newLabelLexicon.lookupKey(1).getStringValue(); - result.put(className, -s); + + public int getNumClasses() { + return numClasses; + } + + /** + * @return the solverType + */ + public String getSolverType() { + return solverType; } - return result; - } - - - /** - * Computes the dot product of the specified example vector - * and the weight vector associated with the supplied class. - * If no label is specified, it defaults to a label of 0 - * (that is, a positive example), but this should only be done - * in binary classification. - * - * @param example The example object. - * @return The score for the given example vector. - **/ - public double score(Object example) { - assert !solverType.equals("MCSVM_CS") && numClasses == 2 - : "Cannot call score(Object) in a multi-class classifier."; - - return score(example, 0); - } - - - /** - * Computes the dot product of the specified example vector - * and the weight vector associated with the supplied class. - * - * @param example The example object. - * @param label The class label - * @return The score for the given example vector. - **/ - public double score(Object example, int label) { - Object[] exampleArray = getExampleArray(example, false); - return score((int[]) exampleArray[0], (double[]) exampleArray[1], label); - } - - - /** - * Computes the dot product of the specified feature vector - * and the weight vector associated with the supplied class. - * - * @param exampleFeatures The example's array of feature indices - * @param exampleValues The example's array of feature values - * @param label The class label - * @return The score for the given example vector. - **/ - public double score(int[] exampleFeatures, double[] exampleValues, - int label) { - assert exampleFeatures.length == exampleValues.length - : "Array mismatch; improperly formatted input."; - - double s = 0; - - if (weights == null) { - if (allLabels != null && !warningPrinted) { - System.err.println( - "LBJava WARNING: SupportVectorMachine's doneLearning() method should " - + "be called before attempting to make predictions."); - warningPrinted = true; - } - - return 0; + /** + * Sets the values of parameters that control the behavior of this learning algorithm. + * + * @param p The parameters. + **/ + public void setParameters(Parameters p) { + C = p.C; + epsilon = p.epsilon; + bias = p.bias; + biasFeatures = (bias >= 0) ? 1 : 0; + solverType = p.solverType; + displayLL = p.displayLL; + featurePruningThreshold = p.featurePruningThreshold; } - // If binary classification, no special offset for the weight vector. - // Negate the final score if it is a negative example - boolean negate = false; - if (!(numClasses > 2 || solverType.equals("MCSVM_CS"))) { - if (label == 1) negate = true; - numClasses = 1; - label = 0; + /** + * Retrieves the parameters that are set in this learner. + * + * @return An object containing all the values of the parameters that control the behavior of + * this learning algorithm. + **/ + public Learner.Parameters getParameters() { + Parameters p = new Parameters(super.getParameters()); + p.C = C; + p.epsilon = epsilon; + p.bias = bias; + p.solverType = solverType; + p.displayLL = displayLL; + p.featurePruningThreshold = this.featurePruningThreshold; + return p; } - for (int i = 0; i < exampleFeatures.length; i++) { - int f = exampleFeatures[i]; - if (f < numFeatures) { - double value = exampleValues[i]; - double weight = weights[f*numClasses + label]; + /** + * Sets the labels list. + * + * @param l A new label producing classifier. + **/ + public void setLabeler(Classifier l) { + super.setLabeler(l); + allowableValues = l == null ? null : l.allowableValues(); + if (allowableValues == null) + allowableValues = new String[0]; + } + - s += weight*value; - } + /** + * Returns the array of allowable values that a feature returned by this classifier may take. + * + * @return The allowable values of this learner's labeler, or an array of length zero if the + * labeler has not yet been established or does not specify allowable values. + **/ + public String[] allowableValues() { + return allowableValues; } - if (bias >= 0) s += bias * weights[numFeatures*numClasses + label]; - - return negate ? -s : s; - } - - - /** - * Using this method, the winner-take-all competition is narrowed to - * involve only those labels contained in the specified list. The list - * must contain only {@link ByteString}s. - * - * @param example The example object. - * @param candidates A list of the only labels the example may take. - * @return The prediction as a feature or null if the network - * did not contain any of the specified labels. - **/ - public Feature valueOf(Object example, Collection candidates) { - Object[] array = getExampleArray(example, false); - return valueOf((int[]) array[0], (double[]) array[1], candidates); - } - - - /** - * Using this method, the winner-take-all competition is narrowed to - * involve only those labels contained in the specified list. The list - * must contain only Strings. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param candidates A list of the only labels the example may take. - * @return The prediction as a feature or null if the network - * did not contain any of the specified labels. - **/ - public Feature valueOf(int[] exampleFeatures, double[] exampleValues, - Collection candidates) { - if (weights == null && allLabels != null && !warningPrinted) { - System.err.println( - "LBJava WARNING: SupportVectorMachine's doneLearning() method should " - + "be called before attempting to make predictions."); - warningPrinted = true; + + /** + * Initializes the example vector arrays. + * + * @param ne The number of examples to train. + * @param nf The number of features. + **/ + public void initialize(int ne, int nf) { + allLabels = new IVector(ne); + allExamples = new OVector(ne); } - if (weights == null) return null; - double bestScore = Double.NEGATIVE_INFINITY; - int bestValue = -1; - Iterator I = candidates.iterator(); + /** + * This method adds the example's features and labels to the arrays storing the training + * examples. These examples will eventually be passed to liblinear.Linear.train() + * for training. + * + *

+ * Note that learning via the liblinear library does not actually take place until + * {@link #doneLearning()} is called. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param exampleLabels The example's array of label indices. + * @param labelValues The example's array of label values. + **/ + @SuppressWarnings({ "unchecked", "rawtypes" }) + public void learn(final int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, + double[] labelValues) { + // Expand the size of the example arrays if they are full. + + if (allLabels == null || allLabels.size() == 0) { + if (allLabels == null) { + allLabels = new IVector(); + allExamples = new OVector(); + } + weights = null; + warningPrinted = false; + } + + // Add the label to the examples array. + + assert exampleLabels.length == 1 : "Example must have a single label."; - if (I.hasNext()) { - if (conjunctiveLabels) - return conjunctiveValueOf(exampleFeatures, exampleValues, I); + allLabels.add(exampleLabels[0]); - while (I.hasNext()) { - double score = Double.NEGATIVE_INFINITY; - String label = (String) I.next(); + // Add the example to the examples list. Space for the bias feature is + // allocated, but it isn't instantiated because we don't know its index + // yet. - Feature f = - new DiscretePrimitiveStringFeature( - labeler.containingPackage, labeler.name, "", label, - valueIndexOf(label), (short) allowableValues.length); + int F = exampleFeatures.length; + FeatureNode[] liblinearExample = new FeatureNode[F + biasFeatures]; + allExamples.add(liblinearExample); - int key = -1; - if (newLabelLexicon.contains(f)) { - key = newLabelLexicon.lookup(f); - score = score(exampleFeatures, exampleValues, key); + for (int i = 0; i < F; ++i) { + int featureIndex = exampleFeatures[i] + 1; + numFeatures = Math.max(numFeatures, featureIndex); + liblinearExample[i] = new FeatureNode(featureIndex, exampleValues[i]); } - if (score > bestScore) { - bestValue = key; - bestScore = score; + Arrays.sort(liblinearExample, 0, F, new Comparator() { + public int compare(Object o1, Object o2) { + FeatureNode f1 = (FeatureNode) o1; + FeatureNode f2 = (FeatureNode) o2; + return f1.index - f2.index; + } + }); + + // Check for duplicate features. If there are any, add up all strengths + // corresponding to a given feature index and put them in a single + // feature. + + int previousI = -1; + int realCount = F; + + for (int i = 0; i < F; i++) { + int f = liblinearExample[i].index; + + if (previousI != -1 && f == liblinearExample[previousI].index) { + realCount--; + liblinearExample[previousI] = + new FeatureNode(f, liblinearExample[previousI].value + + liblinearExample[i].value); + liblinearExample[i] = null; + } else + previousI = i; + } + + // If duplicate features were observed, rebuild the example array without + // the duplicates. + + if (realCount < F) { + FeatureNode[] temp = new FeatureNode[realCount + biasFeatures]; + int k = 0; + for (int i = 0; i < F; i++) + if (liblinearExample[i] != null) + temp[k++] = liblinearExample[i]; + allExamples.set(allExamples.size() - 1, temp); } - } } - else { - for (int l = 0; l < numClasses; l++) { - double score = score(exampleFeatures, exampleValues, l); - if (score > bestScore) { - bestValue = l; - bestScore = score; + + + /** + * This method converts the arrays of examples stored in this class into input for the + * liblinear training method. The learned weight vector is stored in + * {@link #weights}. + **/ + public void doneLearning() { + super.doneLearning(); + + // Create the new lexicon of labels given the examples seen during + // training. This is necessary when doing cross-validation, where the + // supplied lexicon is based on all of the examples and so the lexicon + // might not match up with the subset of examples seen during the current + // fold. + + // liblinear expects that it sees y labels in increasing order, which + // might not be the case during some folds of cross-validation, where the + // label lexicon is not created during the particular fold. liblinear + // also only allocates space in the weight vector for the labels it + // encounters during training -- if the label lexicon contains more labels + // than what are observed during training, then liblinear's label + // representation will not match up with our label lexicon. Thus, + // creating a new lexicon here solves both of these problems. + + if (labelLexicon.size() > 2 || solverType.equals("MCSVM_CS")) { + newLabelLexicon = new Lexicon(); + boolean same = true; + for (int i = 0; i < allExamples.size(); i++) { + Feature label = labelLexicon.lookupKey(allLabels.get(i)); + int newLabel = newLabelLexicon.lookup(label, true); + same &= newLabel == allLabels.get(i); + allLabels.set(i, newLabel); + } + + if (same && newLabelLexicon.size() == labelLexicon.size()) + newLabelLexicon = labelLexicon; + else if (newLabelLexicon.size() > labelLexicon.size()) { + System.err + .println("LBJava ERROR: SupportVectorMachine: new label lexicon is too big!"); + new Exception().printStackTrace(); + System.exit(1); + } else { + int N = newLabelLexicon.size(); + predictions = new FVector(N); + for (int i = 0; i < N; ++i) + createPrediction(newLabelLexicon, i); + } + } + + if (displayLL) + System.out.println(" Training via liblinear at " + new Date()); + if (allLabels == null) { + if (displayLL) { + System.out.println(" No training examples; no action taken."); + System.out.println(" Finished training at " + new Date()); + } + return; } - } + + if (solverType.length() == 0) + solverType = defaultSolverType; + numClasses = newLabelLexicon.size(); + for (int i = 0; i < numClasses && !conjunctiveLabels; ++i) + conjunctiveLabels = newLabelLexicon.lookupKey(i).isConjunctive(); + + int l = allExamples.size(); // number of examples + int n = numFeatures + biasFeatures; // number of features + + if (biasFeatures == 1) + for (int i = 0; i < l; i++) { + FeatureNode[] ex = (FeatureNode[]) allExamples.get(i); + ex[ex.length - 1] = new FeatureNode(n, bias); + } + + // In the binary case, liblinear will consider the integer label it sees + // on the first example to represent "positive". We need the string in + // allowableValues[1] to mean "positive". + boolean fixLabels = + !solverType.equals("MCSVM_CS") && numClasses == 2 && allowableValues.length == 2; + + if (l > 0 && fixLabels) { + Feature f = + new DiscretePrimitiveStringFeature(labeler.containingPackage, labeler.name, "", + allowableValues[1], (short) 1, (short) 2); + int p = newLabelLexicon.lookup(f); + int positive = 0; + + while (positive < l && allLabels.get(positive) == 1 - p) + ++positive; + if (positive > 0 && positive < l) { + allLabels.set(0, p); + allLabels.set(positive, 1 - p); + allExamples.set(0, allExamples.set(positive, allExamples.get(0))); + + newLabelLexicon = new Lexicon(); + newLabelLexicon.lookup(f, true); + newLabelLexicon.lookup(new DiscretePrimitiveStringFeature( + labeler.containingPackage, labeler.name, "", allowableValues[0], (short) 0, + (short) 2), true); + predictions = new FVector(2); + createPrediction(newLabelLexicon, 0); + createPrediction(newLabelLexicon, 1); + } + } + + de.bwaldvogel.liblinear.Problem prob = new de.bwaldvogel.liblinear.Problem(); + prob.bias = bias; + prob.l = l; + prob.n = n; + prob.x = new FeatureNode[l][]; + for (int i = 0; i < l; ++i) + prob.x[i] = (FeatureNode[]) allExamples.get(i); + prob.y = allLabels.toArrayDouble(); + + de.bwaldvogel.liblinear.Parameter params = + new de.bwaldvogel.liblinear.Parameter(Parameters.getSolverType(solverType), C, + epsilon); + + de.bwaldvogel.liblinear.Model trainedModel = + de.bwaldvogel.liblinear.Linear.train(prob, params); + weights = trainedModel.getFeatureWeights(); + allExamples = null; + allLabels = null; + if (displayLL) + System.out.println(" Finished training at " + new Date()); } - return predictions.get(bestValue); - } - - - /** - * This method is a surrogate for - * {@link #valueOf(int[],double[],Collection)} when the labeler is known to - * produce conjunctive features. It is necessary because when given a - * string label from the collection, we will not know how to construct the - * appropriate conjunctive feature key for lookup in the label lexicon. - * So, we must go through each feature in the label lexicon and use - * {@link edu.illinois.cs.cogcomp.lbjava.classify.Feature#valueEquals(String)}. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @param I An iterator over the set of labels to choose - * from. - * @return The label chosen by this classifier or null if the - * network did not contain any of the specified labels. - **/ - protected Feature conjunctiveValueOf(int[] exampleFeatures, - double[] exampleValues, Iterator I) { - double bestScore = Double.NEGATIVE_INFINITY; - int bestValue = -1; - - while (I.hasNext()) { - String label = (String) I.next(); - - for (int i = 0; i < numClasses; ++i) { - if (!labelLexicon.lookupKey(i).valueEquals(label)) continue; - double score = score(exampleFeatures, exampleValues, i); - if (score > bestScore) { - bestScore = score; - bestValue = i; + + /** + * Optimize the model by doing feature pruning, drop the low value weights. + */ + public void doneTraining() { + super.doneTraining(); + + // optimize the resulting model by discarding low weight features. + SupportVectorMachineOptimizer svmo = new SupportVectorMachineOptimizer(this); + svmo.optimize(); + } + + /** + * Writes the algorithm's internal representation as text. In the first line of output, the name + * of the classifier is printed, followed by {@link #C}, {@link #epsilon}, {@link #bias}, and + * finally {@link #solverType}. + * + * @param out The output stream. + **/ + public void write(PrintStream out) { + demandLexicon(); + out.println(name + ": " + C + ", " + epsilon + ", " + bias + ", " + solverType); + + if (weights != null) { + out.println(); + + out.println("Feature weights:"); + out.println("========================================="); + + int F = numFeatures; + if (bias >= 0) + F++; + + // only display one weight vector if binary solver + if (!solverType.equals("MCSVM_CS") && numClasses <= 2) + numClasses = 1; + + for (int c = 0; c < numClasses; c++) { + if (numClasses > 1) { + String className = newLabelLexicon.lookupKey(c).getStringValue(); + out.println("Class = " + className); + } + + for (int f = 0; f < F; f++) { + if (f < numFeatures) + out.print(lexicon.lookupKey(f)); + else + out.print("[bias]"); + double weight = weights[f * numClasses + c]; + out.println("\t\t\t" + weight); + } + } + out.println("========================================="); } - break; - } + + out.println("End of SupportVectorMachine"); } - return predictions.get(bestValue); - } - - - /** - * Resets the internal bookkeeping. - **/ - public void forget() { - super.forget(); - - numClasses = numFeatures = 0; - allLabels = null; - allExamples = null; - weights = null; - conjunctiveLabels = false; - } - - - /** - * A container for all of {@link SupportVectorMachine}'s configurable - * parameters. Using instances of this class should make code more - * readable and constructors less complicated. - * - * @author Michael Paul - **/ - public static class Parameters extends Learner.Parameters - { + /** - * The type of solver; default - * {@link SupportVectorMachine#defaultSolverType}. - * - *

Possible values: - *

    - *
  • "L2_LR" = L2-regularized logistic regression; - *
  • "L2LOSS_SVM_DUAL" = L2-loss support vector machines (dual); - *
  • "L2LOSS_SVM" = L2-loss support vector machines (primal); - *
  • "L1LOSS_SVM_DUAL" = L1-loss support vector machines (dual); - *
  • "MCSVM_CS" = multi-class support vector machines by Crammer and - * Singer - *
+ * Writes the learned function's internal representation in binary form. + * + * @param out The output stream. **/ - public String solverType; + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeString(solverType); + out.writeDouble(C); + out.writeDouble(epsilon); + out.writeDouble(bias); + out.writeBoolean(displayLL); + out.writeInt(numClasses); + out.writeInt(numFeatures); + out.writeBoolean(conjunctiveLabels); + + out.writeInt(allowableValues.length); + for (int i = 0; i < allowableValues.length; ++i) + out.writeString(allowableValues[i]); + + if (newLabelLexicon == labelLexicon) + out.writeBoolean(false); + else { + out.writeBoolean(true); + newLabelLexicon.write(out); + } + + if (weights == null) + out.writeInt(0); + else { + out.writeInt(weights.length); + for (int i = 0; i < weights.length; ++i) + out.writeDouble(weights[i]); + } + } + + /** - * The cost parameter C; default {@link SupportVectorMachine#defaultC} + * Reads the binary representation of a learner with this object's run-time type, overwriting + * any and all learned or manually specified parameters as well as the label lexicon but without + * modifying the feature lexicon. + * + * @param in The input stream. **/ - public double C; + public void read(ExceptionlessInputStream in) { + super.read(in); + solverType = in.readString(); + C = in.readDouble(); + epsilon = in.readDouble(); + bias = in.readDouble(); + biasFeatures = (bias >= 0) ? 1 : 0; + displayLL = in.readBoolean(); + numClasses = in.readInt(); + numFeatures = in.readInt(); + conjunctiveLabels = in.readBoolean(); + + int N = in.readInt(); + allowableValues = new String[N]; + for (int i = 0; i < N; ++i) + allowableValues[i] = in.readString(); + + if (in.readBoolean()) + newLabelLexicon = Lexicon.readLexicon(in); + else + newLabelLexicon = labelLexicon; + + N = in.readInt(); + weights = new double[N]; + for (int i = 0; i < N; ++i) + weights[i] = in.readDouble(); + } + + + /** + * Returns the classification of the given example as a single feature instead of a + * {@link FeatureVector}. + * + * @param f The features array. + * @param v The values array. + * @return The classification of the example as a feature. + **/ + public Feature featureValue(int[] f, double[] v) { + if (weights == null && allLabels != null && !warningPrinted) { + System.err + .println("LBJava WARNING: SupportVectorMachine's doneLearning() method should " + + "be called before attempting to make predictions."); + warningPrinted = true; + } + + if (weights == null) + return null; + double bestScore = Double.NEGATIVE_INFINITY; + int prediction = 0; + + if (numClasses > 2 || solverType.equals("MCSVM_CS")) { + for (int c = 0; c < numClasses; c++) { + double s = score(f, v, c); + + if (s > bestScore) { + bestScore = s; + prediction = c; + } + } + } else { + double s = score(f, v, 0); + if (s < 0) + prediction = 1; + } + + return predictions.get(prediction); + } + + + /** + * The evaluate method returns the class label which yields the highest score for this example. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of feature values + * @return The computed feature (in a vector). + **/ + public String discreteValue(int[] exampleFeatures, double[] exampleValues) { + return featureValue(exampleFeatures, exampleValues).getStringValue(); + } + + + /** + * Evaluates the given example using liblinear's prediction method. Returns a + * {@link DiscretePrimitiveStringFeature} set to the label value. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of feature values + **/ + public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) { + return new FeatureVector(featureValue(exampleFeatures, exampleValues)); + } + + + /** + * An SVM returns a classification score for each class. The score for each class is the result + * of {@link #score(int[],double[],int)}. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of feature values + * @return The set of scores as described above. + **/ + public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { + // score the example and save the results + ScoreSet result = new ScoreSet(); + + if (weights == null) { + if (allLabels != null && !warningPrinted) { + System.err + .println("LBJava WARNING: SupportVectorMachine's doneLearning() method should " + + "be called before attempting to make predictions."); + warningPrinted = true; + } + + return result; + } + + if (numClasses > 2 || solverType.equals("MCSVM_CS")) { + for (int c = 0; c < numClasses; c++) { + String className = newLabelLexicon.lookupKey(c).getStringValue(); + double s = score(exampleFeatures, exampleValues, c); + result.put(className, s); + } + } else { + String className = newLabelLexicon.lookupKey(0).getStringValue(); + double s = score(exampleFeatures, exampleValues, 0); + result.put(className, s); + className = newLabelLexicon.lookupKey(1).getStringValue(); + result.put(className, -s); + } + + return result; + } + + /** - * The tolerance of termination criterion; - * default {@link SupportVectorMachine#defaultEpsilon}. + * Computes the dot product of the specified example vector and the weight vector associated + * with the supplied class. If no label is specified, it defaults to a label of 0 (that is, a + * positive example), but this should only be done in binary classification. + * + * @param example The example object. + * @return The score for the given example vector. **/ - public double epsilon; + public double score(Object example) { + assert !solverType.equals("MCSVM_CS") && numClasses == 2 : "Cannot call score(Object) in a multi-class classifier."; + + return score(example, 0); + } + + /** - * If {@link SupportVectorMachine#bias} >= 0, an instance vector x - * becomes [x; bias]; otherwise, if {@link SupportVectorMachine#bias} - * < 0, no bias term is added. + * Computes the dot product of the specified example vector and the weight vector associated + * with the supplied class. + * + * @param example The example object. + * @param label The class label + * @return The score for the given example vector. **/ - public double bias; + public double score(Object example, int label) { + Object[] exampleArray = getExampleArray(example, false); + return score((int[]) exampleArray[0], (double[]) exampleArray[1], label); + } + + /** - * Determines if liblinear-related output should be - * displayed; default false + * Computes the dot product of the specified feature vector and the weight vector associated + * with the supplied class. + * + * @param exampleFeatures The example's array of feature indices + * @param exampleValues The example's array of feature values + * @param label The class label + * @return The score for the given example vector. **/ - public boolean displayLL; + public double score(int[] exampleFeatures, double[] exampleValues, int label) { + assert exampleFeatures.length == exampleValues.length : "Array mismatch; improperly formatted input."; + + double s = 0; + + if (weights == null) { + if (allLabels != null && !warningPrinted) { + System.err + .println("LBJava WARNING: SupportVectorMachine's doneLearning() method should " + + "be called before attempting to make predictions."); + warningPrinted = true; + } + + return 0; + } + + // If binary classification, no special offset for the weight vector. + // Negate the final score if it is a negative example + boolean negate = false; + if (!(numClasses > 2 || solverType.equals("MCSVM_CS"))) { + if (label == 1) + negate = true; + + numClasses = 1; + label = 0; + } + + for (int i = 0; i < exampleFeatures.length; i++) { + int f = exampleFeatures[i]; + + if (f < numFeatures) { + double value = exampleValues[i]; + double weight = weights[f * numClasses + label]; + + s += weight * value; + } + } + if (bias >= 0) + s += bias * weights[numFeatures * numClasses + label]; - /** Sets all the default values. */ - public Parameters() { - solverType = ""; - C = defaultC; - epsilon = defaultEpsilon; - bias = defaultBias; - displayLL = false; + return negate ? -s : s; } /** - * Sets the parameters from the parent's parameters object, giving - * defaults to all parameters declared in this object. + * Using this method, the winner-take-all competition is narrowed to involve only those labels + * contained in the specified list. The list must contain only {@link ByteString}s. + * + * @param example The example object. + * @param candidates A list of the only labels the example may take. + * @return The prediction as a feature or null if the network did not contain any + * of the specified labels. **/ - public Parameters(Learner.Parameters p) { - super(p); - solverType = ""; - C = defaultC; - epsilon = defaultEpsilon; - bias = defaultBias; - displayLL = false; + public Feature valueOf(Object example, Collection candidates) { + Object[] array = getExampleArray(example, false); + return valueOf((int[]) array[0], (double[]) array[1], candidates); } - /** Copy constructor. */ - public Parameters(Parameters p) { - super(p); - solverType = p.solverType; - C = p.C; - epsilon = p.epsilon; - bias = p.bias; - displayLL = p.displayLL; + /** + * Given the index of the weights to prune, discard them, then shrink the weight vector down + * to save memory. + * @param uselessfeatures the indices of the features being pruned. + * @param numberFeatures the total number of features before pruning. + */ + public void pruneWeights(int[] uselessfeatures, int numberFeatures) { + int sz = numberFeatures - uselessfeatures.length; + double[] newweights = new double[sz+biasFeatures]; + int nextToPrune = 0; + int newweightindex = 0; + for (int i = 0; i < weights.length; i++) { + if (nextToPrune < uselessfeatures.length && i == uselessfeatures[nextToPrune]) { + if (Math.abs(weights[i]) > this.featurePruningThreshold) + throw new IllegalArgumentException("Pruning a high value weight : "+weights[i]+" at "+i); + nextToPrune++; + } else { + if (newweightindex >= newweights.length) + throw new IllegalArgumentException("Attempted to overpopulate the new weight : indx=" + +i+" features="+numberFeatures+" useless="+uselessfeatures.length); + newweights[newweightindex] = weights[i]; + newweightindex++; + } + } + + // do some sanity checks. + if (newweightindex != newweights.length) + throw new IllegalArgumentException("The new pruned weight vector was not fully populated!"); + if (nextToPrune != uselessfeatures.length) + throw new IllegalArgumentException("Not all the prunable features were pruned!"); + + // all good, do the replacement. + System.out.println("SVM.pruneWeights: "+sz+" features, "+newweights.length+" weights size"); + numFeatures = sz; + weights = newweights; + } + + /** + * Using this method, the winner-take-all competition is narrowed to involve only those labels + * contained in the specified list. The list must contain only Strings. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param candidates A list of the only labels the example may take. + * @return The prediction as a feature or null if the network did not contain any + * of the specified labels. + **/ + public Feature valueOf(int[] exampleFeatures, double[] exampleValues, Collection candidates) { + if (weights == null && allLabels != null && !warningPrinted) { + System.err + .println("LBJava WARNING: SupportVectorMachine's doneLearning() method should " + + "be called before attempting to make predictions."); + warningPrinted = true; + } + + if (weights == null) + return null; + + double bestScore = Double.NEGATIVE_INFINITY; + int bestValue = -1; + Iterator I = candidates.iterator(); + + if (I.hasNext()) { + if (conjunctiveLabels) + return conjunctiveValueOf(exampleFeatures, exampleValues, I); + + while (I.hasNext()) { + double score = Double.NEGATIVE_INFINITY; + String label = (String) I.next(); + + Feature f = + new DiscretePrimitiveStringFeature(labeler.containingPackage, labeler.name, + "", label, valueIndexOf(label), (short) allowableValues.length); + + int key = -1; + if (newLabelLexicon.contains(f)) { + key = newLabelLexicon.lookup(f); + score = score(exampleFeatures, exampleValues, key); + } + + if (score > bestScore) { + bestValue = key; + bestScore = score; + } + } + } else { + for (int l = 0; l < numClasses; l++) { + double score = score(exampleFeatures, exampleValues, l); + if (score > bestScore) { + bestValue = l; + bestScore = score; + } + } + } + + return predictions.get(bestValue); } /** - * Calls the appropriate Learner.setParameters(Parameters) - * method for this Parameters object. - * - * @param l The learner whose parameters will be set. + * This method is a surrogate for {@link #valueOf(int[],double[],Collection)} when the labeler + * is known to produce conjunctive features. It is necessary because when given a string label + * from the collection, we will not know how to construct the appropriate conjunctive feature + * key for lookup in the label lexicon. So, we must go through each feature in the label lexicon + * and use {@link edu.illinois.cs.cogcomp.lbjava.classify.Feature#valueEquals(String)}. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param I An iterator over the set of labels to choose from. + * @return The label chosen by this classifier or null if the network did not + * contain any of the specified labels. **/ - public void setParameters(Learner l) { - ((SupportVectorMachine) l).setParameters(this); + protected Feature conjunctiveValueOf(int[] exampleFeatures, double[] exampleValues, Iterator I) { + double bestScore = Double.NEGATIVE_INFINITY; + int bestValue = -1; + + while (I.hasNext()) { + String label = (String) I.next(); + + for (int i = 0; i < numClasses; ++i) { + if (!labelLexicon.lookupKey(i).valueEquals(label)) + continue; + double score = score(exampleFeatures, exampleValues, i); + if (score > bestScore) { + bestScore = score; + bestValue = i; + } + break; + } + } + + return predictions.get(bestValue); } /** - * Converts the string representation of the solver type - * into a liblinear.SolverType object to be used - * by liblinear during training. - * - * @param stype The solver type string. - * @return The corresponding liblinear.SolverType object. + * Resets the internal bookkeeping. **/ - public static de.bwaldvogel.liblinear.SolverType getSolverType(String stype) { - if (stype.equals("L2_LR")) - return de.bwaldvogel.liblinear.SolverType.L2R_LR; - else if (stype.equals("L2LOSS_SVM_DUAL")) - return de.bwaldvogel.liblinear.SolverType.L2R_L2LOSS_SVC_DUAL; - else if (stype.equals("L2LOSS_SVM")) - return de.bwaldvogel.liblinear.SolverType.L2R_L2LOSS_SVC; - else if (stype.equals("L1LOSS_SVM_DUAL")) - return de.bwaldvogel.liblinear.SolverType.L2R_L1LOSS_SVC_DUAL; - else if (stype.equals("MCSVM_CS")) - return de.bwaldvogel.liblinear.SolverType.MCSVM_CS; - else - return de.bwaldvogel.liblinear.SolverType.L2R_L2LOSS_SVC; + public void forget() { + super.forget(); + + numClasses = numFeatures = 0; + allLabels = null; + allExamples = null; + weights = null; + conjunctiveLabels = false; } /** - * Creates a string representation of these parameters in which only - * those parameters that differ from their default values are mentioned. + * A container for all of {@link SupportVectorMachine}'s configurable parameters. Using + * instances of this class should make code more readable and constructors less complicated. + * + * @author Michael Paul **/ - public String nonDefaultString() { - String result = super.nonDefaultString(); - - if (!solverType.equals(SupportVectorMachine.defaultSolverType)) - result += ", solverType = \"" + solverType + "\""; - if (C != SupportVectorMachine.defaultC) - result += ", C = " + C; - if (epsilon != SupportVectorMachine.defaultEpsilon) - result += ", epsilon = " + epsilon; - if (bias != SupportVectorMachine.defaultBias) - result += ", bias = " + bias; - - if (result.startsWith(", ")) result = result.substring(2); - return result; + public static class Parameters extends Learner.Parameters { + /** + * The type of solver; default {@link SupportVectorMachine#defaultSolverType}. + * + *

+ * Possible values: + *

    + *
  • "L2_LR" = L2-regularized logistic regression; + *
  • "L2LOSS_SVM_DUAL" = L2-loss support vector machines (dual); + *
  • "L2LOSS_SVM" = L2-loss support vector machines (primal); + *
  • "L1LOSS_SVM_DUAL" = L1-loss support vector machines (dual); + *
  • "MCSVM_CS" = multi-class support vector machines by Crammer and Singer + *
+ **/ + public String solverType; + + /** + * @return the solverType + */ + public String getSolverType() { + return solverType; + } + + /** + * The cost parameter C; default {@link SupportVectorMachine#defaultC} + **/ + public double C; + /** + * The tolerance of termination criterion; default + * {@link SupportVectorMachine#defaultEpsilon}. + **/ + public double epsilon; + /** + * If {@link SupportVectorMachine#bias} >= 0, an instance vector x becomes [x; bias]; + * otherwise, if {@link SupportVectorMachine#bias} < 0, no bias term is added. + **/ + public double bias; + /** + * Determines if liblinear-related output should be displayed; default + * false + **/ + public boolean displayLL; + + /** feature pruning threshold caps magnitude of useful features. */ + public double featurePruningThreshold; + + /** Sets all the default values. */ + public Parameters() { + solverType = ""; + C = defaultC; + epsilon = defaultEpsilon; + bias = defaultBias; + displayLL = false; + featurePruningThreshold = defaultFeaturePruningThreshold; + } + + + /** + * Sets the parameters from the parent's parameters object, giving defaults to all + * parameters declared in this object. + **/ + public Parameters(Learner.Parameters p) { + super(p); + solverType = ""; + C = defaultC; + epsilon = defaultEpsilon; + bias = defaultBias; + displayLL = false; + featurePruningThreshold = defaultFeaturePruningThreshold; + } + + + /** Copy constructor. */ + public Parameters(Parameters p) { + super(p); + solverType = p.solverType; + C = p.C; + epsilon = p.epsilon; + bias = p.bias; + displayLL = p.displayLL; + featurePruningThreshold = p.featurePruningThreshold; + } + + + /** + * Calls the appropriate Learner.setParameters(Parameters) method for this + * Parameters object. + * + * @param l The learner whose parameters will be set. + **/ + public void setParameters(Learner l) { + ((SupportVectorMachine) l).setParameters(this); + } + + + /** + * Converts the string representation of the solver type into a + * liblinear.SolverType object to be used by liblinear during + * training. + * + * @param stype The solver type string. + * @return The corresponding liblinear.SolverType object. + **/ + public static de.bwaldvogel.liblinear.SolverType getSolverType(String stype) { + if (stype.equals("L2_LR")) + return de.bwaldvogel.liblinear.SolverType.L2R_LR; + else if (stype.equals("L2LOSS_SVM_DUAL")) + return de.bwaldvogel.liblinear.SolverType.L2R_L2LOSS_SVC_DUAL; + else if (stype.equals("L2LOSS_SVM")) + return de.bwaldvogel.liblinear.SolverType.L2R_L2LOSS_SVC; + else if (stype.equals("L1LOSS_SVM_DUAL")) + return de.bwaldvogel.liblinear.SolverType.L2R_L1LOSS_SVC_DUAL; + else if (stype.equals("MCSVM_CS")) + return de.bwaldvogel.liblinear.SolverType.MCSVM_CS; + else + return de.bwaldvogel.liblinear.SolverType.L2R_L2LOSS_SVC; + } + + + /** + * Creates a string representation of these parameters in which only those parameters that + * differ from their default values are mentioned. + **/ + public String nonDefaultString() { + String result = super.nonDefaultString(); + + if (!solverType.equals(SupportVectorMachine.defaultSolverType)) + result += ", solverType = \"" + solverType + "\""; + if (C != SupportVectorMachine.defaultC) + result += ", C = " + C; + if (epsilon != SupportVectorMachine.defaultEpsilon) + result += ", epsilon = " + epsilon; + if (bias != SupportVectorMachine.defaultBias) + result += ", bias = " + bias; + if (featurePruningThreshold != defaultFeaturePruningThreshold) + result += ", feature pruning threshold = " + featurePruningThreshold; + + if (result.startsWith(", ")) + result = result.substring(2); + return result; + } } - } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/TestingMetric.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/TestingMetric.java index ad3233de..158c0545 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/TestingMetric.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/TestingMetric.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -15,27 +12,23 @@ /** - * TestingMetric is an interface through which the user may - * implement their own testing method for use by LBJava's internal cross - * validation algorithm. - * - * @author Dan Muriello + * TestingMetric is an interface through which the user may implement their own testing + * method for use by LBJava's internal cross validation algorithm. + * + * @author Dan Muriello **/ -public interface TestingMetric -{ - /** Returns the name of the testing metric. */ - public String getName(); +public interface TestingMetric { + /** Returns the name of the testing metric. */ + public String getName(); - /** - * Evaluates a classifier against an oracle on the data provided by a - * parser. - * - * @param classifier The classifier whose accuracy is being measured. - * @param oracle A classifier that returns the label of each example. - * @param parser A parser to supply the example objects. - * @return A value assessing the performance of the classifier. - **/ - public double test(Classifier classifier, Classifier oracle, Parser parser); + /** + * Evaluates a classifier against an oracle on the data provided by a parser. + * + * @param classifier The classifier whose accuracy is being measured. + * @param oracle A classifier that returns the label of each example. + * @param parser A parser to supply the example objects. + * @return A value assessing the performance of the classifier. + **/ + public double test(Classifier classifier, Classifier oracle, Parser parser); } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/WekaWrapper.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/WekaWrapper.java index 16284291..072e11fe 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/WekaWrapper.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/WekaWrapper.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.learn; @@ -19,786 +16,740 @@ import weka.core.FastVector; import weka.core.Instance; import weka.core.Instances; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.classify.Classifier; import edu.illinois.cs.cogcomp.lbjava.classify.DiscretePrimitiveStringFeature; import edu.illinois.cs.cogcomp.lbjava.classify.Feature; import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector; import edu.illinois.cs.cogcomp.lbjava.classify.RealPrimitiveStringFeature; import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessOutputStream; - /** - * Translates LBJava's internal problem representation into that which can - * be handled by WEKA learning algorithms. This translation involves storing - * all examples in memory so they can be passed to WEKA at one time. - * - *

WEKA must be available on your CLASSPATH in order to use - * this class. WEKA source code and pre-compiled jar distributions are - * available at: - * http://www.cs.waikato.ac.nz/ml/weka/ - * - *

To use this class in the with clause of a learning - * classifier expression, the following restrictions must be recognized: - *

    - *
  • Feature pre-extraction must be enabled. - *
  • No hard-coded feature generators may be referenced in the - * using clause. - *
  • No array producing classifiers may be referenced in the - * using clause. - *
  • The names of classifiers referenced in the using - * clause may not contain the underscore character ('_'). - *
  • The values produced by discrete classifiers referenced in the - * using clause may not contain the underscore, colon, - * or comma characters ('_', ':', or - * ','). - *
- * - *

To use this class in a Java application, the following restrictions - * must be recognized: - *

    - *
  • {@link #doneLearning()} must be called before calls to - * {@link #classify(Object)} can be made. - *
  • After {@link #doneLearning()} is called, {@link #learn(Object)} may - * not be called without first calling {@link #forget()}. - *
- * - * @author Dan Muriello + * Translates LBJava's internal problem representation into that which can be handled by WEKA + * learning algorithms. This translation involves storing all examples in memory so they can be + * passed to WEKA at one time. + * + *

+ * WEKA must be available on your CLASSPATH in order to use this class. WEKA source + * code and pre-compiled jar distributions are available at: http://www.cs.waikato.ac.nz/ml/weka/ + * + *

+ * To use this class in the with clause of a learning classifier expression, the + * following restrictions must be recognized: + *

    + *
  • Feature pre-extraction must be enabled. + *
  • No hard-coded feature generators may be referenced in the using clause. + *
  • No array producing classifiers may be referenced in the using clause. + *
  • The names of classifiers referenced in the using clause may not contain the + * underscore character ('_'). + *
  • The values produced by discrete classifiers referenced in the using clause may + * not contain the underscore, colon, or comma characters ('_', ':', or ' + * ,'). + *
+ * + *

+ * To use this class in a Java application, the following restrictions must be recognized: + *

    + *
  • {@link #doneLearning()} must be called before calls to {@link #classify(Object)} can be made. + *
  • After {@link #doneLearning()} is called, {@link #learn(Object)} may not be called without + * first calling {@link #forget()}. + *
+ * + * @author Dan Muriello **/ -public class WekaWrapper extends Learner -{ - /** Default for the {@link #attributeString} field. */ - public static final String defaultAttributeString = ""; - /** Default for the {@link #baseClassifier} field. */ - public static final weka.classifiers.Classifier defaultBaseClassifier = - new weka.classifiers.bayes.NaiveBayes(); - - - /** A string encoding of the attributes used by this learner. */ - protected String attributeString; - /** - * Stores the instance of the WEKA classifier which we are training; - * default is weka.classifiers.bayes.NaiveBayes. - **/ - protected weka.classifiers.Classifier baseClassifier; - /** - * Stores a fresh instance of the WEKA classifier for the purposes of - * forgetting. - **/ - protected weka.classifiers.Classifier freshClassifier; - /** - * Information about the features this learner takes as input is parsed - * from an attribute string and stored here. This information is crucial - * in the task of interfacing with the WEKA algorithms, and must be present - * before the {@link #learn(Object)} method can be called. - * - *

Here is an example of a valid attribute string: - * nom_SimpleLabel_1,2,3,:str_First:nom_Second_a,b,c,d,r,t,:num_Third: - * - *

nom stands for "Nominal", i.e. the feature - * SimpleLabel was declared as discrete, and had - * the value list {"1","2","3"}. - * - *

str stands for "Stirng", i.e. the feature - * First was declared to be discrete, but was not - * provided with a value list. When using the WekaWrapper, it - * is best to provide value lists whenever possible, because very few WEKA - * classifiers can handle string attributes. - * - *

num stands for "Numerical", i.e. the feature - * Third was declared to be real. - **/ - protected FastVector attributeInfo = new FastVector(); - /** The main collection of Instance objects.*/ - protected Instances instances; - /** - * Indicates whether the {@link #doneLearning()} method has been called - * and the {@link #forget()} method has not yet been called. - **/ - protected boolean trained = false; - /** The label producing classifier's allowable values. */ - protected String[] allowableValues; - - - /** - * Empty constructor. Instantiates this wrapper with the default learning - * algorithm: weka.classifiers.bayes.NaiveBayes. Attribute - * information must be provided before any learning can occur. - **/ - public WekaWrapper() { - this(""); - } - - /** - * Partial constructor; attribute information must be provided before any - * learning can occur. - * - * @param base The classifier to be used in this system. - **/ - public WekaWrapper(weka.classifiers.Classifier base) { - this("", base); - } - - /** - * Redirecting constructor. - * - * @param base The classifier to be used in this system. - * @param attributeString The string describing the types of attributes - * example objects will have. - **/ - public WekaWrapper(weka.classifiers.Classifier base, String attributeString) - { - this("", base, attributeString); - } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link WekaWrapper.Parameters} object. - * - * @param p The settings of all parameters. - **/ - public WekaWrapper(Parameters p) { this("", p); } - - /** - * Empty constructor. Instantiates this wrapper with the default learning - * algorithm: weka.classifiers.bayes.NaiveBayes. Attribute - * information must be provided before any learning can occur. - * - * @param n The name of the classifier. - **/ - public WekaWrapper(String n) { this(n, new Parameters()); } - - /** - * Partial constructor; attribute information must be provided before any - * learning can occur. - * - * @param base The classifier to be used in this system. - **/ - public WekaWrapper(String n, weka.classifiers.Classifier base) { - this(n, base, defaultAttributeString); - } - - /** - * Default Constructor. Instantiates this wrapper with the default - * learning algorithm: weka.classifiers.bayes.NaiveBayes. - * - * @param n The name of the classifier. - * @param attributeString The string describing the types of attributes - * example objects will have. - **/ - public WekaWrapper(String n, String attributeString) { - this(n, defaultBaseClassifier, attributeString); - } - - /** - * Initializing constructor. Sets all member variables to their associated - * settings in the {@link WekaWrapper.Parameters} object. - * - * @param n The name of the classifier. - * @param p The settings of all parameters. - **/ - public WekaWrapper(String n, Parameters p) { - super(n); - setParameters(p); - freshClassifier = baseClassifier; - } - - /** - * Full Constructor. - * - * @param n The name of the classifier - * @param base The classifier to be used in this system. - * @param attributeString The string describing the types of attributes - * example objects will have. - **/ - public WekaWrapper(String n, weka.classifiers.Classifier base, - String attributeString) { - super(n); - Parameters p = new Parameters(); - p.baseClassifier = base; - p.attributeString = attributeString; - setParameters(p); - freshClassifier = base; - } - - - /** - * Sets the values of parameters that control the behavior of this learning - * algorithm. - * - * @param p The parameters. - **/ - public void setParameters(Parameters p) { - baseClassifier = p.baseClassifier; - attributeString = p.attributeString; - initializeAttributes(); - } - - - /** - * Retrieves the parameters that are set in this learner. - * - * @return An object containing all the values of the parameters that - * control the behavior of this learning algorithm. - **/ - public Learner.Parameters getParameters() { - Parameters p = new Parameters(super.getParameters()); - p.baseClassifier = baseClassifier; - p.attributeString = attributeString; - return p; - } - - - /** This learner's output type is "mixed%". */ - public String getOutputType() { return "mixed%"; } - - - /** - * Takes attributeString and initializes this wrapper's - * {@link #instances} collection to take those attributes. - **/ - public void initializeAttributes() { - String[] atts = attributeString.split(":"); - - for (int i = 0; i < atts.length; ++i) { - String[] parts = atts[i].split("_"); - - if (parts[0].equals("str")) { - String attributeName = parts[1]; - Attribute newAttribute = - new Attribute(attributeName, (FastVector) null); - attributeInfo.addElement(newAttribute); - } - else if (parts[0].equals("nom")) { - String[] valueStrings = parts[2].split(","); - FastVector valueVector = new FastVector(valueStrings.length); - for (int j = 0; j < valueStrings.length; ++j) - valueVector.addElement(valueStrings[j]); - - Attribute a = new Attribute(parts[1], valueVector); - attributeInfo.addElement(a); - } - else if (parts[0].equals("num")) { - attributeInfo.addElement(new Attribute(parts[1])); - } - else { - System.err.println( - "WekaWrapper: Error - Malformed attribute information string: " - + attributeString); - new Exception().printStackTrace(); - System.exit(1); - } - } +public class WekaWrapper extends Learner { + /** Default for the {@link #attributeString} field. */ + public static final String defaultAttributeString = ""; + /** Default for the {@link #baseClassifier} field. */ + public static final weka.classifiers.Classifier defaultBaseClassifier = + new weka.classifiers.bayes.NaiveBayes(); + + + /** A string encoding of the attributes used by this learner. */ + protected String attributeString; + /** + * Stores the instance of the WEKA classifier which we are training; default is + * weka.classifiers.bayes.NaiveBayes. + **/ + protected weka.classifiers.Classifier baseClassifier; + /** + * Stores a fresh instance of the WEKA classifier for the purposes of forgetting. + **/ + protected weka.classifiers.Classifier freshClassifier; + /** + * Information about the features this learner takes as input is parsed from an attribute string + * and stored here. This information is crucial in the task of interfacing with the WEKA + * algorithms, and must be present before the {@link #learn(Object)} method can be called. + * + *

+ * Here is an example of a valid attribute string: + * nom_SimpleLabel_1,2,3,:str_First:nom_Second_a,b,c,d,r,t,:num_Third: + * + *

+ * nom stands for "Nominal", i.e. the feature SimpleLabel was declared + * as discrete, and had the value list {"1","2","3"}. + * + *

+ * str stands for "Stirng", i.e. the feature First was declared to be + * discrete, but was not provided with a value list. When using the + * WekaWrapper, it is best to provide value lists whenever possible, because very + * few WEKA classifiers can handle string attributes. + * + *

+ * num stands for "Numerical", i.e. the feature Third was declared to + * be real. + **/ + protected FastVector attributeInfo = new FastVector(); + /** The main collection of Instance objects. */ + protected Instances instances; + /** + * Indicates whether the {@link #doneLearning()} method has been called and the + * {@link #forget()} method has not yet been called. + **/ + protected boolean trained = false; + /** The label producing classifier's allowable values. */ + protected String[] allowableValues; - instances = new Instances(name, attributeInfo, 0); - instances.setClassIndex(0); - } - - - /** - * Sets the labeler. - * - * @param l A labeling classifier. - **/ - public void setLabeler(Classifier l) { - super.setLabeler(l); - allowableValues = l == null ? null : l.allowableValues(); - } - - - /** - * Returns the array of allowable values that a feature returned by this - * classifier may take. - * - * @return The allowable values of this learner's labeler, or an array of - * length zero if the labeler has not yet been established or does - * not specify allowable values. - **/ - public String[] allowableValues() { - if (allowableValues == null) return new String[0]; - return allowableValues; - } - - - /** - * Since WEKA classifiers cannot learn online, this method causes no actual - * learning to occur, it simply creates an Instance object - * from this example and adds it to a set of examples from which the - * classifier will be built once {@link #doneLearning()} is called. - **/ - public void learn(int[] exampleFeatures, double[] exampleValues, - int[] exampleLabels, double[] labelValues) { - instances.add(makeInstance(exampleFeatures, exampleValues, - exampleLabels, labelValues)); - } - - - /** - * This method makes one or more decisions about a single object, returning - * those decisions as Features in a vector. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - * @return A feature vector with a single feature containing the prediction - * for this example. - **/ - public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) - { - if (!trained) { - System.err.println( - "WekaWrapper: Error - Cannot make a classification with an " - + "untrained classifier."); - new Exception().printStackTrace(); - System.exit(1); + + /** + * Empty constructor. Instantiates this wrapper with the default learning algorithm: + * weka.classifiers.bayes.NaiveBayes. Attribute information must be provided before + * any learning can occur. + **/ + public WekaWrapper() { + this(""); } - /* - Assuming that the first Attribute in our attributeInfo vector is the - class attribute, decide which case we are in - */ - Attribute classAtt = (Attribute) attributeInfo.elementAt(0); - - if (classAtt.isNominal() || classAtt.isString()) { - double[] dist = getDistribution(exampleFeatures, exampleValues); - int best = 0; - for (int i = 1; i < dist.length; ++i) - if (dist[i] > dist[best]) best = i; - - Feature label = labelLexicon.lookupKey(best); - if (label == null) return new FeatureVector(); - String value = label.getStringValue(); - - return - new FeatureVector( - new DiscretePrimitiveStringFeature( - containingPackage, name, "", value, valueIndexOf(value), - (short) allowableValues().length)); + /** + * Partial constructor; attribute information must be provided before any learning can occur. + * + * @param base The classifier to be used in this system. + **/ + public WekaWrapper(weka.classifiers.Classifier base) { + this("", base); } - else if (classAtt.isNumeric()) { - return - new FeatureVector( - new RealPrimitiveStringFeature( - containingPackage, name, "", - getDistribution(exampleFeatures, exampleValues)[0])); + + /** + * Redirecting constructor. + * + * @param base The classifier to be used in this system. + * @param attributeString The string describing the types of attributes example objects will + * have. + **/ + public WekaWrapper(weka.classifiers.Classifier base, String attributeString) { + this("", base, attributeString); } - else { - System.err.println("WekaWrapper: Error - illegal class type."); - new Exception().printStackTrace(); - System.exit(1); + + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link WekaWrapper.Parameters} object. + * + * @param p The settings of all parameters. + **/ + public WekaWrapper(Parameters p) { + this("", p); } - return new FeatureVector(); - } - - - /** - * Returns a discrete distribution of the classifier's prediction values. - * - * @param exampleFeatures The example's array of feature indices. - * @param exampleValues The example's array of feature values. - **/ - protected double[] getDistribution(int[] exampleFeatures, - double[] exampleValues) { - if (!trained) { - System.err.println( - "WekaWrapper: Error - Cannot make a classification with an " - + "untrained classifier."); - new Exception().printStackTrace(); - System.exit(1); + /** + * Empty constructor. Instantiates this wrapper with the default learning algorithm: + * weka.classifiers.bayes.NaiveBayes. Attribute information must be provided before + * any learning can occur. + * + * @param n The name of the classifier. + **/ + public WekaWrapper(String n) { + this(n, new Parameters()); } - Instance inQuestion = makeInstance(exampleFeatures, exampleValues, - new int[0], new double[0]); - - /* - For Numerical class values, this will return an array of size 1, - containing the class prediction. - For Nominal classes, an array of size equal to that of the class list, - representing probabilities. - For String classes, ? - */ - double[] dist = null; - try { dist = baseClassifier.distributionForInstance(inQuestion); } - catch (Exception e) { - System.err.println("WekaWrapper: Error while computing distribution."); - e.printStackTrace(); - System.exit(1); + /** + * Partial constructor; attribute information must be provided before any learning can occur. + * + * @param base The classifier to be used in this system. + **/ + public WekaWrapper(String n, weka.classifiers.Classifier base) { + this(n, base, defaultAttributeString); } - if (dist.length == 0) { - System.err.println( - "WekaWrapper: Error - The base classifier returned an empty " - + "probability distribution when attempting to classify an " - + "example."); - new Exception().printStackTrace(); - System.exit(1); + /** + * Default Constructor. Instantiates this wrapper with the default learning algorithm: + * weka.classifiers.bayes.NaiveBayes. + * + * @param n The name of the classifier. + * @param attributeString The string describing the types of attributes example objects will + * have. + **/ + public WekaWrapper(String n, String attributeString) { + this(n, defaultBaseClassifier, attributeString); } - return dist; - } + /** + * Initializing constructor. Sets all member variables to their associated settings in the + * {@link WekaWrapper.Parameters} object. + * + * @param n The name of the classifier. + * @param p The settings of all parameters. + **/ + public WekaWrapper(String n, Parameters p) { + super(n); + setParameters(p); + freshClassifier = baseClassifier; + } + /** + * Full Constructor. + * + * @param n The name of the classifier + * @param base The classifier to be used in this system. + * @param attributeString The string describing the types of attributes example objects will + * have. + **/ + public WekaWrapper(String n, weka.classifiers.Classifier base, String attributeString) { + super(n); + Parameters p = new Parameters(); + p.baseClassifier = base; + p.attributeString = attributeString; + setParameters(p); + freshClassifier = base; + } - /** - * Destroys the learned version of the WEKA classifier and empties the - * {@link #instances} collection of examples. - **/ - public void forget() { - super.forget(); - try { baseClassifier = weka.classifiers.Classifier.makeCopy(freshClassifier); } - catch (Exception e) { - System.err.println( - "LBJava ERROR: WekaWrapper.forget: Can't copy classifier:"); - e.printStackTrace(); - System.exit(1); + /** + * Sets the values of parameters that control the behavior of this learning algorithm. + * + * @param p The parameters. + **/ + public void setParameters(Parameters p) { + baseClassifier = p.baseClassifier; + attributeString = p.attributeString; + initializeAttributes(); } - instances = new Instances(name, attributeInfo, 0); - instances.setClassIndex(0); - trained = false; - } - - - /** - * Creates a WEKA Instance object out of a {@link FeatureVector}. - **/ - private Instance makeInstance(int[] exampleFeatures, double[] exampleValues, - int[] exampleLabels, double[] labelValues) { - // Make sure attributeInfo has been filled - if (attributeInfo.size() == 0) { - System.err.println( - "WekaWrapper: Error - makeInstance was called while attributeInfo " - + "was empty."); - new Exception().printStackTrace(); - System.exit(1); - } - // Initialize an Instance object - Instance inst = new Instance(attributeInfo.size()); - - // Acknowledge that this instance will be a member of our dataset - // 'instances' - inst.setDataset(instances); - - // Assign values for its attributes - /* - Since we are iterating through this example's feature list, which does - not contain the label feature (the label feature is the first in the - 'attribute' list), we start attIndex at 1, while we start featureIndex - at 0. - */ - for (int featureIndex = 0, attIndex = 1; - featureIndex < exampleFeatures.length; ++featureIndex, ++attIndex) { - Feature f = (Feature) lexicon.lookupKey(exampleFeatures[featureIndex]); - Attribute att = (Attribute) attributeInfo.elementAt(attIndex); - - // make sure the feature's identifier and the attribute's name match - if (!(att.name().equals(f.getStringIdentifier()))) { - System.err.println( - "WekaWrapper: Error - makeInstance encountered a misaligned " - + "attribute-feature pair."); - System.err.println( - " " + att.name() + " and " + f.getStringIdentifier() - + " should have been identical."); - new Exception().printStackTrace(); - System.exit(1); - } - - if (!f.isDiscrete()) - inst.setValue(attIndex, exampleValues[featureIndex]); - else { // it's a discrete or conjunctive feature. - String attValue = - f.totalValues() == 2 ? att.value((int) exampleValues[featureIndex]) - : f.getStringValue(); - inst.setValue(attIndex, attValue); - } + /** + * Retrieves the parameters that are set in this learner. + * + * @return An object containing all the values of the parameters that control the behavior of + * this learning algorithm. + **/ + public Learner.Parameters getParameters() { + Parameters p = new Parameters(super.getParameters()); + p.baseClassifier = baseClassifier; + p.attributeString = attributeString; + return p; } - /* - Here, we assume that if either the labels FeatureVector is empty - of features, or is null, then this example is to be considered - unlabeled. - */ - if (exampleLabels.length == 0) { - inst.setClassMissing(); - } - else if (exampleLabels.length > 1) { - System.err.println( - "WekaWrapper: Error - Weka Instances may only take a single class " - + "value, "); - new Exception().printStackTrace(); - System.exit(1); - } - else { - Feature label = labelLexicon.lookupKey(exampleLabels[0]); - - // make sure the name of the label feature matches the name of the 0'th - // attribute - if (!(label.getStringIdentifier() - .equals(((Attribute) attributeInfo.elementAt(0)).name()))) { - System.err.println( - "WekaWrapper: Error - makeInstance found the wrong label name."); - new Exception().printStackTrace(); - System.exit(1); - } - - if (!label.isDiscrete()) inst.setValue(0, labelValues[0]); - else inst.setValue(0, label.getStringValue()); + + /** This learner's output type is "mixed%". */ + public String getOutputType() { + return "mixed%"; } - return inst; - } - - - /** - * Produces a set of scores indicating the degree to which each possible - * discrete classification value is associated with the given example - * object. - **/ - public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { - double[] dist = getDistribution(exampleFeatures, exampleValues); - - /* - Assuming that the first Attribute in our attributeInfo vector is the - class attribute, decide which case we are in - */ - Attribute classAtt = (Attribute) attributeInfo.elementAt(0); - - ScoreSet scores = new ScoreSet(); - - if (classAtt.isNominal() || classAtt.isString()) { - Enumeration enumeratedValues = classAtt.enumerateValues(); - - int i = 0; - while (enumeratedValues.hasMoreElements()) { - if (i >= dist.length) { - System.err.println( - "WekaWrapper: Error - scores found more possible values than " - + "probabilities."); - new Exception().printStackTrace(); - System.exit(1); + + /** + * Takes attributeString and initializes this wrapper's {@link #instances} + * collection to take those attributes. + **/ + public void initializeAttributes() { + String[] atts = attributeString.split(":"); + + for (int i = 0; i < atts.length; ++i) { + String[] parts = atts[i].split("_"); + + if (parts[0].equals("str")) { + String attributeName = parts[1]; + Attribute newAttribute = new Attribute(attributeName, (FastVector) null); + attributeInfo.addElement(newAttribute); + } else if (parts[0].equals("nom")) { + String[] valueStrings = parts[2].split(","); + FastVector valueVector = new FastVector(valueStrings.length); + for (int j = 0; j < valueStrings.length; ++j) + valueVector.addElement(valueStrings[j]); + + Attribute a = new Attribute(parts[1], valueVector); + attributeInfo.addElement(a); + } else if (parts[0].equals("num")) { + attributeInfo.addElement(new Attribute(parts[1])); + } else { + System.err.println("WekaWrapper: Error - Malformed attribute information string: " + + attributeString); + new Exception().printStackTrace(); + System.exit(1); + } } - double s = dist[i]; - String v = (String) enumeratedValues.nextElement(); - scores.put(v,s); - ++i; - } - } - else if (classAtt.isNumeric()) { - System.err.println( - "WekaWrapper: Error - The 'scores' function should not be called " - + "when the class attribute is numeric."); - new Exception().printStackTrace(); - System.exit(1); - } - else { - System.err.println( - "WekaWrapper: Error - ScoreSet: Class Types must be either " - + "Nominal, String, or Numeric."); - new Exception().printStackTrace(); - System.exit(1); - } - return scores; - } - - - /** - * Indicates that the classifier is finished learning. This method - * must be called if the WEKA classifier is to learn anything. - * Since WEKA classifiers cannot learn online, all of the training examples - * must be gathered and committed to first. This method invokes the WEKA - * classifier's buildClassifier(Instances) method. - **/ - public void doneLearning() { - if (trained) { - System.err.println( - "WekaWrapper: Error - Cannot call 'doneLearning()' again without " - + "first calling 'forget()'"); - new Exception().printStackTrace(); - System.exit(1); + instances = new Instances(name, attributeInfo, 0); + instances.setClassIndex(0); } - /* - System.out.println("\nWekaWrapper Data Summary:"); - System.out.println(instances.toSummaryString()); - */ - - try { baseClassifier.buildClassifier(instances); } - catch (Exception e) { - System.err.println( - "WekaWrapper: Error - There was a problem building the classifier"); - if (baseClassifier == null) - System.out.println("WekaWrapper: baseClassifier was null."); - e.printStackTrace(); - System.exit(1); - } - trained = true; - instances = new Instances(name, attributeInfo, 0); - instances.setClassIndex(0); - } - - - /** - * Writes the settings of the classifier in use, and a string describing - * the classifier, if available. - **/ - public void write(PrintStream out) { - out.print(name + ": "); - String[] options = baseClassifier.getOptions(); - for (int i = 0; i < options.length; ++i) - out.println(options[i]); - out.println(baseClassifier); - } - - - /** - * Writes the learned function's internal representation in binary form. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - super.write(out); - out.writeBoolean(trained); - - if (allowableValues == null) out.writeInt(0); - else { - out.writeInt(allowableValues.length); - for (int i = 0; i < allowableValues.length; ++i) - out.writeString(allowableValues[i]); + /** + * Sets the labeler. + * + * @param l A labeling classifier. + **/ + public void setLabeler(Classifier l) { + super.setLabeler(l); + allowableValues = l == null ? null : l.allowableValues(); } - ObjectOutputStream oos = null; - try { oos = new ObjectOutputStream(out); } - catch (Exception e) { - System.err.println("Can't create object stream for '" + name + "': " - + e); - System.exit(1); + + /** + * Returns the array of allowable values that a feature returned by this classifier may take. + * + * @return The allowable values of this learner's labeler, or an array of length zero if the + * labeler has not yet been established or does not specify allowable values. + **/ + public String[] allowableValues() { + if (allowableValues == null) + return new String[0]; + return allowableValues; } - try { - oos.writeObject(baseClassifier); - oos.writeObject(freshClassifier); - oos.writeObject(attributeInfo); - oos.writeObject(instances); + + /** + * Since WEKA classifiers cannot learn online, this method causes no actual learning to occur, + * it simply creates an Instance object from this example and adds it to a set of + * examples from which the classifier will be built once {@link #doneLearning()} is called. + **/ + public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, + double[] labelValues) { + instances.add(makeInstance(exampleFeatures, exampleValues, exampleLabels, labelValues)); } - catch (Exception e) { - System.err.println("Can't write to object stream for '" + name + "': " - + e); - System.exit(1); + + + /** + * This method makes one or more decisions about a single object, returning those decisions as + * Features in a vector. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return A feature vector with a single feature containing the prediction for this example. + **/ + public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) { + if (!trained) { + System.err.println("WekaWrapper: Error - Cannot make a classification with an " + + "untrained classifier."); + new Exception().printStackTrace(); + System.exit(1); + } + + /* + * Assuming that the first Attribute in our attributeInfo vector is the class attribute, + * decide which case we are in + */ + Attribute classAtt = (Attribute) attributeInfo.elementAt(0); + + if (classAtt.isNominal() || classAtt.isString()) { + double[] dist = getDistribution(exampleFeatures, exampleValues); + int best = 0; + for (int i = 1; i < dist.length; ++i) + if (dist[i] > dist[best]) + best = i; + + Feature label = labelLexicon.lookupKey(best); + if (label == null) + return new FeatureVector(); + String value = label.getStringValue(); + + return new FeatureVector(new DiscretePrimitiveStringFeature(containingPackage, name, + "", value, valueIndexOf(value), (short) allowableValues().length)); + } else if (classAtt.isNumeric()) { + return new FeatureVector(new RealPrimitiveStringFeature(containingPackage, name, "", + getDistribution(exampleFeatures, exampleValues)[0])); + } else { + System.err.println("WekaWrapper: Error - illegal class type."); + new Exception().printStackTrace(); + System.exit(1); + } + + return new FeatureVector(); } - } - - - /** - * Reads the binary representation of a learner with this object's run-time - * type, overwriting any and all learned or manually specified parameters - * as well as the label lexicon but without modifying the feature lexicon. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - super.read(in); - trained = in.readBoolean(); - allowableValues = new String[in.readInt()]; - for (int i = 0; i < allowableValues.length; ++i) - allowableValues[i] = in.readString(); - - ObjectInputStream ois = null; - try { ois = new ObjectInputStream(in); } - catch (Exception e) { - System.err.println("Can't create object stream for '" + name + "': " - + e); - System.exit(1); + + + /** + * Returns a discrete distribution of the classifier's prediction values. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + **/ + protected double[] getDistribution(int[] exampleFeatures, double[] exampleValues) { + if (!trained) { + System.err.println("WekaWrapper: Error - Cannot make a classification with an " + + "untrained classifier."); + new Exception().printStackTrace(); + System.exit(1); + } + + Instance inQuestion = + makeInstance(exampleFeatures, exampleValues, new int[0], new double[0]); + + /* + * For Numerical class values, this will return an array of size 1, containing the class + * prediction. For Nominal classes, an array of size equal to that of the class list, + * representing probabilities. For String classes, ? + */ + double[] dist = null; + try { + dist = baseClassifier.distributionForInstance(inQuestion); + } catch (Exception e) { + System.err.println("WekaWrapper: Error while computing distribution."); + e.printStackTrace(); + System.exit(1); + } + + if (dist.length == 0) { + System.err.println("WekaWrapper: Error - The base classifier returned an empty " + + "probability distribution when attempting to classify an " + "example."); + new Exception().printStackTrace(); + System.exit(1); + } + + return dist; } - try { - baseClassifier = (weka.classifiers.Classifier) ois.readObject(); - freshClassifier = (weka.classifiers.Classifier) ois.readObject(); - attributeInfo = (FastVector) ois.readObject(); - instances = (Instances) ois.readObject(); + + /** + * Destroys the learned version of the WEKA classifier and empties the {@link #instances} + * collection of examples. + **/ + public void forget() { + super.forget(); + + try { + baseClassifier = weka.classifiers.Classifier.makeCopy(freshClassifier); + } catch (Exception e) { + System.err.println("LBJava ERROR: WekaWrapper.forget: Can't copy classifier:"); + e.printStackTrace(); + System.exit(1); + } + + instances = new Instances(name, attributeInfo, 0); + instances.setClassIndex(0); + trained = false; } - catch (Exception e) { - System.err.println("Can't read from object stream for '" + name + "': " - + e); - System.exit(1); + + + /** + * Creates a WEKA Instance object out of a {@link FeatureVector}. + **/ + private Instance makeInstance(int[] exampleFeatures, double[] exampleValues, + int[] exampleLabels, double[] labelValues) { + // Make sure attributeInfo has been filled + if (attributeInfo.size() == 0) { + System.err.println("WekaWrapper: Error - makeInstance was called while attributeInfo " + + "was empty."); + new Exception().printStackTrace(); + System.exit(1); + } + + // Initialize an Instance object + Instance inst = new Instance(attributeInfo.size()); + + // Acknowledge that this instance will be a member of our dataset + // 'instances' + inst.setDataset(instances); + + // Assign values for its attributes + /* + * Since we are iterating through this example's feature list, which does not contain the + * label feature (the label feature is the first in the 'attribute' list), we start attIndex + * at 1, while we start featureIndex at 0. + */ + for (int featureIndex = 0, attIndex = 1; featureIndex < exampleFeatures.length; ++featureIndex, ++attIndex) { + Feature f = (Feature) lexicon.lookupKey(exampleFeatures[featureIndex]); + Attribute att = (Attribute) attributeInfo.elementAt(attIndex); + + // make sure the feature's identifier and the attribute's name match + if (!(att.name().equals(f.getStringIdentifier()))) { + System.err.println("WekaWrapper: Error - makeInstance encountered a misaligned " + + "attribute-feature pair."); + System.err.println(" " + att.name() + " and " + f.getStringIdentifier() + + " should have been identical."); + new Exception().printStackTrace(); + System.exit(1); + } + + if (!f.isDiscrete()) + inst.setValue(attIndex, exampleValues[featureIndex]); + else { // it's a discrete or conjunctive feature. + String attValue = + f.totalValues() == 2 ? att.value((int) exampleValues[featureIndex]) : f + .getStringValue(); + inst.setValue(attIndex, attValue); + } + } + + /* + * Here, we assume that if either the labels FeatureVector is empty of features, or is null, + * then this example is to be considered unlabeled. + */ + if (exampleLabels.length == 0) { + inst.setClassMissing(); + } else if (exampleLabels.length > 1) { + System.err.println("WekaWrapper: Error - Weka Instances may only take a single class " + + "value, "); + new Exception().printStackTrace(); + System.exit(1); + } else { + Feature label = labelLexicon.lookupKey(exampleLabels[0]); + + // make sure the name of the label feature matches the name of the 0'th + // attribute + if (!(label.getStringIdentifier().equals(((Attribute) attributeInfo.elementAt(0)) + .name()))) { + System.err.println("WekaWrapper: Error - makeInstance found the wrong label name."); + new Exception().printStackTrace(); + System.exit(1); + } + + if (!label.isDiscrete()) + inst.setValue(0, labelValues[0]); + else + inst.setValue(0, label.getStringValue()); + } + + return inst; } - } - - - /** - * Simply a container for all of {@link WekaWrapper}'s configurable - * parameters. Using instances of this class should make code more - * readable and constructors less complicated. - * - * @author Nick Rizzolo - **/ - public static class Parameters extends Learner.Parameters - { + + /** - * Stores the instance of the WEKA classifier which we are training; - * default {@link WekaWrapper#defaultBaseClassifier}. + * Produces a set of scores indicating the degree to which each possible discrete classification + * value is associated with the given example object. **/ - public weka.classifiers.Classifier baseClassifier; + public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { + double[] dist = getDistribution(exampleFeatures, exampleValues); + + /* + * Assuming that the first Attribute in our attributeInfo vector is the class attribute, + * decide which case we are in + */ + Attribute classAtt = (Attribute) attributeInfo.elementAt(0); + + ScoreSet scores = new ScoreSet(); + + if (classAtt.isNominal() || classAtt.isString()) { + Enumeration enumeratedValues = classAtt.enumerateValues(); + + int i = 0; + while (enumeratedValues.hasMoreElements()) { + if (i >= dist.length) { + System.err + .println("WekaWrapper: Error - scores found more possible values than " + + "probabilities."); + new Exception().printStackTrace(); + System.exit(1); + } + double s = dist[i]; + String v = (String) enumeratedValues.nextElement(); + scores.put(v, s); + ++i; + } + } else if (classAtt.isNumeric()) { + System.err.println("WekaWrapper: Error - The 'scores' function should not be called " + + "when the class attribute is numeric."); + new Exception().printStackTrace(); + System.exit(1); + } else { + System.err.println("WekaWrapper: Error - ScoreSet: Class Types must be either " + + "Nominal, String, or Numeric."); + new Exception().printStackTrace(); + System.exit(1); + } + + return scores; + } + + /** - * A string encoding of the return types of each of the feature - * extractors in use; default {@link WekaWrapper#defaultAttributeString}. + * Indicates that the classifier is finished learning. This method must be called if the + * WEKA classifier is to learn anything. Since WEKA classifiers cannot learn online, all of the + * training examples must be gathered and committed to first. This method invokes the WEKA + * classifier's buildClassifier(Instances) method. **/ - public String attributeString; + public void doneLearning() { + if (trained) { + System.err.println("WekaWrapper: Error - Cannot call 'doneLearning()' again without " + + "first calling 'forget()'"); + new Exception().printStackTrace(); + System.exit(1); + } + /* + * System.out.println("\nWekaWrapper Data Summary:"); + * System.out.println(instances.toSummaryString()); + */ + + try { + baseClassifier.buildClassifier(instances); + } catch (Exception e) { + System.err.println("WekaWrapper: Error - There was a problem building the classifier"); + if (baseClassifier == null) + System.out.println("WekaWrapper: baseClassifier was null."); + e.printStackTrace(); + System.exit(1); + } - /** Sets all the default values. */ - public Parameters() { - baseClassifier = defaultBaseClassifier; - attributeString = defaultAttributeString; + trained = true; + instances = new Instances(name, attributeInfo, 0); + instances.setClassIndex(0); } /** - * Sets the parameters from the parent's parameters object, giving - * defaults to all parameters declared in this object. + * Writes the settings of the classifier in use, and a string describing the classifier, if + * available. **/ - public Parameters(Learner.Parameters p) { - super(p); - baseClassifier = defaultBaseClassifier; - attributeString = defaultAttributeString; + public void write(PrintStream out) { + out.print(name + ": "); + String[] options = baseClassifier.getOptions(); + for (int i = 0; i < options.length; ++i) + out.println(options[i]); + out.println(baseClassifier); } - /** Copy constructor. */ - public Parameters(Parameters p) { - super(p); - baseClassifier = p.baseClassifier; - attributeString = p.attributeString; + /** + * Writes the learned function's internal representation in binary form. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeBoolean(trained); + + if (allowableValues == null) + out.writeInt(0); + else { + out.writeInt(allowableValues.length); + for (int i = 0; i < allowableValues.length; ++i) + out.writeString(allowableValues[i]); + } + + ObjectOutputStream oos = null; + try { + oos = new ObjectOutputStream(out); + } catch (Exception e) { + System.err.println("Can't create object stream for '" + name + "': " + e); + System.exit(1); + } + + try { + oos.writeObject(baseClassifier); + oos.writeObject(freshClassifier); + oos.writeObject(attributeInfo); + oos.writeObject(instances); + } catch (Exception e) { + System.err.println("Can't write to object stream for '" + name + "': " + e); + System.exit(1); + } } /** - * Calls the appropriate Learner.setParameters(Parameters) - * method for this Parameters object. - * - * @param l The learner whose parameters will be set. + * Reads the binary representation of a learner with this object's run-time type, overwriting + * any and all learned or manually specified parameters as well as the label lexicon but without + * modifying the feature lexicon. + * + * @param in The input stream. **/ - public void setParameters(Learner l) { - ((WekaWrapper) l).setParameters(this); + public void read(ExceptionlessInputStream in) { + super.read(in); + trained = in.readBoolean(); + allowableValues = new String[in.readInt()]; + for (int i = 0; i < allowableValues.length; ++i) + allowableValues[i] = in.readString(); + + ObjectInputStream ois = null; + try { + ois = new ObjectInputStream(in); + } catch (Exception e) { + System.err.println("Can't create object stream for '" + name + "': " + e); + System.exit(1); + } + + try { + baseClassifier = (weka.classifiers.Classifier) ois.readObject(); + freshClassifier = (weka.classifiers.Classifier) ois.readObject(); + attributeInfo = (FastVector) ois.readObject(); + instances = (Instances) ois.readObject(); + } catch (Exception e) { + System.err.println("Can't read from object stream for '" + name + "': " + e); + System.exit(1); + } } /** - * Creates a string representation of these parameters in which only - * those parameters that differ from their default values are mentioned. + * Simply a container for all of {@link WekaWrapper}'s configurable parameters. Using instances + * of this class should make code more readable and constructors less complicated. + * + * @author Nick Rizzolo **/ - public String nonDefaultString() { - String result = super.nonDefaultString(); + public static class Parameters extends Learner.Parameters { + /** + * Stores the instance of the WEKA classifier which we are training; default + * {@link WekaWrapper#defaultBaseClassifier}. + **/ + public weka.classifiers.Classifier baseClassifier; + /** + * A string encoding of the return types of each of the feature extractors in use; default + * {@link WekaWrapper#defaultAttributeString}. + **/ + public String attributeString; + + + /** Sets all the default values. */ + public Parameters() { + baseClassifier = defaultBaseClassifier; + attributeString = defaultAttributeString; + } + + + /** + * Sets the parameters from the parent's parameters object, giving defaults to all + * parameters declared in this object. + **/ + public Parameters(Learner.Parameters p) { + super(p); + baseClassifier = defaultBaseClassifier; + attributeString = defaultAttributeString; + } + + + /** Copy constructor. */ + public Parameters(Parameters p) { + super(p); + baseClassifier = p.baseClassifier; + attributeString = p.attributeString; + } + + + /** + * Calls the appropriate Learner.setParameters(Parameters) method for this + * Parameters object. + * + * @param l The learner whose parameters will be set. + **/ + public void setParameters(Learner l) { + ((WekaWrapper) l).setParameters(this); + } - if (!attributeString.equals(WekaWrapper.defaultAttributeString)) - result += ", attributeString = \"" + attributeString + "\""; - if (result.startsWith(", ")) result = result.substring(2); - return result; + /** + * Creates a string representation of these parameters in which only those parameters that + * differ from their default values are mentioned. + **/ + public String nonDefaultString() { + String result = super.nonDefaultString(); + + if (!attributeString.equals(WekaWrapper.defaultAttributeString)) + result += ", attributeString = \"" + attributeString + "\""; + + if (result.startsWith(", ")) + result = result.substring(2); + return result; + } } - } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LexiconOptimizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LexiconOptimizer.java new file mode 100644 index 00000000..f6a68d15 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LexiconOptimizer.java @@ -0,0 +1,181 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Map.Entry; + +import edu.illinois.cs.cogcomp.lbjava.classify.DiscreteConjunctiveFeature; +import edu.illinois.cs.cogcomp.lbjava.classify.Feature; +import edu.illinois.cs.cogcomp.lbjava.classify.RealConjunctiveFeature; +import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; + +/** + * This class defines the life cycle methods for pruning useless features + * from a lexicon. Features for example that carry zero weights with them are + * not useful to the model, so can be eliminated saving space and execution time, without + * affecting accuracy (much). + * @author redman + */ +abstract public class LexiconOptimizer { + + /** any weight less than this is considered irrelevant. This is for prunning. */ + private static final double PRUNING_THRESHOLD = 0.000001; + + /** lexicon contains the features we will operate on. */ + protected Lexicon lexicon; + + /** this also for testing, save feature names we will delete, check the names when we do. */ + final protected ArrayList uselessFeatureNames = new ArrayList(); + + /** this is the threshold we use to discard useless features. */ + protected double threshold = PRUNING_THRESHOLD; + + /** + * We must have a lexicon to perform this operation. + * @param lexicon the lexicon object. + * @param threshold the feature pruning threshold. + */ + protected LexiconOptimizer(Lexicon lexicon, double threshold) { + this.lexicon = lexicon; + this.threshold = threshold; + } + + /** + * Determine if the provided feature has sum of weights greater than a threshold value, + * and discard the feature if it falls below. + * @param lex the lexicon. + * @param f the feature. + * @return true if the feature has any value, there is a + */ + abstract protected boolean hasWeight(Lexicon lex, Feature f); + + /** + * This method returns the number of features. This implementation assumes the + * lexicon is populated, but that's not always the case (with SVM for example appears + * to not always have a populated lexicon). In these cases, this method may be overriden. + * @return the number of featues. + */ + protected int getNumberFeatures() { + return lexicon.size(); + } + + /** + * do the optimization + */ + public void optimize () { + + int originalNumFeatures = this.getNumberFeatures(); + int [] uselessfeatures = identifyUselessFeatures(); + pruneWeights(uselessfeatures, originalNumFeatures); + pruneLexicon(uselessfeatures); + + System.out.println("LexiconOptimizer optimization complete, pruned " + +uselessfeatures.length+" features of "+originalNumFeatures+", leaving "+(originalNumFeatures - uselessfeatures.length)+ + " at threshold of "+threshold); + } + + /** + * @param f the feature. + * @return true if the feature is conjunctive. + */ + static private boolean isConjunctive(Feature f) { + return (f instanceof DiscreteConjunctiveFeature || f instanceof RealConjunctiveFeature); + } + + /** + * If this conjunctive feature has weight, add it and all it's children to the white list. + * @param lex the lexicon maps feature to index. + * @param whitelist the white list we will add to. + * @param f the conjunctive feature. + */ + private void traverseConjunctiveTree(HashSet whitelist, Feature f) { + + // add the conjunctive feature. + whitelist.add(f); + + if (f instanceof DiscreteConjunctiveFeature) { + + // add it's direct children + DiscreteConjunctiveFeature dcf = (DiscreteConjunctiveFeature) f; + whitelist.add(dcf.getLeft()); + whitelist.add(dcf.getRight()); + + // possible add any children of children. + if (isConjunctive(dcf.getLeft())) + traverseConjunctiveTree(whitelist, dcf.getLeft()); + if (isConjunctive(dcf.getRight())) + traverseConjunctiveTree(whitelist, dcf.getRight()); + } else { + + // add it's direct children + RealConjunctiveFeature rcf = (RealConjunctiveFeature) f; + whitelist.add(rcf.getLeft()); + whitelist.add(rcf.getRight()); + + // possible add any children of children. + if (isConjunctive(rcf.getLeft())) + traverseConjunctiveTree(whitelist, rcf.getLeft()); + if (isConjunctive(rcf.getRight())) + traverseConjunctiveTree(whitelist, rcf.getRight()); + } + } + + /** + * Find all features we must whitelist. For each conjunctive feature that has weight, we must keep + * all it's children, regardless of weight, and the rest of the tree from there on down. + * @param lex the lexicon. + * @return the conjunctive features. + */ + protected HashSet compileWhitelist(Lexicon lex) { + HashSet whitelist = new HashSet(); + for (Object e : lex.getMap().entrySet()) { + @SuppressWarnings("unchecked") + Entry entry = (Entry) e; + Feature f = entry.getKey(); + if (isConjunctive(f) && this.hasWeight(lex, f)) { + + // add this conjunctive feature and all it's kids to the whitelist. + traverseConjunctiveTree(whitelist, f); + } + } + return whitelist; + } + + + /** + * Given a list of useless features, prune the entries from the lexicon. + * @param uselessfeatures + */ + protected void pruneLexicon(int[] uselessfeatures) { + lexicon.discardPrunedFeatures(uselessfeatures); + for (Feature f : this.uselessFeatureNames) { + if (lexicon.contains(f)) { + throw new RuntimeException("The features were not correctly removed from the lexicon : " + f.getStringIdentifier()); + } + } + } + + /** + * This method selects the features to be pruned. If weights + * are needed, they must be passed to the constructor and stored in fields of + * the implementing class. In this way, we make no assumptions about the + * structure of the weight classes. + * @return + */ + abstract protected int[] identifyUselessFeatures(); + + /** + * Once we have identified the useless entries, we need to optimize the + * model components. + * @param uselessfeatures the indices of those features with no significant weights. + * @param originalNumFeatures the number of features in the original lexicon. + */ + abstract public void pruneWeights(int[] uselessfeatures, int originalNumFeatures); +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LinearThresholdUnitOptimizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LinearThresholdUnitOptimizer.java new file mode 100644 index 00000000..44101b33 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/LinearThresholdUnitOptimizer.java @@ -0,0 +1,144 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Map.Entry; + +import edu.illinois.cs.cogcomp.lbjava.classify.Feature; +import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; +import edu.illinois.cs.cogcomp.lbjava.learn.LinearThresholdUnit; +import edu.illinois.cs.cogcomp.lbjava.learn.SparseAveragedPerceptron; +import gnu.trove.set.hash.TIntHashSet; + +/** + * This class will optimize any working LinearThresholdUnit subclass by pruning + * low value features. + * @author redman + */ +public class LinearThresholdUnitOptimizer extends LexiconOptimizer { + + /** the LTU learner we want to optimize. */ + private LinearThresholdUnit ltuLearner; + + /** this also for testing, save feature names we will delete, check the names when we do. */ + final ArrayList uselessFeatureNames = new ArrayList(); + + /** + * Given the LTU learner to optimize. + * @param snl the LTU learner. + */ + public LinearThresholdUnitOptimizer(LinearThresholdUnit ltu) { + super(ltu.demandLexicon(), ltu.featurePruningThreshold); + ltuLearner = ltu; + } + + /** + * Determine if the provided feature has sum of weights greater than a threshold value, + * and discard the feature if it falls below. + * @param lex the lexicon. + * @param f the feature. + * @return true if the feature has any value, there is a + */ + protected boolean hasWeight(Lexicon lex, Feature f) { + int featureindex = lex.lookup(f); + + // we assume each element of the network is of the same type, if that type is sparse averaged + // perceptron, we check both the averaged and current weight + double sum; + if (this.ltuLearner instanceof SparseAveragedPerceptron) { + SparseAveragedPerceptron sap = (SparseAveragedPerceptron) this.ltuLearner; + double wt = sap.getWeightVector().getRawWeights().get(featureindex); + double avg = sap.getAveragedWeightVector().getRawWeights().get(featureindex); + sum = Math.abs(wt); + sum += Math.abs(avg); + } else { + double wt = this.ltuLearner.getWeightVector().getRawWeights().get(featureindex); + sum = Math.abs(wt); + } + + // if the value is sufficiently large, then we have a good weight and should keep. + if (sum > this.threshold) + return true; + else + return false; + } + + /** + * In this case, we must check, for each feature, the associated set of weight in each weight + * vector, if they are all very small, it is useless. The array returned is sorted ascending. + * @return the set of useless features. + * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#identifyUselessFeatures() + */ + @SuppressWarnings("unchecked") + @Override + protected int[] identifyUselessFeatures() { + Lexicon lex = this.ltuLearner.demandLexicon(); + if (lex != null) { + HashSet whitelist = compileWhitelist(lex); + + // we have the conjunctive features, if left, right, or the parent itself has a non zero weight, + // consider non of the features (parent, left or right) useless, whitelist them. + int count = 0; + int numberfeatures = lex.size(); + int[] all = new int[numberfeatures]; + TIntHashSet defunct = new TIntHashSet(); + for (Object e : lex.getMap().entrySet()) { + Entry entry = (Entry) e; + int fi = entry.getValue(); + if (!whitelist.contains(entry.getKey())) { + double wt = Math.abs(this.ltuLearner.getWeightVector().getRawWeights().get(fi)); + + // if the value is sufficiently large, then we have a good weight and should keep. + if (wt < this.threshold) { + + // This is a useless feature + all[count] = fi; + if (defunct.contains(fi)) { + System.err.println("There was a feature discarded twice during feature pruning!"); + } else { + defunct.add(fi); + } + this.uselessFeatureNames.add(entry.getKey()); + count++; + } + } + } + + int[] useless = new int[count]; + System.arraycopy(all, 0, useless, 0, count); + Arrays.sort(useless); + return useless; + } else + return new int[0]; + } + + /** + * Check it out when done, make sure it worked. + */ + protected void pruneLexicon(int[] uselessfeatures) { + super.pruneLexicon(uselessfeatures); + for (Feature f : this.uselessFeatureNames) { + if (lexicon.contains(f)) { + throw new RuntimeException("The features were not correctly removed from the lexicon : " + f.getStringIdentifier()); + } + } + } + + /** + * Not we remove the useless weights from ALL weight vectors. There must be the same number + * of entries in each weight vector as there is in the lexicon. + * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#pruneWeights(int[]) + */ + @Override + public void pruneWeights(int[] uselessfeatures, int origNumFeatures) { + this.ltuLearner.pruneWeights(uselessfeatures, origNumFeatures); + } +} \ No newline at end of file diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java new file mode 100644 index 00000000..45cfe812 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SparseNetworkOptimizer.java @@ -0,0 +1,149 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Map.Entry; + +import edu.illinois.cs.cogcomp.core.datastructures.vectors.OVector; +import edu.illinois.cs.cogcomp.lbjava.classify.Feature; +import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; +import edu.illinois.cs.cogcomp.lbjava.learn.LinearThresholdUnit; +import edu.illinois.cs.cogcomp.lbjava.learn.SparseAveragedPerceptron; +import edu.illinois.cs.cogcomp.lbjava.learn.SparseNetworkLearner; +import gnu.trove.set.hash.TIntHashSet; + +/** + * This class will optimize the SparseNetworkLearner by discarding all features + * associated with no sufficiently high weight values. For the network learner, we + * much check the weights across all the binary learners to determin the value + * of a particular feature. + * @author redman + */ +public class SparseNetworkOptimizer extends LexiconOptimizer { + + /** the network learner we want to optimize. */ + private SparseNetworkLearner networkLearner; + + /** + * Given the sparse net learner to optimize. + * @param snl the sparse net learner. + */ + public SparseNetworkOptimizer(SparseNetworkLearner snl) { + super(snl.demandLexicon(), snl.getBaseLTU().featurePruningThreshold); + networkLearner = snl; + } + + /** + * Determine if the provided feature has sum of weights greater than a threshold value, + * and discard the feature if it falls below. + * @param lex the lexicon. + * @param f the feature. + * @return true if the feature has any value, there is a + */ + protected boolean hasWeight(Lexicon lex, Feature f) { + OVector net = networkLearner.getNetwork(); + if (net.size() == 0) + return false; + int numberclasses = net.size(); + int i = 0; + double sum = 0; + int featureindex = lex.lookup(f); + + // we assume each element of the network is of the same type, if that type is sparse averaged + // perceptron, we check both the averaged and current weight + if (net.get(0) instanceof SparseAveragedPerceptron) { + for (; i < numberclasses; ++i) { + SparseAveragedPerceptron sap = (SparseAveragedPerceptron) net.get(i); + double wt = sap.getWeightVector().getRawWeights().get(featureindex); + double avg = sap.getAveragedWeightVector().getRawWeights().get(featureindex); + sum += Math.abs(wt); + sum += Math.abs(avg); + + // if the value is sufficiently large, then we have a good weight and should keep. + if (sum >= this.threshold) + return true; + } + } else { + for (; i < numberclasses; ++i) { + LinearThresholdUnit ltu = (LinearThresholdUnit) net.get(i); + double wt = ltu.getWeightVector().getRawWeights().get(featureindex); + sum += Math.abs(wt); + + // if the value is sufficiently large, then we have a good weight and should keep. + if (sum >= this.threshold) + return true; + } + } + return false; + } + + /** + * In this case, we must check, for each feature, the associated set of weight in each weight + * vector, if they are all very small, it is useless. The array returned is sorted ascending. + * @return the set of useless features. + * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#identifyUselessFeatures() + */ + @SuppressWarnings("unchecked") + @Override + protected int[] identifyUselessFeatures() { + Lexicon lex = networkLearner.demandLexicon(); + if (lex != null) { + + // we have the conjunctive features, if left, right, or the parent itself has a non zero weight, + // consider non of the features (parent, left or right) useless, whitelist them. + HashSet whitelist = compileWhitelist(lex); + int count = 0; + int numberfeatures = lex.size(); + int[] all = new int[numberfeatures]; + TIntHashSet defunct = new TIntHashSet(); + + // For each feature, determin it's value. We will interate over a map with features as key + // and the integer index of the feature. If the feature is whitelisted, we keep, otherwise + // check for uselessness and if so, add to the list. + for (Object e : lex.getMap().entrySet()) { + Entry entry = (Entry) e; + if (!whitelist.contains(entry.getKey())) { + int fi = entry.getValue(); + if (!hasWeight(lexicon, entry.getKey())) { + all[count] = fi; + if (defunct.contains(fi)) { + System.err.println("There was a feature discarded twice during feature pruning!"); + } else { + defunct.add(fi); + } + + this.uselessFeatureNames.add(entry.getKey()); + count++; + } + } + } + + int[] useless = new int[count]; + System.arraycopy(all, 0, useless, 0, count); + Arrays.sort(useless); + return useless; + } else + return new int[0]; + } + + /** + * Not we remove the useless weights from ALL weight vectors. There must be the same number + * of entries in each weight vector as there is in the lexicon. + * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#pruneWeights(int[]) + */ + @Override + public void pruneWeights(int[] uselessfeatures, int origNumFeatures) { + OVector ltus = networkLearner.getNetwork(); + for (int i = 0; i < ltus.size(); i++) { + LinearThresholdUnit ltu = (LinearThresholdUnit) ltus.get(i); + ltu.pruneWeights(uselessfeatures, origNumFeatures); + } + } +} \ No newline at end of file diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SupportVectorMachineOptimizer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SupportVectorMachineOptimizer.java new file mode 100644 index 00000000..5b1fa976 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/SupportVectorMachineOptimizer.java @@ -0,0 +1,125 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning; + +import java.util.*; +import java.util.Map.Entry; + +import edu.illinois.cs.cogcomp.lbjava.classify.Feature; +import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon; +import edu.illinois.cs.cogcomp.lbjava.learn.SupportVectorMachine; + +/** + * Optimized a support vector machine by discarding any sufficiently low weights. + * @author redman + */ +public class SupportVectorMachineOptimizer extends LexiconOptimizer { + + /** the model we are going to optimize. */ + private SupportVectorMachine svm = null; + + /** the number of classes, if the numclasses is two, consider it binary and change to one. */ + public int numberclasses = -1; + + /** the biasfeatures are 0 for no added bias features, or 1 if bias is added. */ + public int biasfeatures = 0; + + /** + * Take lex and model, and optimize the model by pruning the weights. Any zero weights get pruned. + * @param lexicon the lexicon with the feature map. + * @param s the support vector machine. + */ + public SupportVectorMachineOptimizer(SupportVectorMachine s) { + super(s.demandLexicon(), s.featurePruningThreshold); + this.svm = s; + + // the numClasses field gets change in the write method to allow for the binary case + // which is actually two classes to behave as one class (binary). + if (!s.getSolverType().equals("MCSVM_CS") && s.getNumClasses() <= 2) + numberclasses = 1; + else + numberclasses = s.getNumClasses(); + + // we need to figure out if we have a bias feature introduced + this.biasfeatures = svm.getBiasFeatures(); + } + + /** + * Determine if the provided feature has sum of weights greater than a threshold value, + * and discard the feature if it falls below. + * @param lex the lexicon. + * @param f the feature. + * @return true if the feature has any value, there is a + */ + protected boolean hasWeight(Lexicon lex, Feature f) { + int index = lex.lookup(f); + return getWeight(index) > this.threshold; + } + + /** + * Compute the single weight at the index as the sum of all weights for all classes. + * @param index the index of the feature + * @return the sum of the absolute value of all weights for the feature. + */ + private double getWeight(int index) { + double sum = 0; + for (int i = 0; i < this.numberclasses; i++) { + sum += Math.abs(svm.getWeights()[index]); + index += (this.lexicon.size() + biasfeatures); + } + return sum; + } + + /** + * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#identifyUselessFeatures() + */ + @Override + protected int[] identifyUselessFeatures() { + + // compile the whitelist + HashSet whitelist = compileWhitelist(lexicon); + + // look at each feature in the lexicon, any with zero weights can be safely discarded. + int [] all = new int [this.lexicon.size()]; + int count = 0; + for (Object e : lexicon.getMap().entrySet()) { + @SuppressWarnings("unchecked") + Entry entry = (Entry) e; + if (!whitelist.contains(entry.getKey())) { + int fi = entry.getValue(); + double wt = getWeight(fi); + if (wt < this.threshold) { + all[count] = fi; + count++; + } + } + } + int[] useless = new int[count]; + System.arraycopy(all, 0, useless, 0, count); + Arrays.sort(useless); + return useless; + } + + /** + * This method returns the number of features. This implementation assumes the + * lexicon is populated, but that's not always the case (with SVM for example appears + * to not always have a populated lexicon). In these cases, this method may be overriden. + * @return the number of featues. + */ + protected int getNumberFeatures() { + return this.svm.getNumFeatures(); + } + + /** + * @see edu.illinois.cs.cogcomp.lbjava.learn.featurepruning.LexiconOptimizer#pruneWeights(int[], int) + */ + @Override + public void pruneWeights(int[] uselessfeatures, int originalNumFeatures) { + this.svm.pruneWeights(uselessfeatures, originalNumFeatures); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/package-info.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/package-info.java new file mode 100644 index 00000000..c9b185bc --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/featurepruning/package-info.java @@ -0,0 +1,38 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +/** + *

For sparse learners, it is often the case that the array of features you learn + * contains only a subset of useful features. When we leave these features in the lexicon, + * we end up with bloated lexicons and weight vectors. This leads to larger than necessary + * models.

+ * + *

This package contains an interface that defines the life cycle for the feature pruning + * process, as well as some implementations, one that takes multiple weight vectors (for + * multi-class network learners), and some that takes only one weight vector.

+ * + *

All optimizers should subclass @see LexiconOptimizer which implements most of the + * optimization. Subclass will need to provide methods to compute the weight value to compare + * against the threshold, a method to identify the useless features, and a method to prune + * those features.

+ * + *

The optimizers are invoked by the {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner#doneTraining} + * method of the Learner class when all learning is complete. For those learners that include a feature + * pruning implementation, they must override this method to invoke the optimizer. In this way, during the + * normal LBJava compile and model build cycle, the optimization is performed automatically. For those + * who have build their own training procedure, they are required to invoke the doneTraining and + * {@link edu.illinois.cs.cogcomp.lbjava.learn.Learner#startTraining} method at appropriate points during + * their training process.

+ * + *

The learner classes typically have a parameter that can be set to change the default feature + * pruning threshold to any the user might choose, or it can be set to 0.0 to disable.

+ * + *

The pruning threshold value is provided by the specific learner, and should be, in one way or + * another, parameterized.

+ * @author redman + */ +package edu.illinois.cs.cogcomp.lbjava.learn.featurepruning; \ No newline at end of file diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ActThread.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ActThread.java new file mode 100644 index 00000000..20929909 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ActThread.java @@ -0,0 +1,71 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +import edu.illinois.cs.cogcomp.lbjava.neuralnet.ThreadedNNTrainer.Range; + +/** + * This thread will compute a single activtion value, for each layer + * setup must be called to provide the output array, the layer and the + * input values. + * @author redman + */ +class ActThread extends PushThread { + + /** the input data. */ + float[] currentInputs = null; + + /** the layer we are operating on. */ + Layer layer = null; + + /** the resulting outputs are stored here, this array is shared + * by all threads activating on this layer. */ + float [] layerActivations = null; + + /** used to make the name of the thread unique. */ + private static int inc = 0; + + /** + * init with a mux. + * @param m the multiplexer. + */ + ActThread() { + super("ActThread-"+(inc++)); + } + + /** + * before we start a layer, this is called to set up the thread. + * @param ci the input data. + * @param l the layer. + * @param la the layer actvation values. + * @param mux the multiplexer. + */ + void setup(float[] ci, Layer l, float[] la) { + this.currentInputs = ci; + this.layer = l; + this.layerActivations = la; + } + + /** + * Run forever never quite. + */ + public void run() { + synchronized (this) { + while(true) { + + // wait for the range object to be set. + Range r = this.getRange(); + if (r == null) + return; + for (int indx = r.start; indx < r.end; indx++) { + layerActivations[indx] = layer.computeOneOutput(indx, currentInputs); + } + } + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Activator.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Activator.java new file mode 100644 index 00000000..74b25f0a --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Activator.java @@ -0,0 +1,25 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +/** + * Implementations will activate all the layers of the net and + * produce a set of outputs. The one required method will return + * all the output values. + * @author redman + */ +public interface Activator { + + /** + * Activate the provided layer, return the resulting outputs. + * @param inputs the input data. + * @param layer the layer to supply the inputs to. + * @return the output values. + */ + public float[] prediction(float[] inputs); +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ConvergenceMeasure.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ConvergenceMeasure.java new file mode 100644 index 00000000..8237a18b --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ConvergenceMeasure.java @@ -0,0 +1,27 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +/** + * Measure convergence, simplest implementation may simply run some number of epochs, + * more sophosticated will look some function of cumulative error going to zero at the + * end of an epoch. Conversion is always measured at the end of a training cycle. + * @author redman + */ +public interface ConvergenceMeasure { + + /** + * With the given inputs and outputs, evaluate the results of the last iteration, + * determine the error, probably store that, and if convergence (what whatever measure) + * is achieved, return true, else return false. + * + * @param learner the learner being used to train up the neural net, contains the cummulative error. + * @return true if converged. + */ + public boolean evaluate(NNTrainingInterface learner); +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/DatasetReader.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/DatasetReader.java new file mode 100644 index 00000000..acd54807 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/DatasetReader.java @@ -0,0 +1,98 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.InputStream; +import java.io.FileInputStream; +import java.io.IOException; + +/** + * @author redman + * + */ +public class DatasetReader { + + /** + * flip the byte order. + * @param is input stream. + * @return the integer. + * @throws IOException + */ + private static int readInt(InputStream is) throws IOException { + int i0 = is.read(); + int i1 = is.read(); + int i2 = is.read(); + int i3 = is.read(); + return (i0<<24) + (i1<<16) + (i2<<8) + i3; + } + + /** + * get the examples form an NIST dataset, return everything at once. There are + * 60k examples, at 28x28 pixel values per example, so 60000 x 28 x 28 floats = + * 47 million floats. These are input examples, so they are image data. + * @param filename + * @return the input examples. + * @throws IOException + */ + public static float[][] getExampleInputs(String filename) throws IOException { + InputStream dis = new BufferedInputStream(new FileInputStream(new File(filename))); + int m1 = readInt(dis); + if (m1 != 2051) + throw new IOException("That was not an example file! magic code = "+m1); + int numExamples = readInt(dis); + if (numExamples != 60000) + System.out.println("We expecting 60k examples "+m1); + int numRows = readInt(dis); + if (numRows != 28) + System.out.println("We expecting 28 rows "+numRows); + int numColumns = readInt(dis); + if (numColumns != 28) + System.out.println("We expecting 28 columns "+numColumns); + int totalpixels = numRows*numColumns; + float [][] examples = new float [numExamples][totalpixels]; + for (int i = 0 ; i < examples.length; i++) { + for (int j = 0; j < totalpixels; j++) { + examples[i][j] = (float)(dis.read()/128f) - 1f; + } + } + return examples; + } + + /** + * get the examples form an NIST dataset, return everything at once. There are + * 60k examples, at 28x28 pixel values per example, so 60000 x 28 x 28 floats = + * 47 million floats. These are input examples, so they are image data. + * @param filename + * @return the output examples. + * @throws IOException + */ + public static float[][] getExampleOutputs(String filename) throws IOException { + InputStream dis = new BufferedInputStream(new FileInputStream(new File(filename))); + int m1 = readInt(dis); + if (m1 != 2049) + throw new IOException("That was not an example file! magic code = "+m1); + int numExamples = readInt(dis); + float [][] examples = new float [numExamples][1]; + for (int i = 0 ; i < numExamples; i++) { + examples[i][0] = (float)(dis.read()/5f) - 1f; + } + return examples; + } + + /** + * @param a + * @throws IOException + */ + @SuppressWarnings("unused") + public static void main(String[]a) throws IOException { + float[][] examples = getExampleInputs("/Users/redman/Desktop/NNTrainingData/train-images-idx3-ubyte"); + float[][] labels = getExampleOutputs("/Users/redman/Desktop/NNTrainingData/train-labels-idx1-ubyte"); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/EpochConvergenceMeasure.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/EpochConvergenceMeasure.java new file mode 100644 index 00000000..a794cf6a --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/EpochConvergenceMeasure.java @@ -0,0 +1,42 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +/** + * Terminate agfter so many iterations. + * @author redman + */ +public class EpochConvergenceMeasure implements ConvergenceMeasure { + + /** the current epoch count. */ + private int epoch = 0; + + /** the current epoch count. */ + private int max; + + /** + * Takes the number of iterations. + * @param m the max iterations. + */ + public EpochConvergenceMeasure(int m) { + this.max = m; + } + + /** + * @see edu.illinois.cs.cogcomp.lbjava.neuralnet.ConvergenceMeasure#evaluate(edu.illinois.cs.cogcomp.lbjava.neuralnet.NNTrainingInterface) + */ + @Override + public boolean evaluate(NNTrainingInterface learner) { + epoch++; + if (epoch > max) { + return true; + } else + return false; + } + +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Layer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Layer.java new file mode 100644 index 00000000..29adfeb0 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Layer.java @@ -0,0 +1,277 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +import java.io.PrintStream; +import java.util.Random; + +/** + * This is a layer in a neural net. it is characterized by a number of inputs + * and a number of outputs. The neurons (perceptrons) are hidden within, I see + * no reason to expose them. this allows the layer class to do all computations + * across the entire layer in one pass, which is very efficient. Downside; Nothing + * in this implementation will allow you to assign per neuron attributes. Also, the + * weights are represented by a primitive array, so only 32 bit indices meaning no + * more than 2 ^ 32 weights are allowed. + * @author redman + */ +public class Layer { + + /** number of inputs to this layer. */ + private int numberInputs; + + /** the number of outputs from this layer. */ + private int numberOutputs; + + /** the neuron weights. */ + private float[] weights; + + /** the derived outputs. */ + private float[] dweights; + + /** collects output values. */ + private float[] outputs; + + /** + * The layer constructed. + * @param numIn the number of inputs. + * @param numOut the number of outputs. + */ + public Layer(int numIn, int numOut) { + this(numIn, numOut, new Random()); + outputs = new float[numOut]; + } + + /** + * The layer constructed. + * @param numIn the number of inputs. + * @param numOut the number of outputs. + * @param r the random num generator. + */ + public Layer(int numIn, int numOut, Random r) { + this.numberInputs = numIn; + this.numberOutputs = numOut; + int wl = (numIn+1)*numOut; + weights = new float[wl]; + dweights = new float[wl]; + for (int i = 0; i < wl; i++) + weights [i] = (r.nextFloat() - 0.5f) * 4f; + outputs = new float[numOut]; + } + + /** + * Compute the sigmoid first derivative. + * @param x the input value + * @return the sigmoid + */ + final private float sigmoid(float x) { + return (float) (1.0 / (1.0 + Math.exp(-x))); + } + + /** + * @return the weights + */ + public float[] getWeights() { + return weights; + } + + /** + * @param weights the weights to set + */ + public void setWeights(float[] weights) { + this.weights = weights; + } + + /** + * @return the numberInputs + */ + public int getNumberInputs() { + return numberInputs; + } + + /** + * @param numberInputs the numberInputs to set + */ + public void setNumberInputs(int numberInputs) { + this.numberInputs = numberInputs; + } + + /** + * @return the numberOutputs + */ + public int getNumberOutputs() { + return numberOutputs; + } + + /** + * @param numberOutputs the numberOutputs to set + */ + public void setNumberOutputs(int numberOutputs) { + this.numberOutputs = numberOutputs; + } + + /** + * This granularity of method invocation is only necessary so parallelize + * the process. + * @param index the index of the input to compute the output for. + * @param inputs the inputs. + * @return the activation output. + */ + final float computeOneOutput(int index, float[] inputs) { + float result = 0.0f; + int nI = this.numberInputs; + int start = index * (nI+1); + for (int k = 0 ; k < nI ; k++) { + result += weights[start+k] * inputs[k]; + } + result += weights[start+nI]; + return (float) sigmoid(result); + } + + /** + * Given a set of inputs, produce the set of activation + * values. + * @param inputs the inputs to produce the predictions for. + * @return the set of predictions. + */ + final public float[] activate(float[] inputs) { + int nO = this.numberOutputs; + float[] o = this.outputs; + for (int j = 0 ; j < nO ; j++) { + o[j] = this.computeOneOutput(j, inputs); + } + return outputs; + } + + /** + * train up weights for just one output. Thread safety must be noted here, since everybody will be + * updating the nextError array at the same time. To avoid doing repeated synchronizations which are + * expensive here, for multithreaded trainer, we pass in a dummy error array, update at will, then + * the caller is responsible for synchronizing on the real one and updating the shared sum error array. + * @param error the activation errors used to compute the backprop value. + * @param input the input date. + * @param output the computed output data. + * @param learningRate the learning rate. + * @param momentum the momentum + * @param nextError the array where the error values will be updated + * @param outIndex the output index; + */ + final public void trainOne(float[] error, float[] input, float[] output, float learningRate, float momentum, float[] nextError, int outIndex) { + int woffset = (this.numberInputs+1) * outIndex; + float d = error[outIndex] * (output[outIndex] * (1 - output[outIndex])); + for (int j = 0; j < this.numberInputs; j++) { + int windx = woffset + j; + nextError[j] += weights[windx] * d; + float dw = input[j] * d * learningRate; + weights[windx] += dweights[windx] * momentum + dw; + dweights[windx] = dw; + } + + // compute the error for the bias, the fake bias input is always 1. + int windx = woffset + input.length; + nextError[input.length] += weights[windx] * d; + float dw = d * learningRate; + weights[windx] += dweights[windx] * momentum + dw; + dweights[windx] = dw; + } + + /** + * given a set of errors (errors from the next layer on), and adjust the weights + * to do a gradient descent. + * @param error the output errors. + * @param input the input data. + * @param output the desired output. + * @param learningRate the rate of learning. + * @param momentum helps to avoid local minima. + * @return the errors from this layer. + */ + final public float[] train(float[] error, float[] input, float[] output, float learningRate, float momentum) { + int nI = this.numberInputs+1/*for the bias*/; + float[] nextError = new float[nI]; + for (int i = 0; i < this.numberOutputs; i++) { + //this.trainOne(error, input, output, learningRate, momentum, nextError, i); + + int woffset = nI * i; + float d = error[i] * (output[i] * (1 - output[i])); + for (int j = 0; j < this.numberInputs; j++) { + int windx = woffset + j; + nextError[j] += weights[windx] * d; + float dw = input[j] * d * learningRate; + weights[windx] += dweights[windx] * momentum + dw; + dweights[windx] = dw; + } + + // compute the error for the bias, the fake bias input is always 1. + int windx = woffset + input.length; + nextError[input.length] += weights[windx] * d; + float dw = d * learningRate; + weights[windx] += dweights[windx] * momentum + dw; + dweights[windx] = dw; + } + return nextError; + } + + /** + * print out the weights. + */ + public void print() { + System.out.print(this.numberInputs+":"+this.numberOutputs); + System.out.print(" "); + for (float w : weights) { + System.out.format(" %.8f",w); + } + System.out.print(" ("); + for (float w : dweights) { + System.out.format(" %.8f",w); + } + System.out.println(")"); + } + + /** + * @return the dweights + */ + public float[] getDweights() { + return dweights; + } + + /** + * @param dweights the dweights to set + */ + public void setDweights(float[] dweights) { + this.dweights = dweights; + } + + /** + * used for reporting mostely. + */ + public String toString() { + StringBuffer sb = new StringBuffer(); + sb.append("in : "+this.numberInputs+" out : "+this.numberOutputs); + sb.append("\n"); + for (int i = 0; i < weights.length;) { + for (int j = 0; j < this.numberInputs;j++,i++) { + sb.append(" "+weights[i]); + sb.append(" "); + } + sb.append("\n"); + } + return sb.toString(); + } + + /** + * Write the representation to a digital output stream. + * @param out the output stream for serialization. + */ + public void write(PrintStream out) { + out.print(numberInputs); + out.print(numberOutputs); + out.print(weights.length); + for (int i = 0; i < weights.length; ++i) + out.print(weights[i]); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/LearnerThread.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/LearnerThread.java new file mode 100644 index 00000000..d3568330 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/LearnerThread.java @@ -0,0 +1,96 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +import java.util.Arrays; + +import edu.illinois.cs.cogcomp.lbjava.neuralnet.ThreadedNNTrainer.Range; + +/** + * This thread will compute a single activtion value, for each layer + * setup must be called to provide the output array, the layer and the + * input values. + * @author redman + */ +class LearnerThread extends PushThread { + + /** the input error from the next layer being back propogated. */ + float[] error = null; + + /** the input labeled data. */ + float[] input = null; + + /** the input data. */ + float[] output = null; + + /** the result error SHARED ACROSS THREADS, must be synced to update. */ + float [] nextError; + + /** the space where updates to the errors will be set, later used to update nextError.*/ + float [] errorWorkspace; + + /** the learning rate. */ + float learnRate; + + /** the momentum. */ + float momentum; + + /** the layer we are operating on. */ + Layer layer = null;; + + /** the unique id. */ + private static int inc = 0; + + /** + * The learning rate and momentum will not change, so we will take them initially. + * @param lR the learning rate. + * @param m the momentum. + * @param mux the multiplexer. + */ + LearnerThread(float lR, float m) { + super("LearnerThread-"+(inc++)); + this.learnRate = lR; + this.momentum = m; + } + + /** + * before we start a layer, this is called to set up the thread. + * @param error the error from the next layer, used to calc this layers error. + * @param input the input data. + * @param output the result data. + * @param nextError put the next layers input error here. + * @param layer the layer we operate on. + */ + void setup(float [] error, float [] input, float [] output, float[] nextError, Layer layer) { + this.error = error; + this.input = input; + this.output = output; + this.nextError = nextError; + this.layer = layer; + this.errorWorkspace = new float[nextError.length]; + Arrays.fill(this.errorWorkspace, 0); + } + + /** + * Run till we complete the layer, then finish up. + */ + public void run() { + synchronized (this) { + while(true) { + + // wait for the range object to be set. + Range r = this.getRange(); + if (r == null) + return; + for (int indx = r.start; indx < r.end; indx++) { + layer.trainOne(error, input, output, learnRate, momentum, errorWorkspace, indx); + } + } + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/NNTrainingInterface.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/NNTrainingInterface.java new file mode 100644 index 00000000..84e29922 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/NNTrainingInterface.java @@ -0,0 +1,23 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +/** + * @author redman + */ +public interface NNTrainingInterface { + + /** + * Given a set of examples, and a set of desired outputs, train the network + * represented by the provided network layers the provided number of epochs. + * @param inputs the input data to train against. + * @param outputs the desired outputs. + * @param epochs the number of training iterations to run. + */ + public void train(float[][] inputs, float[][]outputs, int epochs); +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/PushThread.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/PushThread.java new file mode 100644 index 00000000..9dada667 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/PushThread.java @@ -0,0 +1,84 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +import edu.illinois.cs.cogcomp.lbjava.neuralnet.ThreadedNNTrainer.Range; + +/** + * Threads will operate on a range, this superclass contains that + * range and handles atomic synchronized access. + * @author redman + */ +public class PushThread extends Thread { + + /** the range to operate on. */ + protected Range range = null; + + /** set when this thread is waiting for input. */ + private boolean idle = false; + /** + * the push thread takes the name ofthe thread, to pass to + * the super. + * @param name the name of the thread. + */ + PushThread(String name) { + super(name); + } + + /** + * set the range of things to operate on. + * @param range + */ + synchronized void setRange(Range range) { + this.range = range; + this.notifyAll(); + } + + /** + * call this when we are done. + */ + synchronized void done() { + this.range = null; + this.interrupt(); + } + + /** + * wait for the thread to complete it's run, it will set + * poised and block till it gets data. + */ + final synchronized public void waitIdle() { + while(!idle || range != null) + try { + this.wait(); + } catch (InterruptedException e) { + } + } + + /** + * wait for the next range. + * @return the range. + */ + final synchronized protected Range getRange() { + while (range == null) + try { + this.idle = true; + this.notify(); // somebody waiting for completion? + this.wait(); + } catch (InterruptedException e) { + if (this.isInterrupted()) { + System.out.println("Interrupted error."); + return null; + } + } + Range r = range; + range = null; + this.idle = false; + return r; + } + +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/SimpleNNTrainer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/SimpleNNTrainer.java new file mode 100644 index 00000000..951c3144 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/SimpleNNTrainer.java @@ -0,0 +1,150 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +import java.util.Random; + +/** + * This class will simply learn up the NeuralNet layers, single threaded. + * @author redman + */ +public class SimpleNNTrainer implements NNTrainingInterface { + + /** the layers of the neural network. */ + private Layer[] layers; + + /** scales the weight deltas for each iteration. */ + private float learningRate = .3f; + + /** this prevents local minimum capture. */ + private float momentum = .6f; + + /** + * Need the layer data, learning rate and momentum. + * @param l the layers of the neural net. + * @param rate the learning rate. + * @param mom the momentum. + */ + public SimpleNNTrainer(Layer[] l, float rate, float mom) { + this.layers = l; + this.learningRate = rate; + this.momentum = mom; + } + + /** + * given an input set of example, compute the output values, also return all the + * activation values in between, return them all. The results will be in the last + * vector in the returned array. + * @param inputs the inputs. + * @return the activation energies from all layers/ + */ + public final float [] classify(float[] inputs) { + + int layerCount = layers.length; + + // storage for each output of each layer, and the error computed for each activation. + float [][] activations = new float[layerCount][]; + + // This array contains inputs from previous layer + float [] currentinputs = inputs; + for (int i = 0 ; i < layerCount ; i++) { + + // compute the activations for this layer. + Layer layer = layers[i]; + activations[i] = layer.activate(currentinputs); + currentinputs = activations[i]; + } + return activations[layerCount-1]; + } + + /** + * given an input set of example, compute the output values, also return all the + * activation values in between, return them all. The results will be in the last + * vector in the returned array. + * @param inputs the inputs. + * @return the activation energies from all layers/ + */ + public final float [][] activate(float[] inputs) { + + int layerCount = layers.length; + + // storage for each output of each layer, and the error computed for each activation. + float [][] activations = new float[layerCount][]; + + // This array contains inputs from previous layer + float [] currentinputs = inputs; + for (int i = 0 ; i < layerCount ; i++) { + + // compute the activations for this layer. + Layer layer = layers[i]; + activations[i] = layer.activate(currentinputs); + currentinputs = activations[i]; + } + return activations; + } + + /** + * Train with one example. + * @param inputs input data. + * @param outputs the labeled data. + * @param epochs + */ + public void train(float[] inputs, float[]outputs) { + // storage for each output of each layer, and the error computed for each activation. + float [][] activations = this.activate(inputs); + + // now we have all the activations. + float[] calcOut = activations[activations.length-1]; + int errlen = calcOut.length; + float [] error = new float[errlen]; + for (int i = 0; i < errlen; i++) { + error[i] = outputs[i] - calcOut[i]; // negative error + } + for (int i = layers.length - 1; i > 0; i--) { + error = layers[i].train(error,activations[i-1],activations[i], this.learningRate, this.momentum); + } + error = layers[0].train(error,inputs, activations[0], this.learningRate, this.momentum); + } + /** + * Execute the given number of epochs, then exit whatever the error. + * @param inputs the input examples. + * @param outputs the labels. + * @param layers + */ + @Override + public void train(float[][] inputs, float[][]outputs, int epochs) { + + // error checking. + if (inputs.length != outputs.length) + throw new RuntimeException("There must be the same number of input data records and output data records."); + int totalInputs = inputs.length; + + // set up our counts. + int layerCount = layers.length; + Random r = new Random(34565); + for (int epoch = 0; epoch < epochs; epoch++) { + for (int inindx = 0; inindx < totalInputs; inindx++) { + int iI = r.nextInt(totalInputs); + // storage for each output of each layer, and the error computed for each activation. + float [][] activations = this.activate(inputs[iI]); + + // now we have all the activations. + float[] calcOut = activations[layerCount-1]; + int errlen = calcOut.length; + float [] error = new float[errlen]; + for (int i = 0; i < errlen; i++) { + error[i] = outputs[iI][i] - calcOut[i]; // negative error + } + for (int i = layers.length - 1; i > 0; i--) { + error = layers[i].train(error,activations[i-1],activations[i], this.learningRate, this.momentum); + } + error = layers[0].train(error,inputs[iI],activations[0], this.learningRate, this.momentum); + } + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ThreadedNNTrainer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ThreadedNNTrainer.java new file mode 100644 index 00000000..01980fb2 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ThreadedNNTrainer.java @@ -0,0 +1,338 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +import java.util.Arrays; +import java.util.Random; + +/** + * This class will simply learn up the NeuralNet layers, single threaded. + * @author redman + */ +public class ThreadedNNTrainer implements NNTrainingInterface { + + /** the layers of the neural network. */ + private Layer[] layers; + + /** scales the weight deltas for each iteration. */ + private float learningRate = .3f; + + /** this prevents local minimum capture. */ + private float momentum = .6f; + + /** this is the number of threads we will use, by default, number of processors on the machine. */ + private int numThreads = Runtime.getRuntime().availableProcessors(); + + /** + * Need the layer data, learning rate and momentum. + * @param l the layers of the neural net. + * @param rate the learning rate. + * @param mom the momentum. + */ + public ThreadedNNTrainer(Layer[] l, float rate, float mom) { + this.layers = l; + this.learningRate = rate; + this.momentum = mom; + } + + /** + * Need the layer data, learning rate and momentum. + * @param l the layers of the neural net. + * @param rate the learning rate. + * @param mom the momentum. + * @param numThreads number of threads to deploy. + */ + public ThreadedNNTrainer(Layer[] l, float rate, float mom, int numThreads) { + this.layers = l; + this.learningRate = rate; + this.momentum = mom; + this.numThreads = numThreads; + } + + /** + * given an input set of example, compute the output values, also return all the + * activation values in between, return them all. The results will be in the last + * vector in the returned array. + * @param inputs the inputs. + * @return the activation energies from all layers/ + */ + public final float [][] activate(float[] inputs) { + + int layerCount = layers.length; + + // storage for each output of each layer, and the error computed for each activation. + float [][] activations = new float[layerCount][]; + + // This array contains inputs from previous layer + float [] currentinputs = inputs; + for (int i = 0 ; i < layerCount ; i++) { + + // compute the activations for this layer. + Layer layer = layers[i]; + activations[i] = layer.activate(currentinputs); + currentinputs = activations[i]; + } + return activations; + } + + /** + * Execute the given number of epochs, then exit whatever the error. + * @param inputs the input examples. + * @param outputs the labels. + * @param epochs the number of iterations to perform. + */ + @Override + final public void train(float[][] inputs, float[][] outputs, int epochs) { + // error checking. + if (inputs.length != outputs.length) + throw new RuntimeException("There must be the same number of input data records and output data records."); + + // iterate this number of times. + int numExamples = inputs.length; + + // For each layer, compute the ranges of indices to operate on. This will allow us to + // continue computing on a thread without handshakes. + int ll = layers.length; + Range[][] ranges = new Range[ll][]; + for (int i = 0; i < ll ; i++) { + Layer l = layers[i]; + int no = l.getNumberOutputs(); + int increment = no / numThreads; + int onsies; + if (increment == 0) { + onsies = no; + ranges[i] = new Range[onsies]; + } else { + onsies = no % numThreads; + ranges[i] = new Range[numThreads]; + } + int start = 0; + for (int j = 0 ; j < ranges[i].length && start < no; j++) { + int end = start + increment; + if (onsies != 0) { + end++; + onsies--; + } + ranges[i][j] = new Range(start, end); + start = end; + } + } + + // create the threads to run against the activation mux. + ActThread[] actThreads = new ActThread[numThreads]; + for (int i = 0; i < numThreads; i++) { + actThreads[i] = new ActThread(); + actThreads[i].start(); + } + + // create the threads to run against the activation mux. + LearnerThread[] learnerThreads = new LearnerThread[numThreads]; + for (int i = 0; i < numThreads; i++) { + learnerThreads[i] = new LearnerThread(this.learningRate, this.momentum); + learnerThreads[i].start(); + } + + // set up our counts. + int layerCount = layers.length; + + // storage for each output of each layer, and the error computed for each activation. + float[][] activations = new float[layerCount][]; + for (int i = 0; i < layerCount; i++) { + activations[i] = new float[layers[i].getNumberOutputs()]; + } + + Thread.yield(); + Thread.yield(); + Thread.yield(); + Random r = new Random(34565); + + // do the specified number of epochs. + for (int epoch = 0; epoch < epochs; epoch++) { + for (int inindx = 0; inindx < numExamples; inindx++) { + int iI = r.nextInt(numExamples); + + // zero activations + for (int i = 0; i < layerCount; i++) { + Arrays.fill(activations[i], 0.0f); + } + + // This array contains inputs from previous layer output + float[] currentinputs = inputs[iI]; + + // for each layer, do the activations. + for (int i = 0; i < layerCount; i++) { + Layer layer = layers[i]; + + // set up the threads + float[] acts = activations[i]; + int rl = ranges[i].length; + for (int j = 0; j < rl; j++) { + actThreads[j].setup(currentinputs, layer, acts); + actThreads[j].setRange(ranges[i][j]); + } + + // wait for them to finish. + for (int j = 0; j < rl; j++) { + actThreads[j].waitIdle(); + } + currentinputs = acts; + } + + ////////////////////////////////// + // compute output errors. + // now we have all the activations, lets do error propogation. + float[] calcOut = activations[layerCount - 1]; + int errlen = calcOut.length; + float[] error = new float[errlen]; + for (int i = 0; i < errlen; i++) { + error[i] = outputs[iI][i] - calcOut[i]; // negative error + } + + ////////////////////////////////// + // propogate the errors back and adjust weights. + // now learn from out errors. + for (int i = layerCount - 1; i > 0; i--) { + Layer layer = layers[i]; + int nI = layer.getNumberInputs() + 1/*for the bias*/; + float[] nextError = new float[nI]; + + // set up the threads + int rl = ranges[i].length; + for (int j = 0; j < rl; j++) { + learnerThreads[j].setup(error, activations[i - 1], activations[i], nextError, layer); + learnerThreads[j].setRange(ranges[i][j]); + } + + // wait for complete, then set up next layer run. + // wait for them to finish. + for (int j = 0; j < rl; j++) { + learnerThreads[j].waitIdle(); + } + + // now we must sum all the errors for each of the threads. + int esize = nextError.length; + for (int ei = 0; ei < esize; ei++) { + for (int j = 0; j < rl; j++) { + nextError[ei] += learnerThreads[j].errorWorkspace[ei]; + } + } + error = nextError; + } + + // The setup for the first layer is computed using the actual inputs, so we do this + // a bit differently. + Layer layer = layers[0]; + int rl = ranges[0].length; + int nI = layer.getNumberInputs() + 1/*for the bias*/; + float[] nextError = new float[nI]; + for (int j = 0; j < rl; j++) { + learnerThreads[j].setup(error, inputs[iI], activations[0], nextError, layer); + learnerThreads[j].setRange(ranges[0][j]); + } + + // wait for complete, then set up next layer run. + // wait for them to finish. + for (int j = 0; j < rl; j++) { + learnerThreads[j].waitIdle(); + } + } + + // check for convergence. + float sumerr = 0; + for (int inputIdx = 0; inputIdx < outputs.length; inputIdx++) { + + // storage for each output of each layer, and the error computed for each activation. + float [][] a = this.activate(inputs[inputIdx]); + float[] outs = a[layerCount-1]; + float pred = outs[0]; + float label = outputs[inputIdx][0]; + sumerr = pred > label ? pred - label : label - pred; + } + System.out.format("%d) error = %.18f\n",epoch,(sumerr/(float)outputs.length)); + } + } + + /** just holds range of datums to operate on. */ + static class Range { + int start; + int end; + Range(int s, int e) { + start = s; + end = e; + } + public String toString() { + return start+"-"+end; + } + } + + /** + * this class coordinates the activities of a set of threads by handing + * out indexes that need operated on in a threadsafe way. If a request is made + * for an index, and non are available, the thread will wait until notified. + * @author redman + */ + static class Multiplexer { + + /** these are the ranges for the layer we operate on, these inited once and reused each epoch. */ + private Range[] ranges = null; + + /** the number of elements we are counting down from. */ + private int count = 0; + + /** number of threads operating. */ + private int waiting = 0; + + /** the number of threads sharing this multiplexer. */ + private int numThreads = 0; + + /** + * We need the number of elements in the layer to operate on. + * @param numThreads the total number of threads. + */ + Multiplexer(int numThreads) { + this.numThreads = numThreads; + } + + /** + * Start this process. This should be called by the main thread where + * coordination occures. This will be accessed by the done method. + * @param ranges the range of indices to operate on. + * @param compLock use this as a semaphor + */ + synchronized void startAndWait(Range[] ranges) { + this.count = 0; + this.ranges = ranges; + this.waiting = 0; + this.notifyAll(); + while (waiting != numThreads) { + try { + this.wait(); + } catch (InterruptedException e1) { + } + } + } + + /** + * get the next available index, or block till one is available. + * @return the index. + */ + synchronized Range getNextIndex() { + while (ranges == null || count == ranges.length) { + try { + this.waiting++; + if (waiting == numThreads) + this.notifyAll(); + this.wait(); + this.waiting--; + } catch (InterruptedException e) { + } + } + return ranges[count++]; + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/package-info.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/package-info.java new file mode 100644 index 00000000..c8623369 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/package-info.java @@ -0,0 +1,21 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +/** + * This package contains the Neural Network implemented employed by LBJava. This + * implementation supports bias, momentum and back prop, and is designed with + * efficiency in mind. The implementation contract includes an API for trainers + * {@see NNTrainingInterface} that defines the API for the any trainers. A single + * threaded trainer is provided. There is also a multithreaded trainer, which helps + * when there are a very large number of weights between layers.

+ * + * There is also a {@see Layer} class which implements functionality specific + * to neural net layers within the system. However, there is no representation of + * neuron within the API, this was decided upon to ensure good performance. + * @author redman + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/ArrayFileParser.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/ArrayFileParser.java index c9558bfd..cbae0aa4 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/ArrayFileParser.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/ArrayFileParser.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.parse; @@ -18,256 +15,243 @@ import java.util.zip.ZipFile; import java.util.zip.ZipInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector; import edu.illinois.cs.cogcomp.lbjava.learn.Learner; -import edu.illinois.cs.cogcomp.lbjava.util.ExceptionlessInputStream; /** - * This parser returns an array of arrays representing each example. The - * first array represents the integer keys of the example's features; the - * second array holds the values of those features. The third array holds the - * example's label(s), and the fourth array holds the values of those labels. - * These arrays are read in through files, and the paths to these files are - * passed in through the constructor. - * - *

When run as a stand-alone program, this class takes the names of - * example, lexicon, and model files as input and prints all the feature - * vectors in the dataset to STDOUT. - * - * @author Michael Paul + * This parser returns an array of arrays representing each example. The first array represents the + * integer keys of the example's features; the second array holds the values of those features. The + * third array holds the example's label(s), and the fourth array holds the values of those labels. + * These arrays are read in through files, and the paths to these files are passed in through the + * constructor. + * + *

+ * When run as a stand-alone program, this class takes the names of example, lexicon, and model + * files as input and prints all the feature vectors in the dataset to STDOUT. + * + * @author Michael Paul **/ -public class ArrayFileParser implements Parser -{ - /** Reader for file currently being parsed. */ - protected DataInputStream in; - /** The name of the file to parse. */ - protected String exampleFileName; - /** A single array from which all examples can be parsed. */ - protected byte[] exampleData; - /** Whether or not the input stream is zipped. */ - protected boolean zipped; - /** Whether the returned example arrays should include pruned features. */ - protected boolean includePruned = false; - - - /** - * Initializes the parser with a file name assuming the input stream is not - * zipped. - * - * @param exampleFile The name of the file containing the examples. - **/ - public ArrayFileParser(String exampleFile) { this(exampleFile, false); } - - /** - * Initializes the parser with a file name, specifying whether the data is - * zipped. - * - * @param exampleFile The name of the file containing the examples. - * @param zip Whether or not the input stream is zipped. - **/ - public ArrayFileParser(String exampleFile, boolean zip) { - exampleFileName = exampleFile; - zipped = zip; - reset(); - } - - /** - * Initializes the parser with a data array assuming the input stream is - * not zipped. - * - * @param data The examples can be parsed out of this array. - **/ - public ArrayFileParser(byte[] data) { this(data, true); } - - /** - * Initializes the parser with a data array, specifying whether the data is - * zipped. - * - * @param data The examples can be parsed out of this array. - * @param zip Whether or not the input stream is zipped. - **/ - public ArrayFileParser(byte[] data, boolean zip) { - exampleData = data; - zipped = zip; - reset(); - } - - - /** Setter for {@link #includePruned}. */ - public void setIncludePruned(boolean b) { includePruned = b; } - - - /** - * Returns the number of examples left in the example file. This may be - * slow to compute as it must read through the entire file and increment - * the count. {@link #reset()} is called after the examples are counted. - * - * @return The number of examples left in the example file. - **/ - public int getNumExamples() { - int result = 0; - - try { - while (true) { - int L = in.readInt(); - if (L == -1) continue; - ++result; - in.skipBytes(12 * L); // 4 for label index, 8 for its value - L = in.readInt() + in.readInt(); - in.skipBytes(12 * L); // 4 for feature index, 8 for its value - } - } - catch (EOFException eof) { +public class ArrayFileParser implements Parser { + /** Reader for file currently being parsed. */ + protected DataInputStream in; + /** The name of the file to parse. */ + protected String exampleFileName; + /** A single array from which all examples can be parsed. */ + protected byte[] exampleData; + /** Whether or not the input stream is zipped. */ + protected boolean zipped; + /** Whether the returned example arrays should include pruned features. */ + protected boolean includePruned = false; + + + /** + * Initializes the parser with a file name assuming the input stream is not zipped. + * + * @param exampleFile The name of the file containing the examples. + **/ + public ArrayFileParser(String exampleFile) { + this(exampleFile, false); } - catch (Exception e) { - System.err.println("Can't read from '" + exampleFileName + "':"); - e.printStackTrace(); - System.exit(1); + + /** + * Initializes the parser with a file name, specifying whether the data is zipped. + * + * @param exampleFile The name of the file containing the examples. + * @param zip Whether or not the input stream is zipped. + **/ + public ArrayFileParser(String exampleFile, boolean zip) { + exampleFileName = exampleFile; + zipped = zip; + reset(); } - reset(); + /** + * Initializes the parser with a data array assuming the input stream is not zipped. + * + * @param data The examples can be parsed out of this array. + **/ + public ArrayFileParser(byte[] data) { + this(data, true); + } - return result; - } + /** + * Initializes the parser with a data array, specifying whether the data is zipped. + * + * @param data The examples can be parsed out of this array. + * @param zip Whether or not the input stream is zipped. + **/ + public ArrayFileParser(byte[] data, boolean zip) { + exampleData = data; + zipped = zip; + reset(); + } - /** - * Returns either an Object[] or a {@link FoldSeparator} - * deserialized out of the given file. - **/ - public Object next() { - Object[] result = new Object[4]; + /** Setter for {@link #includePruned}. */ + public void setIncludePruned(boolean b) { + includePruned = b; + } - try { - int L = in.readInt(); - // A -1 means that there was a fold separator here - if (L == -1) return FoldSeparator.separator; - else { - int[] exampleLabels = new int[L]; - double[] labelValues = new double[L]; - for (int i = 0; i < L; ++i) { - exampleLabels[i] = in.readInt(); - labelValues[i] = in.readDouble(); + /** + * Returns the number of examples left in the example file. This may be slow to compute as it + * must read through the entire file and increment the count. {@link #reset()} is called after + * the examples are counted. + * + * @return The number of examples left in the example file. + **/ + public int getNumExamples() { + int result = 0; + + try { + while (true) { + int L = in.readInt(); + if (L == -1) + continue; + ++result; + in.skipBytes(12 * L); // 4 for label index, 8 for its value + L = in.readInt() + in.readInt(); + in.skipBytes(12 * L); // 4 for feature index, 8 for its value + } + } catch (EOFException eof) { + } catch (Exception e) { + System.err.println("Can't read from '" + exampleFileName + "':"); + e.printStackTrace(); + System.exit(1); } - int Fup = in.readInt(); // # unpruned - int Fp = in.readInt(); // # pruned - int F = (includePruned) ? (Fup+Fp) : Fup; + reset(); - int[] exampleFeatures = new int[F]; - double[] exampleValues = new double[F]; + return result; + } - for (int i = 0; i < Fup+Fp; ++i) { - int ef = in.readInt(); - double ev = in.readDouble(); - if (i < F) { - exampleFeatures[i] = ef; - exampleValues[i] = ev; - } + /** + * Returns either an Object[] or a {@link FoldSeparator} deserialized out of the + * given file. + **/ + public Object next() { + Object[] result = new Object[4]; + + try { + int L = in.readInt(); + + // A -1 means that there was a fold separator here + if (L == -1) + return FoldSeparator.separator; + else { + int[] exampleLabels = new int[L]; + double[] labelValues = new double[L]; + for (int i = 0; i < L; ++i) { + exampleLabels[i] = in.readInt(); + labelValues[i] = in.readDouble(); + } + + int Fup = in.readInt(); // # unpruned + int Fp = in.readInt(); // # pruned + int F = (includePruned) ? (Fup + Fp) : Fup; + + int[] exampleFeatures = new int[F]; + double[] exampleValues = new double[F]; + + for (int i = 0; i < Fup + Fp; ++i) { + int ef = in.readInt(); + double ev = in.readDouble(); + + if (i < F) { + exampleFeatures[i] = ef; + exampleValues[i] = ev; + } + } + + result[0] = exampleFeatures; + result[1] = exampleValues; + result[2] = exampleLabels; + result[3] = labelValues; + } + } catch (EOFException eof) { + result = null; + } catch (Exception e) { + System.err.println("Can't read from '" + exampleFileName + "':"); + e.printStackTrace(); + System.exit(1); } - result[0] = exampleFeatures; - result[1] = exampleValues; - result[2] = exampleLabels; - result[3] = labelValues; - } - } - catch (EOFException eof) { - result = null; - } - catch (Exception e) { - System.err.println("Can't read from '" + exampleFileName + "':"); - e.printStackTrace(); - System.exit(1); + return result; } - return result; - } - - /** Resets the example file stream to the beginning. */ - public void reset() { - close(); - - try { - if (exampleFileName != null) { - if (zipped) { - ZipFile zip = new ZipFile(exampleFileName); - in = - new DataInputStream( - new BufferedInputStream( - zip.getInputStream( - zip.getEntry(ExceptionlessInputStream.zipEntryName)))); + /** Resets the example file stream to the beginning. */ + public void reset() { + close(); + + try { + if (exampleFileName != null) { + if (zipped) { + ZipFile zip = new ZipFile(exampleFileName); + in = + new DataInputStream(new BufferedInputStream(zip.getInputStream(zip + .getEntry(ExceptionlessInputStream.zipEntryName)))); + } else + in = + new DataInputStream(new BufferedInputStream(new FileInputStream( + exampleFileName))); + } else if (zipped) { + ZipInputStream zip = new ZipInputStream(new ByteArrayInputStream(exampleData)); + zip.getNextEntry(); + in = new DataInputStream(new BufferedInputStream(zip)); + } else + in = new DataInputStream(new ByteArrayInputStream(exampleData)); + } catch (Exception e) { + System.err.println("Can't open '" + exampleFileName + "' for input:"); + e.printStackTrace(); + System.exit(1); } - else - in = - new DataInputStream( - new BufferedInputStream( - new FileInputStream(exampleFileName))); - } - else if (zipped) { - ZipInputStream zip = - new ZipInputStream( - new ByteArrayInputStream(exampleData)); - zip.getNextEntry(); - in = new DataInputStream(new BufferedInputStream(zip)); - } - else - in = - new DataInputStream( - new ByteArrayInputStream(exampleData)); - } - catch (Exception e) { - System.err.println("Can't open '" + exampleFileName + "' for input:"); - e.printStackTrace(); - System.exit(1); } - } - /** Frees any resources this parser may be holding. */ - public void close() { - if (in == null) return; - try { in.close(); } - catch (Exception e) { - System.err.println("Can't close '" + exampleFileName + "':"); - e.printStackTrace(); - System.exit(1); + /** Frees any resources this parser may be holding. */ + public void close() { + if (in == null) + return; + try { + in.close(); + } catch (Exception e) { + System.err.println("Can't close '" + exampleFileName + "':"); + e.printStackTrace(); + System.exit(1); + } } - } - - public static void main(String[] args) { - String exFileName = null; - String lexFileName = null; - String lcFileName = null; - try { - exFileName = args[0]; - lexFileName = args[1]; - lcFileName = args[2]; - if (args.length > 3) throw new Exception(); - } - catch (Exception e) { - System.err.println( -"usage: java edu.illinois.cs.cogcomp.lbjava.parse.ArrayFileParser "); - System.exit(1); - } + public static void main(String[] args) { + String exFileName = null; + String lexFileName = null; + String lcFileName = null; + + try { + exFileName = args[0]; + lexFileName = args[1]; + lcFileName = args[2]; + if (args.length > 3) + throw new Exception(); + } catch (Exception e) { + System.err + .println("usage: java edu.illinois.cs.cogcomp.lbjava.parse.ArrayFileParser "); + System.exit(1); + } - ArrayFileParser parser = new ArrayFileParser(exFileName); - Learner learner = Learner.readLearner(lcFileName); - learner.readLexicon(lexFileName); + ArrayFileParser parser = new ArrayFileParser(exFileName); + Learner learner = Learner.readLearner(lcFileName); + learner.readLexicon(lexFileName); - for (Object e = parser.next(); e != null; e = parser.next()) { - FeatureVector v = - new FeatureVector((Object[]) e, learner.getLexicon(), - learner.getLabelLexicon()); - v.sort(); - System.out.println(v); + for (Object e = parser.next(); e != null; e = parser.next()) { + FeatureVector v = + new FeatureVector((Object[]) e, learner.getLexicon(), learner.getLabelLexicon()); + v.sort(); + System.out.println(v); + } } - } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/ArrayParser.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/ArrayParser.java index d3b4fad2..c3fcc41f 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/ArrayParser.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/ArrayParser.java @@ -1,70 +1,67 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.parse; /** - * This parser returns the example objects in an array one at a time. - * - * @author Michael Paul + * This parser returns the example objects in an array one at a time. + * + * @author Michael Paul **/ -public class ArrayParser implements Parser -{ - /** The pointer to the current cell of the {@link #examples} array. */ - protected int index; - /** An array of examples, returned one at a time by the parser. */ - protected Object[] examples; +public class ArrayParser implements Parser { + /** The pointer to the current cell of the {@link #examples} array. */ + protected int index; + /** An array of examples, returned one at a time by the parser. */ + protected Object[] examples; - /** - * Initializes the parser with an empty example array. - **/ - public ArrayParser() { - this(new Object[0]); - } + /** + * Initializes the parser with an empty example array. + **/ + public ArrayParser() { + this(new Object[0]); + } - /** - * Creates the parser with the supplied example array. - * - * @param e The array of examples - **/ - public ArrayParser(Object[] e) { - index = 0; - examples = e; - } + /** + * Creates the parser with the supplied example array. + * + * @param e The array of examples + **/ + public ArrayParser(Object[] e) { + index = 0; + examples = e; + } - /** Returns the value of {@link #examples}. */ - public Object[] getExamples() { return examples; } + /** Returns the value of {@link #examples}. */ + public Object[] getExamples() { + return examples; + } - /** - * Returns the next example in the array and increments - * the {@link #index} pointer. - **/ - public Object next() { - if (index >= examples.length) return null; - return (Object[]) examples[index++]; - } + /** + * Returns the next example in the array and increments the {@link #index} pointer. + **/ + public Object next() { + if (index >= examples.length) + return null; + return examples[index++]; + } - /** - * Resets the {@link #index} pointer to 0. - **/ - public void reset() { - index = 0; - } + /** + * Resets the {@link #index} pointer to 0. + **/ + public void reset() { + index = 0; + } - /** Frees any resources this parser may be holding. */ - public void close() { } + /** Frees any resources this parser may be holding. */ + public void close() {} } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/ChildrenFromVectors.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/ChildrenFromVectors.java index 55717d9b..1a62dd7d 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/ChildrenFromVectors.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/ChildrenFromVectors.java @@ -1,66 +1,65 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.parse; /** - * Use this parser in conjunction with another parser that returns - * LinkedVectors, and this parser will return their - * LinkedChildren. - * - * @author Nick Rizzolo + * Use this parser in conjunction with another parser that returns LinkedVectors, and + * this parser will return their LinkedChildren. + * + * @author Nick Rizzolo **/ -public class ChildrenFromVectors implements Parser -{ - /** A parser that returns LinkedVectors. */ - protected Parser parser; - /** The next child to be returned. */ - protected LinkedChild next; +public class ChildrenFromVectors implements Parser { + /** A parser that returns LinkedVectors. */ + protected Parser parser; + /** The next child to be returned. */ + protected LinkedChild next; + + /** + * Creates the parser. + * + * @param p A parser that returns LinkedVectors. + **/ + public ChildrenFromVectors(Parser p) { + parser = p; + } - /** - * Creates the parser. - * - * @param p A parser that returns LinkedVectors. - **/ - public ChildrenFromVectors(Parser p) { parser = p; } + /** + * Returns the next LinkedChild parsed. + * + * @return The next LinkedChild parsed, or null if there are no more + * children in the stream. + **/ + public Object next() { + while (next == null) { + LinkedVector v = (LinkedVector) parser.next(); + if (v == null) + return null; + next = v.get(0); + } - /** - * Returns the next LinkedChild parsed. - * - * @return The next LinkedChild parsed, or null - * if there are no more children in the stream. - **/ - public Object next() { - while (next == null) { - LinkedVector v = (LinkedVector) parser.next(); - if (v == null) return null; - next = v.get(0); + LinkedChild result = next; + next = next.next; + return result; } - LinkedChild result = next; - next = next.next; - return result; - } - - /** Sets this parser back to the beginning of the raw data. */ - public void reset() { - parser.reset(); - next = null; - } + /** Sets this parser back to the beginning of the raw data. */ + public void reset() { + parser.reset(); + next = null; + } - /** Frees any resources this parser may be holding. */ - public void close() { parser.close(); } + /** Frees any resources this parser may be holding. */ + public void close() { + parser.close(); + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/FoldParser.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/FoldParser.java index 55c5c05e..9b663e19 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/FoldParser.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/FoldParser.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.parse; @@ -15,333 +12,330 @@ /** - * Useful when performing k-fold cross validation, this parser filters - * the examples coming from another parser. Conceptually, the examples from - * the original parser are first split into k "folds" (or partitions) - * depending on the selected splitting policy. A particular fold is then - * selected as the pivot, and this parser can be configured either to return - * all and only the examples from that fold, or all and only the examples - * from other folds. - * - *

The k folds are referred to by their indexes, which are 0, 1, - * ..., k - 1. This index is used to select the pivot fold. - * - * @see FoldParser.SplitPolicy - * @author Dan Muriello, Nick Rizzolo + * Useful when performing k-fold cross validation, this parser filters the examples coming + * from another parser. Conceptually, the examples from the original parser are first split into + * k "folds" (or partitions) depending on the selected splitting policy. A particular fold is + * then selected as the pivot, and this parser can be configured either to return all and only the + * examples from that fold, or all and only the examples from other folds. + * + *

+ * The k folds are referred to by their indexes, which are 0, 1, ..., k - 1. This + * index is used to select the pivot fold. + * + * @see FoldParser.SplitPolicy + * @author Dan Muriello, Nick Rizzolo **/ -public class FoldParser implements Parser -{ - /** The parser whose examples are being filtered. */ - protected Parser parser; - /** The total number of folds. */ - protected int K; - /** The way in which examples are partitioned into folds. */ - protected SplitPolicy splitPolicy; - /** - * The examples from this fold are exclusively selected for or excluded - * from the set of examples returned by this parser. - **/ - protected int pivot; - /** Whether examples will come from the pivot fold or not. */ - protected boolean fromPivot; - /** The total number of examples coming from {@link #parser}. */ - protected int examples; - - /** Keeps track of the index of the next example to be returned. */ - protected int exampleIndex; - /** Keeps track of the current fold; used only in manual splitting. */ - protected int fold; - /** - * A lower bound for an index relating to the pivot fold. The index - * variable in question may either be {@link #exampleIndex} or - * {@link #shuffleIndex}. - **/ - protected int lowerBound; - /** - * An upper bound for an index relating to the pivot fold. The index - * variable in question may either be {@link #exampleIndex} or - * {@link #shuffleIndex}. - **/ - protected int upperBound; - /** - * Used only by the random splitting policy to remember which example - * indexes are in which folds. - **/ - protected int[] shuffled; - /** An index pointing into {@link #shuffled}. */ - protected int shuffleIndex; - - - /** - * Constructor for when you don't know how many examples are in the data. - * Using a constructor that allows specification of the number of examples - * in the data only saves computation when the splitting policy is either - * sequential or random. - * - * @param parser The parser whose examples are being filtered. - * @param K The total number of folds; this value is ignored if the - * splitting policy is manual. - * @param split The way in which examples are partitioned into folds. - * @param pivot The index of the pivot fold. - * @param f Whether to extract examples from the pivot. - **/ - public FoldParser(Parser parser, int K, SplitPolicy split, int pivot, - boolean f) { - this(parser, K, split, pivot, f, -1); - } - - /** - * Constructor for when you know neither how many examples are in the data - * nor K, i.e., how many folds are in the data. This constructor - * can only be used when the splitting policy is manual. Using a - * constructor that allows specification of the number of examples in the - * data only saves computation when the splitting policy is either - * sequential or random. - * - * @param parser The parser whose examples are being filtered. - * @param split The way in which examples are partitioned into folds. - * @param pivot The index of the pivot fold. - * @param f Whether to extract examples from the pivot. - **/ - public FoldParser(Parser parser, SplitPolicy split, int pivot, boolean f) { - this(parser, -1, split, pivot, f, -1); - } - - /** - * Full constructor. - * - * @param parser The parser whose examples are being filtered. - * @param K The total number of folds; this value is ignored if the - * splitting policy is manual. - * @param split The way in which examples are partitioned into folds. - * @param pivot The index of the pivot fold. - * @param f Whether to extract examples from the pivot. - * @param e The total number of examples coming from - * parser, or -1 if unknown. - **/ - public FoldParser(Parser parser, int K, SplitPolicy split, int pivot, - boolean f, int e) { - this.K = K; - splitPolicy = split; - fromPivot = f; - examples = e; - - if (examples == -1 - && (splitPolicy == SplitPolicy.sequential - || splitPolicy == SplitPolicy.random)) { - ++examples; - for (Object example = parser.next(); example != null; - example = parser.next()) - if (example != FoldSeparator.separator) ++examples; - parser.reset(); +public class FoldParser implements Parser { + /** The parser whose examples are being filtered. */ + protected Parser parser; + /** The total number of folds. */ + protected int K; + /** The way in which examples are partitioned into folds. */ + protected SplitPolicy splitPolicy; + /** + * The examples from this fold are exclusively selected for or excluded from the set of examples + * returned by this parser. + **/ + protected int pivot; + /** Whether examples will come from the pivot fold or not. */ + protected boolean fromPivot; + /** The total number of examples coming from {@link #parser}. */ + protected int examples; + + /** Keeps track of the index of the next example to be returned. */ + protected int exampleIndex; + /** Keeps track of the current fold; used only in manual splitting. */ + protected int fold; + /** + * A lower bound for an index relating to the pivot fold. The index variable in question may + * either be {@link #exampleIndex} or {@link #shuffleIndex}. + **/ + protected int lowerBound; + /** + * An upper bound for an index relating to the pivot fold. The index variable in question may + * either be {@link #exampleIndex} or {@link #shuffleIndex}. + **/ + protected int upperBound; + /** + * Used only by the random splitting policy to remember which example indexes are in which + * folds. + **/ + protected int[] shuffled; + /** An index pointing into {@link #shuffled}. */ + protected int shuffleIndex; + + + /** + * Constructor for when you don't know how many examples are in the data. Using a constructor + * that allows specification of the number of examples in the data only saves computation when + * the splitting policy is either sequential or random. + * + * @param parser The parser whose examples are being filtered. + * @param K The total number of folds; this value is ignored if the splitting policy is manual. + * @param split The way in which examples are partitioned into folds. + * @param pivot The index of the pivot fold. + * @param f Whether to extract examples from the pivot. + **/ + public FoldParser(Parser parser, int K, SplitPolicy split, int pivot, boolean f) { + this(parser, K, split, pivot, f, -1); } - if (splitPolicy == SplitPolicy.random) { - shuffled = new int[examples]; - for (int i = 0; i < examples; ++i) shuffled[i] = i; - Random r = new Random(); - - for (int i = 0; i < examples; ++i) { - int j = i + r.nextInt(examples - i); - int t = shuffled[i]; - shuffled[i] = shuffled[j]; - shuffled[j] = t; - } - - for (int i = 0; i < K; ++i) { - setPivot(i); - Arrays.sort(shuffled, lowerBound, upperBound); - } + /** + * Constructor for when you know neither how many examples are in the data nor K, i.e., + * how many folds are in the data. This constructor can only be used when the splitting policy + * is manual. Using a constructor that allows specification of the number of examples in the + * data only saves computation when the splitting policy is either sequential or random. + * + * @param parser The parser whose examples are being filtered. + * @param split The way in which examples are partitioned into folds. + * @param pivot The index of the pivot fold. + * @param f Whether to extract examples from the pivot. + **/ + public FoldParser(Parser parser, SplitPolicy split, int pivot, boolean f) { + this(parser, -1, split, pivot, f, -1); } - if (splitPolicy == SplitPolicy.manual) { - this.K = 1; - for (Object example = parser.next(); example != null; - example = parser.next()) - if (example == FoldSeparator.separator) ++this.K; - parser.reset(); + /** + * Full constructor. + * + * @param parser The parser whose examples are being filtered. + * @param K The total number of folds; this value is ignored if the splitting policy is manual. + * @param split The way in which examples are partitioned into folds. + * @param pivot The index of the pivot fold. + * @param f Whether to extract examples from the pivot. + * @param e The total number of examples coming from parser, or -1 if unknown. + **/ + public FoldParser(Parser parser, int K, SplitPolicy split, int pivot, boolean f, int e) { + this.K = K; + splitPolicy = split; + fromPivot = f; + examples = e; + + if (examples == -1 + && (splitPolicy == SplitPolicy.sequential || splitPolicy == SplitPolicy.random)) { + ++examples; + for (Object example = parser.next(); example != null; example = parser.next()) + if (example != FoldSeparator.separator) + ++examples; + parser.reset(); + } + + if (splitPolicy == SplitPolicy.random) { + shuffled = new int[examples]; + for (int i = 0; i < examples; ++i) + shuffled[i] = i; + Random r = new Random(); + + for (int i = 0; i < examples; ++i) { + int j = i + r.nextInt(examples - i); + int t = shuffled[i]; + shuffled[i] = shuffled[j]; + shuffled[j] = t; + } + + for (int i = 0; i < K; ++i) { + setPivot(i); + Arrays.sort(shuffled, lowerBound, upperBound); + } + } + + if (splitPolicy == SplitPolicy.manual) { + this.K = 1; + for (Object example = parser.next(); example != null; example = parser.next()) + if (example == FoldSeparator.separator) + ++this.K; + parser.reset(); + } + + setPivot(pivot); + this.parser = parser; } - setPivot(pivot); - this.parser = parser; - } - - - /** - * Retrieves the value of {@link #K}, which may have been computed in the - * constructor if the splitting policy is manual. - **/ - public int getK() { return K; } - - - /** - * Sets the value of {@link #fromPivot}, which controls whether examples - * will be taken from the pivot fold or from all other folds. - * - * @param f The new value for {@link #fromPivot}. - **/ - public void setFromPivot(boolean f) { fromPivot = f; } - - - /** - * Sets the pivot fold, which also causes {@link #parser} to be reset. - * - * @param p The index of the new pivot fold. - **/ - public void setPivot(int p) { - pivot = p; - if (p < K) reset(); - } - - - /** Returns the value of {@link #pivot}. */ - public int getPivot() { return pivot; } - /** Returns the value of {@link #parser}. */ - public Parser getParser() { return parser; } - - - /** - * Sets this parser back to the beginning of the raw data. This means - * arranging for all relevant state variables to be reset appropriately as - * well, since the value of {@link #pivot} may have changed. - * - * @see #setPivot(int) - **/ - public void reset() { - if (parser != null) parser.reset(); - - if (splitPolicy == SplitPolicy.sequential - || splitPolicy == SplitPolicy.random) { - lowerBound = pivot * (examples / K) + Math.min(pivot, examples % K); - upperBound = - (pivot + 1) * (examples / K) + Math.min(pivot + 1, examples % K); + + /** + * Retrieves the value of {@link #K}, which may have been computed in the constructor if the + * splitting policy is manual. + **/ + public int getK() { + return K; } - if (splitPolicy == SplitPolicy.random) shuffleIndex = lowerBound; - if (splitPolicy == SplitPolicy.manual) fold = 0; - exampleIndex = 0; - } - - - /** - * Convenient for determining if the next example should be returned or - * not. - * - * @param example The next example object. - * @return true iff the next example should be returned. - **/ - protected boolean filter(Object example) { - if (example == FoldSeparator.separator) return false; - if (splitPolicy == SplitPolicy.sequential) - return fromPivot - == (exampleIndex >= lowerBound && exampleIndex < upperBound); - if (splitPolicy == SplitPolicy.random) - return fromPivot - == (shuffleIndex < upperBound - && shuffled[shuffleIndex] == exampleIndex); - if (splitPolicy == SplitPolicy.kth) - return fromPivot == (exampleIndex % K == pivot); - // splitPolicy == SplitPolicy.manual - return fromPivot == (fold == pivot); - } - - - /** - * Changes state to reflect retrieval of the next example from the parser. - * - * @param example The previous example object. - **/ - protected void increment(Object example) { - if (example == FoldSeparator.separator) { - if (splitPolicy == SplitPolicy.manual) ++fold; + + /** + * Sets the value of {@link #fromPivot}, which controls whether examples will be taken from the + * pivot fold or from all other folds. + * + * @param f The new value for {@link #fromPivot}. + **/ + public void setFromPivot(boolean f) { + fromPivot = f; } - else { - if (splitPolicy == SplitPolicy.random) { - if (shuffleIndex < upperBound - && shuffled[shuffleIndex] == exampleIndex) - ++shuffleIndex; - } - - ++exampleIndex; + + + /** + * Sets the pivot fold, which also causes {@link #parser} to be reset. + * + * @param p The index of the new pivot fold. + **/ + public void setPivot(int p) { + pivot = p; + if (p < K) + reset(); } - } - - - /** Retrieves the next example object. */ - public Object next() { - Object result = parser.next(); - for (; result != null && !filter(result); result = parser.next()) - increment(result); - if (result != null) increment(result); - return result; - } - - - /** Frees any resources this parser may be holding. */ - public void close() { parser.close(); } - - - /** - * Immutable type representing the way in which examples are partitioned - * into folds. - * - * TODO: When LBJava's self imposed restriction to use Java 1.4 is lifted, this class will be replaced by an enum. - * - *

The four implemented splitting strategies are described below. Note - * that in all cases except "Manual", the size of the folds are as equal as - * possible, with any extra examples allocated to earlier folds. - * - *

- *
- *
Sequential
- *
The examples are simply partitioned into sequential folds.
- *
kth
- *
Every kth example is in the same fold.
- *
Random
- *
Examples are randomly assigned to folds.
- *
Manual
- *
- * Same as sequential, except fold boundaries are indicated by an - * appearance of the {@link FoldSeparator} in place of an example - * object. - *
- *
- *
- * - * @author Nick Rizzolo - **/ - public static class SplitPolicy - { - /** Represents the random split policy. */ - public static final SplitPolicy random = new SplitPolicy(0); - /** Represents the sequential split policy. */ - public static final SplitPolicy sequential = new SplitPolicy(1); + + + /** Returns the value of {@link #pivot}. */ + public int getPivot() { + return pivot; + } + + /** Returns the value of {@link #parser}. */ + public Parser getParser() { + return parser; + } + + /** - * Represents the split policy in which every kth example is - * part of the same fold. - **/ - public static final SplitPolicy kth = new SplitPolicy(2); + * Sets this parser back to the beginning of the raw data. This means arranging for all relevant + * state variables to be reset appropriately as well, since the value of {@link #pivot} may have + * changed. + * + * @see #setPivot(int) + **/ + public void reset() { + if (parser != null) + parser.reset(); + + if (splitPolicy == SplitPolicy.sequential || splitPolicy == SplitPolicy.random) { + lowerBound = pivot * (examples / K) + Math.min(pivot, examples % K); + upperBound = (pivot + 1) * (examples / K) + Math.min(pivot + 1, examples % K); + } + + if (splitPolicy == SplitPolicy.random) + shuffleIndex = lowerBound; + if (splitPolicy == SplitPolicy.manual) + fold = 0; + exampleIndex = 0; + } + + /** - * Represents the split policy in which the user manually inserts fold - * separation objects. - **/ - public static final SplitPolicy manual = new SplitPolicy(3); + * Convenient for determining if the next example should be returned or not. + * + * @param example The next example object. + * @return true iff the next example should be returned. + **/ + protected boolean filter(Object example) { + if (example == FoldSeparator.separator) + return false; + if (splitPolicy == SplitPolicy.sequential) + return fromPivot == (exampleIndex >= lowerBound && exampleIndex < upperBound); + if (splitPolicy == SplitPolicy.random) + return fromPivot == (shuffleIndex < upperBound && shuffled[shuffleIndex] == exampleIndex); + if (splitPolicy == SplitPolicy.kth) + return fromPivot == (exampleIndex % K == pivot); + // splitPolicy == SplitPolicy.manual + return fromPivot == (fold == pivot); + } - /** The names of the different split strategies as strings. */ - private static final String[] names = - { "random", "sequential", "kth", "manual" }; + /** + * Changes state to reflect retrieval of the next example from the parser. + * + * @param example The previous example object. + **/ + protected void increment(Object example) { + if (example == FoldSeparator.separator) { + if (splitPolicy == SplitPolicy.manual) + ++fold; + } else { + if (splitPolicy == SplitPolicy.random) { + if (shuffleIndex < upperBound && shuffled[shuffleIndex] == exampleIndex) + ++shuffleIndex; + } + + ++exampleIndex; + } + } - /** Can be used to index the {@link #names} array. */ - private int index; + /** Retrieves the next example object. */ + public Object next() { + Object result = parser.next(); + for (; result != null && !filter(result); result = parser.next()) + increment(result); + if (result != null) + increment(result); + return result; + } - /** Initializes the object with an index. */ - private SplitPolicy(int i) { index = i; } + /** Frees any resources this parser may be holding. */ + public void close() { + parser.close(); + } - /** Retrieves the name of the policy represented by this object. */ - public String toString() { return names[index]; } - } -} + /** + * Immutable type representing the way in which examples are partitioned into folds. + * + * TODO: When LBJava's self imposed restriction to use Java 1.4 is lifted, this class will be + * replaced by an enum. + * + *

+ * The four implemented splitting strategies are described below. Note that in all cases except + * "Manual", the size of the folds are as equal as possible, with any extra examples allocated + * to earlier folds. + * + *

+ *
+ *
Sequential
+ *
The examples are simply partitioned into sequential folds.
+ *
kth
+ *
Every kth example is in the same fold.
+ *
Random
+ *
Examples are randomly assigned to folds.
+ *
Manual
+ *
+ * Same as sequential, except fold boundaries are indicated by an appearance of the + * {@link FoldSeparator} in place of an example object.
+ *
+ *
+ * + * @author Nick Rizzolo + **/ + public static class SplitPolicy { + /** Represents the random split policy. */ + public static final SplitPolicy random = new SplitPolicy(0); + /** Represents the sequential split policy. */ + public static final SplitPolicy sequential = new SplitPolicy(1); + /** + * Represents the split policy in which every kth example is part of the same + * fold. + **/ + public static final SplitPolicy kth = new SplitPolicy(2); + /** + * Represents the split policy in which the user manually inserts fold separation objects. + **/ + public static final SplitPolicy manual = new SplitPolicy(3); + + /** The names of the different split strategies as strings. */ + private static final String[] names = {"random", "sequential", "kth", "manual"}; + + + /** Can be used to index the {@link #names} array. */ + private int index; + + + /** Initializes the object with an index. */ + private SplitPolicy(int i) { + index = i; + } + + + /** Retrieves the name of the policy represented by this object. */ + public String toString() { + return names[index]; + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/FoldSeparator.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/FoldSeparator.java index 547df662..a6ac7ff9 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/FoldSeparator.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/FoldSeparator.java @@ -1,32 +1,26 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.parse; /** - * This is a dummy class which is only used to signify the separation between - * folds for use in the cross validation method. This class is ignored by - * the LBJava compiler unless the manual split policy is being - * used. In that case, whenever the object referenced by {@link #separator} - * is encountered, it is interpreted as a division between two folds. - * - * @author Dan Muriello + * This is a dummy class which is only used to signify the separation between folds for use in the + * cross validation method. This class is ignored by the LBJava compiler unless the + * manual split policy is being used. In that case, whenever the object referenced by + * {@link #separator} is encountered, it is interpreted as a division between two folds. + * + * @author Dan Muriello **/ -public class FoldSeparator -{ - /** The only instance of this class is stored here. */ - public static final FoldSeparator separator = new FoldSeparator(); +public class FoldSeparator { + /** The only instance of this class is stored here. */ + public static final FoldSeparator separator = new FoldSeparator(); - /** Blank Constructor, takes nothing, does nothing. */ - private FoldSeparator(){ } + /** Blank Constructor, takes nothing, does nothing. */ + private FoldSeparator() {} } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/LineByLine.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/LineByLine.java index 345330b3..d803f237 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/LineByLine.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/LineByLine.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.parse; @@ -15,91 +12,93 @@ /** - * This abstract Parser does not define the next() - * method, but it does define a constructor that opens the specified file and - * a readLine() method that fetches the next line of text from - * that file, taking care of exception handling. - * - * @author Nick Rizzolo + * This abstract Parser does not define the next() method, but it does + * define a constructor that opens the specified file and a readLine() method that + * fetches the next line of text from that file, taking care of exception handling. + * + * @author Nick Rizzolo **/ -public abstract class LineByLine implements Parser -{ - /** Reader for file currently being parsed. */ - protected BufferedReader in; - /** The name of the file to parse. */ - protected String fileName; - - - /** Leaves the member variables uninitialized. */ - protected LineByLine() { } - - /** - * Creates the parser. - * - * @param file The name of the file to parse. - **/ - public LineByLine(String file) { - fileName = file; - try { in = new BufferedReader(new FileReader(fileName)); } - catch (Exception e) { - System.err.println("Can't open '" + fileName + "' for input:"); - e.printStackTrace(); - System.exit(1); - } - } - - - /** - * Reads a line from the current buffer and returns it. When there are no - * more lines in the input file, the stream is closed, and - * null will be returned by this method thereafter. Returned - * strings do not contain line termination characters. - * - * @return The next line of text from the input file, or null - * if no more lines remain. - **/ - protected String readLine() { - if (in == null) return null; - - String line = null; - try { line = in.readLine(); } - catch (Exception e) { - System.err.println("Can't read from '" + fileName + "':"); - e.printStackTrace(); - System.exit(1); +public abstract class LineByLine implements Parser { + /** Reader for file currently being parsed. */ + protected BufferedReader in; + /** The name of the file to parse. */ + protected String fileName; + + + /** Leaves the member variables uninitialized. */ + protected LineByLine() {} + + /** + * Creates the parser. + * + * @param file The name of the file to parse. + **/ + public LineByLine(String file) { + fileName = file; + try { + in = new BufferedReader(new FileReader(fileName)); + } catch (Exception e) { + System.err.println("Can't open '" + fileName + "' for input:"); + e.printStackTrace(); + System.exit(1); + } } - if (line == null) { - close(); - in = null; - } - return line; - } + /** + * Reads a line from the current buffer and returns it. When there are no more lines in the + * input file, the stream is closed, and null will be returned by this method + * thereafter. Returned strings do not contain line termination characters. + * + * @return The next line of text from the input file, or null if no more lines + * remain. + **/ + protected String readLine() { + if (in == null) + return null; + + String line = null; + try { + line = in.readLine(); + } catch (Exception e) { + System.err.println("Can't read from '" + fileName + "':"); + e.printStackTrace(); + System.exit(1); + } + + if (line == null) { + close(); + in = null; + } + + return line; + } - /** Sets this parser back to the beginning of the raw data. */ - public void reset() { - close(); + /** Sets this parser back to the beginning of the raw data. */ + public void reset() { + close(); - try { in = new BufferedReader(new FileReader(fileName)); } - catch (Exception e) { - System.err.println("Can't open '" + fileName + "' for input:"); - e.printStackTrace(); - System.exit(1); + try { + in = new BufferedReader(new FileReader(fileName)); + } catch (Exception e) { + System.err.println("Can't open '" + fileName + "' for input:"); + e.printStackTrace(); + System.exit(1); + } } - } - /** Frees any resources this parser may be holding. */ - public void close() { - if (in == null) return; - try { in.close(); } - catch (Exception e) { - System.err.println("Can't close '" + fileName + "':"); - e.printStackTrace(); - System.exit(1); + /** Frees any resources this parser may be holding. */ + public void close() { + if (in == null) + return; + try { + in.close(); + } catch (Exception e) { + System.err.println("Can't close '" + fileName + "':"); + e.printStackTrace(); + System.exit(1); + } } - } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/LinkedChild.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/LinkedChild.java index 4bc8f629..fea87be2 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/LinkedChild.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/LinkedChild.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.parse; @@ -14,73 +11,73 @@ /** - * A LinkedChild is the child of a LinkedVector. - * LinkedVectors use the methods of this interface to maintain - * links between parent and child. - * - * @see LinkedVector - * @author Nick Rizzolo + * A LinkedChild is the child of a LinkedVector. LinkedVector + * s use the methods of this interface to maintain links between parent and child. + * + * @see LinkedVector + * @author Nick Rizzolo **/ -public abstract class LinkedChild implements Cloneable, Serializable -{ - /** A link to this child's parent. */ - public LinkedVector parent; - /** A link to the previous child in the parent vector. */ - public LinkedChild previous; - /** A link to the next child in the parent vector. */ - public LinkedChild next; - /** The offset into the raw data input file at which this child starts. */ - public int start; - /** The offset into the raw data input file at which this child ends. */ - public int end; - /** Space for a label for this linked child. */ - public String label; +public abstract class LinkedChild implements Cloneable, Serializable { + /** A link to this child's parent. */ + public LinkedVector parent; + /** A link to the previous child in the parent vector. */ + public LinkedChild previous; + /** A link to the next child in the parent vector. */ + public LinkedChild next; + /** The offset into the raw data input file at which this child starts. */ + public int start; + /** The offset into the raw data input file at which this child ends. */ + public int end; + /** Space for a label for this linked child. */ + public String label; - /** Does nothing. */ - protected LinkedChild() { } + /** Does nothing. */ + protected LinkedChild() {} - /** - * Useful when the information that this child represents is parsed - * forwards. - * - * @param p The previous child in the parent vector. - **/ - protected LinkedChild(LinkedChild p) { this(p, -1, -1); } + /** + * Useful when the information that this child represents is parsed forwards. + * + * @param p The previous child in the parent vector. + **/ + protected LinkedChild(LinkedChild p) { + this(p, -1, -1); + } - /** - * Constructor that sets the byte offsets of this child. - * - * @param s The offset at which this child starts. - * @param e The offset at which this child ends. - **/ - public LinkedChild(int s, int e) { this(null, s, e); } + /** + * Constructor that sets the byte offsets of this child. + * + * @param s The offset at which this child starts. + * @param e The offset at which this child ends. + **/ + public LinkedChild(int s, int e) { + this(null, s, e); + } - /** - * Useful when the information that this child represents is parsed - * forwards. - * - * @param p The previous child in the parent vector. - * @param s The offset at which this child starts. - * @param e The offset at which this child ends. - **/ - public LinkedChild(LinkedChild p, int s, int e) { - previous = p; - start = s; - end = e; - } + /** + * Useful when the information that this child represents is parsed forwards. + * + * @param p The previous child in the parent vector. + * @param s The offset at which this child starts. + * @param e The offset at which this child ends. + **/ + public LinkedChild(LinkedChild p, int s, int e) { + previous = p; + start = s; + end = e; + } - /** Returns a shallow clone of this object. */ - public Object clone() { - LinkedChild clone = null; - try { clone = (LinkedChild) super.clone(); } - catch (Exception e) { - System.err.println("Problem with LinkedChild clone: " + e); - System.exit(1); - } + /** Returns a shallow clone of this object. */ + public Object clone() { + LinkedChild clone = null; + try { + clone = (LinkedChild) super.clone(); + } catch (Exception e) { + System.err.println("Problem with LinkedChild clone: " + e); + System.exit(1); + } - return clone; - } + return clone; + } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/LinkedVector.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/LinkedVector.java index dceb007f..4682a2d0 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/LinkedVector.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/LinkedVector.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.parse; @@ -14,205 +11,214 @@ /** - * A LinkedVector is used to store a vector of - * LinkedChildren which all maintain links between each other - * and the parent LinkedVector. - * - * @see LinkedChild - * @author Nick Rizzolo + * A LinkedVector is used to store a vector of LinkedChildren which all + * maintain links between each other and the parent LinkedVector. + * + * @see LinkedChild + * @author Nick Rizzolo **/ -public class LinkedVector extends LinkedChild -{ - /** The linked vector is simply represented as a vector of children. */ - protected Vector children; - - - /** Initializes the vector. */ - public LinkedVector() { children = new Vector(); } - - /** - * Constructor for when only a single child from anywhere in this vector is - * available. It is assumed that the previous and - * next links are filled in by every child. - * - * @param c Any child in this vector. - **/ - public LinkedVector(LinkedChild c) { - children = new Vector(); - - while (c.previous != null) c = c.previous; - start = c.start; - - for (; c.next != null; c = c.next) { - c.parent = this; - children.add(c); +public class LinkedVector extends LinkedChild { + /** The linked vector is simply represented as a vector of children. */ + protected Vector children; + + + /** Initializes the vector. */ + public LinkedVector() { + children = new Vector(); } - c.parent = this; - children.add(c); - end = c.end; - } - - /** - * Useful when the information that this child represents is parsed - * forwards. - * - * @param p The previous child in the parent vector. - **/ - public LinkedVector(LinkedVector p) { - super(p); - children = new Vector(); - } - - /** - * Constructor that sets the character offsets of this vector. - * - * @param s The offset at which this sentence starts. - * @param e The offset at which this sentence ends. - **/ - public LinkedVector(int s, int e) { - super(s, e); - children = new Vector(); - } - - /** - * Constructor for when only a single child from anywhere in this vector is - * available. It is assumed that the previous and - * next links are filled in by every child. - * - * @param c Any child in this vector. - * @param s The offset at which this sentence starts. - * @param e The offset at which this sentence ends. - **/ - public LinkedVector(LinkedChild c, int s, int e) { - super(s, e); - children = new Vector(); - while (c.previous != null) c = c.previous; - for (; c != null; c = c.next) add(c); - } - - /** - * Useful when the information that this child represents is parsed - * forwards. - * - * @param p The previous child in the parent vector. - * @param s The offset at which this sentence starts. - * @param e The offset at which this sentence ends. - **/ - public LinkedVector(LinkedVector p, int s, int e) { - super(p, s, e); - children = new Vector(); - } - - - /** - * Adds the specified child to the end of the vector, informing the child - * of its parent and index and linking the child to its only neighbor - * (which was previously the last child in the vector). - * - * @param c The child to add. - **/ - public boolean add(LinkedChild c) { - c.parent = this; - if (children.size() > 0) { - LinkedChild p = get(children.size() - 1); - p.next = c; - c.previous = p; + /** + * Constructor for when only a single child from anywhere in this vector is available. It is + * assumed that the previous and next links are filled in by every + * child. + * + * @param c Any child in this vector. + **/ + public LinkedVector(LinkedChild c) { + children = new Vector(); + + while (c.previous != null) + c = c.previous; + start = c.start; + + for (; c.next != null; c = c.next) { + c.parent = this; + children.add(c); + } + + c.parent = this; + children.add(c); + end = c.end; + } + + /** + * Useful when the information that this child represents is parsed forwards. + * + * @param p The previous child in the parent vector. + **/ + public LinkedVector(LinkedVector p) { + super(p); + children = new Vector(); + } + + /** + * Constructor that sets the character offsets of this vector. + * + * @param s The offset at which this sentence starts. + * @param e The offset at which this sentence ends. + **/ + public LinkedVector(int s, int e) { + super(s, e); + children = new Vector(); + } + + /** + * Constructor for when only a single child from anywhere in this vector is available. It is + * assumed that the previous and next links are filled in by every + * child. + * + * @param c Any child in this vector. + * @param s The offset at which this sentence starts. + * @param e The offset at which this sentence ends. + **/ + public LinkedVector(LinkedChild c, int s, int e) { + super(s, e); + children = new Vector(); + while (c.previous != null) + c = c.previous; + for (; c != null; c = c.next) + add(c); + } + + /** + * Useful when the information that this child represents is parsed forwards. + * + * @param p The previous child in the parent vector. + * @param s The offset at which this sentence starts. + * @param e The offset at which this sentence ends. + **/ + public LinkedVector(LinkedVector p, int s, int e) { + super(p, s, e); + children = new Vector(); + } + + + /** + * Adds the specified child to the end of the vector, informing the child of its parent and + * index and linking the child to its only neighbor (which was previously the last child in the + * vector). + * + * @param c The child to add. + **/ + public boolean add(LinkedChild c) { + c.parent = this; + if (children.size() > 0) { + LinkedChild p = get(children.size() - 1); + p.next = c; + c.previous = p; + } + + return children.add(c); } - return children.add(c); - } - - - /** - * Removes the child at the specified index. - * - * @param i The index of the child to remove. - * @return The child removed, or null if there was no child at - * that index. - **/ - public LinkedChild remove(int i) { - LinkedChild before = - (i - 1 < 0) ? null : (LinkedChild) children.get(i - 1); - LinkedChild after = - (i + 1 >= children.size()) ? null : (LinkedChild) children.get(i + 1); - - if (before != null) before.next = after; - if (after != null) after.previous = before; - - LinkedChild removed = null; - try { removed = (LinkedChild) children.remove(i); } - catch (ArrayIndexOutOfBoundsException e) { return null; } - - removed.parent = null; - removed.next = removed.previous = null; - return removed; - } - - - /** - * Inserts the specified child into the specified index. All children that - * previously had index greater than or equal to the specified index are - * shifted up one. - * - * @param c The child to insert. - * @param i The index at which to insert the child. - * @return true if and only if the insert was successful. - **/ - public boolean insert(LinkedChild c, int i) { - try { children.insertElementAt(c, i); } - catch (ArrayIndexOutOfBoundsException e) { return false; } - - c.parent = this; - c.previous = (i - 1 < 0) ? null : (LinkedChild) children.get(i - 1); - c.next = - (i + 1 >= children.size()) ? null : (LinkedChild) children.get(i + 1); - - if (c.previous != null) c.previous.next = c; - if (c.next != null) c.next.previous = c; - - return true; - } - - - /** - * Retrieves the child at the specified index in the vector. - * - * @param i The index from which to retrieve a child. - * @return The child at the specified index, or null if there - * was no child at that index. - **/ - public LinkedChild get(int i) { - try { return (LinkedChild) children.get(i); } - catch (ArrayIndexOutOfBoundsException e) { } - return null; - } - - - /** - * Returns the size of the vector. - * - * @return The size of the vector. - **/ - public int size() { return children.size(); } - - - /** - * Returns a clone of this object that is deep in the sense that all of the - * children objects are cloned. - * - * @return A deep clone of this object. - **/ - public Object clone() { - LinkedVector clone = (LinkedVector) super.clone(); - clone.children = (Vector) clone.children.clone(); - // This may look inefficient, but there is a purpose. Subclasses of - // LinkedVector that don't define any new non-primitive member fields will - // not need to override this method, as it already produces an object of - // the subclass's type. - for (int i = 0; i < size(); ++i) - clone.insert((LinkedChild) clone.remove(i).clone(), i); - return clone; - } -} + /** + * Removes the child at the specified index. + * + * @param i The index of the child to remove. + * @return The child removed, or null if there was no child at that index. + **/ + public LinkedChild remove(int i) { + LinkedChild before = (i - 1 < 0) ? null : (LinkedChild) children.get(i - 1); + LinkedChild after = (i + 1 >= children.size()) ? null : (LinkedChild) children.get(i + 1); + + if (before != null) + before.next = after; + if (after != null) + after.previous = before; + + LinkedChild removed = null; + try { + removed = (LinkedChild) children.remove(i); + } catch (ArrayIndexOutOfBoundsException e) { + return null; + } + + removed.parent = null; + removed.next = removed.previous = null; + return removed; + } + + + /** + * Inserts the specified child into the specified index. All children that previously had index + * greater than or equal to the specified index are shifted up one. + * + * @param c The child to insert. + * @param i The index at which to insert the child. + * @return true if and only if the insert was successful. + **/ + public boolean insert(LinkedChild c, int i) { + try { + children.insertElementAt(c, i); + } catch (ArrayIndexOutOfBoundsException e) { + return false; + } + + c.parent = this; + c.previous = (i - 1 < 0) ? null : (LinkedChild) children.get(i - 1); + c.next = (i + 1 >= children.size()) ? null : (LinkedChild) children.get(i + 1); + + if (c.previous != null) + c.previous.next = c; + if (c.next != null) + c.next.previous = c; + + return true; + } + + + /** + * Retrieves the child at the specified index in the vector. + * + * @param i The index from which to retrieve a child. + * @return The child at the specified index, or null if there was no child at that + * index. + **/ + public LinkedChild get(int i) { + try { + return (LinkedChild) children.get(i); + } catch (ArrayIndexOutOfBoundsException e) { + } + return null; + } + + + /** + * Returns the size of the vector. + * + * @return The size of the vector. + **/ + public int size() { + return children.size(); + } + + + /** + * Returns a clone of this object that is deep in the sense that all of the children objects are + * cloned. + * + * @return A deep clone of this object. + **/ + public Object clone() { + LinkedVector clone = (LinkedVector) super.clone(); + clone.children = (Vector) clone.children.clone(); + // This may look inefficient, but there is a purpose. Subclasses of + // LinkedVector that don't define any new non-primitive member fields will + // not need to override this method, as it already produces an object of + // the subclass's type. + for (int i = 0; i < size(); ++i) + clone.insert((LinkedChild) clone.remove(i).clone(), i); + return clone; + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/Parser.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/Parser.java index a158e8f3..4497108e 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/Parser.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/Parser.java @@ -1,39 +1,32 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.parse; /** - * Any parser that extends this interface can be sent to a - * Learner for batch training. - * - * @see edu.illinois.cs.cogcomp.lbjava.learn.Learner - * @author Nick Rizzolo + * Any parser that extends this interface can be sent to a Learner for batch training. + * + * @see edu.illinois.cs.cogcomp.lbjava.learn.Learner + * @author Nick Rizzolo **/ -public interface Parser -{ - /** - * Use this method to retrieve the next object parsed from the raw input - * data. - * - * @return The next object parsed from the input data. - **/ - Object next(); +public interface Parser { + /** + * Use this method to retrieve the next object parsed from the raw input data. + * + * @return The next object parsed from the input data. + **/ + Object next(); - /** Sets this parser back to the beginning of the raw data. */ - void reset(); + /** Sets this parser back to the beginning of the raw data. */ + void reset(); - /** Frees any resources this parser may be holding. */ - void close(); + /** Frees any resources this parser may be holding. */ + void close(); } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/ByteString.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/ByteString.java index b26daaba..e592239d 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/ByteString.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/ByteString.java @@ -1,437 +1,453 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.util; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; + import java.util.Arrays; /** - * Represents a String by directly storing an encoding of that - * String in an array of bytes. This can save a - * lot of memory if all of the application's characters fit in a single byte - * when encoded by, for instance, UTF-8. In fact, the default encoding used - * by this class is UTF-8. - * - * @author Nick Rizzolo + * Represents a String by directly storing an encoding of that String in + * an array of bytes. This can save a lot of memory if all of the application's + * characters fit in a single byte when encoded by, for instance, UTF-8. In fact, the default + * encoding used by this class is UTF-8. + * + * @author Nick Rizzolo **/ -public class ByteString implements Cloneable, Comparable -{ - /** The default character encoding for instances of this class. */ - public static final String defaultEncoding = "UTF-8"; - /** A byte string representing "". */ - public static final ByteString emptyString = - new ByteString("", defaultEncoding); - - - /** - * Handles exceptions generated by unsupported encodings. - * - * @param e The exception. - **/ - protected void handleEncodingException(Exception e) { - System.err.println( - "ERROR: Encoding \"" + encoding + "\" is not supported."); - e.printStackTrace(); - System.exit(1); - } - - - /** - * Reads and returns a byte string from an input stream. - * - * @param in The input stream. - * @return The byte string. - **/ - public static ByteString readByteString(ExceptionlessInputStream in) { - ByteString result = new ByteString(false); - result.read(in); - return result; - } - - - /** - * Reads and returns a byte string as written by a lexicon. - * - * @param in The input stream. - * @param i The assumed identifier. If no identifier is given in the - * input stream, the instantiated feature is given this - * identifier. - * @return The byte string. - **/ - public static ByteString lexReadByteString(ExceptionlessInputStream in, - ByteString i) { - ByteString result = new ByteString(false); - result.lexRead(in, i); - return result; - } - - - /** The encoding method used by this instance. */ - protected String encoding; - /** The encoded characters. */ - protected byte[] value; - /** - * The hash code of the String decoding of this byte string. - **/ - protected int hashCode; - - - /** - * For internal use only. - * - * @param b Dummy variable to make a new signature. - **/ - protected ByteString(boolean b) { } - - /** Creates an empty byte string. */ - public ByteString() { this(""); } - - /** - * Creates a byte string by using the default encoding to encode the - * specified string. - * - * @param s The string to encode. - **/ - public ByteString(String s) { this(s, null); } - - /** - * Creates a byte string by using the specified encoding to encode the - * specified string. - * - * @param s The string to encode. - * @param e The encoding method. - **/ - public ByteString(String s, String e) { - encoding = e == null ? defaultEncoding : e.intern(); - setValue(s); - } - - /** - * Creates a byte string with the given encoding, which may involve - * converting the specified byte string's contents if the encodings differ. - * - * @param b The original byte string. - * @param e The new encoding. - **/ - public ByteString(ByteString b, String e) { - encoding = e.intern(); - if (b.encoding == encoding) { - value = b.value; - hashCode = b.hashCode; +public class ByteString implements Cloneable, Comparable { + /** The default character encoding for instances of this class. */ + public static final String defaultEncoding = "UTF-8"; + /** A byte string representing "". */ + public static final ByteString emptyString = new ByteString("", defaultEncoding); + + + /** + * Handles exceptions generated by unsupported encodings. + * + * @param e The exception. + **/ + protected void handleEncodingException(Exception e) { + System.err.println("ERROR: Encoding \"" + encoding + "\" is not supported."); + e.printStackTrace(); + System.exit(1); + } + + + /** + * Reads and returns a byte string from an input stream. + * + * @param in The input stream. + * @return The byte string. + **/ + public static ByteString readByteString(ExceptionlessInputStream in) { + ByteString result = new ByteString(false); + result.read(in); + return result; + } + + + /** + * Reads and returns a byte string as written by a lexicon. + * + * @param in The input stream. + * @param i The assumed identifier. If no identifier is given in the input stream, the + * instantiated feature is given this identifier. + * @return The byte string. + **/ + public static ByteString lexReadByteString(ExceptionlessInputStream in, ByteString i) { + ByteString result = new ByteString(false); + result.lexRead(in, i); + return result; + } + + + /** The encoding method used by this instance. */ + protected String encoding; + /** The encoded characters. */ + protected byte[] value; + /** + * The hash code of the String decoding of this byte string. + **/ + protected int hashCode; + + + /** + * For internal use only. + * + * @param b Dummy variable to make a new signature. + **/ + protected ByteString(boolean b) {} + + /** Creates an empty byte string. */ + public ByteString() { + this(""); + } + + /** + * Creates a byte string by using the default encoding to encode the specified string. + * + * @param s The string to encode. + **/ + public ByteString(String s) { + this(s, null); } - else setValue(b.toString()); - } - - - /** Returns the name of the encoding method of this byte string. */ - public String getEncoding() { return encoding; } - - - /** - * Sets the value of this byte string to the byte encoding of the specified - * string. - * - * @param s The string to encode. - **/ - public void setValue(String s) { - try { value = s.getBytes(encoding); } - catch (Exception e) { handleEncodingException(e); } - hashCode = s.hashCode(); - } - - - /** Returns the length of {@link #value}. */ - public int length() { return value.length; } - - - /** - * Returns the byte at index i of {@link #value}. - * - * @param i The index of the requested byte. - * @return The value of the requested byte. - **/ - public byte getByte(int i) { return value[i]; } - - - /** - * Appends the encoding of the given string onto the existing encoding in - * this object. This operation changes the {@link #value} reference in - * this object. - * - *

Warning: Depending on the character encoding in use, this may - * introduce byte order markers into the middle of this object's byte - * array, which usually is not desired. - * - * @param s The string whose encoding will be appended. - * @return This object. - **/ - public ByteString append(String s) { - hashCode = (toString() + s).hashCode(); - byte[] v = null; - try { v = s.getBytes(encoding); } - catch (Exception e) { handleEncodingException(e); } - - byte[] t = new byte[value.length + v.length]; - System.arraycopy(value, 0, t, 0, value.length); - System.arraycopy(v, 0, t, value.length, v.length); - value = t; - return this; - } - - - /** - * Appends the encodings of all the given strings onto the existing - * encoding in this object. This operation changes the {@link #value} - * reference in this object. - * - *

Warning: Depending on the character encoding in use, this may - * introduce byte order markers into the middle of this object's byte - * array, which usually is not desired. - * - * @param s The strings whose encodings will be appended. - * @return This object. - **/ - public ByteString append(String[] s) { - StringBuffer buffer = new StringBuffer(toString()); - for (int i = 0; i < s.length; ++i) buffer.append(s[i]); - hashCode = buffer.toString().hashCode(); - - byte[][] v = new byte[s.length][]; - int length = 0; - try { - for (int i = 0; i < v.length; ++i) { - v[i] = s[i].getBytes(encoding); - length += v[i].length; - } + + /** + * Creates a byte string by using the specified encoding to encode the specified string. + * + * @param s The string to encode. + * @param e The encoding method. + **/ + public ByteString(String s, String e) { + encoding = e == null ? defaultEncoding : e.intern(); + setValue(s); } - catch (Exception e) { handleEncodingException(e); } - - byte[] t = new byte[length]; - length = value.length; - System.arraycopy(value, 0, t, 0, length); - for (int i = 0; i < v.length; ++i) { - System.arraycopy(v[i], 0, t, length, v[i].length); - length += v[i].length; + + /** + * Creates a byte string with the given encoding, which may involve converting the specified + * byte string's contents if the encodings differ. + * + * @param b The original byte string. + * @param e The new encoding. + **/ + public ByteString(ByteString b, String e) { + encoding = e.intern(); + if (b.encoding == encoding) { + value = b.value; + hashCode = b.hashCode; + } else + setValue(b.toString()); } - value = t; - return this; - } - - - /** - * Appends the string represented by the given byte string onto the - * existing content in this object. This operation changes the - * {@link #value} reference in this object. - * - *

Warning: Depending on the character encoding in use, this may - * introduce byte order markers into the middle of this object's byte - * array, which usually is not desired. - * - * @param b The string whose encoding will be appended. - * @return This object. - **/ - public ByteString append(ByteString b) { - String s = b.toString(); - hashCode = (toString() + s).hashCode(); - if (encoding != b.encoding) return append(s); - byte[] t = new byte[value.length + b.value.length]; - System.arraycopy(value, 0, t, 0, value.length); - System.arraycopy(b.value, 0, t, value.length, b.value.length); - value = t; - return this; - } - - - /** - * Appends the strings represented by the given byte strings onto the - * existing content in this object. This operation changes the - * {@link #value} reference in this object. - * - *

Warning: Depending on the character encoding in use, this may - * introduce byte order markers into the middle of this object's byte - * array, which usually is not desired. - * - * @param b The strings whose encodings will be appended. - * @return This object. - **/ - public ByteString append(ByteString[] b) { - int length = 0; - StringBuffer buffer = new StringBuffer(toString()); - for (int i = 0; i < b.length; ++i) { - String s = b[i].toString(); - buffer.append(s); - if (encoding != b[i].encoding) - b[i] = new ByteString(s, encoding); - length += b[i].value.length; + + /** Returns the name of the encoding method of this byte string. */ + public String getEncoding() { + return encoding; } - hashCode = buffer.toString().hashCode(); - - byte[] t = new byte[length]; - length = value.length; - System.arraycopy(value, 0, t, 0, length); - for (int i = 0; i < b.length; ++i) { - System.arraycopy(b[i].value, 0, t, length, b[i].value.length); - length += b[i].value.length; + + + /** + * Sets the value of this byte string to the byte encoding of the specified string. + * + * @param s The string to encode. + **/ + public void setValue(String s) { + try { + value = s.getBytes(encoding); + } catch (Exception e) { + handleEncodingException(e); + } + hashCode = s.hashCode(); } - value = t; - return this; - } - - - /** - * If the argument object is a byte string, this object's byte array and - * the argument object's byte array are compared lexicographically. - * Otherwise, -1 is returned. Of course, this operation is considerably - * more expensive if the two strings do not share the same encoding. - **/ - public int compareTo(Object o) { - if (!(o instanceof ByteString)) return -1; - ByteString b = (ByteString) o; - if (encoding != b.encoding) return toString().compareTo(b.toString()); - - int n1 = value.length; - int n2 = b.value.length; - int n = Math.min(n1, n2); - - for (int i = 0; i < n; ++i) { - byte b1 = value[i]; - byte b2 = b.value[i]; - if (b1 != b2) return b1 - b2; + + /** Returns the length of {@link #value}. */ + public int length() { + return value.length; } - return n1 - n2; - } - - - /** Returns a hash code for this object. */ - public int hashCode() { return hashCode; } - - - /** - * Two byte strings are equivalent if they encode the same string. This - * operation is more expensive if the two byte strings use different - * encodings. - **/ - public boolean equals(Object o) { - if (o instanceof String) return toString().equals(o); - if (!(o instanceof ByteString)) return false; - ByteString b = (ByteString) o; - if (encoding != b.encoding) return toString().equals(b.toString()); - if (value.length != b.value.length) return false; - - for (int i = 0; i < value.length; ++i) - if (value[i] != b.value[i]) return false; - return true; - } - - - /** - * Writes a complete binary representation of this byte string. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - out.writeString(encoding); - out.writeInt(hashCode); - out.writeBytes(value); - } - - - /** - * Reads in a complete binary representation of a byte string. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - encoding = in.readString().intern(); - hashCode = in.readInt(); - value = in.readBytes(); - } - - - /** - * Writes a binary representation of this byte string intended for use by - * a lexicon, omitting redundant information when possible. - * - * @param out The output stream. - * @param i The assumed identifier string. This byte strings value, - * encoding, or both may be omitted if they are equivalent to - * i. - **/ - public void lexWrite(ExceptionlessOutputStream out, ByteString i) { - if (i != null && encoding == i.encoding && Arrays.equals(value, i.value)) - out.writeBytes(null); - else { - out.writeBytes(value); - out.writeInt(hashCode); - out.writeString(i != null && encoding == i.encoding ? null : encoding); + + /** + * Returns the byte at index i of {@link #value}. + * + * @param i The index of the requested byte. + * @return The value of the requested byte. + **/ + public byte getByte(int i) { + return value[i]; } - } - - - /** - * Reads the representation of a byte string as stored by a lexicon, - * overwriting the data in this object. - * - *

This method is appropriate for reading byte strings as written by - * {@link #lexWrite(ExceptionlessOutputStream,ByteString)}. - * - * @param in The input stream. - * @param i The assumed identifier string. - **/ - public void lexRead(ExceptionlessInputStream in, ByteString i) { - value = in.readBytes(); - if (value == null) { - value = i.value; - hashCode = i.hashCode; - encoding = i.encoding; + + + /** + * Appends the encoding of the given string onto the existing encoding in this object. This + * operation changes the {@link #value} reference in this object. + * + *

+ * Warning: Depending on the character encoding in use, this may introduce byte order + * markers into the middle of this object's byte array, which usually is not desired. + * + * @param s The string whose encoding will be appended. + * @return This object. + **/ + public ByteString append(String s) { + hashCode = (toString() + s).hashCode(); + byte[] v = null; + try { + v = s.getBytes(encoding); + } catch (Exception e) { + handleEncodingException(e); + } + + byte[] t = new byte[value.length + v.length]; + System.arraycopy(value, 0, t, 0, value.length); + System.arraycopy(v, 0, t, value.length, v.length); + value = t; + return this; } - else { - hashCode = in.readInt(); - encoding = in.readString(); - if (encoding == null) encoding = i.encoding; - else encoding = encoding.intern(); + + + /** + * Appends the encodings of all the given strings onto the existing encoding in this object. + * This operation changes the {@link #value} reference in this object. + * + *

+ * Warning: Depending on the character encoding in use, this may introduce byte order + * markers into the middle of this object's byte array, which usually is not desired. + * + * @param s The strings whose encodings will be appended. + * @return This object. + **/ + public ByteString append(String[] s) { + StringBuffer buffer = new StringBuffer(toString()); + for (int i = 0; i < s.length; ++i) + buffer.append(s[i]); + hashCode = buffer.toString().hashCode(); + + byte[][] v = new byte[s.length][]; + int length = 0; + try { + for (int i = 0; i < v.length; ++i) { + v[i] = s[i].getBytes(encoding); + length += v[i].length; + } + } catch (Exception e) { + handleEncodingException(e); + } + + byte[] t = new byte[length]; + length = value.length; + System.arraycopy(value, 0, t, 0, length); + for (int i = 0; i < v.length; ++i) { + System.arraycopy(v[i], 0, t, length, v[i].length); + length += v[i].length; + } + + value = t; + return this; } - } - - - /** Returns a decoded string. */ - public String toString() { - try { return new String(value, encoding); } - catch (Exception e) { handleEncodingException(e); } - return null; - } - - - /** - * Returns a shallow copy of this string. Note that this class does not - * provide any operations that modify the contents of the objects - * referenced by its fields, making a deep clone unnecessary. - * ({@link #append(String)}, {@link #append(ByteString)}, and - * {@link #setValue(String)} modify the {@link #value} field itself, but - * the reference is merely replaced; the contents of the original array do - * not change.) - **/ - public Object clone() { - Object result = null; - - try { result = super.clone(); } - catch (Exception e) { - System.err.println("Can't clone byte string '" + this + "':"); - e.printStackTrace(); + + + /** + * Appends the string represented by the given byte string onto the existing content in this + * object. This operation changes the {@link #value} reference in this object. + * + *

+ * Warning: Depending on the character encoding in use, this may introduce byte order + * markers into the middle of this object's byte array, which usually is not desired. + * + * @param b The string whose encoding will be appended. + * @return This object. + **/ + public ByteString append(ByteString b) { + String s = b.toString(); + hashCode = (toString() + s).hashCode(); + if (encoding != b.encoding) + return append(s); + byte[] t = new byte[value.length + b.value.length]; + System.arraycopy(value, 0, t, 0, value.length); + System.arraycopy(b.value, 0, t, value.length, b.value.length); + value = t; + return this; } - return result; - } -} + /** + * Appends the strings represented by the given byte strings onto the existing content in this + * object. This operation changes the {@link #value} reference in this object. + * + *

+ * Warning: Depending on the character encoding in use, this may introduce byte order + * markers into the middle of this object's byte array, which usually is not desired. + * + * @param b The strings whose encodings will be appended. + * @return This object. + **/ + public ByteString append(ByteString[] b) { + int length = 0; + StringBuffer buffer = new StringBuffer(toString()); + for (int i = 0; i < b.length; ++i) { + String s = b[i].toString(); + buffer.append(s); + if (encoding != b[i].encoding) + b[i] = new ByteString(s, encoding); + length += b[i].value.length; + } + hashCode = buffer.toString().hashCode(); + + byte[] t = new byte[length]; + length = value.length; + System.arraycopy(value, 0, t, 0, length); + for (int i = 0; i < b.length; ++i) { + System.arraycopy(b[i].value, 0, t, length, b[i].value.length); + length += b[i].value.length; + } + + value = t; + return this; + } + + + /** + * If the argument object is a byte string, this object's byte array and the argument object's + * byte array are compared lexicographically. Otherwise, -1 is returned. Of course, this + * operation is considerably more expensive if the two strings do not share the same encoding. + **/ + public int compareTo(Object o) { + if (!(o instanceof ByteString)) + return -1; + ByteString b = (ByteString) o; + if (encoding != b.encoding) + return toString().compareTo(b.toString()); + + int n1 = value.length; + int n2 = b.value.length; + int n = Math.min(n1, n2); + + for (int i = 0; i < n; ++i) { + byte b1 = value[i]; + byte b2 = b.value[i]; + if (b1 != b2) + return b1 - b2; + } + + return n1 - n2; + } + + + /** Returns a hash code for this object. */ + public int hashCode() { + return hashCode; + } + + + /** + * Two byte strings are equivalent if they encode the same string. This operation is more + * expensive if the two byte strings use different encodings. + **/ + public boolean equals(Object o) { + if (o instanceof String) + return toString().equals(o); + if (!(o instanceof ByteString)) + return false; + ByteString b = (ByteString) o; + if (encoding != b.encoding) + return toString().equals(b.toString()); + if (value.length != b.value.length) + return false; + + for (int i = 0; i < value.length; ++i) + if (value[i] != b.value[i]) + return false; + return true; + } + + + /** + * Writes a complete binary representation of this byte string. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + out.writeString(encoding); + out.writeInt(hashCode); + out.writeBytes(value); + } + + + /** + * Reads in a complete binary representation of a byte string. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + encoding = in.readString().intern(); + hashCode = in.readInt(); + value = in.readBytes(); + } + + + /** + * Writes a binary representation of this byte string intended for use by a lexicon, omitting + * redundant information when possible. + * + * @param out The output stream. + * @param i The assumed identifier string. This byte strings value, encoding, or both may be + * omitted if they are equivalent to i. + **/ + public void lexWrite(ExceptionlessOutputStream out, ByteString i) { + if (i != null && encoding == i.encoding && Arrays.equals(value, i.value)) + out.writeBytes(null); + else { + out.writeBytes(value); + out.writeInt(hashCode); + out.writeString(i != null && encoding == i.encoding ? null : encoding); + } + } + + + /** + * Reads the representation of a byte string as stored by a lexicon, overwriting the data in + * this object. + * + *

+ * This method is appropriate for reading byte strings as written by + * {@link #lexWrite(ExceptionlessOutputStream,ByteString)}. + * + * @param in The input stream. + * @param i The assumed identifier string. + **/ + public void lexRead(ExceptionlessInputStream in, ByteString i) { + value = in.readBytes(); + if (value == null) { + value = i.value; + hashCode = i.hashCode; + encoding = i.encoding; + } else { + hashCode = in.readInt(); + encoding = in.readString(); + if (encoding == null) + encoding = i.encoding; + else + encoding = encoding.intern(); + } + } + + + /** Returns a decoded string. */ + public String toString() { + try { + return new String(value, encoding); + } catch (Exception e) { + handleEncodingException(e); + } + return null; + } + + + /** + * Returns a shallow copy of this string. Note that this class does not provide any operations + * that modify the contents of the objects referenced by its fields, making a deep clone + * unnecessary. ({@link #append(String)}, {@link #append(ByteString)}, and + * {@link #setValue(String)} modify the {@link #value} field itself, but the reference is merely + * replaced; the contents of the original array do not change.) + **/ + public Object clone() { + Object result = null; + + try { + result = super.clone(); + } catch (Exception e) { + System.err.println("Can't clone byte string '" + this + "':"); + e.printStackTrace(); + } + + return result; + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/ClassUtils.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/ClassUtils.java index 0b4c2157..9ccfae38 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/ClassUtils.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/ClassUtils.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.util; @@ -19,430 +16,390 @@ /** - * Utility methods for retrieving various classes that are part of the LBJava - * class hierarchy by name. - * - * @author Nick Rizzolo + * Utility methods for retrieving various classes that are part of the LBJava class hierarchy by + * name. + * + * @author Nick Rizzolo **/ -public class ClassUtils -{ - /** - * Retrieves the Class object with the given name. If there - * is any exception thrown during retrieval, the program will print an - * error message to STDERR and terminate via - * System.exit(1). - * - * @param name The fully qualified name of the class. - * @return The class with the given name. - **/ - public static Class getClass(String name) { - return ClassUtils.getClass(name, true); - } - - - /** - * Retrieves the Class object with the given name. - * - * @param name The fully qualified name of the class. - * @param exit Whether or not to System.exit(1) on an - * exception. - * @return The class with the given name, or null if an - * exception was caught. - **/ - public static Class getClass(String name, boolean exit) { - Class clazz = null; - - try { clazz = Class.forName(name); } - catch (Exception e) { - if (exit) { - System.err.println("Can't get class for '" + name + "':"); - e.printStackTrace(); - System.exit(1); - } +public class ClassUtils { + /** + * Retrieves the Class object with the given name. If there is any exception thrown + * during retrieval, the program will print an error message to STDERR and + * terminate via System.exit(1). + * + * @param name The fully qualified name of the class. + * @return The class with the given name. + **/ + public static Class getClass(String name) { + return ClassUtils.getClass(name, true); } - return clazz; - } - - - /** - * Retrieve the constructor of the given class with the given parameter - * types. If there is any exception thrown during retrieval, the program - * will print an error message to STDERR and terminate via - * System.exit(1). - * - * @param name The fully qualified name of the class. - * @param paramTypes The Classes representing the types of the - * constructor's parameters. - * @return The indicated constructor. - **/ - public static Constructor getConstructor(String name, Class[] paramTypes) { - return ClassUtils.getConstructor(name, paramTypes, true); - } - - - /** - * Retrieve the constructor of the given class with the given parameter - * type names. If there is any exception thrown during retrieval, the - * program will print an error message to STDERR and terminate - * via System.exit(1). - * - * @param name The fully qualified name of the class. - * @param paramNames The names of the types of the constructor's - * parameters. - * @return The indicated constructor. - **/ - public static Constructor getConstructor(String name, String[] paramNames) { - return ClassUtils.getConstructor(name, paramNames, true); - } - - - /** - * Retrieve the constructor of the given class with the given parameter - * type names. - * - * @param name The fully qualified name of the class. - * @param paramNames The names of the types of the constructor's - * parameters. - * @param exit Whether or not to System.exit(1) on an - * exception. - * @return The indicated constructor, or null if an exception - * was caught. - **/ - public static Constructor getConstructor(String name, String[] paramNames, - boolean exit) { - Class[] paramTypes = new Class[paramNames.length]; - for (int i = 0; i < paramNames.length; ++i) - paramTypes[i] = ClassUtils.getClass(paramNames[i], exit); - return getConstructor(name, paramTypes, exit); - } - - - /** - * Retrieve the constructor of the given class with the given parameter - * types. - * - * @param name The fully qualified name of the class. - * @param paramTypes The Classes representing the types of the - * constructor's parameters. - * @param exit Whether or not to System.exit(1) on an - * exception. - * @return The indicated constructor, or null if an exception - * was caught. - **/ - public static Constructor getConstructor(String name, Class[] paramTypes, - boolean exit) { - Class clazz = ClassUtils.getClass(name); - Constructor constructor = null; - - try { constructor = clazz.getConstructor(paramTypes); } - catch (Exception e) { - if (exit) { - System.err.print("Can't get the constructor with parameters (" - + paramTypes[0].getName()); - for (int i = 1; i < paramTypes.length; ++i) - System.err.print(", " + paramTypes[i].getName()); - System.err.println(") for '" + name + "':"); - e.printStackTrace(); - System.exit(1); - } + + /** + * Retrieves the Class object with the given name. + * + * @param name The fully qualified name of the class. + * @param exit Whether or not to System.exit(1) on an exception. + * @return The class with the given name, or null if an exception was caught. + **/ + public static Class getClass(String name, boolean exit) { + Class clazz = null; + + try { + clazz = Class.forName(name); + } catch (Exception e) { + if (exit) { + System.err.println("Can't get class for '" + name + "':"); + e.printStackTrace(); + System.exit(1); + } + } + + return clazz; } - return constructor; - } - - - /** - * Retrieve a Classifier by name using the no-argument - * constructor. If there is any exception thrown during retrieval, the - * program will print an error message to STDERR and terminate - * via System.exit(1). - * - * @param name The fully qualified name of the class. - * @return An instance of the classifier. - **/ - public static Classifier getClassifier(String name) { - return ClassUtils.getClassifier(name, true); - } - - - /** - * Retrieve a Classifier by name using the no-argument - * constructor. - * - * @param name The fully qualified name of the class. - * @param exit Whether or not to System.exit(1) on an - * exception. - * @return An instance of the classifier, or null if an - * exception was caught. - **/ - public static Classifier getClassifier(String name, boolean exit) { - Class clazz = ClassUtils.getClass(name); - Classifier classifier = null; - - try { classifier = (Classifier) clazz.newInstance(); } - catch (Exception e) { - if (exit) { - System.err.println("Can't instantiate '" + name + "':"); - e.printStackTrace(); - System.exit(1); - } + + /** + * Retrieve the constructor of the given class with the given parameter types. If there is any + * exception thrown during retrieval, the program will print an error message to + * STDERR and terminate via System.exit(1). + * + * @param name The fully qualified name of the class. + * @param paramTypes The Classes representing the types of the constructor's + * parameters. + * @return The indicated constructor. + **/ + public static Constructor getConstructor(String name, Class[] paramTypes) { + return ClassUtils.getConstructor(name, paramTypes, true); } - return classifier; - } - - - /** - * Retrieve a Classifier by name using a constructor with - * arguments. If there is any exception thrown during retrieval, the - * program will print an error message to STDERR and terminate - * via System.exit(1). - * - * @param name The fully qualified name of the class. - * @param paramTypes The Classes representing the types of the - * constructor's parameters. - * @param arguments The arguments to send to the constructor. - * @return An instance of the classifier. - **/ - public static Classifier getClassifier(String name, Class[] paramTypes, - Object[] arguments) { - return ClassUtils.getClassifier(name, paramTypes, arguments, true); - } - - - /** - * Retrieve a Classifier by name using a constructor with - * arguments. - * - * @param name The fully qualified name of the class. - * @param paramTypes The Classes representing the types of the - * constructor's parameters. - * @param arguments The arguments to send to the constructor. - * @param exit Whether or not to System.exit(1) on an - * exception. - * @return An instance of the classifier, or null if an - * exception was caught. - **/ - public static Classifier getClassifier(String name, Class[] paramTypes, - Object[] arguments, boolean exit) { - Constructor constructor = ClassUtils.getConstructor(name, paramTypes); - Classifier classifier = null; - - try { classifier = (Classifier) constructor.newInstance(arguments); } - catch (InvocationTargetException e) { - if (exit) { - Throwable cause = e.getCause(); - System.err.println("Can't instantiate '" + name + "':"); - cause.printStackTrace(); - System.exit(1); - } + + /** + * Retrieve the constructor of the given class with the given parameter type names. If there is + * any exception thrown during retrieval, the program will print an error message to + * STDERR and terminate via System.exit(1). + * + * @param name The fully qualified name of the class. + * @param paramNames The names of the types of the constructor's parameters. + * @return The indicated constructor. + **/ + public static Constructor getConstructor(String name, String[] paramNames) { + return ClassUtils.getConstructor(name, paramNames, true); } - catch (Exception e) { - if (exit) { - System.err.println("Can't instantiate '" + name + "':"); - e.printStackTrace(); - System.exit(1); - } + + + /** + * Retrieve the constructor of the given class with the given parameter type names. + * + * @param name The fully qualified name of the class. + * @param paramNames The names of the types of the constructor's parameters. + * @param exit Whether or not to System.exit(1) on an exception. + * @return The indicated constructor, or null if an exception was caught. + **/ + public static Constructor getConstructor(String name, String[] paramNames, boolean exit) { + Class[] paramTypes = new Class[paramNames.length]; + for (int i = 0; i < paramNames.length; ++i) + paramTypes[i] = ClassUtils.getClass(paramNames[i], exit); + return getConstructor(name, paramTypes, exit); + } + + + /** + * Retrieve the constructor of the given class with the given parameter types. + * + * @param name The fully qualified name of the class. + * @param paramTypes The Classes representing the types of the constructor's + * parameters. + * @param exit Whether or not to System.exit(1) on an exception. + * @return The indicated constructor, or null if an exception was caught. + **/ + public static Constructor getConstructor(String name, Class[] paramTypes, boolean exit) { + Class clazz = ClassUtils.getClass(name); + Constructor constructor = null; + + try { + constructor = clazz.getConstructor(paramTypes); + } catch (Exception e) { + if (exit) { + System.err.print("Can't get the constructor with parameters (" + + paramTypes[0].getName()); + for (int i = 1; i < paramTypes.length; ++i) + System.err.print(", " + paramTypes[i].getName()); + System.err.println(") for '" + name + "':"); + e.printStackTrace(); + System.exit(1); + } + } + + return constructor; } - return classifier; - } - - - /** - * Retrieve a Learner by name using the no-argument - * constructor. If there is any exception thrown during retrieval, the - * program will print an error message to STDERR and terminate - * via System.exit(1). - * - * @param name The fully qualified name of the class. - * @return An instance of the learner. - **/ - public static Learner getLearner(String name) { - return ClassUtils.getLearner(name, true); - } - - - /** - * Retrieve a Learner by name using the no-argument - * constructor. - * - * @param name The fully qualified name of the class. - * @param exit Whether or not to System.exit(1) on an - * exception. - * @return An instance of the learner, or null if an exception - * was caught. - **/ - public static Learner getLearner(String name, boolean exit) { - Class clazz = ClassUtils.getClass(name); - Learner learner = null; - - try { learner = (Learner) clazz.newInstance(); } - catch (Exception e) { - if (exit) { - System.err.println("Can't instantiate '" + name + "':"); - e.printStackTrace(); - System.exit(1); - } + + /** + * Retrieve a Classifier by name using the no-argument constructor. If there is any + * exception thrown during retrieval, the program will print an error message to + * STDERR and terminate via System.exit(1). + * + * @param name The fully qualified name of the class. + * @return An instance of the classifier. + **/ + public static Classifier getClassifier(String name) { + return ClassUtils.getClassifier(name, true); } - return learner; - } - - - /** - * Retrieve a Learner by name using a constructor with - * arguments. If there is any exception thrown during retrieval, the - * program will print an error message to STDERR and terminate - * via System.exit(1). - * - * @param name The fully qualified name of the class. - * @param paramTypes The Classes representing the types of the - * constructor's parameters. - * @param arguments The arguments to send to the constructor. - * @return An instance of the learner. - **/ - public static Learner getLearner(String name, Class[] paramTypes, - Object[] arguments) { - return ClassUtils.getLearner(name, paramTypes, arguments, true); - } - - - /** - * Retrieve a Learner by name using a constructor with - * arguments. - * - * @param name The fully qualified name of the class. - * @param paramTypes The Classes representing the types of the - * constructor's parameters. - * @param arguments The arguments to send to the constructor. - * @param exit Whether or not to System.exit(1) on an - * exception. - * @return An instance of the learner, or null if an exception - * was caught. - **/ - public static Learner getLearner(String name, Class[] paramTypes, - Object[] arguments, boolean exit) { - Constructor constructor = ClassUtils.getConstructor(name, paramTypes); - Learner learner = null; - - try { learner = (Learner) constructor.newInstance(arguments); } - catch (InvocationTargetException e) { - if (exit) { - Throwable cause = e.getCause(); - System.err.println("Can't instantiate '" + name + "':"); - cause.printStackTrace(); - System.exit(1); - } + + /** + * Retrieve a Classifier by name using the no-argument constructor. + * + * @param name The fully qualified name of the class. + * @param exit Whether or not to System.exit(1) on an exception. + * @return An instance of the classifier, or null if an exception was caught. + **/ + public static Classifier getClassifier(String name, boolean exit) { + Class clazz = ClassUtils.getClass(name); + Classifier classifier = null; + + try { + classifier = (Classifier) clazz.newInstance(); + } catch (Exception e) { + if (exit) { + System.err.println("Can't instantiate '" + name + "':"); + e.printStackTrace(); + System.exit(1); + } + } + + return classifier; } - catch (Exception e) { - if (exit) { - System.err.println("Can't instantiate '" + name + "':"); - e.printStackTrace(); - System.exit(1); - } + + + /** + * Retrieve a Classifier by name using a constructor with arguments. If there is + * any exception thrown during retrieval, the program will print an error message to + * STDERR and terminate via System.exit(1). + * + * @param name The fully qualified name of the class. + * @param paramTypes The Classes representing the types of the constructor's + * parameters. + * @param arguments The arguments to send to the constructor. + * @return An instance of the classifier. + **/ + public static Classifier getClassifier(String name, Class[] paramTypes, Object[] arguments) { + return ClassUtils.getClassifier(name, paramTypes, arguments, true); } - return learner; - } - - - /** - * Retrieve a Parser by name using the no-argument - * constructor. If there is any exception thrown during retrieval, the - * program will print an error message to STDERR and terminate - * via System.exit(1). - * - * @param name The fully qualified name of the class. - * @return An instance of the parser. - **/ - public static Parser getParser(String name) { - return ClassUtils.getParser(name, true); - } - - - /** - * Retrieve a Parser by name using the no-argument - * constructor. - * - * @param name The fully qualified name of the class. - * @param exit Whether or not to System.exit(1) on an - * exception. - * @return An instance of the parser, or null if an exception - * was caught. - **/ - public static Parser getParser(String name, boolean exit) { - Class clazz = ClassUtils.getClass(name); - Parser parser = null; - - try { parser = (Parser) clazz.newInstance(); } - catch (Exception e) { - if (exit) { - System.err.println("Can't instantiate '" + name + "':"); - e.printStackTrace(); - System.exit(1); - } + + /** + * Retrieve a Classifier by name using a constructor with arguments. + * + * @param name The fully qualified name of the class. + * @param paramTypes The Classes representing the types of the constructor's + * parameters. + * @param arguments The arguments to send to the constructor. + * @param exit Whether or not to System.exit(1) on an exception. + * @return An instance of the classifier, or null if an exception was caught. + **/ + public static Classifier getClassifier(String name, Class[] paramTypes, Object[] arguments, + boolean exit) { + Constructor constructor = ClassUtils.getConstructor(name, paramTypes); + Classifier classifier = null; + + try { + classifier = (Classifier) constructor.newInstance(arguments); + } catch (InvocationTargetException e) { + if (exit) { + Throwable cause = e.getCause(); + System.err.println("Can't instantiate '" + name + "':"); + cause.printStackTrace(); + System.exit(1); + } + } catch (Exception e) { + if (exit) { + System.err.println("Can't instantiate '" + name + "':"); + e.printStackTrace(); + System.exit(1); + } + } + + return classifier; } - return parser; - } - - - /** - * Retrieve a Parser by name using a constructor with - * arguments. If there is any exception thrown during retrieval, the - * program will print an error message to STDERR and terminate - * via System.exit(1). - * - * @param name The fully qualified name of the class. - * @param paramTypes The Classes representing the types of the - * constructor's parameters. - * @param arguments The arguments to send to the constructor. - * @return An instance of the parser. - **/ - public static Parser getParser(String name, Class[] paramTypes, - Object[] arguments) { - return ClassUtils.getParser(name, paramTypes, arguments, true); - } - - - /** - * Retrieve a Parser by name using a constructor with - * arguments. - * - * @param name The fully qualified name of the class. - * @param paramTypes The Classes representing the types of the - * constructor's parameters. - * @param arguments The arguments to send to the constructor. - * @param exit Whether or not to System.exit(1) on an - * exception. - * @return An instance of the parser, or null if an exception - * was caught. - **/ - public static Parser getParser(String name, Class[] paramTypes, - Object[] arguments, boolean exit) { - Constructor constructor = ClassUtils.getConstructor(name, paramTypes); - Parser parser = null; - - try { parser = (Parser) constructor.newInstance(arguments); } - catch (InvocationTargetException e) { - if (exit) { - Throwable cause = e.getCause(); - System.err.println("Can't instantiate '" + name + "':"); - cause.printStackTrace(); - System.exit(1); - } + + /** + * Retrieve a Learner by name using the no-argument constructor. If there is any + * exception thrown during retrieval, the program will print an error message to + * STDERR and terminate via System.exit(1). + * + * @param name The fully qualified name of the class. + * @return An instance of the learner. + **/ + public static Learner getLearner(String name) { + return ClassUtils.getLearner(name, true); } - catch (Exception e) { - if (exit) { - System.err.println("Can't instantiate '" + name + "':"); - e.printStackTrace(); - System.exit(1); - } + + + /** + * Retrieve a Learner by name using the no-argument constructor. + * + * @param name The fully qualified name of the class. + * @param exit Whether or not to System.exit(1) on an exception. + * @return An instance of the learner, or null if an exception was caught. + **/ + public static Learner getLearner(String name, boolean exit) { + Class clazz = ClassUtils.getClass(name); + Learner learner = null; + + try { + learner = (Learner) clazz.newInstance(); + } catch (Exception e) { + if (exit) { + System.err.println("Can't instantiate '" + name + "':"); + e.printStackTrace(); + System.exit(1); + } + } + + return learner; } - return parser; - } -} + /** + * Retrieve a Learner by name using a constructor with arguments. If there is any + * exception thrown during retrieval, the program will print an error message to + * STDERR and terminate via System.exit(1). + * + * @param name The fully qualified name of the class. + * @param paramTypes The Classes representing the types of the constructor's + * parameters. + * @param arguments The arguments to send to the constructor. + * @return An instance of the learner. + **/ + public static Learner getLearner(String name, Class[] paramTypes, Object[] arguments) { + return ClassUtils.getLearner(name, paramTypes, arguments, true); + } + + + /** + * Retrieve a Learner by name using a constructor with arguments. + * + * @param name The fully qualified name of the class. + * @param paramTypes The Classes representing the types of the constructor's + * parameters. + * @param arguments The arguments to send to the constructor. + * @param exit Whether or not to System.exit(1) on an exception. + * @return An instance of the learner, or null if an exception was caught. + **/ + public static Learner getLearner(String name, Class[] paramTypes, Object[] arguments, + boolean exit) { + Constructor constructor = ClassUtils.getConstructor(name, paramTypes); + Learner learner = null; + + try { + learner = (Learner) constructor.newInstance(arguments); + } catch (InvocationTargetException e) { + if (exit) { + Throwable cause = e.getCause(); + System.err.println("Can't instantiate '" + name + "':"); + cause.printStackTrace(); + System.exit(1); + } + } catch (Exception e) { + if (exit) { + System.err.println("Can't instantiate '" + name + "':"); + e.printStackTrace(); + System.exit(1); + } + } + + return learner; + } + + + /** + * Retrieve a Parser by name using the no-argument constructor. If there is any + * exception thrown during retrieval, the program will print an error message to + * STDERR and terminate via System.exit(1). + * + * @param name The fully qualified name of the class. + * @return An instance of the parser. + **/ + public static Parser getParser(String name) { + return ClassUtils.getParser(name, true); + } + + + /** + * Retrieve a Parser by name using the no-argument constructor. + * + * @param name The fully qualified name of the class. + * @param exit Whether or not to System.exit(1) on an exception. + * @return An instance of the parser, or null if an exception was caught. + **/ + public static Parser getParser(String name, boolean exit) { + Class clazz = ClassUtils.getClass(name); + Parser parser = null; + + try { + parser = (Parser) clazz.newInstance(); + } catch (Exception e) { + if (exit) { + System.err.println("Can't instantiate '" + name + "':"); + e.printStackTrace(); + System.exit(1); + } + } + + return parser; + } + + + /** + * Retrieve a Parser by name using a constructor with arguments. If there is any + * exception thrown during retrieval, the program will print an error message to + * STDERR and terminate via System.exit(1). + * + * @param name The fully qualified name of the class. + * @param paramTypes The Classes representing the types of the constructor's + * parameters. + * @param arguments The arguments to send to the constructor. + * @return An instance of the parser. + **/ + public static Parser getParser(String name, Class[] paramTypes, Object[] arguments) { + return ClassUtils.getParser(name, paramTypes, arguments, true); + } + + + /** + * Retrieve a Parser by name using a constructor with arguments. + * + * @param name The fully qualified name of the class. + * @param paramTypes The Classes representing the types of the constructor's + * parameters. + * @param arguments The arguments to send to the constructor. + * @param exit Whether or not to System.exit(1) on an exception. + * @return An instance of the parser, or null if an exception was caught. + **/ + public static Parser getParser(String name, Class[] paramTypes, Object[] arguments, boolean exit) { + Constructor constructor = ClassUtils.getConstructor(name, paramTypes); + Parser parser = null; + + try { + parser = (Parser) constructor.newInstance(arguments); + } catch (InvocationTargetException e) { + if (exit) { + Throwable cause = e.getCause(); + System.err.println("Can't instantiate '" + name + "':"); + cause.printStackTrace(); + System.exit(1); + } + } catch (Exception e) { + if (exit) { + System.err.println("Can't instantiate '" + name + "':"); + e.printStackTrace(); + System.exit(1); + } + } + + return parser; + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/DVector.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/DVector.java deleted file mode 100644 index f0684ffa..00000000 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/DVector.java +++ /dev/null @@ -1,324 +0,0 @@ -/** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 - * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign - * http://cogcomp.cs.illinois.edu/ - */ -package edu.illinois.cs.cogcomp.lbjava.util; - -import java.util.Arrays; - - -/** - * This class implements an expandable array of doubles that - * should be faster than java's Vector. - * - * @author Nick Rizzolo - **/ -public class DVector implements Cloneable, java.io.Serializable -{ - /** The default capacity of a vector upon first construction. */ - protected static final int defaultCapacity = 8; - - /** The elements of the vector. */ - protected double[] vector; - /** The number of elements in the vector. */ - protected int size; - - - /** - * Constructs a new vector with capacity equal to {@link #defaultCapacity}. - **/ - public DVector() { this(defaultCapacity); } - - /** - * Constructs a new vector with the specified capacity. - * - * @param c The initial capacity for the new vector. - **/ - public DVector(int c) { vector = new double[Math.max(defaultCapacity, c)]; } - - /** - * Constructs a new vector using the specified array as a starting point. - * - * @param v The initial array. - **/ - public DVector(double[] v) { - if (v.length == 0) vector = new double[defaultCapacity]; - else { - vector = v; - size = vector.length; - } - } - - - /** - * Throws an exception when the specified index is negative. - * - * @param i The index. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - protected void boundsCheck(int i) { - if (i < 0) - throw - new ArrayIndexOutOfBoundsException( - "Attempted to access negative index of DVector."); - } - - - /** - * Retrieves the value stored at the specified index of the vector, or 0 if - * the vector isn't long enough. - * - * @param i The index of the value to retrieve. - * @return The retrieved value. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - public double get(int i) { return get(i, 0); } - - /** - * Retrieves the value stored at the specified index of the vector or - * d if the vector isn't long enough. - * - * @param i The index of the value to retrieve. - * @param d The default value. - * @return The retrieved value. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - public double get(int i, double d) { - boundsCheck(i); - return i < size ? vector[i] : d; - } - - - /** - * Sets the value at the specified index to the given value. - * - * @param i The index of the value to set. - * @param v The new value at that index. - * @return The value that used to be at index i. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - public double set(int i, double v) { return set(i, v, 0); } - - /** - * Sets the value at the specified index to the given value. If the given - * index is greater than the vector's current size, the vector will expand - * to accomodate it. - * - * @param i The index of the value to set. - * @param v The new value at that index. - * @param d The default value for other new indexes that might get - * created. - * @return The value that used to be at index i. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - public double set(int i, double v, double d) { - boundsCheck(i); - expandFor(i, d); - double result = vector[i]; - vector[i] = v; - return result; - } - - - /** - * Adds the specified value on to the end of the vector, expanding its - * capacity as necessary. - * - * @param v The new value to appear last in the vector. - **/ - public void add(double v) { - expandFor(size, 0); - vector[size - 1] = v; - } - - - /** - * Adds all the values in the given vector to the end of this vector, - * expanding its capacity as necessary. - * - * @param v The new vector of values to appear at the end of this vector. - **/ - public void addAll(DVector v) { - expandFor(size + v.size - 1, 0); - System.arraycopy(v.vector, 0, vector, size - v.size, v.size); - } - - - /** - * Removes the element at the specified index of the vector. - * - * @param i The index of the element to remove. - * @return The removed element. - **/ - public double remove(int i) { - boundsCheck(i); - if (i >= size) - throw - new ArrayIndexOutOfBoundsException( - "LBJ: DVector: Can't remove element at index " + i - + " as it is larger than the size (" + size + ")"); - double result = vector[i]; - for (int j = i + 1; j < size; ++j) - vector[j - 1] = vector[j]; - --size; - return result; - } - - - /** Returns the value of {@link #size}. */ - public int size() { return size; } - - - /** Returns the value of the maximum element in the vector. */ - public double max() { - double result = -Double.MAX_VALUE; - for (int i = 0; i < size; ++i) if (vector[i] > result) result = vector[i]; - return result; - } - - - /** Sorts this vector in increasing order. */ - public void sort() { Arrays.sort(vector, 0, size); } - - - /** - * Searches this vector for the specified value using the binary search - * algorithm. This vector must be sorted (as by the - * {@link #sort()} method) prior to making this call. If it is not sorted, - * the results are undefined. If this vector contains multiple elements - * with the specified value, there is no guarantee which one will be found. - * - * @param v The value to be searched for. - * @return The index of v, if it is contained in the vector; - * otherwise, (-(insertion point) - 1). The - * insertion point is defined as the point at which - * v would be inserted into the vector: the index of - * the first element greater than v, or the size of - * the vector if all elements in the list are less than - * v. Note that this guarantees that the return value - * will be >= 0 if and only if v is found. - **/ - public int binarySearch(double v) { - int a = 0, b = size; - - while (b != a) { - int m = (a + b) >> 1; - if (vector[m] > v) b = m; - else if (vector[m] < v) a = m + 1; - else return m; - } - - return -a - 1; - } - - - /** - * Makes sure the capacity and size of the vector can accomodate the - * given index. The capacity of the vector is simply doubled until it can - * accomodate its size. - * - * @param index The index where a new value will be stored. - * @param d The default value for other new indexes that might get - * created. - **/ - protected void expandFor(int index, double d) { - if (index < size) return; - int oldSize = size, capacity = vector.length; - size = index + 1; - if (capacity >= size) return; - while (capacity < size) capacity *= 2; - double[] t = new double[capacity]; - System.arraycopy(vector, 0, t, 0, oldSize); - if (d != 0) Arrays.fill(t, oldSize, size, d); - vector = t; - } - - - /** - * Returns a new array of doubles containing the same data as - * this vector. - **/ - public double[] toArray() { - double[] result = new double[size]; - System.arraycopy(vector, 0, result, 0, size); - return result; - } - - - /** - * Two DVectors are considered equal if they contain the same - * elements and have the same size. - **/ - public boolean equals(Object o) { - if (!(o instanceof DVector)) return false; - DVector v = (DVector) o; - return size == v.size && Arrays.equals(vector, v.vector); - } - - - /** A hash code based on the hash code of {@link #vector}. */ - public int hashCode() { return vector.hashCode(); } - - - /** Returns a deep clone of this vector. */ - public Object clone() { - DVector clone = null; - - try { clone = (DVector) super.clone(); } - catch (Exception e) { - System.err.println("Error cloning " + getClass().getName() + ":"); - e.printStackTrace(); - System.exit(1); - } - - clone.vector = (double[]) vector.clone(); - return clone; - } - - - /** Returns a text representation of this vector. */ - public String toString() { - StringBuffer result = new StringBuffer(); - result.append("["); - for (int i = 0; i < size; ++i) { - result.append(vector[i]); - if (i + 1 < size) result.append(", "); - } - result.append("]"); - return result.toString(); - } - - - /** - * Writes a binary representation of the vector. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - out.writeInt(size); - for (int i = 0; i < size; ++i) out.writeDouble(vector[i]); - } - - - /** - * Reads the binary representation of a vector from the specified stream, - * overwriting the data in this object. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - size = in.readInt(); - if (size == 0) vector = new double[defaultCapacity]; - else { - vector = new double[size]; - for (int i = 0; i < size; ++i) vector[i] = in.readDouble(); - } - } -} - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/DVector2D.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/DVector2D.java deleted file mode 100644 index 42b1f480..00000000 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/DVector2D.java +++ /dev/null @@ -1,443 +0,0 @@ -/** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 - * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign - * http://cogcomp.cs.illinois.edu/ - */ -package edu.illinois.cs.cogcomp.lbjava.util; - -import java.util.Arrays; - - -/** - * This class implements an expandable two dimensional array of doubles that - * should be faster than java's Vector. - * - * @author Nick Rizzolo - **/ -public class DVector2D implements Cloneable, java.io.Serializable -{ - /** The default capacity of the first dimension of this 2D vector. */ - protected static final int defaultCapacity1 = 8; - /** The default capacity of the second dimension of this 2D vector. */ - protected static final int defaultDefaultCapacity2 = 8; - - /** The elements of the vector. */ - protected double[][] vector; - /** The sizes of each vector in the second dimension. */ - protected IVector sizes; - /** The capacity of new vectors created in the second dimension. */ - protected int defaultCapacity2; - - - /** - * Constructs a new vector with default capacities - * {@link #defaultCapacity1} and {@link #defaultCapacity2}. - **/ - public DVector2D() { this(defaultCapacity1, defaultDefaultCapacity2); } - - /** - * Constructs a new vector with the specified capacities. - * - * @param c1 The initial capacity for the first dimension of the new - * vector. - * @param c2 The initial capacity for the second dimension of the new - * vector. - **/ - public DVector2D(int c1, int c2) { - defaultCapacity2 = Math.max(defaultDefaultCapacity2, c2); - vector = new double[Math.max(defaultCapacity1, c1)][defaultCapacity2]; - sizes = new IVector(c1); - } - - /** - * Constructs a new vector using the specified array as a starting point. - * - * @param v The initial array. - **/ - public DVector2D(double[][] v) { - defaultCapacity2 = defaultDefaultCapacity2; - - if (v.length == 0) { - vector = new double[defaultCapacity1][defaultCapacity2]; - sizes = new IVector(defaultCapacity1); - } - else { - vector = v; - sizes = new IVector(v.length); - - for (int i = 0; i < v.length; ++i) { - sizes.set(i, v[i].length); - defaultCapacity2 = Math.max(defaultCapacity2, v[i].length); - } - - for (int i = 0; i < v.length; ++i) if (v[i].length == 0) - v[i] = new double[defaultCapacity2]; - } - } - - - /** - * Throws an exception when either of the specified indexes are negative. - * - * @param i1 The index in the first dimension. - * @param i2 The index in the second dimension. - * @throws ArrayIndexOutOfBoundsException - * When i1 or i2 < 0. - **/ - protected void boundsCheck(int i1, int i2) { - if (i1 < 0 || i2 < 0) - throw - new ArrayIndexOutOfBoundsException( - "Attempted to access negative index of DVector2D."); - } - - - /** - * Retrieves the value stored at the specified index of the vector. - * - * @param i1 The index in the first dimension. - * @param i2 The index in the second dimension. - * @return The retrieved value. - * @throws ArrayIndexOutOfBoundsException - * When i1 or i2 < 0. - **/ - public double get(int i1, int i2) { return get(i1, i2, 0); } - - /** - * Retrieves the value stored at the specified index of the vector. - * - * @param i1 The index in the first dimension. - * @param i2 The index in the second dimension. - * @param d The default value. - * @return The retrieved value. - * @throws ArrayIndexOutOfBoundsException - * When i1 or i2 < 0. - **/ - public double get(int i1, int i2, double d) { - boundsCheck(i1, i2); - - // Because of the way IVector works, the only way i2 < sizes.get(i1) will - // be true is if i1 is in fact a valid index for the first dimension. - return i2 < sizes.get(i1) ? vector[i1][i2] : d; - } - - - /** - * Sets the value at the specified index to the given value. - * - * @param i1 The index in the first dimension. - * @param i2 The index in the second dimension. - * @param v The new value at that index. - * @return The value that used to be at index i. - * @throws ArrayIndexOutOfBoundsException - * When i1 or i2 < 0. - **/ - public double set(int i1, int i2, double v) { return set(i1, i2, v, 0); } - - /** - * Sets the value at the specified index to the given value. - * - * @param i1 The index in the first dimension. - * @param i2 The index in the second dimension. - * @param v The new value at that index. - * @param d The default value for other new indexes that might get - * created. - * @return The value that used to be at index i. - * @throws ArrayIndexOutOfBoundsException - * When i1 or i2 < 0. - **/ - public double set(int i1, int i2, double v, double d) { - boundsCheck(i1, i2); - expandFor(i1, i2, d); - double result = vector[i1][i2]; - vector[i1][i2] = v; - return result; - } - - - /** - * Removes the row at the specified index. - * - * @param i The index of the row to remove. - * @return The removed row. - **/ - public double[] remove(int i) { - boundsCheck(i, 0); - int rows = sizes.size(); - if (i >= rows) - throw - new ArrayIndexOutOfBoundsException( - "LBJ: DVector2D: Can't remove row at index " + i - + " as it is larger than the size (" + rows + ")"); - double[] result = vector[i]; - for (int j = i + 1; j < rows; ++j) - vector[j - 1] = vector[j]; - vector[rows - 1] = null; - sizes.remove(i); - return result; - } - - - /** - * Removes the element at the specified index of the vector. - * - * @param i1 The row index of the element to remove. - * @param i2 The column index of the element to remove. - * @return The removed element. - **/ - public double remove(int i1, int i2) { - boundsCheck(i1, i2); - int rows = sizes.size(), columns = sizes.get(i1); - if (i1 >= rows || i2 >= columns) - throw - new ArrayIndexOutOfBoundsException( - "LBJ: DVector2D: Can't remove index [" + i1 + ", " + i2 - + "] as it is out of bounds (" + rows + ", " + columns + ")"); - double result = vector[i1][i2]; - for (int j = i2 + 1; j < columns; ++j) - vector[i1][j - 1] = vector[i1][j]; - sizes.set(i1, columns - 1); - return result; - } - - - /** Returns the size of the first dimension of this vector.. */ - public int size() { return sizes.size(); } - /** - * Returns the size associated with the specified vector. - * - * @param i The index of the vector whose size will be returned. - **/ - public int size(int i) { return sizes.get(i); } - - - /** - * Returns the value of the maximum element in the - * ith vector. - * - * @param i An index into the first dimension of this vector. - **/ - public double max(int i) { - if (i < 0 || i >= sizes.size()) - throw - new ArrayIndexOutOfBoundsException( - "Attempted to access negative index of DVector2D."); - double result = -Double.MAX_VALUE; - int size = sizes.get(i); - for (int j = 0; j < size; ++j) - if (vector[i][j] > result) result = vector[i][j]; - return result; - } - - - /** - * Sorts the selected row in increasing order. - * - * @param i The row to sort. - **/ - public void sort(int i) { Arrays.sort(vector[i], 0, sizes.get(i)); } - - - /** - * Searches the selected row vector for the specified value using the - * binary search algorithm. The vector must be sorted (as - * by the {@link #sort(int)} method) prior to making this call. If it is - * not sorted, the results are undefined. If the vector contains multiple - * elements with the specified value, there is no guarantee which one will - * be found. - * - * @param i The selected row. - * @param v The value to be searched for. - * @return The index of v, if it is contained in the vector; - * otherwise, (-(insertion point) - 1). The - * insertion point is defined as the point at which - * v would be inserted into the vector: the index of - * the first element greater than v, or the size of - * the vector if all elements in the vector are less than - * v. Note that this guarantees that the return value - * will be >= 0 if and only if v is found. - **/ - public int binarySearch(int i, int v) { - int a = 0, b = sizes.get(i); - - while (b != a) { - int m = (a + b) >> 1; - if (vector[i][m] > v) b = m; - else if (vector[i][m] < v) a = m + 1; - else return m; - } - - return -a - 1; - } - - - /** - * Makes sure the capacities and sizes of the vectors can accomodate the - * given indexes. The capacities of the vectors are simply doubled until - * they can accomodate their sizes. - * - * @param i1 The index in the first dimension. - * @param i2 The index in the second dimension. - * @param d The default value for other new indexes that might get - * created. - **/ - protected void expandFor(int i1, int i2, double d) { - if (i1 >= sizes.size()) { - int oldSize = sizes.size(), capacity = vector.length; - sizes.set(i1, 0); - if (capacity < sizes.size()) { - while (capacity < sizes.size()) capacity *= 2; - double[][] t = new double[capacity][]; - System.arraycopy(vector, 0, t, 0, oldSize); - vector = t; - } - for (int i = oldSize; i < sizes.size(); ++i) - vector[i] = new double[defaultCapacity2]; - } - - if (i2 < sizes.get(i1)) return; - int oldSize = sizes.get(i1), capacity = vector[i1].length; - sizes.set(i1, i2 + 1); - if (capacity >= sizes.get(i1)) return; - while (capacity < sizes.get(i1)) capacity *= 2; - double[] t = new double[capacity]; - System.arraycopy(vector[i1], 0, t, 0, oldSize); - if (d != 0) Arrays.fill(t, oldSize, sizes.get(i1), d); - vector[i1] = t; - } - - - /** - * Returns a new 2D array of doubles containing the same data - * as this vector. - **/ - public double[][] toArray() { - double[][] result = new double[sizes.size()][]; - for (int i = 0; i < result.length; ++i) { - result[i] = new double[sizes.get(i)]; - System.arraycopy(vector[i], 0, result[i], 0, result[i].length); - } - return result; - } - - - /** - * Two DVector2Ds are considered equal if they contain all the - * same elements, sizes, and capacities. - **/ - public boolean equals(Object o) { - if (!(o instanceof DVector2D)) return false; - DVector2D v = (DVector2D) o; - if (vector.length != v.vector.length || !sizes.equals(v.sizes)) - return false; - for (int i = 0; i < vector.length; ++i) - if (!Arrays.equals(vector[i], v.vector[i])) return false; - return true; - } - - - /** - * A hash code based on the hash codes of the constituents of - * {@link #vector}. - **/ - public int hashCode() { - int result = vector.hashCode(); - for (int i = 0; i < vector.length; ++i) - result = 17 * result + vector[i].hashCode(); - return result; - } - - - /** - * Returns a clone of this vector that is one level deep; in particular, - * the objects in the vector themselves are not cloned, but the underlying - * array is. - **/ - public Object clone() { - DVector2D clone = null; - - try { clone = (DVector2D) super.clone(); } - catch (Exception e) { - System.err.println("Error cloning " + getClass().getName() + ":"); - e.printStackTrace(); - System.exit(1); - } - - clone.vector = (double[][]) vector.clone(); - for (int i = 0; i < vector.length; ++i) if (clone.vector[i] != null) - clone.vector[i] = (double[]) vector[i].clone(); - return clone; - } - - - /** Returns a text representation of this vector. */ - public String toString() { - StringBuffer result = new StringBuffer(); - result.append("["); - int s1 = sizes.size(); - - for (int i = 0; i < s1; ++i) { - int s2 = sizes.get(i); - result.append("["); - - for (int j = 0; j < s2; ++j) { - result.append(vector[i][j]); - if (j + 1 < s2) result.append(", "); - } - - result.append("]"); - if (i + 1 < s1) result.append(",\n "); - } - - result.append("]"); - return result.toString(); - } - - - /** - * Writes a binary representation of the vector. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - sizes.write(out); - for (int i = 0; i < sizes.size(); ++i) - for (int j = 0; j < sizes.get(i); ++j) - out.writeDouble(vector[i][j]); - out.writeInt(defaultCapacity2); - } - - - /** - * Reads the binary representation of a vector from the specified stream, - * overwriting the data in this object. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - sizes = new IVector(); - sizes.read(in); - - if (sizes.size() == 0) { - defaultCapacity2 = defaultDefaultCapacity2; - vector = new double[defaultCapacity1][defaultCapacity2]; - } - else { - vector = new double[sizes.size()][]; - - for (int i = 0; i < vector.length; ++i) { - vector[i] = new double[sizes.get(i)]; - for (int j = 0; j < vector[i].length; ++j) - vector[i][j] = in.readDouble(); - } - - defaultCapacity2 = in.readInt(); - } - } -} - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/ExceptionlessInputStream.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/ExceptionlessInputStream.java deleted file mode 100644 index 3387ef5b..00000000 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/ExceptionlessInputStream.java +++ /dev/null @@ -1,520 +0,0 @@ -/** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 - * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign - * http://cogcomp.cs.illinois.edu/ - */ -package edu.illinois.cs.cogcomp.lbjava.util; - -import java.io.BufferedInputStream; -import java.io.DataInputStream; -import java.io.FileInputStream; -import java.io.FilterInputStream; -import java.io.InputStream; -import java.net.URL; -import java.util.zip.ZipFile; -import java.util.zip.ZipInputStream; - - -/** - * This class intends to operate just as a DataInputStream with - * some additional convenience methods and built-in exception handling. - * - * @author Nick Rizzolo - **/ -public class ExceptionlessInputStream extends FilterInputStream -{ - /** The entry inside any compressed file has this name. */ - public static final String zipEntryName = "LBJFile"; - - /** This buffer is used internally by {@link #readUTF(int)}. */ - private byte[] buffer = null; - /** This buffer is used internally by {@link #readUTF(int)}. */ - private char[] chars = null; - /** The underlying data input stream. */ - private DataInputStream dis; - - - /** - * Opens a buffered (and uncompressed) stream for reading from the - * specified file. - * - * @param filename The file to read from. - * @return The newly opened stream. - **/ - public static ExceptionlessInputStream openBufferedStream(String filename) { - ExceptionlessInputStream eis = null; - - try { - eis = - new ExceptionlessInputStream( - new BufferedInputStream( - new FileInputStream(filename))); - } - catch (Exception e) { - System.err.println("Can't open '" + filename + "' for input:"); - e.printStackTrace(); - System.exit(1); - } - - return eis; - } - - - /** - * Opens a compressed stream for reading from the specified file. - * - * @param filename The file to read from. - * @return The newly opened stream. - **/ - public static ExceptionlessInputStream openCompressedStream(String filename) - { - ExceptionlessInputStream eis = null; - - try { - ZipFile zip = new ZipFile(filename); - eis = - new ExceptionlessInputStream( - new BufferedInputStream( - zip.getInputStream(zip.getEntry(zipEntryName)))); - } - catch (Exception e) { - System.err.println("Can't open '" + filename + "' for input:"); - e.printStackTrace(); - System.exit(1); - } - - return eis; - } - - - /** - * Opens a buffered (and uncompressed) stream for reading from the - * specified location. - * - * @param url The location to read from. - * @return The newly opened stream. - **/ - public static ExceptionlessInputStream openBufferedStream(URL url) { - ExceptionlessInputStream eis = null; - - try { - eis = - new ExceptionlessInputStream( - new BufferedInputStream(url.openStream())); - } - catch (Exception e) { - System.err.println("Can't open '" + url + "' for input:"); - e.printStackTrace(); - System.exit(1); - } - - return eis; - } - - - /** - * Opens a compressed stream for reading from the specified location. - * - * @param url The location to read from. - * @return The newly opened stream. - **/ - public static ExceptionlessInputStream openCompressedStream(URL url) { - if (url.getProtocol().equals("file")) - return openCompressedStream(url.getFile()); - - ExceptionlessInputStream eis = null; - - try { - ZipInputStream zip = new ZipInputStream(url.openStream()); - zip.getNextEntry(); - eis = new ExceptionlessInputStream(new BufferedInputStream(zip)); - } - catch (Exception e) { - System.err.println("Can't open '" + url + "' for input:"); - e.printStackTrace(); - System.exit(1); - } - - return eis; - } - - - /** - * Creates a new data input stream to read data from the specified - * underlying input stream. - * - * @param in The underlying input stream. - **/ - public ExceptionlessInputStream(InputStream in) { - super(new DataInputStream(in)); - dis = (DataInputStream) this.in; - } - - - /** - * Whenever an exception is caught, this method attempts to close the - * stream and exit the program. - * - * @param e The thrown exception. - **/ - private void handleException(Exception e) { - System.err.println("Can't read from input stream:"); - e.printStackTrace(); - close(); - System.exit(1); - } - - - /** - * Closes this input stream and releases any system resources associated - * with the stream. - **/ - public void close() { - try { dis.close(); } - catch (Exception e) { - System.err.println("Can't close input stream:"); - e.printStackTrace(); - System.exit(1); - } - } - - - /** - * Reads one input byte and returns true if that byte is - * nonzero, false if that byte is zero. This method is - * suitable for reading the byte written by - * {@link ExceptionlessOutputStream#writeBoolean(boolean)}. - * - * @return The boolean value read. - **/ - public boolean readBoolean() { - try { return dis.readBoolean(); } - catch (Exception e) { handleException(e); } - return false; - } - - - /** - * Reads and returns one input byte. The byte is treated as a signed value - * in the range -128 through 127, inclusive. - * This method is suitable for reading the byte written by - * {@link ExceptionlessOutputStream#writeByte(int)}. - * - * @return The 8-bit value read. - **/ - public byte readByte() { - try { return dis.readByte(); } - catch (Exception e) { handleException(e); } - return (byte) 0; - } - - - /** - * Reads and returns an array of bytes from the input. The input stream is - * expected to contain an integer representing the number of bytes in the - * array first, followed by the bytes in the array. Each byte is treated - * as a signed value in the range -128 through - * 127, inclusive. This method is suitable for reading the - * byte array written by - * {@link ExceptionlessOutputStream#writeBytes(byte[])}. - * - * @return The array of 8-bit bytes read. - **/ - public byte[] readBytes() { - try { - int n = dis.readInt(); - if (n < 0) return null; - byte[] result = new byte[n]; - for (int i = 0; i < n; ++i) - result[i] = dis.readByte(); - return result; - } - catch (Exception e) { handleException(e); } - return null; - } - - - /** - * Reads one input byte, zero-extends it to type int, and - * returns the result, which is therefore in the range 0 - * through 255. This method is suitable for reading the byte - * written by {@link ExceptionlessOutputStream#writeByte(int)} if the - * argument to writeByte was intended to be a value in the - * range 0 through 255. - * - * @return The unsigned 8-bit value read. - **/ - public int readUnsignedByte() { - try { return dis.readByte(); } - catch (Exception e) { handleException(e); } - return 0; - } - - - /** - * Reads two input bytes and returns a short value. Let - * a be the first byte read and b be the second - * byte. The value returned is: - *

(short)((a << 8) | (b & 0xff))
-    * 
- * This method is suitable for reading the bytes written by - * {@link ExceptionlessOutputStream#writeShort(int)}. - * - * @return The 16-bit value read. - **/ - public short readShort() { - try { return dis.readShort(); } - catch (Exception e) { handleException(e); } - return (short) 0; - } - - - /** - * Reads two input bytes and returns an int value in the range - * 0 through 65535. Let a be the - * first byte read and b be the second byte. The value - * returned is: - *

(((a & 0xff) << 8) | (b & 0xff))
-    * 
- * This method is suitable for reading the bytes written by - * {@link ExceptionlessOutputStream#writeShort(int)} if the argument to - * writeShort was intended to be a value in the range - * 0 through 65535. - * - * @return The unsigned 16-bit value read. - **/ - public int readUnsignedShort() { - try { return dis.readUnsignedShort(); } - catch (Exception e) { handleException(e); } - return 0; - } - - - /** - * Reads an input char and returns the char - * value. A Unicode char is made up of two bytes. Let - * a be the first byte read and b be the second - * byte. The value returned is: - *

(char)((a << 8) | (b & 0xff))
-    * 
- * This method is suitable for reading the bytes written by - * {@link ExceptionlessOutputStream#writeChar(int)}. - * - * @return The Unicode char read. - **/ - public char readChar() { - try { return dis.readChar(); } - catch (Exception e) { handleException(e); } - return 0; - } - - - /** - * Reads four input bytes and returns an int value. Let - * a be the first byte read, b be the second - * byte, c be the third byte, and d be the fourth - * byte. The value returned is: - *

-    * 
-    * (((a & 0xff) << 24) | ((b & 0xff) << 16) |
-    *  ((c & 0xff) << 8) | (d & 0xff))
-    * 
- * This method is suitable for reading the bytes written by - * {@link ExceptionlessOutputStream#writeInt(int)}. - * - * @return The int value read. - **/ - public int readInt() { - try { return dis.readInt(); } - catch (Exception e) { handleException(e); } - return 0; - } - - - /** - * Reads eight input bytes and returns a long value. Let - * a be the first byte read, b be the second - * byte, c be the third byte, d be the fourth - * byte, e be the fifth byte, f be the sixth - * byte, g be the seventh byte, and h be the - * eighth byte. The value returned is: - *

 
-    * (((long)(a & 0xff) << 56) |
-    *  ((long)(b & 0xff) << 48) |
-    *  ((long)(c & 0xff) << 40) |
-    *  ((long)(d & 0xff) << 32) |
-    *  ((long)(e & 0xff) << 24) |
-    *  ((long)(f & 0xff) << 16) |
-    *  ((long)(g & 0xff) <<  8) |
-    *  ((long)(h & 0xff)))
-    * 
- *

- * This method is suitable for reading the bytes written by - * {@link ExceptionlessOutputStream#writeLong(long)}. - * - * @return The long value read. - **/ - public long readLong() { - try { return dis.readLong(); } - catch (Exception e) { handleException(e); } - return 0; - } - - - /** - * Reads four input bytes and returns a float value. It does - * this by first constructing an int value in exactly the - * manner of the readInt method, then converting this - * int value to a float in exactly the manner of - * the method Float.intBitsToFloat. This method is suitable - * for reading the bytes written by - * {@link ExceptionlessOutputStream#writeFloat(float)}. - * - * @return The float value read. - **/ - public float readFloat() { - try { return dis.readFloat(); } - catch (Exception e) { handleException(e); } - return 0; - } - - - /** - * Reads eight input bytes and returns a double value. It - * does this by first constructing a long value in exactly the - * manner of the readlong method, then converting this - * long value to a double in exactly the manner - * of the method Double.longBitsToDouble. This method is - * suitable for reading the bytes written by - * {@link ExceptionlessOutputStream#writeDouble(double)}. - * - * @return The double value read. - **/ - public double readDouble() { - try { return dis.readDouble(); } - catch (Exception e) { handleException(e); } - return 0; - } - - - /** - * Reads a string from the underlying stream. - * - * @return The string. - **/ - public String readString() { - short utfLength = readShort(); - if (utfLength == -1) return null; - String result = readUTF(utfLength); - return result; - } - - - /** - * Reads in a string that has been encoded using a - * modified UTF-8 - * format. This method is suitable for reading the bytes written by - * {@link ExceptionlessOutputStream#writeUTF(String)}. - * - * @param utfLength The number of bytes expected in the encoding of the - * string to read. - * @return A Unicode string. - **/ - public String readUTF(int utfLength) { - if (buffer == null || buffer.length < utfLength) { - buffer = new byte[utfLength * 2]; - chars = new char[utfLength * 2]; - } - - int c, char2, char3; - int count = 0; - int charsCount = 0; - - try { dis.readFully(buffer, 0, utfLength); } - catch (Exception e) { handleException(e); } - - while (count < utfLength) { - c = (int) buffer[count] & 0xff; - if (c > 127) break; - count++; - chars[charsCount++] = (char) c; - } - - while (count < utfLength) { - c = (int) buffer[count] & 0xff; - switch (c >> 4) { - case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: - /* 0xxxxxxx*/ - count++; - chars[charsCount++] = (char) c; - break; - case 12: case 13: - /* 110x xxxx 10xx xxxx*/ - count += 2; - if (count > utfLength) { - System.err.println( - "Error in UTF formatting: partial character at end"); - new Exception().printStackTrace(); - close(); - System.exit(1); - } - - char2 = (int) buffer[count - 1]; - if ((char2 & 0xC0) != 0x80) { - System.err.println( - "Error in UTF formatting: malformed input around byte " - + count); - new Exception().printStackTrace(); - close(); - System.exit(1); - } - - chars[charsCount++] = (char) (((c & 0x1F) << 6) | (char2 & 0x3F)); - break; - case 14: - /* 1110 xxxx 10xx xxxx 10xx xxxx */ - count += 3; - if (count > utfLength) { - System.err.println( - "Error in UTF formatting: partial character at end"); - new Exception().printStackTrace(); - close(); - System.exit(1); - } - - char2 = (int) buffer[count - 2]; - char3 = (int) buffer[count - 1]; - if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80)) { - System.err.println( - "Error in UTF formatting: malformed input around byte " - + (count - 1)); - new Exception().printStackTrace(); - close(); - System.exit(1); - } - - chars[charsCount++] = (char) (((c & 0x0F) << 12) | - ((char2 & 0x3F) << 6) | - ((char3 & 0x3F) << 0)); - break; - default: - /* 10xx xxxx, 1111 xxxx */ - System.err.println( - "Error in UTF formatting: malformed input around byte " - + count); - new Exception().printStackTrace(); - close(); - System.exit(1); - } - } - - // The number of chars produced may be less than utfLength - return new String(chars, 0, charsCount); - } -} - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/ExceptionlessOutputStream.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/ExceptionlessOutputStream.java deleted file mode 100644 index 5ca8a9dc..00000000 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/ExceptionlessOutputStream.java +++ /dev/null @@ -1,376 +0,0 @@ -/** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 - * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign - * http://cogcomp.cs.illinois.edu/ - */ -package edu.illinois.cs.cogcomp.lbjava.util; - -import java.io.BufferedOutputStream; -import java.io.DataOutputStream; -import java.io.FileOutputStream; -import java.io.FilterOutputStream; -import java.io.OutputStream; -import java.net.URL; -import java.util.zip.ZipEntry; -import java.util.zip.ZipOutputStream; - - -/** - * This class intends to operate just as a DataOutputStream with - * some additional convenience methods and built-in exception handling. - * - * @author Nick Rizzolo - **/ -public class ExceptionlessOutputStream extends FilterOutputStream -{ - /** This buffer is used internally by {@link #writeUTF(String)}. */ - private byte[] buffer = null; - /** The underlying data output stream. */ - private DataOutputStream dos; - - - /** - * Opens a buffered (and uncompressed) stream for writing to the specified - * file. - * - * @param filename The file to write to. - * @return The newly opened stream. - **/ - public static ExceptionlessOutputStream openBufferedStream(String filename) - { - ExceptionlessOutputStream eos = null; - - try { - eos = - new ExceptionlessOutputStream( - new BufferedOutputStream( - new FileOutputStream(filename))); - } - catch (Exception e) { - System.err.println("Can't open '" + filename + "' for output:"); - e.printStackTrace(); - System.exit(1); - } - - return eos; - } - - - /** - * Opens a compressed stream for writing to the specified file. - * - * @param filename The file to write to. - * @return The newly opened stream. - **/ - public static ExceptionlessOutputStream - openCompressedStream(String filename) { - ExceptionlessOutputStream eos = null; - - try { - ZipOutputStream zip = - new ZipOutputStream( - new FileOutputStream(filename)); - zip.putNextEntry(new ZipEntry(ExceptionlessInputStream.zipEntryName)); - eos = - new ExceptionlessOutputStream( - new BufferedOutputStream(zip)); - } - catch (Exception e) { - System.err.println("Can't open '" + filename + "' for output:"); - e.printStackTrace(); - System.exit(1); - } - - return eos; - } - - - /** - * Opens a buffered (and uncompressed) stream for writing to the specified - * file. If the specified URL does not reference a file on the local file - * system, an error message will be displayed, and the program will exit. - * - * @param url The location of the file to write to. - * @return The newly opened stream. - **/ - public static ExceptionlessOutputStream openBufferedStream(URL url) { - if (!url.getProtocol().equals("file")) { - System.err.println("Can't open URL with protocol '" + url.getProtocol() - + "' for output."); - new Exception().printStackTrace(); - System.exit(1); - } - - return openBufferedStream(url.getFile()); - } - - - /** - * Opens a buffered stream for writing to the specified file. If the - * specified URL does not reference a file on the local file system, an - * error message will be displayed, and the program will exit. - * - * @param url The location of the file to write to. - * @return The newly opened stream. - **/ - public static ExceptionlessOutputStream openCompressedStream(URL url) { - if (!url.getProtocol().equals("file")) { - System.err.println("Can't open URL with protocol '" + url.getProtocol() - + "' for output."); - new Exception().printStackTrace(); - System.exit(1); - } - - return openCompressedStream(url.getFile()); - } - - - /** - * Creates a new data output stream to write data to the specified - * underlying output stream. - * - * @param out The underlying output stream. - **/ - public ExceptionlessOutputStream(OutputStream out) { - super(new DataOutputStream(out)); - dos = (DataOutputStream) this.out; - } - - - /** - * Whenever an exception is caught, this method attempts to close the - * stream and exit the program. - * - * @param e The thrown exception. - **/ - private void handleException(Exception e) { - System.err.println("Can't write to output stream:"); - e.printStackTrace(); - close(); - System.exit(1); - } - - - /** - * Closes this output stream and releases any system resources associated - * with the stream. - **/ - public void close() { - try { dos.close(); } - catch (Exception e) { - System.err.println("Can't close output stream:"); - e.printStackTrace(); - System.exit(1); - } - } - - - /** - * Writes a boolean to the underlying output stream as - * a 1-byte value. The value true is written out as the - * value (byte)1; the value false is - * written out as the value (byte)0. - * - * @param v A boolean value to be written. - **/ - public void writeBoolean(boolean v) { - try { dos.writeBoolean(v); } - catch (Exception e) { handleException(e); } - } - - - /** - * Writes out a byte to the underlying output stream as - * a 1-byte value. - * - * @param v A byte value to be written. - **/ - public void writeByte(int v) { - try { dos.writeByte(v); } - catch (Exception e) { handleException(e); } - } - - - /** - * Writes an array of bytes to the underlying output stream. First, an - * integer representing the number of bytes in the array is written, - * followed by the bytes in the array. - * - * @param ba The array of 8-bit bytes. - **/ - public void writeBytes(byte[] ba) { - try { - if (ba == null) { - dos.writeInt(-1); - return; - } - - int n = ba.length; - dos.writeInt(n); - for (int i = 0; i < n; ++i) - dos.writeByte(ba[i]); - } - catch (Exception e) { handleException(e); } - } - - - /** - * Writes a short to the underlying output stream as two - * bytes, high byte first. - * - * @param v A short to be written. - **/ - public void writeShort(int v) { - try { dos.writeShort(v); } - catch (Exception e) { handleException(e); } - } - - - /** - * Writes a char to the underlying output stream as a - * 2-byte value, high byte first. - * - * @param v A char value to be written. - **/ - public void writeChar(int v) { - try { dos.writeChar(v); } - catch (Exception e) { handleException(e); } - } - - - /** - * Writes an int to the underlying output stream as four - * bytes, high byte first. - * - * @param v An int to be written. - **/ - public void writeInt(int v) { - try { dos.writeInt(v); } - catch (Exception e) { handleException(e); } - } - - - /** - * Writes a long to the underlying output stream as eight - * bytes, high byte first. - * - * @param v A long to be written. - */ - public void writeLong(long v) { - try { dos.writeLong(v); } - catch (Exception e) { handleException(e); } - } - - - /** - * Converts the float argument to an int using the - * floatToIntBits method in class Float, - * and then writes that int value to the underlying - * output stream as a 4-byte quantity, high byte first. - * - * @param v A float value to be written. - **/ - public void writeFloat(float v) { - try { dos.writeFloat(v); } - catch (Exception e) { handleException(e); } - } - - - /** - * Converts the double argument to a long using the - * doubleToLongBits method in class Double, - * and then writes that long value to the underlying - * output stream as an 8-byte quantity, high byte first. - * - * @param v A double value to be written. - **/ - public void writeDouble(double v) { - try { dos.writeDouble(v); } - catch (Exception e) { handleException(e); } - } - - - /** - * Writes a string to the underlying stream in such a way that it can be - * read back in. In particular, the length of the string is written first. - * - * @param s The string to write. - **/ - public void writeString(String s) { - if (s == null) writeShort((short) -1); - else writeUTF(s); - } - - - /** - * Writes a string using - * modified UTF-8 - * encoding in a machine-independent manner. - * - *

First, two bytes are written to out via the {@link #writeShort(int)} - * method giving the number of bytes to follow. This value is the number - * of bytes actually written out, not the length of the string. Following - * the length, each character of the string is output, in sequence, using - * the modified UTF-8 encoding for the character. - * - * @param str A string to be written. - * @return The number of bytes written out. - **/ - public int writeUTF(String str) { - int strlen = str.length(); - int utfLength = 0; - int c, count = 0; - - /* use charAt instead of copying String to char array */ - for (int i = 0; i < strlen; i++) { - c = str.charAt(i); - if ((c >= 0x0001) && (c <= 0x007F)) utfLength++; - else if (c > 0x07FF) utfLength += 3; - else utfLength += 2; - } - - if (utfLength > 32767) { - System.err.println( - "Error in ExceptionlessOutputStream: String too long"); - new Exception().printStackTrace(); - close(); - System.exit(1); - } - - if (buffer == null || buffer.length < utfLength) - buffer = new byte[utfLength * 2]; - - writeShort((short) utfLength); - - int i = 0; - for (i = 0; i < strlen; i++) { - c = str.charAt(i); - if (!((c >= 0x0001) && (c <= 0x007F))) break; - buffer[count++] = (byte) c; - } - - for (; i < strlen; i++) { - c = str.charAt(i); - if ((c >= 0x0001) && (c <= 0x007F)) buffer[count++] = (byte) c; - else if (c > 0x07FF) { - buffer[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F)); - buffer[count++] = (byte) (0x80 | ((c >> 6) & 0x3F)); - buffer[count++] = (byte) (0x80 | ((c >> 0) & 0x3F)); - } - else { - buffer[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F)); - buffer[count++] = (byte) (0x80 | ((c >> 0) & 0x3F)); - } - } - - try { dos.write(buffer, 0, utfLength); } - catch (Exception e) { handleException(e); } - return utfLength + 2; - } -} - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/FVector.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/FVector.java index ec0d2286..b530eb9a 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/FVector.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/FVector.java @@ -1,329 +1,373 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.util; +import java.lang.reflect.Array; import java.util.Arrays; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream; +import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream; import edu.illinois.cs.cogcomp.lbjava.classify.Feature; /** - * This class implements an expandable array of features that should be - * faster than java's Vector. - * - * @author Nick Rizzolo + * This class implements an expandable array of features that should be faster than java's + * Vector. + * + * @author Nick Rizzolo **/ -public class FVector implements Cloneable, java.io.Serializable -{ - /** The default capacity of a vector upon first construction. */ - protected static final int defaultCapacity = 8; - - /** The elements of the vector. */ - protected Feature[] vector; - /** The number of elements in the vector. */ - protected int size; - - - /** - * Constructs a new vector with capacity equal to {@link #defaultCapacity}. - **/ - public FVector() { this(defaultCapacity); } - - /** - * Constructs a new vector with the specified capacity. - * - * @param c The initial capacity for the new vector. - **/ - public FVector(int c) { - vector = new Feature[Math.max(defaultCapacity, c)]; - } - - /** - * Constructs a new vector using the specified array as a starting point. - * - * @param v The initial array. - **/ - public FVector(Feature[] v) { - if (v.length == 0) vector = new Feature[defaultCapacity]; - else { - vector = v; - size = vector.length; +public class FVector implements Cloneable, java.io.Serializable { + /** The default capacity of a vector upon first construction. */ + protected static final int defaultCapacity = 8; + + /** The elements of the vector. */ + protected Feature[] vector; + /** The number of elements in the vector. */ + protected int size; + + + /** + * Constructs a new vector with capacity equal to {@link #defaultCapacity}. + **/ + public FVector() { + this(defaultCapacity); } - } - - /** - * Constructs a copy of a vector starting with capacity equal to that - * vector's size. - * - * @param v The vector to copy. - **/ - public FVector(FVector v) { - int N = v.size(); - if (N == 0) vector = new Feature[defaultCapacity]; - else { - vector = new Feature[N]; - size = N; - System.arraycopy(v.vector, 0, vector, 0, N); + + /** + * Constructs a new vector with the specified capacity. + * + * @param c The initial capacity for the new vector. + **/ + public FVector(int c) { + vector = new Feature[Math.max(defaultCapacity, c)]; } - } - - - /** - * Throws an exception when the specified index is negative. - * - * @param i The index. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - protected void boundsCheck(int i) { - if (i < 0) - throw - new ArrayIndexOutOfBoundsException( - "Attempted to access negative index of FVector."); - } - - - /** - * Retrieves the value stored at the specified index of the vector, or - * null if the vector isn't long enough. - * - * @param i The index of the value to retrieve. - * @return The retrieved value. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - public Feature get(int i) { return get(i, null); } - - /** - * Retrieves the value stored at the specified index of the vector or - * d if the vector isn't long enough. - * - * @param i The index of the value to retrieve. - * @param d The default value. - * @return The retrieved value. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - public Feature get(int i, Feature d) { - boundsCheck(i); - return i < size ? vector[i] : d; - } - - - /** - * Sets the value at the specified index to the given value. - * - * @param i The index of the value to set. - * @param v The new value at that index. - * @return The value that used to be at index i. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - public Feature set(int i, Feature v) { return set(i, v, null); } - - /** - * Sets the value at the specified index to the given value. If the given - * index is greater than the vector's current size, the vector will expand - * to accomodate it. - * - * @param i The index of the value to set. - * @param v The new value at that index. - * @param d The default value for other new indexes that might get - * created. - * @return The value that used to be at index i. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - public Feature set(int i, Feature v, Feature d) { - boundsCheck(i); - expandFor(i, d); - Feature result = vector[i]; - vector[i] = v; - return result; - } - - - /** - * Adds the specified value on to the end of the vector, expanding its - * capacity as necessary. - * - * @param v The new value to appear last in the vector. - **/ - public void add(Feature v) { - expandFor(size, null); - vector[size - 1] = v; - } - - - /** - * Adds all the values in the given vector to the end of this vector, - * expanding its capacity as necessary. - * - * @param v The new vector of values to appear at the end of this vector. - **/ - public void addAll(FVector v) { - expandFor(size + v.size - 1, null); - System.arraycopy(v.vector, 0, vector, size - v.size, v.size); - } - - - /** - * Removes the element at the specified index of the vector. - * - * @param i The index of the element to remove. - * @return The removed element. - **/ - public Feature remove(int i) { - boundsCheck(i); - if (i >= size) - throw - new ArrayIndexOutOfBoundsException( - "LBJ: FVector: Can't remove element at index " + i - + " as it is larger than the size (" + size + ")"); - Feature result = vector[i]; - for (int j = i + 1; j < size; ++j) - vector[j - 1] = vector[j]; - vector[--size] = null; - return result; - } - - - /** Returns the value of {@link #size}. */ - public int size() { return size; } - - - /** Sorts this vector in increasing order. */ - public void sort() { Arrays.sort(vector, 0, size); } - - - /** - * After calling this method, the new size and capacity of this vector will - * be equal to the number of non-null elements; all such - * elements will be retained in the same relative order. - **/ - public void consolidate() { - int n = 0; while (n < size && vector[n] != null) ++n; - int i = n; while (i < size && vector[i] == null) ++i; - while (i < size) { - vector[n++] = vector[i++]; - while (i < size && vector[i] == null) ++i; + + /** + * Constructs a new vector using the specified array as a starting point. + * + * @param v The initial array. + **/ + public FVector(Feature[] v) { + if (v.length == 0) + vector = new Feature[defaultCapacity]; + else { + vector = v; + size = vector.length; + } } - if (n < vector.length) { - size = n; - Feature[] newVector = new Feature[size]; - System.arraycopy(vector, 0, newVector, 0, size); - vector = newVector; + /** + * Constructs a copy of a vector starting with capacity equal to that vector's size. + * + * @param v The vector to copy. + **/ + public FVector(FVector v) { + int N = v.size(); + if (N == 0) + vector = new Feature[defaultCapacity]; + else { + vector = new Feature[N]; + size = N; + System.arraycopy(v.vector, 0, vector, 0, N); + } } - } - - - /** - * Makes sure the capacity and size of the vector can accomodate the - * given index. The capacity of the vector is simply doubled until it can - * accomodate its size. - * - * @param index The index where a new value will be stored. - * @param d The default value for other new indexes that might get - * created. - **/ - protected void expandFor(int index, Feature d) { - if (index < size) return; - int oldSize = size, capacity = vector.length; - size = index + 1; - if (capacity >= size) return; - while (capacity < size) capacity *= 2; - Feature[] t = new Feature[capacity]; - System.arraycopy(vector, 0, t, 0, oldSize); - if (d != null) Arrays.fill(t, oldSize, size, d); - vector = t; - } - - - /** - * Returns a new array of features containing the same data as this vector. - **/ - public Feature[] toArray() { - Feature[] result = new Feature[size]; - System.arraycopy(vector, 0, result, 0, size); - return result; - } - - - /** - * Two FVectors are considered equal if they contain - * equivalent elements and have the same size. - **/ - public boolean equals(Object o) { - if (!(o instanceof FVector)) return false; - FVector v = (FVector) o; - return size == v.size && Arrays.equals(vector, v.vector); - } - - - /** A hash code based on the hash code of {@link #vector}. */ - public int hashCode() { return vector.hashCode(); } - - - /** - * Writes a binary representation of this vector to the given stream. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - out.writeInt(size); - for (int i = 0; i < size; ++i) vector[i].write(out); - } - - - /** - * Reads the binary representation of a vector from the specified stream, - * overwriting the data in this object. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - size = in.readInt(); - if (size == 0) vector = new Feature[defaultCapacity]; - else { - vector = new Feature[size]; - for (int i = 0; i < size; ++i) vector[i] = Feature.readFeature(in); + + + /** + * Throws an exception when the specified index is negative. + * + * @param i The index. + * @throws ArrayIndexOutOfBoundsException When i < 0. + **/ + protected void boundsCheck(int i) { + if (i < 0) + throw new ArrayIndexOutOfBoundsException( + "Attempted to access negative index of FVector."); } - } - /** - * Returns a shallow clone of this vector; the vector itself is cloned, but - * the element objects aren't. - **/ - public Object clone() { - FVector clone = null; + /** + * Retrieves the value stored at the specified index of the vector, or null if the + * vector isn't long enough. + * + * @param i The index of the value to retrieve. + * @return The retrieved value. + * @throws ArrayIndexOutOfBoundsException When i < 0. + **/ + public Feature get(int i) { + return get(i, null); + } - try { clone = (FVector) super.clone(); } - catch (Exception e) { - System.err.println("Error cloning " + getClass().getName() + ":"); - e.printStackTrace(); - System.exit(1); + /** + * Retrieves the value stored at the specified index of the vector or d if the + * vector isn't long enough. + * + * @param i The index of the value to retrieve. + * @param d The default value. + * @return The retrieved value. + * @throws ArrayIndexOutOfBoundsException When i < 0. + **/ + public Feature get(int i, Feature d) { + boundsCheck(i); + return i < size ? vector[i] : d; } - clone.vector = (Feature[]) vector.clone(); - return clone; - } + /** + * Sets the value at the specified index to the given value. + * + * @param i The index of the value to set. + * @param v The new value at that index. + * @return The value that used to be at index i. + * @throws ArrayIndexOutOfBoundsException When i < 0. + **/ + public Feature set(int i, Feature v) { + return set(i, v, null); + } - /** Returns a text representation of this vector. */ - public String toString() { - StringBuffer result = new StringBuffer(); - result.append("["); - for (int i = 0; i < size; ++i) { - result.append(vector[i]); - if (i + 1 < size) result.append(", "); + /** + * Sets the value at the specified index to the given value. If the given index is greater than + * the vector's current size, the vector will expand to accomodate it. + * + * @param i The index of the value to set. + * @param v The new value at that index. + * @param d The default value for other new indexes that might get created. + * @return The value that used to be at index i. + * @throws ArrayIndexOutOfBoundsException When i < 0. + **/ + public Feature set(int i, Feature v, Feature d) { + boundsCheck(i); + expandFor(i, d); + Feature result = vector[i]; + vector[i] = v; + return result; + } + + + /** + * Adds the specified value on to the end of the vector, expanding its capacity as necessary. + * + * @param v The new value to appear last in the vector. + **/ + public void add(Feature v) { + expandFor(size, null); + vector[size - 1] = v; + } + + + /** + * Adds all the values in the given vector to the end of this vector, expanding its capacity as + * necessary. + * + * @param v The new vector of values to appear at the end of this vector. + **/ + public void addAll(FVector v) { + expandFor(size + v.size - 1, null); + System.arraycopy(v.vector, 0, vector, size - v.size, v.size); + } + + + /** + * Removes the element at the specified index of the vector. + * + * @param i The index of the element to remove. + * @return The removed element. + **/ + public Feature remove(int i) { + boundsCheck(i); + if (i >= size) + throw new ArrayIndexOutOfBoundsException("LBJ: FVector: Can't remove element at index " + + i + " as it is larger than the size (" + size + ")"); + Feature result = vector[i]; + for (int j = i + 1; j < size; ++j) + vector[j - 1] = vector[j]; + vector[--size] = null; + return result; + } + + + /** + * Remove all the features specfied by the indices. This is MUCH faster + * than removing them one at a time. + * + * @param indexes The indexes of the elements to remove. + **/ + public void remove(int[] indexes) { + Arrays.sort(indexes); + int sourceindex = 0; + int discardindex = 0; + for (int targetindex = 0; targetindex < size; targetindex++) { + if (discardindex < indexes.length && targetindex == indexes[discardindex]) { + // skip this one (by simply not coping it and not inc the sourceindex), inc discardindex + discardindex++; + } else { + vector[sourceindex] = vector[targetindex]; + sourceindex++; + } + } + if (discardindex != indexes.length) + // this should nver happen. + throw new RuntimeException("There was a problem removing some of the indexes!"); + size -= indexes.length; + } + + + /** Returns the value of {@link #size}. */ + public int size() { + return size; + } + + + /** Sorts this vector in increasing order. */ + public void sort() { + Arrays.sort(vector, 0, size); } - result.append("]"); - return result.toString(); - } -} + + /** + * After calling this method, the new size and capacity of this vector will be equal to the + * number of non-null elements; all such elements will be retained in the same + * relative order. + **/ + public void consolidate() { + int n = 0; + while (n < size && vector[n] != null) + ++n; + int i = n; + while (i < size && vector[i] == null) + ++i; + while (i < size) { + vector[n++] = vector[i++]; + while (i < size && vector[i] == null) + ++i; + } + + if (n < vector.length) { + size = n; + Feature[] newVector = new Feature[size]; + System.arraycopy(vector, 0, newVector, 0, size); + vector = newVector; + } + } + + + /** + * Makes sure the capacity and size of the vector can accomodate the given index. The capacity + * of the vector is simply doubled until it can accomodate its size. + * + * @param index The index where a new value will be stored. + * @param d The default value for other new indexes that might get created. + **/ + protected void expandFor(int index, Feature d) { + if (index < size) + return; + int oldSize = size, capacity = vector.length; + size = index + 1; + if (capacity >= size) + return; + while (capacity < size) + capacity *= 2; + Feature[] t = new Feature[capacity]; + System.arraycopy(vector, 0, t, 0, oldSize); + if (d != null) + Arrays.fill(t, oldSize, size, d); + vector = t; + } + + + /** + * Returns a new array of features containing the same data as this vector. + **/ + public Feature[] toArray() { + Feature[] result = new Feature[size]; + System.arraycopy(vector, 0, result, 0, size); + return result; + } + + + /** + * Two FVectors are considered equal if they contain equivalent elements and have + * the same size. + **/ + public boolean equals(Object o) { + if (!(o instanceof FVector)) + return false; + FVector v = (FVector) o; + return size == v.size && Arrays.equals(vector, v.vector); + } + + + /** A hash code based on the hash code of {@link #vector}. */ + public int hashCode() { + return vector.hashCode(); + } + + + /** + * Writes a binary representation of this vector to the given stream. + * + * @param out The output stream. + **/ + public void write(ExceptionlessOutputStream out) { + out.writeInt(size); + for (int i = 0; i < size; ++i) + vector[i].write(out); + } + + + /** + * Reads the binary representation of a vector from the specified stream, overwriting the data + * in this object. + * + * @param in The input stream. + **/ + public void read(ExceptionlessInputStream in) { + size = in.readInt(); + if (size == 0) + vector = new Feature[defaultCapacity]; + else { + vector = new Feature[size]; + for (int i = 0; i < size; ++i) + vector[i] = Feature.readFeature(in); + } + } + + + /** + * Returns a shallow clone of this vector; the vector itself is cloned, but the element objects + * aren't. + **/ + public Object clone() { + FVector clone = null; + + try { + clone = (FVector) super.clone(); + } catch (Exception e) { + System.err.println("Error cloning " + getClass().getName() + ":"); + e.printStackTrace(); + System.exit(1); + } + + clone.vector = (Feature[]) vector.clone(); + return clone; + } + + + /** Returns a text representation of this vector. */ + public String toString() { + StringBuffer result = new StringBuffer(); + result.append("["); + for (int i = 0; i < size; ++i) { + result.append(vector[i]); + if (i + 1 < size) + result.append(", "); + } + result.append("]"); + return result.toString(); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/FileUtils.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/FileUtils.java new file mode 100644 index 00000000..78819f63 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/FileUtils.java @@ -0,0 +1,51 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.util; + +import java.io.File; + +/** + * Utility methods for handling file paths. + * + **/ +public class FileUtils { + + /** + * Update FilePath separator for file paths read from pom.xml config. + * + * @param originalFilePath Path to a file in UNIX separator convention.s + * @return Platform Independent File Path + */ + public static String getPlatformIndependentFilePath(String originalFilePath) { + if (File.separatorChar == '/') { + return originalFilePath; + } + + return originalFilePath.replace('/', File.separatorChar); + } + + + /** + * Escapes the forward slash in Windows. Currently used in the generate code where a model's + * location string is generated. + * + * CAVEAT: This function might break other escaped characters in the originalFilePath string. + * Use with care. + * + * @param originalFilePath Original File Path + * @return File Path with the forward slash escaped on Windows. + */ + public static String escapeFilePath(String originalFilePath) { + if (File.separatorChar == '/') { + return originalFilePath; + } + + // Only update this on Windows. + return originalFilePath.replace("\\", "\\\\"); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/IVector.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/IVector.java deleted file mode 100644 index 28143a57..00000000 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/IVector.java +++ /dev/null @@ -1,388 +0,0 @@ -/** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 - * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign - * http://cogcomp.cs.illinois.edu/ - */ -package edu.illinois.cs.cogcomp.lbjava.util; - -import java.util.Arrays; - - -/** - * This class implements an expandable array of ints that should - * be faster than java's Vector. - * - * @author Nick Rizzolo - **/ -public class IVector implements Cloneable, java.io.Serializable -{ - /** The default capacity of a vector upon first construction. */ - protected static final int defaultCapacity = 8; - - /** The elements of the vector. */ - protected int[] vector; - /** The number of elements in the vector. */ - protected int size; - - - /** - * Constructs a new vector with capacity equal to {@link #defaultCapacity}. - **/ - public IVector() { this(defaultCapacity); } - - /** - * Constructs a new vector with the specified capacity. - * - * @param c The initial capacity for the new vector. - **/ - public IVector(int c) { vector = new int[Math.max(defaultCapacity, c)]; } - - /** - * Constructs a new vector using the specified array as a starting point. - * - * @param v The initial array. - **/ - public IVector(int[] v) { - if (v.length == 0) vector = new int[defaultCapacity]; - else { - vector = v; - size = vector.length; - } - } - - /** - * Copy constructor. - * - * @param v The vector to copy. - **/ - public IVector(IVector v) { this(v.toArray()); } - - - /** - * Throws an exception when the specified index is negative. - * - * @param i The index. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - protected void boundsCheck(int i) { - if (i < 0) - throw - new ArrayIndexOutOfBoundsException( - "Attempted to access negative index of IVector."); - } - - - /** - * Retrieves the value stored at the specified index of the vector, or 0 if - * the vector isn't long enough. - * - * @param i The index of the value to retrieve. - * @return The retrieved value. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - public int get(int i) { return get(i, 0); } - - /** - * Retrieves the value stored at the specified index of the vector or - * d if the vector isn't long enough. - * - * @param i The index of the value to retrieve. - * @param d The default value. - * @return The retrieved value. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - public int get(int i, int d) { - boundsCheck(i); - return i < size ? vector[i] : d; - } - - - /** - * Sets the value at the specified index to the given value. - * - * @param i The index of the value to set. - * @param v The new value at that index. - * @return The value that used to be at index i. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - public int set(int i, int v) { return set(i, v, 0); } - - /** - * Sets the value at the specified index to the given value. If the given - * index is greater than the vector's current size, the vector will expand - * to accomodate it. - * - * @param i The index of the value to set. - * @param v The new value at that index. - * @param d The default value for other new indexes that might get - * created. - * @return The value that used to be at index i. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - public int set(int i, int v, int d) { - boundsCheck(i); - expandFor(i, d); - int result = vector[i]; - vector[i] = v; - return result; - } - - - /** - * Increments the integer at the given index by 1. - * - * @param i The index of the value to increment. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - public void increment(int i) { increment(i, 0); } - - /** - * Increments the integer at the given index by 1. If the given index is - * greater than the vector's current size, the vector will expand to - * accomodate it. - * - * @param i The index of the value to increment. - * @param d The default value for other new indexes that might get - * created. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - public void increment(int i, int d) { - boundsCheck(i); - expandFor(i, d); - vector[i]++; - } - - - /** - * Decrements the integer at the given index by 1. - * - * @param i The index of the value to decrement. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - public void decrement(int i) { decrement(i, 0); } - - /** - * Decrements the integer at the given index by 1. If the given index is - * greater than the vector's current size, the vector will expand to - * accomodate it. - * - * @param i The index of the value to decrement. - * @param d The default value for other new indexes that might get - * created. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - public void decrement(int i, int d) { - boundsCheck(i); - expandFor(i, d); - vector[i]--; - } - - - /** - * Adds the specified value on to the end of the vector, expanding its - * capacity as necessary. - * - * @param v The new value to appear last in the vector. - **/ - public void add(int v) { - expandFor(size, 0); - vector[size - 1] = v; - } - - - /** - * Adds all the values in the given vector to the end of this vector, - * expanding its capacity as necessary. - * - * @param v The new vector of values to appear at the end of this vector. - **/ - public void addAll(IVector v) { - expandFor(size + v.size - 1, 0); - System.arraycopy(v.vector, 0, vector, size - v.size, v.size); - } - - - /** - * Removes the element at the specified index of the vector. - * - * @param i The index of the element to remove. - * @return The removed element. - **/ - public int remove(int i) { - boundsCheck(i); - if (i >= size) - throw - new ArrayIndexOutOfBoundsException( - "LBJ: IVector: Can't remove element at index " + i - + " as it is larger than the size (" + size + ")"); - int result = vector[i]; - for (int j = i + 1; j < size; ++j) - vector[j - 1] = vector[j]; - --size; - return result; - } - - - /** Returns the value of {@link #size}. */ - public int size() { return size; } - - - /** Returns the value of the maximum element in the vector. */ - public int max() { - int result = Integer.MIN_VALUE; - for (int i = 0; i < size; ++i) if (vector[i] > result) result = vector[i]; - return result; - } - - - /** Sorts this vector in increasing order. */ - public void sort() { Arrays.sort(vector, 0, size); } - - - /** - * Searches this vector for the specified value using the binary search - * algorithm. This vector must be sorted (as by the - * {@link #sort()} method) prior to making this call. If it is not sorted, - * the results are undefined. If this vector contains multiple elements - * with the specified value, there is no guarantee which one will be found. - * - * @param v The value to be searched for. - * @return The index of v, if it is contained in the vector; - * otherwise, (-(insertion point) - 1). The - * insertion point is defined as the point at which - * v would be inserted into the vector: the index of - * the first element greater than v, or the size of - * the vector if all elements in the list are less than - * v. Note that this guarantees that the return value - * will be >= 0 if and only if v is found. - **/ - public int binarySearch(int v) { - int a = 0, b = size; - - while (b != a) { - int m = (a + b) >> 1; - if (vector[m] > v) b = m; - else if (vector[m] < v) a = m + 1; - else return m; - } - - return -a - 1; - } - - - /** - * Makes sure the capacity and size of the vector can accomodate the - * given index. The capacity of the vector is simply doubled until it can - * accomodate its size. - * - * @param index The index where a new value will be stored. - * @param d The default value for other new indexes that might get - * created. - **/ - protected void expandFor(int index, int d) { - if (index < size) return; - int oldSize = size, capacity = vector.length; - size = index + 1; - if (capacity >= size) return; - while (capacity < size) capacity *= 2; - int[] t = new int[capacity]; - System.arraycopy(vector, 0, t, 0, oldSize); - if (d != 0) Arrays.fill(t, oldSize, size, d); - vector = t; - } - - - /** - * Returns a new array of ints containing the same data as - * this vector. - **/ - public int[] toArray() { - int[] result = new int[size]; - System.arraycopy(vector, 0, result, 0, size); - return result; - } - - public double[] toArrayDouble() { - double[] result = new double[size]; - for (int i = 0; i < size; i++) - result[i] = (double) vector[i]; - return result; - } - - - /** - * Two IVectors are considered equal if they contain the same - * elements and have the same size. - **/ - public boolean equals(Object o) { - if (!(o instanceof IVector)) return false; - IVector v = (IVector) o; - return size == v.size && Arrays.equals(vector, v.vector); - } - - - /** A hash code based on the hash code of {@link #vector}. */ - public int hashCode() { return vector.hashCode(); } - - - /** Returns a deep clone of this vector. */ - public Object clone() { - IVector clone = null; - - try { clone = (IVector) super.clone(); } - catch (Exception e) { - System.err.println("Error cloning " + getClass().getName() + ":"); - e.printStackTrace(); - System.exit(1); - } - - clone.vector = (int[]) vector.clone(); - return clone; - } - - - /** Returns a text representation of this vector. */ - public String toString() { - StringBuffer result = new StringBuffer(); - result.append("["); - for (int i = 0; i < size; ++i) { - result.append(vector[i]); - if (i + 1 < size) result.append(", "); - } - result.append("]"); - return result.toString(); - } - - - /** - * Writes a binary representation of the vector. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - out.writeInt(size); - for (int i = 0; i < size; ++i) out.writeInt(vector[i]); - } - - - /** - * Reads the binary representation of a vector from the specified stream, - * overwriting the data in this object. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - size = in.readInt(); - if (size == 0) vector = new int[defaultCapacity]; - else { - vector = new int[size]; - for (int i = 0; i < size; ++i) vector[i] = in.readInt(); - } - } -} - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/IVector2D.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/IVector2D.java deleted file mode 100644 index 32ad7a2f..00000000 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/IVector2D.java +++ /dev/null @@ -1,469 +0,0 @@ -/** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 - * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign - * http://cogcomp.cs.illinois.edu/ - */ -package edu.illinois.cs.cogcomp.lbjava.util; - -import java.util.Arrays; - - -/** - * This class implements an expandable two dimensional array of ints that - * should be faster than java's Vector. - * - * @author Nick Rizzolo - **/ -public class IVector2D implements Cloneable, java.io.Serializable -{ - /** The default capacity of the first dimension of this 2D vector. */ - protected static final int defaultCapacity1 = 8; - /** The default capacity of the second dimension of this 2D vector. */ - protected static final int defaultDefaultCapacity2 = 8; - - /** The elements of the vector. */ - protected int[][] vector; - /** The sizes of each vector in the second dimension. */ - protected IVector sizes; - /** The capacity of new vectors created in the second dimension. */ - protected int defaultCapacity2; - - - /** - * Constructs a new vector with default capacities - * {@link #defaultCapacity1} and {@link #defaultCapacity2}. - **/ - public IVector2D() { this(defaultCapacity1, defaultDefaultCapacity2); } - - /** - * Constructs a new vector with the specified capacities. - * - * @param c1 The initial capacity for the first dimension of the new - * vector. - * @param c2 The initial capacity for the second dimension of the new - * vector. - **/ - public IVector2D(int c1, int c2) { - defaultCapacity2 = Math.max(defaultDefaultCapacity2, c2); - vector = new int[Math.max(defaultCapacity1, c1)][defaultCapacity2]; - sizes = new IVector(c1); - } - - /** - * Constructs a new vector using the specified array as a starting point. - * - * @param v The initial array. - **/ - public IVector2D(int[][] v) { - defaultCapacity2 = defaultDefaultCapacity2; - - if (v.length == 0) { - vector = new int[defaultCapacity1][defaultCapacity2]; - sizes = new IVector(defaultCapacity1); - } - else { - vector = v; - sizes = new IVector(v.length); - - for (int i = 0; i < v.length; ++i) { - sizes.set(i, v[i].length); - defaultCapacity2 = Math.max(defaultCapacity2, v[i].length); - } - - for (int i = 0; i < v.length; ++i) if (v[i].length == 0) - v[i] = new int[defaultCapacity2]; - } - } - - - /** - * Throws an exception when either of the specified indexes are negative. - * - * @param i1 The index in the first dimension. - * @param i2 The index in the second dimension. - * @throws ArrayIndexOutOfBoundsException - * When i1 or i2 < 0. - **/ - protected void boundsCheck(int i1, int i2) { - if (i1 < 0 || i2 < 0) - throw - new ArrayIndexOutOfBoundsException( - "Attempted to access negative index of IVector2D."); - } - - - /** - * Retrieves the value stored at the specified index of the vector. - * - * @param i1 The index in the first dimension. - * @param i2 The index in the second dimension. - * @return The retrieved value. - * @throws ArrayIndexOutOfBoundsException - * When i1 or i2 < 0. - **/ - public int get(int i1, int i2) { return get(i1, i2, 0); } - - /** - * Retrieves the value stored at the specified index of the vector. - * - * @param i1 The index in the first dimension. - * @param i2 The index in the second dimension. - * @param d The default value. - * @return The retrieved value. - * @throws ArrayIndexOutOfBoundsException - * When i1 or i2 < 0. - **/ - public int get(int i1, int i2, int d) { - boundsCheck(i1, i2); - - // Because of the way IVector works, the only way i2 < sizes.get(i1) will - // be true is if i1 is in fact a valid index for the first dimension. - return i2 < sizes.get(i1) ? vector[i1][i2] : d; - } - - - /** - * Sets the value at the specified index to the given value. - * - * @param i1 The index in the first dimension. - * @param i2 The index in the second dimension. - * @param v The new value at that index. - * @return The value that used to be at index i. - * @throws ArrayIndexOutOfBoundsException - * When i1 or i2 < 0. - **/ - public int set(int i1, int i2, int v) { return set(i1, i2, v, 0); } - - /** - * Sets the value at the specified index to the given value. - * - * @param i1 The index in the first dimension. - * @param i2 The index in the second dimension. - * @param v The new value at that index. - * @param d The default value for other new indexes that might get - * created. - * @return The value that used to be at index i. - * @throws ArrayIndexOutOfBoundsException - * When i1 or i2 < 0. - **/ - public int set(int i1, int i2, int v, int d) { - boundsCheck(i1, i2); - expandFor(i1, i2, d); - int result = vector[i1][i2]; - vector[i1][i2] = v; - return result; - } - - - /** - * Removes the row at the specified index. - * - * @param i The index of the row to remove. - * @return The removed row. - **/ - public int[] remove(int i) { - boundsCheck(i, 0); - int rows = sizes.size(); - if (i >= rows) - throw - new ArrayIndexOutOfBoundsException( - "LBJ: IVector2D: Can't remove row at index " + i - + " as it is larger than the size (" + rows + ")"); - int[] result = vector[i]; - for (int j = i + 1; j < rows; ++j) - vector[j - 1] = vector[j]; - vector[rows - 1] = null; - sizes.remove(i); - return result; - } - - - /** - * Removes the element at the specified index of the vector. - * - * @param i1 The row index of the element to remove. - * @param i2 The column index of the element to remove. - * @return The removed element. - **/ - public int remove(int i1, int i2) { - boundsCheck(i1, i2); - int rows = sizes.size(), columns = sizes.get(i1); - if (i1 >= rows || i2 >= columns) - throw - new ArrayIndexOutOfBoundsException( - "LBJ: IVector2D: Can't remove index [" + i1 + ", " + i2 - + "] as it is out of bounds (" + rows + ", " + columns + ")"); - int result = vector[i1][i2]; - for (int j = i2 + 1; j < columns; ++j) - vector[i1][j - 1] = vector[i1][j]; - sizes.set(i1, columns - 1); - return result; - } - - - /** - * Increments the integer at the given index by 1. - * - * @param i1 The index in the first dimension. - * @param i2 The index in the second dimension. - * @throws ArrayIndexOutOfBoundsException - * When i1 or i2 < 0. - **/ - public void increment(int i1, int i2) { increment(i1, i2, 0); } - - /** - * Increments the integer at the given index by 1. - * - * @param i1 The index in the first dimension. - * @param i2 The index in the second dimension. - * @param d The default value for other new indexes that might get - * created. - * @throws ArrayIndexOutOfBoundsException - * When i1 or i2 < 0. - **/ - public void increment(int i1, int i2, int d) { - boundsCheck(i1, i2); - expandFor(i1, i2, d); - vector[i1][i2]++; - } - - - /** Returns the size of the first dimension of this vector.. */ - public int size() { return sizes.size(); } - /** - * Returns the size associated with the specified vector. - * - * @param i The index of the vector whose size will be returned. - **/ - public int size(int i) { return sizes.get(i); } - - - /** - * Returns the value of the maximum element in the - * ith vector. - * - * @param i An index into the first dimension of this vector. - **/ - public int max(int i) { - if (i < 0 || i >= sizes.size()) - throw - new ArrayIndexOutOfBoundsException( - "Attempted to access negative index of IVector2D."); - int result = Integer.MIN_VALUE, size = sizes.get(i); - for (int j = 0; j < size; ++j) - if (vector[i][j] > result) result = vector[i][j]; - return result; - } - - - /** - * Sorts the selected row in increasing order. - * - * @param i The row to sort. - **/ - public void sort(int i) { Arrays.sort(vector[i], 0, sizes.get(i)); } - - - /** - * Searches the selected row vector for the specified value using the - * binary search algorithm. The vector must be sorted (as - * by the {@link #sort(int)} method) prior to making this call. If it is - * not sorted, the results are undefined. If the vector contains multiple - * elements with the specified value, there is no guarantee which one will - * be found. - * - * @param i The selected row. - * @param v The value to be searched for. - * @return The index of v, if it is contained in the vector; - * otherwise, (-(insertion point) - 1). The - * insertion point is defined as the point at which - * v would be inserted into the vector: the index of - * the first element greater than v, or the size of - * the vector if all elements in the vector are less than - * v. Note that this guarantees that the return value - * will be >= 0 if and only if v is found. - **/ - public int binarySearch(int i, int v) { - int a = 0, b = sizes.get(i); - - while (b != a) { - int m = (a + b) >> 1; - if (vector[i][m] > v) b = m; - else if (vector[i][m] < v) a = m + 1; - else return m; - } - - return -a - 1; - } - - - /** - * Makes sure the capacities and sizes of the vectors can accomodate the - * given indexes. The capacities of the vectors are simply doubled until - * they can accomodate their sizes. - * - * @param i1 The index in the first dimension. - * @param i2 The index in the second dimension. - * @param d The default value for other new indexes that might get - * created. - **/ - protected void expandFor(int i1, int i2, int d) { - if (i1 >= sizes.size()) { - int oldSize = sizes.size(), capacity = vector.length; - sizes.set(i1, 0); - if (capacity < sizes.size()) { - while (capacity < sizes.size()) capacity *= 2; - int[][] t = new int[capacity][]; - System.arraycopy(vector, 0, t, 0, oldSize); - vector = t; - } - for (int i = oldSize; i < sizes.size(); ++i) - vector[i] = new int[defaultCapacity2]; - } - - if (i2 < sizes.get(i1)) return; - int oldSize = sizes.get(i1), capacity = vector[i1].length; - sizes.set(i1, i2 + 1); - if (capacity >= sizes.get(i1)) return; - while (capacity < sizes.get(i1)) capacity *= 2; - int[] t = new int[capacity]; - System.arraycopy(vector[i1], 0, t, 0, oldSize); - if (d != 0) Arrays.fill(t, oldSize, sizes.get(i1), d); - vector[i1] = t; - } - - - /** - * Returns a new 2D array of ints containing the same data as - * this vector. - **/ - public int[][] toArray() { - int[][] result = new int[sizes.size()][]; - for (int i = 0; i < result.length; ++i) { - result[i] = new int[sizes.get(i)]; - System.arraycopy(vector[i], 0, result[i], 0, result[i].length); - } - return result; - } - - - /** - * Two IVector2Ds are considered equal if they contain all the - * same elements, sizes, and capacities. - **/ - public boolean equals(Object o) { - if (!(o instanceof IVector2D)) return false; - IVector2D v = (IVector2D) o; - if (vector.length != v.vector.length || !sizes.equals(v.sizes)) - return false; - for (int i = 0; i < vector.length; ++i) - if (!Arrays.equals(vector[i], v.vector[i])) return false; - return true; - } - - - /** - * A hash code based on the hash codes of the constituents of - * {@link #vector}. - **/ - public int hashCode() { - int result = vector.hashCode(); - for (int i = 0; i < vector.length; ++i) - result = 17 * result + vector[i].hashCode(); - return result; - } - - - /** - * Returns a clone of this vector that is one level deep; in particular, - * the objects in the vector themselves are not cloned, but the underlying - * array is. - **/ - public Object clone() { - IVector2D clone = null; - - try { clone = (IVector2D) super.clone(); } - catch (Exception e) { - System.err.println("Error cloning " + getClass().getName() + ":"); - e.printStackTrace(); - System.exit(1); - } - - clone.vector = (int[][]) vector.clone(); - for (int i = 0; i < vector.length; ++i) if (clone.vector[i] != null) - clone.vector[i] = (int[]) vector[i].clone(); - return clone; - } - - - /** Returns a text representation of this vector. */ - public String toString() { - StringBuffer result = new StringBuffer(); - result.append("["); - int s1 = sizes.size(); - - for (int i = 0; i < s1; ++i) { - int s2 = sizes.get(i); - result.append("["); - - for (int j = 0; j < s2; ++j) { - result.append(vector[i][j]); - if (j + 1 < s2) result.append(", "); - } - - result.append("]"); - if (i + 1 < s1) result.append(",\n "); - } - - result.append("]"); - return result.toString(); - } - - - /** - * Writes a binary representation of the vector. - * - * @param out The output stream. - **/ - public void write(ExceptionlessOutputStream out) { - sizes.write(out); - for (int i = 0; i < sizes.size(); ++i) - for (int j = 0; j < sizes.get(i); ++j) - out.writeInt(vector[i][j]); - out.writeInt(defaultCapacity2); - } - - - /** - * Reads the binary representation of a vector from the specified stream, - * overwriting the data in this object. - * - * @param in The input stream. - **/ - public void read(ExceptionlessInputStream in) { - sizes = new IVector(); - sizes.read(in); - - if (sizes.size() == 0) { - defaultCapacity2 = defaultDefaultCapacity2; - vector = new int[defaultCapacity1][defaultCapacity2]; - } - else { - vector = new int[sizes.size()][]; - - for (int i = 0; i < vector.length; ++i) { - vector[i] = new int[sizes.get(i)]; - for (int j = 0; j < vector[i].length; ++j) - vector[i][j] = in.readInt(); - } - - defaultCapacity2 = in.readInt(); - } - } -} - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/OVector.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/OVector.java deleted file mode 100644 index 49666e3c..00000000 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/OVector.java +++ /dev/null @@ -1,282 +0,0 @@ -/** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 - * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign - * http://cogcomp.cs.illinois.edu/ - */ -package edu.illinois.cs.cogcomp.lbjava.util; - -import java.util.Arrays; -import java.util.Comparator; - - -/** - * This class implements an expandable array of objects that should be faster - * than java's Vector. - * - * @author Nick Rizzolo - **/ -public class OVector implements Cloneable, java.io.Serializable -{ - /** The default capacity of a vector upon first construction. */ - protected static final int defaultCapacity = 8; - - /** The elements of the vector. */ - protected Object[] vector; - /** The number of elements in the vector. */ - protected int size; - - - /** - * Constructs a new vector with capacity equal to {@link #defaultCapacity}. - **/ - public OVector() { this(defaultCapacity); } - - /** - * Constructs a new vector with the specified capacity. - * - * @param c The initial capacity for the new vector. - **/ - public OVector(int c) { vector = new Object[Math.max(defaultCapacity, c)]; } - - /** - * Constructs a new vector using the specified array as a starting point. - * - * @param v The initial array. - **/ - public OVector(Object[] v) { - if (v.length == 0) vector = new Object[defaultCapacity]; - else { - vector = v; - size = vector.length; - } - } - - /** - * Constructs a copy of a vector starting with capacity equal to that - * vector's size. - * - * @param v The vector to copy. - **/ - public OVector(OVector v) { - int N = v.size(); - if (N == 0) vector = new Object[defaultCapacity]; - else { - vector = new Object[N]; - size = N; - System.arraycopy(v.vector, 0, vector, 0, N); - } - } - - - /** - * Throws an exception when the specified index is negative. - * - * @param i The index. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - protected void boundsCheck(int i) { - if (i < 0) - throw - new ArrayIndexOutOfBoundsException( - "Attempted to access negative index of OVector."); - } - - - /** - * Retrieves the value stored at the specified index of the vector, or - * null if the vector isn't long enough. - * - * @param i The index of the value to retrieve. - * @return The retrieved value. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - public Object get(int i) { return get(i, null); } - - /** - * Retrieves the value stored at the specified index of the vector or - * d if the vector isn't long enough. - * - * @param i The index of the value to retrieve. - * @param d The default value. - * @return The retrieved value. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - public Object get(int i, Object d) { - boundsCheck(i); - return i < size ? vector[i] : d; - } - - - /** - * Sets the value at the specified index to the given value. - * - * @param i The index of the value to set. - * @param v The new value at that index. - * @return The value that used to be at index i. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - public Object set(int i, Object v) { return set(i, v, null); } - - /** - * Sets the value at the specified index to the given value. If the given - * index is greater than the vector's current size, the vector will expand - * to accomodate it. - * - * @param i The index of the value to set. - * @param v The new value at that index. - * @param d The default value for other new indexes that might get - * created. - * @return The value that used to be at index i. - * @throws ArrayIndexOutOfBoundsException When i < 0. - **/ - public Object set(int i, Object v, Object d) { - boundsCheck(i); - expandFor(i, d); - Object result = vector[i]; - vector[i] = v; - return result; - } - - - /** - * Adds the specified value on to the end of the vector, expanding its - * capacity as necessary. - * - * @param v The new value to appear last in the vector. - **/ - public void add(Object v) { - expandFor(size, null); - vector[size - 1] = v; - } - - - /** - * Adds all the values in the given vector to the end of this vector, - * expanding its capacity as necessary. - * - * @param v The new vector of values to appear at the end of this vector. - **/ - public void addAll(OVector v) { - expandFor(size + v.size - 1, null); - System.arraycopy(v.vector, 0, vector, size - v.size, v.size); - } - - - /** - * Removes the element at the specified index of the vector. - * - * @param i The index of the element to remove. - * @return The removed element. - **/ - public Object remove(int i) { - boundsCheck(i); - if (i >= size) - throw - new ArrayIndexOutOfBoundsException( - "LBJ: OVector: Can't remove element at index " + i - + " as it is larger than the size (" + size + ")"); - Object result = vector[i]; - for (int j = i + 1; j < size; ++j) - vector[j - 1] = vector[j]; - vector[--size] = null; - return result; - } - - - /** Returns the value of {@link #size}. */ - public int size() { return size; } - - - /** - * Sorts this vector in increasing order according to the given comparator. - * - * @param c A comparator for the elements of this vector. - **/ - public void sort(Comparator c) { Arrays.sort(vector, 0, size, c); } - - - /** - * Makes sure the capacity and size of the vector can accomodate the - * given index. The capacity of the vector is simply doubled until it can - * accomodate its size. - * - * @param index The index where a new value will be stored. - * @param d The default value for other new indexes that might get - * created. - **/ - protected void expandFor(int index, Object d) { - if (index < size) return; - int oldSize = size, capacity = vector.length; - size = index + 1; - if (capacity >= size) return; - while (capacity < size) capacity *= 2; - Object[] t = new Object[capacity]; - System.arraycopy(vector, 0, t, 0, oldSize); - if (d != null) Arrays.fill(t, oldSize, size, d); - vector = t; - } - - - /** - * Returns a new array of objectss containing the same data as - * this vector. - **/ - public Object[] toArray() { - Object[] result = new Object[size]; - System.arraycopy(vector, 0, result, 0, size); - return result; - } - - - /** - * Two OVectors are considered equal if they contain - * equivalent elements and have the same size. - **/ - public boolean equals(Object o) { - if (!(o instanceof OVector)) return false; - OVector v = (OVector) o; - return size == v.size && Arrays.equals(vector, v.vector); - } - - - /** A hash code based on the hash code of {@link #vector}. */ - public int hashCode() { return vector.hashCode(); } - - - /** - * Returns a shallow clone of this vector; the vector itself is cloned, but - * the element objects aren't. - **/ - public Object clone() { - OVector clone = null; - - try { clone = (OVector) super.clone(); } - catch (Exception e) { - System.err.println("Error cloning " + getClass().getName() + ":"); - e.printStackTrace(); - System.exit(1); - } - - clone.vector = (Object[]) vector.clone(); - return clone; - } - - - /** Returns a text representation of this vector. */ - public String toString() { - StringBuffer result = new StringBuffer(); - result.append("["); - for (int i = 0; i < size; ++i) { - result.append(vector[i]); - if (i + 1 < size) result.append(", "); - } - result.append("]"); - return result.toString(); - } -} - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/Sort.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/Sort.java deleted file mode 100644 index 34255dd7..00000000 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/Sort.java +++ /dev/null @@ -1,133 +0,0 @@ -/** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 - * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign - * http://cogcomp.cs.illinois.edu/ - */ -package edu.illinois.cs.cogcomp.lbjava.util; - - -/** - * This class provides methods for sorting primitive arrays with user - * specified comparators. - * - * @author Nick Rizzolo - **/ -public class Sort -{ - /** - * Sorts the given array of ints according to the given - * comparator. - * - * @param x An array of values. - * @param c The comparator used to compare the values. - **/ - public static void sort(int[] x, IntComparator c) { - quickSort(x, 0, x.length, c); - } - - - /** - * Sorts the specified portion (including fromIndex and - * excluding toIndex) of the given array of ints - * according to the given comparator. - * - * @param x An array of values. - * @param fromIndex The index of the first element to be sorted. - * @param toIndex One past the index of the last element to be sorted. - * @param c The comparator used to compare the values. - **/ - public static void sort(int[] x, int fromIndex, int toIndex, - IntComparator c) { - quickSort(x, fromIndex, toIndex, c); - } - - - /** - * Sorts the specified portion of the given array of ints - * according to the given comparator. - * - * @param x An array of values. - * @param fromIndex The index of the first element to sort. - * @param toIndex One past the index of the last element to sort. - * @param cmp The comparator used to compare the values. - **/ - private static void quickSort(int[] x, int fromIndex, int toIndex, - IntComparator cmp) { - if (toIndex - fromIndex < 7) { - for (int i = fromIndex + 1; i < toIndex; ++i) - for (int j = i; j > fromIndex && cmp.compare(x[j - 1], x[j]) > 0; --j) - swap(x, j, j - 1); - return; - } - - swap(x, fromIndex, getMedianIndex(x, fromIndex, toIndex, cmp)); - int median = x[fromIndex]; - - int i = fromIndex + 1, j = toIndex - 1; - while (j >= i) { - while (i <= j && cmp.compare(x[i], median) <= 0) ++i; - while (j >= i && cmp.compare(x[j], median) >= 0) --j; - if (j > i) swap(x, i++, j--); - } - - swap(x, fromIndex, i - 1); - - if (i - 2 > fromIndex) quickSort(x, fromIndex, i - 1, cmp); - if (toIndex - i > 1) quickSort(x, i, toIndex, cmp); - } - - - /** - * Swaps the element at x[i1] with the element at - * x[i2]. - * - * @param i1 One index involved in the swap. - * @param i2 The other index involved in the swap. - **/ - private static void swap(int x[], int i1, int i2) { - int t = x[i1]; - x[i1] = x[i2]; - x[i2] = t; - } - - - /** - * Picks three elements from the array and finds the median value according - * to the given comparator. - * - * @param x The array. - * @param fromIndex The index of the first element to sort. - * @param toIndex One past the index of the last element to sort. - * @param cmp The comparator. - * @return The median of the three selected values. - **/ - private static int getMedianIndex(int[] x, int fromIndex, int toIndex, - IntComparator cmp) { - int last = toIndex - 1; - int m = (fromIndex + toIndex) / 2; - return - cmp.compare(x[fromIndex], x[last]) < 0 - ? (cmp.compare(x[last], x[m]) < 0 - ? last : cmp.compare(x[fromIndex], x[m]) > 0 ? fromIndex : m) - : (cmp.compare(x[fromIndex], x[m]) < 0 - ? fromIndex : cmp.compare(x[last], x[m]) > 0 ? last : m); - } - - - /** - * Allows a user to implement their own comparison function for integers. - * - * @author Nick Rizzolo - **/ - public static interface IntComparator - { - /** The comparison function. */ - public int compare(int i1, int i2); - } -} - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/StudentT.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/StudentT.java index 00f0c280..e4acd5ef 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/StudentT.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/StudentT.java @@ -1,331 +1,329 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.util; /** - * A collection of statistical methods supporting computations related to the - * Student's T distribution. - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
Date:2002 as part of Fmath
Amended: - * 12 May 2003 Statistics separated out from Fmath as a new class - *
Update: - * 18 June 2005, 5 January 2006, 25 April 2006, 12, 21 November 2006, - * 4 December 2006 (renaming of cfd and pdf methods - older version - * also retained), 31 December 2006, March 2007, 14 April 2007 - *
- * - *

Documentation

- *

See Michael Thomas Flanagan's Java library on-line web page:
- * http://www.ee.ucl.ac.uk/~mflanaga/java/Stat.html - * http://www.ee.ucl.ac.uk/~mflanaga/java/ - * - *

Copyright © April 2004, June 2005, January 2006, December 2006, - * April 2007 - * - *

Permission to Copy

- *

Permission to use, copy and modify this software and its documentation - * for NON-COMMERCIAL purposes is granted, without fee, provided that an - * acknowledgement to the author, Michael Thomas Flanagan at - * www.ee.ucl.ac.uk/~mflanaga, - * appears in all copies. - * - *

Dr. Michael Thomas Flanagan makes no representations about the - * suitability or fitness of the software for any or for a particular - * purpose. Michael Thomas Flanagan shall not be liable for any damages - * suffered as a result of using, modifying or distributing this software or - * its derivatives. - * - * @author Dr. Michael Thomas Flanagan + * A collection of statistical methods supporting computations related to the Student's T + * distribution. + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Date:2002 as part of Fmath
Amended: + * 12 May 2003 Statistics separated out from Fmath as a new class
Update: + * 18 June 2005, 5 January 2006, 25 April 2006, 12, 21 November 2006, 4 December 2006 (renaming of + * cfd and pdf methods - older version also retained), 31 December 2006, March 2007, 14 April 2007
+ * + *

Documentation

+ *

+ * See Michael Thomas Flanagan's Java library on-line web page:
+ * http://www.ee.ucl.ac.uk/~mflanaga + * /java/Stat.html http://www.ee.ucl.ac.uk/~mflanaga/java/ + * + *

+ * Copyright © April 2004, June 2005, January 2006, December 2006, April 2007 + * + *

Permission to Copy

+ *

+ * Permission to use, copy and modify this software and its documentation for NON-COMMERCIAL + * purposes is granted, without fee, provided that an acknowledgement to the author, Michael Thomas + * Flanagan at www.ee.ucl.ac.uk/~mflanaga, appears in all copies. + * + *

+ * Dr. Michael Thomas Flanagan makes no representations about the suitability or fitness of the + * software for any or for a particular purpose. Michael Thomas Flanagan shall not be liable for any + * damages suffered as a result of using, modifying or distributing this software or its + * derivatives. + * + * @author Dr. Michael Thomas Flanagan **/ -public class StudentT -{ - /** - * A small number close to the smallest representable floating point - * number. - **/ - public static final double FPMIN = 1e-300; - /** Lanczos Gamma Function approximation - N (number of coefficients -1) */ - private static int lgfN = 6; - /** Lanczos Gamma Function approximation - Coefficients */ - private static double[] lgfCoeff = {1.000000000190015, 76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179E-2, -0.5395239384953E-5}; - /** Lanczos Gamma Function approximation - small gamma */ - private static double lgfGamma = 5.0; +public class StudentT { + /** + * A small number close to the smallest representable floating point number. + **/ + public static final double FPMIN = 1e-300; + /** Lanczos Gamma Function approximation - N (number of coefficients -1) */ + private static int lgfN = 6; + /** Lanczos Gamma Function approximation - Coefficients */ + private static double[] lgfCoeff = {1.000000000190015, 76.18009172947146, -86.50532032941677, + 24.01409824083091, -1.231739572450155, 0.1208650973866179E-2, -0.5395239384953E-5}; + /** Lanczos Gamma Function approximation - small gamma */ + private static double lgfGamma = 5.0; - /** returns -1 if x < 0 else returns 1 (double version) */ - public static double sign(double x){ - if (x<0.0){ - return -1.0; - } - else{ - return 1.0; + /** returns -1 if x < 0 else returns 1 (double version) */ + public static double sign(double x) { + if (x < 0.0) { + return -1.0; + } else { + return 1.0; + } } - } - /** - * factorial of n. Argument is of type double but must be, numerically, an - * integer factorial returned as double but is, numerically, should be an - * integer numerical rounding may makes this an approximation after n = 21 - **/ - public static double factorial(double n){ - if(n<0 || (n-Math.floor(n))!=0)throw new IllegalArgumentException("\nn must be a positive integer\nIs a Gamma funtion [Fmath.gamma(x)] more appropriate?"); - double f = 1.0D; - double iCount = 2.0D; - while(iCount<=n){ - f*=iCount; - iCount += 1.0D; + /** + * factorial of n. Argument is of type double but must be, numerically, an integer factorial + * returned as double but is, numerically, should be an integer numerical rounding may makes + * this an approximation after n = 21 + **/ + public static double factorial(double n) { + if (n < 0 || (n - Math.floor(n)) != 0) + throw new IllegalArgumentException( + "\nn must be a positive integer\nIs a Gamma funtion [Fmath.gamma(x)] more appropriate?"); + double f = 1.0D; + double iCount = 2.0D; + while (iCount <= n) { + f *= iCount; + iCount += 1.0D; + } + return f; } - return f; - } - /** - * log to base e of the factorial of n. Argument is of type double but - * must be, numerically, an integer log[e](factorial) returned as double - * numerical rounding may makes this an approximation - **/ - public static double logFactorial(double n){ - if(n<0 || (n-Math.floor(n))!=0)throw new IllegalArgumentException("\nn must be a positive integer\nIs a Gamma funtion [Fmath.gamma(x)] more appropriate?"); - double f = 0.0D; - double iCount = 2.0D; - while(iCount<=n){ - f+=Math.log(iCount); - iCount += 1.0D; + /** + * log to base e of the factorial of n. Argument is of type double but must be, numerically, an + * integer log[e](factorial) returned as double numerical rounding may makes this an + * approximation + **/ + public static double logFactorial(double n) { + if (n < 0 || (n - Math.floor(n)) != 0) + throw new IllegalArgumentException( + "\nn must be a positive integer\nIs a Gamma funtion [Fmath.gamma(x)] more appropriate?"); + double f = 0.0D; + double iCount = 2.0D; + while (iCount <= n) { + f += Math.log(iCount); + iCount += 1.0D; + } + return f; } - return f; - } - /** Gamma function, Lanczos approximation (6 terms) */ - public static double gamma(double x){ + /** Gamma function, Lanczos approximation (6 terms) */ + public static double gamma(double x) { - double xcopy = x; - double first = x + lgfGamma + 0.5; - double second = lgfCoeff[0]; - double fg = 0.0D; + double xcopy = x; + double first = x + lgfGamma + 0.5; + double second = lgfCoeff[0]; + double fg = 0.0D; - if(x>=0.0){ - if(x>=1.0D && x-(int)x==0.0D){ - fg = factorial(x)/x; - } - else{ - first = Math.pow(first, x + 0.5)*Math.exp(-first); - for(int i=1; i<=lgfN; i++)second += lgfCoeff[i]/++xcopy; - fg = first*Math.sqrt(2.0*Math.PI)*second/x; - } - } - else{ - fg = -Math.PI/(x*gamma(-x)*Math.sin(Math.PI*x)); + if (x >= 0.0) { + if (x >= 1.0D && x - (int) x == 0.0D) { + fg = factorial(x) / x; + } else { + first = Math.pow(first, x + 0.5) * Math.exp(-first); + for (int i = 1; i <= lgfN; i++) + second += lgfCoeff[i] / ++xcopy; + fg = first * Math.sqrt(2.0 * Math.PI) * second / x; + } + } else { + fg = -Math.PI / (x * gamma(-x) * Math.sin(Math.PI * x)); + } + return fg; } - return fg; - } - /** - * log to base e of the Gamma function, Lanczos approximation (6 terms). - * Retained for backward compatibility. - **/ - public static double logGamma(double x){ - double xcopy = x; - double fg = 0.0D; - double first = x + lgfGamma + 0.5; - double second = lgfCoeff[0]; + /** + * log to base e of the Gamma function, Lanczos approximation (6 terms). Retained for backward + * compatibility. + **/ + public static double logGamma(double x) { + double xcopy = x; + double fg = 0.0D; + double first = x + lgfGamma + 0.5; + double second = lgfCoeff[0]; - if(x>=0.0){ - if(x>=1.0 && x-(int)x==0.0){ - fg = logFactorial(x)-Math.log(x); - } - else{ - first -= (x + 0.5)*Math.log(first); - for(int i=1; i<=lgfN; i++)second += lgfCoeff[i]/++xcopy; - fg = Math.log(Math.sqrt(2.0*Math.PI)*second/x) - first; - } - } - else{ - fg = Math.PI/(gamma(1.0D-x)*Math.sin(Math.PI*x)); + if (x >= 0.0) { + if (x >= 1.0 && x - (int) x == 0.0) { + fg = logFactorial(x) - Math.log(x); + } else { + first -= (x + 0.5) * Math.log(first); + for (int i = 1; i <= lgfN; i++) + second += lgfCoeff[i] / ++xcopy; + fg = Math.log(Math.sqrt(2.0 * Math.PI) * second / x) - first; + } + } else { + fg = Math.PI / (gamma(1.0D - x) * Math.sin(Math.PI * x)); - if(fg!=1.0/0.0 && fg!=-1.0/0.0){ - if(fg<0){ - throw new IllegalArgumentException("\nThe gamma function is negative"); - } - else{ - fg = Math.log(fg); + if (fg != 1.0 / 0.0 && fg != -1.0 / 0.0) { + if (fg < 0) { + throw new IllegalArgumentException("\nThe gamma function is negative"); + } else { + fg = Math.log(fg); + } + } } - } + return fg; } - return fg; - } - /** - * Incomplete fraction summation used in the method - * {@link #regularisedBetaFunction(double,double,double)}. modified - * Lentz's method - **/ - public static double contFract(double a, double b, double x){ - int maxit = 500; - double eps = 3.0e-7; - double aplusb = a + b; - double aplus1 = a + 1.0D; - double aminus1 = a - 1.0D; - double c = 1.0D; - double d = 1.0D - aplusb*x/aplus1; - if(Math.abs(d)maxit){ - test=false; - System.out.println("Maximum number of iterations ("+maxit+") exceeded in Stat.contFract in Stat.incomplete Beta"); - } + /** + * Incomplete fraction summation used in the method + * {@link #regularisedBetaFunction(double,double,double)}. modified Lentz's method + **/ + public static double contFract(double a, double b, double x) { + int maxit = 500; + double eps = 3.0e-7; + double aplusb = a + b; + double aplus1 = a + 1.0D; + double aminus1 = a - 1.0D; + double c = 1.0D; + double d = 1.0D - aplusb * x / aplus1; + if (Math.abs(d) < FPMIN) + d = FPMIN; + d = 1.0D / d; + double h = d; + double aa = 0.0D; + double del = 0.0D; + int i = 1, i2 = 0; + boolean test = true; + while (test) { + i2 = 2 * i; + aa = i * (b - i) * x / ((aminus1 + i2) * (a + i2)); + d = 1.0D + aa * d; + if (Math.abs(d) < FPMIN) + d = FPMIN; + c = 1.0D + aa / c; + if (Math.abs(c) < FPMIN) + c = FPMIN; + d = 1.0D / d; + h *= d * c; + aa = -(a + i) * (aplusb + i) * x / ((a + i2) * (aplus1 + i2)); + d = 1.0D + aa * d; + if (Math.abs(d) < FPMIN) + d = FPMIN; + c = 1.0D + aa / c; + if (Math.abs(c) < FPMIN) + c = FPMIN; + d = 1.0D / d; + del = d * c; + h *= del; + i++; + if (Math.abs(del - 1.0D) < eps) + test = false; + if (i > maxit) { + test = false; + System.out.println("Maximum number of iterations (" + maxit + + ") exceeded in Stat.contFract in Stat.incomplete Beta"); + } + } + return h; } - return h; - } - /** - * Regularised Incomplete Beta function. Continued Fraction approximation - * (see Numerical recipies for details of method) - **/ - public static double regularisedBetaFunction(double z, double w, double x){ - if(x<0.0D || x>1.0D)throw new IllegalArgumentException("Argument x, "+x+", must be lie between 0 and 1 (inclusive)"); - double ibeta = 0.0D; - if(x==0.0D){ - ibeta=0.0D; - } - else{ - if(x==1.0D){ - ibeta=1.0D; - } - else{ - // Term before continued fraction - ibeta = Math.exp(logGamma(z+w) - logGamma(z) - logGamma(w) + z*Math.log(x) + w*Math.log(1.0D-x)); - // Continued fraction - if(x < (z+1.0D)/(z+w+2.0D)){ - ibeta = ibeta*contFract(z, w, x)/z; - } - else{ - // Use symmetry relationship - ibeta = 1.0D - ibeta*contFract(w, z, 1.0D-x)/w; + /** + * Regularised Incomplete Beta function. Continued Fraction approximation (see Numerical + * recipies for details of method) + **/ + public static double regularisedBetaFunction(double z, double w, double x) { + if (x < 0.0D || x > 1.0D) + throw new IllegalArgumentException("Argument x, " + x + + ", must be lie between 0 and 1 (inclusive)"); + double ibeta = 0.0D; + if (x == 0.0D) { + ibeta = 0.0D; + } else { + if (x == 1.0D) { + ibeta = 1.0D; + } else { + // Term before continued fraction + ibeta = + Math.exp(logGamma(z + w) - logGamma(z) - logGamma(w) + z * Math.log(x) + w + * Math.log(1.0D - x)); + // Continued fraction + if (x < (z + 1.0D) / (z + w + 2.0D)) { + ibeta = ibeta * contFract(z, w, x) / z; + } else { + // Use symmetry relationship + ibeta = 1.0D - ibeta * contFract(w, z, 1.0D - x) / w; + } + } } - } + return ibeta; } - return ibeta; - } - // STUDENT'S T DISTRIBUTION + // STUDENT'S T DISTRIBUTION - /** Returns the Student's t cumulative distribution function probability */ - public static double studentTcdf(double tValue, int df){ - double ddf = (double)df; - double x = ddf/(ddf+tValue*tValue); - return 0.5D*(1.0D + (regularisedBetaFunction(ddf/2.0D, 0.5D, 1) - regularisedBetaFunction(ddf/2.0D, 0.5D, x))*sign(tValue)); - } + /** Returns the Student's t cumulative distribution function probability */ + public static double studentTcdf(double tValue, int df) { + double ddf = (double) df; + double x = ddf / (ddf + tValue * tValue); + return 0.5D * (1.0D + (regularisedBetaFunction(ddf / 2.0D, 0.5D, 1) - regularisedBetaFunction( + ddf / 2.0D, 0.5D, x)) * sign(tValue)); + } - /** - * Computes the multiplier for the standard error of the mean when finding - * a (1 - alpha) * 100% confidence interval. - * - * @param df The degrees of freedom. - * @param alpha The fraction of the distribution to leave outside the - * interval. - * @return m such that mu +- m s represents a - * (1 - alpha) * 100% confidence interval, where mu - * is the sample mean and s is the sample's standard - * deviation. - **/ - public static double tTable(int df, double alpha) { - double c = 1 - alpha / 2.0; - double max = 700, min = -700; - boolean same = false; + /** + * Computes the multiplier for the standard error of the mean when finding a (1 - alpha) * + * 100% confidence interval. + * + * @param df The degrees of freedom. + * @param alpha The fraction of the distribution to leave outside the interval. + * @return m such that mu +- m s represents a (1 - alpha) * 100% confidence + * interval, where mu is the sample mean and s is the sample's standard + * deviation. + **/ + public static double tTable(int df, double alpha) { + double c = 1 - alpha / 2.0; + double max = 700, min = -700; + boolean same = false; - while (!same) { - double mid = (max + min) / 2.0; - if (studentTcdf(mid, df) < c) { - same = min == mid; - min = mid; - } - else { - same = max == mid; - max = mid; - } + while (!same) { + double mid = (max + min) / 2.0; + if (studentTcdf(mid, df) < c) { + same = min == mid; + min = mid; + } else { + same = max == mid; + max = mid; + } + } + + return (max + min) / 2.0; } - return (max + min) / 2.0; - } + /** + * Computes the confidence interval of the specified precision over a set of data points. + * + * @param x The data points. + * @param alpha The fraction of the distribution to leave outside the interval. + * @return An array containing the mean of the elements in x and half of the size + * of the confidence interval over x. If this array is named r + * , then the confidence interval can be stated as r[0] +/- r[1]. + **/ + public static double[] confidenceInterval(double[] x, double alpha) { + double mean = 0; + // Compute the average. + for (int i = 0; i < x.length; ++i) + mean += x[i]; + mean /= (double) x.length; - /** - * Computes the confidence interval of the specified precision over a set - * of data points. - * - * @param x The data points. - * @param alpha The fraction of the distribution to leave outside the - * interval. - * @return An array containing the mean of the elements in x - * and half of the size of the confidence interval over - * x. If this array is named r, then the - * confidence interval can be stated as r[0] +/- r[1]. - **/ - public static double[] confidenceInterval(double[] x, double alpha) { - double mean = 0; - // Compute the average. - for (int i = 0; i < x.length; ++i) mean += x[i]; - mean /= (double) x.length; + // Compute standard deviation and confidence interval. + // s: the standard deviation of the testing results + double s = 0.0; + for (int i = 0; i < x.length; ++i) { + double d = x[i] - mean; + s += d * d; + } + s /= (double) (x.length - 1); + s = Math.sqrt(s); - // Compute standard deviation and confidence interval. - // s: the standard deviation of the testing results - double s = 0.0; - for (int i = 0; i < x.length; ++i) { - double d = x[i] - mean; - s += d * d; + // sem: estimated standard error of the mean + double sem = s / Math.sqrt(x.length); + double t = tTable(x.length - 1, alpha); + return new double[] {mean, t * sem}; } - s /= (double) (x.length - 1); - s = Math.sqrt(s); - - // sem: estimated standard error of the mean - double sem = s / Math.sqrt(x.length); - double t = tTable(x.length - 1, alpha); - return new double[]{ mean, t * sem }; - } } - diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/TableFormat.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/TableFormat.java index 2766fb58..d7c0cbc7 100644 --- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/TableFormat.java +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/util/TableFormat.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.util; @@ -15,734 +12,687 @@ /** - * A library of routines for taking tabular data and producing a human - * readable string representation of it. The resulting string(s) can be pure - * ASCII or latex compatible. - * - * @author Nick Rizzolo + * A library of routines for taking tabular data and producing a human readable string + * representation of it. The resulting string(s) can be pure ASCII or latex compatible. + * + * @author Nick Rizzolo **/ -public class TableFormat -{ - /** - * The default number of significant digits to which table entries will be - * rounded. - **/ - private static final int defaultSignificantDigits = 3; - - - /** - * Simply converts the type of the given matrix from double to - * Double. - * - * @param m The matrix. - * @return The type converted matrix. - **/ - public static Double[][] wrapDouble(double[][] m) { - Double[][] result = new Double[m.length][]; - - for (int i = 0; i < m.length; ++i) - if (m[i] != null) { - result[i] = new Double[m[i].length]; - for (int j = 0; j < m[i].length; ++j) - result[i][j] = new Double(m[i][j]); - } - - return result; - } - - - /** - * Simply prints each element of the given array of strings to the given - * stream in its own line. - * - * @param out The stream. - * @param table The strings. - **/ - public static void printTable(PrintStream out, String[] table) { - for (int i = 0; i < table.length; ++i) out.println(table[i]); - } - - - /** - * Formats the given data into an ASCII table and prints it to the given - * stream. All non-null entries will be rounded to three - * significant digits after the decimal place. - * - * @param out The stream. - * @param data The numerical data. - **/ - public static void printTableFormat(PrintStream out, double[][] data) { - printTable(out, tableFormat(data)); - } - - - /** - * Formats the given data into an ASCII table. All non-null - * entries will be rounded to three significant digits after the decimal - * place. - * - * @param data The numerical data. - * @return The string respresentation of the data. - **/ - public static String[] tableFormat(double[][] data) { - if (data == null || data.length == 0) return new String[0]; - return tableFormat(null, null, wrapDouble(data)); - } - - - /** - * Formats the given data into an ASCII table and prints it to the given - * stream. All non-null entries will be rounded to three - * significant digits after the decimal place. - * - * @param out The stream. - * @param data The numerical data. - **/ - public static void printTableFormat(PrintStream out, Double[][] data) { - printTable(out, tableFormat(data)); - } - - - /** - * Formats the given data into an ASCII table. All non-null - * entries will be rounded to three significant digits after the decimal - * place. - * - * @param data The numerical data. - * @return The string respresentation of the data. - **/ - public static String[] tableFormat(Double[][] data) { - if (data == null || data.length == 0) return new String[0]; - return tableFormat(null, null, data); - } - - - /** - * Formats the given data into an ASCII table and prints it to the given - * stream. All non-null entries will be rounded to three - * significant digits after the decimal place. - * - * @param out The stream. - * @param columnLabels One label for each column in the output, starting - * with the column of row labels if one exists. If - * null, no column label row will be - * returned. - * @param rowLabels One label for each non-null row in - * data. If null, no row - * label column will be returned. - * @param data The numerical data. - **/ - public static void printTableFormat(PrintStream out, String[] columnLabels, - String[] rowLabels, double[][] data) { - printTable(out, tableFormat(columnLabels, rowLabels, data)); - } - - - /** - * Formats the given data into an ASCII table. All non-null - * entries will be rounded to three significant digits after the decimal - * place. - * - * @param columnLabels One label for each column in the output, starting - * with the column of row labels if one exists. If - * null, no column label row will be - * returned. - * @param rowLabels One label for each non-null row in - * data. If null, no row - * label column will be returned. - * @param data The numerical data. - * @return The string respresentation of the data. - **/ - public static String[] tableFormat(String[] columnLabels, - String[] rowLabels, double[][] data) { - if (data == null || data.length == 0) return new String[0]; - return tableFormat(columnLabels, rowLabels, wrapDouble(data)); - } - - - /** - * Formats the given data into an ASCII table and prints it to the given - * stream. Any entry in any row of data set to - * null will become a single dash in the output. All - * non-null entries will be rounded to three significant - * digits after the decimal place. - * - * @param out The stream. - * @param columnLabels One label for each column in the output, starting - * with the column of row labels if one exists. If - * null, no column label row will be - * returned. - * @param rowLabels One label for each non-null row in - * data. If null, no row - * label column will be returned. - * @param data The numerical data. - **/ - public static void printTableFormat(PrintStream out, String[] columnLabels, - String[] rowLabels, Double[][] data) { - printTable(out, tableFormat(columnLabels, rowLabels, data)); - } - - - /** - * Formats the given data into an ASCII table. Any entry in any row of - * data set to null will become a single dash in - * the output. All non-null entries will be rounded to three - * significant digits after the decimal place. - * - * @param columnLabels One label for each column in the output, starting - * with the column of row labels if one exists. If - * null, no column label row will be - * returned. - * @param rowLabels One label for each non-null row in - * data. If null, no row - * label column will be returned. - * @param data The numerical data. - * @return The string respresentation of the data. - **/ - public static String[] tableFormat(String[] columnLabels, - String[] rowLabels, Double[][] data) { - if (data == null || data.length == 0) return new String[0]; - int dataColumns = 0; - - for (int i = 0; i < data.length; ++i) { - int current = data[i] == null ? 0 : data[i].length; - dataColumns = Math.max(dataColumns, current); +public class TableFormat { + /** + * The default number of significant digits to which table entries will be rounded. + **/ + private static final int defaultSignificantDigits = 3; + + + /** + * Simply converts the type of the given matrix from double to Double. + * + * @param m The matrix. + * @return The type converted matrix. + **/ + public static Double[][] wrapDouble(double[][] m) { + Double[][] result = new Double[m.length][]; + + for (int i = 0; i < m.length; ++i) + if (m[i] != null) { + result[i] = new Double[m[i].length]; + for (int j = 0; j < m[i].length; ++j) + result[i][j] = new Double(m[i][j]); + } + + return result; + } + + + /** + * Simply prints each element of the given array of strings to the given stream in its own line. + * + * @param out The stream. + * @param table The strings. + **/ + public static void printTable(PrintStream out, String[] table) { + for (int i = 0; i < table.length; ++i) + out.println(table[i]); + } + + + /** + * Formats the given data into an ASCII table and prints it to the given stream. All non- + * null entries will be rounded to three significant digits after the decimal + * place. + * + * @param out The stream. + * @param data The numerical data. + **/ + public static void printTableFormat(PrintStream out, double[][] data) { + printTable(out, tableFormat(data)); + } + + + /** + * Formats the given data into an ASCII table. All non-null entries will be rounded + * to three significant digits after the decimal place. + * + * @param data The numerical data. + * @return The string respresentation of the data. + **/ + public static String[] tableFormat(double[][] data) { + if (data == null || data.length == 0) + return new String[0]; + return tableFormat(null, null, wrapDouble(data)); } - if (dataColumns == 0) return new String[0]; - - int[] sigDigits = new int[dataColumns]; - for (int i = 0; i < dataColumns; ++i) - sigDigits[i] = defaultSignificantDigits; - return tableFormat(columnLabels, rowLabels, data, sigDigits); - } - - - /** - * Formats the given data into an ASCII table and prints it to the given - * stream. Any entry in any row of data set to - * null will become a single dash in the output. - * - * @param out The stream. - * @param columnLabels One label for each column in the output, starting - * with the column of row labels if one exists. If - * null, no column label row will be - * returned. - * @param rowLabels One label for each non-null row in - * data. If null, no row - * label column will be returned. - * @param data The numerical data. - * @param sigDigits Significant digits specified on a column by column - * basis, only for data columns. - **/ - public static void printTableFormat(PrintStream out, String[] columnLabels, - String[] rowLabels, double[][] data, - int[] sigDigits) { - printTable(out, tableFormat(columnLabels, rowLabels, data, sigDigits)); - } - - - /** - * Formats the given data into an ASCII table. Any entry in any row of - * data set to null will become a single dash in - * the output. - * - * @param columnLabels One label for each column in the output, starting - * with the column of row labels if one exists. If - * null, no column label row will be - * returned. - * @param rowLabels One label for each non-null row in - * data. If null, no row - * label column will be returned. - * @param data The numerical data. - * @param sigDigits Significant digits specified on a column by column - * basis, only for data columns. - * @return The string respresentation of the data. - **/ - public static String[] tableFormat(String[] columnLabels, - String[] rowLabels, double[][] data, - int[] sigDigits) { - if (data == null || data.length == 0) return new String[0]; - return tableFormat(columnLabels, rowLabels, wrapDouble(data), sigDigits); - } - - - /** - * Formats the given data into an ASCII table and prints it to the given - * stream. Any entry in any row of data set to - * null will become a single dash in the output. - * - * @param out The stream. - * @param columnLabels One label for each column in the output, starting - * with the column of row labels if one exists. If - * null, no column label row will be - * returned. - * @param rowLabels One label for each non-null row in - * data. If null, no row - * label column will be returned. - * @param data The numerical data. - * @param sigDigits Significant digits specified on a column by column - * basis, only for data columns. - **/ - public static void printTableFormat(PrintStream out, String[] columnLabels, - String[] rowLabels, Double[][] data, - int[] sigDigits) { - printTable(out, tableFormat(columnLabels, rowLabels, data, sigDigits)); - } - - - /** - * Formats the given data into an ASCII table. Any entry in any row of - * data set to null will become a single dash in - * the output. - * - * @param columnLabels One label for each column in the output, starting - * with the column of row labels if one exists. If - * null, no column label row will be - * returned. - * @param rowLabels One label for each non-null row in - * data. If null, no row - * label column will be returned. - * @param data The numerical data. - * @param sigDigits Significant digits specified on a column by column - * basis, only for data columns. - * @return The string respresentation of the data. - **/ - public static String[] tableFormat(String[] columnLabels, - String[] rowLabels, Double[][] data, - int[] sigDigits) { - return tableFormat(columnLabels, rowLabels, data, sigDigits, new int[0]); - } - - - /** - * Formats the given data into an ASCII table and prints it to the given - * stream. Any entry in any row of data set to - * null will become a single dash in the output. - * - * @param out The stream. - * @param columnLabels One label for each column in the output, starting - * with the column of row labels if one exists. If - * null, no column label row will be - * returned. - * @param rowLabels One label for each non-null row in - * data. If null, no row - * label column will be returned. - * @param data The numerical data. - * @param sigDigits Significant digits specified on a column by column - * basis, only for data columns. - * @param dashRows The indexes of rows in data which - * should be preceded by a row of dashes in the output. - **/ - public static void printTableFormat(PrintStream out, String[] columnLabels, - String[] rowLabels, double[][] data, - int[] sigDigits, int[] dashRows) { - printTable(out, - tableFormat(columnLabels, rowLabels, data, sigDigits, - dashRows)); - } - - - /** - * Formats the given data into an ASCII table. Any entry in any row of - * data set to null will become a single dash in - * the output. - * - * @param columnLabels One label for each column in the output, starting - * with the column of row labels if one exists. If - * null, no column label row will be - * returned. - * @param rowLabels One label for each non-null row in - * data. If null, no row - * label column will be returned. - * @param data The numerical data. - * @param sigDigits Significant digits specified on a column by column - * basis, only for data columns. - * @param dashRows The indexes of rows in data which - * should be preceded by a row of dashes in the output. - * @return The string respresentation of the data. - **/ - public static String[] tableFormat(String[] columnLabels, - String[] rowLabels, double[][] data, - int[] sigDigits, int[] dashRows) { - if (data == null || data.length == 0) return new String[0]; - return - tableFormat(columnLabels, rowLabels, wrapDouble(data), sigDigits, - dashRows); - } - - - /** - * Formats the given data into an ASCII table and prints it to the given - * stream. Any entry in any row of data set to - * null will become a single dash in the output. - * - * @param out The stream. - * @param columnLabels One label for each column in the output, starting - * with the column of row labels if one exists. If - * null, no column label row will be - * returned. - * @param rowLabels One label for each non-null row in - * data. If null, no row - * label column will be returned. - * @param data The numerical data. - * @param sigDigits Significant digits specified on a column by column - * basis, only for data columns. - * @param dashRows The indexes of rows in data which - * should be preceded by a row of dashes in the output. - **/ - public static void printTableFormat(PrintStream out, String[] columnLabels, - String[] rowLabels, Double[][] data, - int[] sigDigits, int[] dashRows) { - printTable(out, - tableFormat(columnLabels, rowLabels, data, sigDigits, - dashRows)); - } - - - /** - * Formats the given data into an ASCII table. Any entry in any row of - * data set to null will become a single dash in - * the output. - * - * @param columnLabels One label for each column in the output, starting - * with the column of row labels if one exists. If - * null, no column label row will be - * returned. - * @param rowLabels One label for each non-null row in - * data. If null, no row - * label column will be returned. - * @param data The numerical data. - * @param sigDigits Significant digits specified on a column by column - * basis, only for data columns. - * @param dashRows The indexes of rows in data which - * should be preceded by a row of dashes in the output. - * @return The string respresentation of the data. - **/ - public static String[] tableFormat(String[] columnLabels, - String[] rowLabels, Double[][] data, - int[] sigDigits, int[] dashRows) { - if (data == null || data.length == 0) return new String[0]; - if (sigDigits == null) return tableFormat(columnLabels, rowLabels, data); - - int dataColumns = 0; - - for (int i = 0; i < data.length; ++i) { - int current = data[i] == null ? 0 : data[i].length; - dataColumns = Math.max(dataColumns, current); + + /** + * Formats the given data into an ASCII table and prints it to the given stream. All non- + * null entries will be rounded to three significant digits after the decimal + * place. + * + * @param out The stream. + * @param data The numerical data. + **/ + public static void printTableFormat(PrintStream out, Double[][] data) { + printTable(out, tableFormat(data)); + } + + + /** + * Formats the given data into an ASCII table. All non-null entries will be rounded + * to three significant digits after the decimal place. + * + * @param data The numerical data. + * @return The string respresentation of the data. + **/ + public static String[] tableFormat(Double[][] data) { + if (data == null || data.length == 0) + return new String[0]; + return tableFormat(null, null, data); + } + + + /** + * Formats the given data into an ASCII table and prints it to the given stream. All non- + * null entries will be rounded to three significant digits after the decimal + * place. + * + * @param out The stream. + * @param columnLabels One label for each column in the output, starting with the column of row + * labels if one exists. If null, no column label row will be returned. + * @param rowLabels One label for each non-null row in data. If + * null, no row label column will be returned. + * @param data The numerical data. + **/ + public static void printTableFormat(PrintStream out, String[] columnLabels, String[] rowLabels, + double[][] data) { + printTable(out, tableFormat(columnLabels, rowLabels, data)); + } + + + /** + * Formats the given data into an ASCII table. All non-null entries will be rounded + * to three significant digits after the decimal place. + * + * @param columnLabels One label for each column in the output, starting with the column of row + * labels if one exists. If null, no column label row will be returned. + * @param rowLabels One label for each non-null row in data. If + * null, no row label column will be returned. + * @param data The numerical data. + * @return The string respresentation of the data. + **/ + public static String[] tableFormat(String[] columnLabels, String[] rowLabels, double[][] data) { + if (data == null || data.length == 0) + return new String[0]; + return tableFormat(columnLabels, rowLabels, wrapDouble(data)); + } + + + /** + * Formats the given data into an ASCII table and prints it to the given stream. Any entry in + * any row of data set to null will become a single dash in the + * output. All non-null entries will be rounded to three significant digits after + * the decimal place. + * + * @param out The stream. + * @param columnLabels One label for each column in the output, starting with the column of row + * labels if one exists. If null, no column label row will be returned. + * @param rowLabels One label for each non-null row in data. If + * null, no row label column will be returned. + * @param data The numerical data. + **/ + public static void printTableFormat(PrintStream out, String[] columnLabels, String[] rowLabels, + Double[][] data) { + printTable(out, tableFormat(columnLabels, rowLabels, data)); + } + + + /** + * Formats the given data into an ASCII table. Any entry in any row of data set to + * null will become a single dash in the output. All non-null entries + * will be rounded to three significant digits after the decimal place. + * + * @param columnLabels One label for each column in the output, starting with the column of row + * labels if one exists. If null, no column label row will be returned. + * @param rowLabels One label for each non-null row in data. If + * null, no row label column will be returned. + * @param data The numerical data. + * @return The string respresentation of the data. + **/ + public static String[] tableFormat(String[] columnLabels, String[] rowLabels, Double[][] data) { + if (data == null || data.length == 0) + return new String[0]; + int dataColumns = 0; + + for (int i = 0; i < data.length; ++i) { + int current = data[i] == null ? 0 : data[i].length; + dataColumns = Math.max(dataColumns, current); + } + + if (dataColumns == 0) + return new String[0]; + + int[] sigDigits = new int[dataColumns]; + for (int i = 0; i < dataColumns; ++i) + sigDigits[i] = defaultSignificantDigits; + return tableFormat(columnLabels, rowLabels, data, sigDigits); + } + + + /** + * Formats the given data into an ASCII table and prints it to the given stream. Any entry in + * any row of data set to null will become a single dash in the + * output. + * + * @param out The stream. + * @param columnLabels One label for each column in the output, starting with the column of row + * labels if one exists. If null, no column label row will be returned. + * @param rowLabels One label for each non-null row in data. If + * null, no row label column will be returned. + * @param data The numerical data. + * @param sigDigits Significant digits specified on a column by column basis, only for + * data columns. + **/ + public static void printTableFormat(PrintStream out, String[] columnLabels, String[] rowLabels, + double[][] data, int[] sigDigits) { + printTable(out, tableFormat(columnLabels, rowLabels, data, sigDigits)); + } + + + /** + * Formats the given data into an ASCII table. Any entry in any row of data set to + * null will become a single dash in the output. + * + * @param columnLabels One label for each column in the output, starting with the column of row + * labels if one exists. If null, no column label row will be returned. + * @param rowLabels One label for each non-null row in data. If + * null, no row label column will be returned. + * @param data The numerical data. + * @param sigDigits Significant digits specified on a column by column basis, only for + * data columns. + * @return The string respresentation of the data. + **/ + public static String[] tableFormat(String[] columnLabels, String[] rowLabels, double[][] data, + int[] sigDigits) { + if (data == null || data.length == 0) + return new String[0]; + return tableFormat(columnLabels, rowLabels, wrapDouble(data), sigDigits); } - if (dataColumns == 0) return new String[0]; - int columns = dataColumns; - if (rowLabels != null) ++columns; + /** + * Formats the given data into an ASCII table and prints it to the given stream. Any entry in + * any row of data set to null will become a single dash in the + * output. + * + * @param out The stream. + * @param columnLabels One label for each column in the output, starting with the column of row + * labels if one exists. If null, no column label row will be returned. + * @param rowLabels One label for each non-null row in data. If + * null, no row label column will be returned. + * @param data The numerical data. + * @param sigDigits Significant digits specified on a column by column basis, only for + * data columns. + **/ + public static void printTableFormat(PrintStream out, String[] columnLabels, String[] rowLabels, + Double[][] data, int[] sigDigits) { + printTable(out, tableFormat(columnLabels, rowLabels, data, sigDigits)); + } + - if (sigDigits.length < dataColumns) { - int[] temp = new int[dataColumns]; - System.arraycopy(sigDigits, 0, temp, 0, sigDigits.length); - for (int i = sigDigits.length; i < dataColumns; ++i) - temp[i] = defaultSignificantDigits; - sigDigits = temp; + /** + * Formats the given data into an ASCII table. Any entry in any row of data set to + * null will become a single dash in the output. + * + * @param columnLabels One label for each column in the output, starting with the column of row + * labels if one exists. If null, no column label row will be returned. + * @param rowLabels One label for each non-null row in data. If + * null, no row label column will be returned. + * @param data The numerical data. + * @param sigDigits Significant digits specified on a column by column basis, only for + * data columns. + * @return The string respresentation of the data. + **/ + public static String[] tableFormat(String[] columnLabels, String[] rowLabels, Double[][] data, + int[] sigDigits) { + return tableFormat(columnLabels, rowLabels, data, sigDigits, new int[0]); } - int dataRows = data.length; - int rows = dataRows; - if (columnLabels != null) { - if (columnLabels.length < columns) { - String[] temp = new String[columns]; - System.arraycopy(columnLabels, 0, temp, - columns - columnLabels.length, columnLabels.length); - columnLabels = temp; - } + /** + * Formats the given data into an ASCII table and prints it to the given stream. Any entry in + * any row of data set to null will become a single dash in the + * output. + * + * @param out The stream. + * @param columnLabels One label for each column in the output, starting with the column of row + * labels if one exists. If null, no column label row will be returned. + * @param rowLabels One label for each non-null row in data. If + * null, no row label column will be returned. + * @param data The numerical data. + * @param sigDigits Significant digits specified on a column by column basis, only for + * data columns. + * @param dashRows The indexes of rows in data which should be preceded by a row of + * dashes in the output. + **/ + public static void printTableFormat(PrintStream out, String[] columnLabels, String[] rowLabels, + double[][] data, int[] sigDigits, int[] dashRows) { + printTable(out, tableFormat(columnLabels, rowLabels, data, sigDigits, dashRows)); + } - for (int i = 0; i < columnLabels.length; ++i) - if (columnLabels[i] == null) columnLabels[i] = ""; - ++rows; + + /** + * Formats the given data into an ASCII table. Any entry in any row of data set to + * null will become a single dash in the output. + * + * @param columnLabels One label for each column in the output, starting with the column of row + * labels if one exists. If null, no column label row will be returned. + * @param rowLabels One label for each non-null row in data. If + * null, no row label column will be returned. + * @param data The numerical data. + * @param sigDigits Significant digits specified on a column by column basis, only for + * data columns. + * @param dashRows The indexes of rows in data which should be preceded by a row of + * dashes in the output. + * @return The string respresentation of the data. + **/ + public static String[] tableFormat(String[] columnLabels, String[] rowLabels, double[][] data, + int[] sigDigits, int[] dashRows) { + if (data == null || data.length == 0) + return new String[0]; + return tableFormat(columnLabels, rowLabels, wrapDouble(data), sigDigits, dashRows); } - if (rowLabels != null) { - String[] temp = new String[dataRows]; - int length = Math.min(rowLabels.length, dataRows); - System.arraycopy(rowLabels, 0, temp, 0, length); - rowLabels = temp; - for (int i = 0; i < rowLabels.length; ++i) - if (rowLabels[i] == null) rowLabels[i] = ""; + /** + * Formats the given data into an ASCII table and prints it to the given stream. Any entry in + * any row of data set to null will become a single dash in the + * output. + * + * @param out The stream. + * @param columnLabels One label for each column in the output, starting with the column of row + * labels if one exists. If null, no column label row will be returned. + * @param rowLabels One label for each non-null row in data. If + * null, no row label column will be returned. + * @param data The numerical data. + * @param sigDigits Significant digits specified on a column by column basis, only for + * data columns. + * @param dashRows The indexes of rows in data which should be preceded by a row of + * dashes in the output. + **/ + public static void printTableFormat(PrintStream out, String[] columnLabels, String[] rowLabels, + Double[][] data, int[] sigDigits, int[] dashRows) { + printTable(out, tableFormat(columnLabels, rowLabels, data, sigDigits, dashRows)); } - return asciiTableFormat(columns, dataColumns, rows, dataRows, - columnLabels, rowLabels, data, sigDigits, - dashRows); - } - - - /** - * Formats the given data into an ASCII table. Any row of - * data set to null will become a row with one - * dash in each cell of the output table's row. Any entry in any row of - * data set to null will also become a single - * dash in the output. - * - * @param columns The total number of columns in the table. - * @param dataColumns The number of columns containing data in the table. - * @param rows The total number of rows in the table. - * @param dataRows The number of rows containing data in the table. - * @param columnLabels One label for each column in the output, starting - * with the column of row labels if one exists. If - * null, no column label row will be - * returned. - * @param rowLabels One label for each non-null row in - * data. If null, no row - * label column will be returned. - * @param data The numerical data. - * @param sigDigits Significant digits specified on a column by column - * basis, only for data columns. - * @param dashRows The indexes of rows in data which - * should be preceded by a row of dashes in the output. - * @return The string respresentation of the data. - **/ - protected static String[] asciiTableFormat( - int columns, int dataColumns, int rows, int dataRows, - String[] columnLabels, String[] rowLabels, Double[][] data, - int[] sigDigits, int[] dashRows) { - String[][] table = new String[rows][columns]; - int dataRowStart = 0; - int dataColumnStart = 0; - - if (columnLabels != null) { - dataRowStart = 1; - table[0] = columnLabels; + + /** + * Formats the given data into an ASCII table. Any entry in any row of data set to + * null will become a single dash in the output. + * + * @param columnLabels One label for each column in the output, starting with the column of row + * labels if one exists. If null, no column label row will be returned. + * @param rowLabels One label for each non-null row in data. If + * null, no row label column will be returned. + * @param data The numerical data. + * @param sigDigits Significant digits specified on a column by column basis, only for + * data columns. + * @param dashRows The indexes of rows in data which should be preceded by a row of + * dashes in the output. + * @return The string respresentation of the data. + **/ + public static String[] tableFormat(String[] columnLabels, String[] rowLabels, Double[][] data, + int[] sigDigits, int[] dashRows) { + if (data == null || data.length == 0) + return new String[0]; + if (sigDigits == null) + return tableFormat(columnLabels, rowLabels, data); + + int dataColumns = 0; + + for (int i = 0; i < data.length; ++i) { + int current = data[i] == null ? 0 : data[i].length; + dataColumns = Math.max(dataColumns, current); + } + + if (dataColumns == 0) + return new String[0]; + + int columns = dataColumns; + if (rowLabels != null) + ++columns; + + if (sigDigits.length < dataColumns) { + int[] temp = new int[dataColumns]; + System.arraycopy(sigDigits, 0, temp, 0, sigDigits.length); + for (int i = sigDigits.length; i < dataColumns; ++i) + temp[i] = defaultSignificantDigits; + sigDigits = temp; + } + + int dataRows = data.length; + int rows = dataRows; + + if (columnLabels != null) { + if (columnLabels.length < columns) { + String[] temp = new String[columns]; + System.arraycopy(columnLabels, 0, temp, columns - columnLabels.length, + columnLabels.length); + columnLabels = temp; + } + + for (int i = 0; i < columnLabels.length; ++i) + if (columnLabels[i] == null) + columnLabels[i] = ""; + ++rows; + } + + if (rowLabels != null) { + String[] temp = new String[dataRows]; + int length = Math.min(rowLabels.length, dataRows); + System.arraycopy(rowLabels, 0, temp, 0, length); + rowLabels = temp; + + for (int i = 0; i < rowLabels.length; ++i) + if (rowLabels[i] == null) + rowLabels[i] = ""; + } + + return asciiTableFormat(columns, dataColumns, rows, dataRows, columnLabels, rowLabels, + data, sigDigits, dashRows); } - if (rowLabels != null) { - dataColumnStart = 1; - for (int i = 0; i < dataRows; ++i) - table[dataRowStart + i][0] = rowLabels[i]; + + /** + * Formats the given data into an ASCII table. Any row of data set to + * null will become a row with one dash in each cell of the output table's row. Any + * entry in any row of data set to null will also become a single dash + * in the output. + * + * @param columns The total number of columns in the table. + * @param dataColumns The number of columns containing data in the table. + * @param rows The total number of rows in the table. + * @param dataRows The number of rows containing data in the table. + * @param columnLabels One label for each column in the output, starting with the column of row + * labels if one exists. If null, no column label row will be returned. + * @param rowLabels One label for each non-null row in data. If null, + * no row label column will be returned. + * @param data The numerical data. + * @param sigDigits Significant digits specified on a column by column basis, only for + * data columns. + * @param dashRows The indexes of rows in data which should be preceded by a row of + * dashes in the output. + * @return The string respresentation of the data. + **/ + protected static String[] asciiTableFormat(int columns, int dataColumns, int rows, + int dataRows, String[] columnLabels, String[] rowLabels, Double[][] data, + int[] sigDigits, int[] dashRows) { + String[][] table = new String[rows][columns]; + int dataRowStart = 0; + int dataColumnStart = 0; + + if (columnLabels != null) { + dataRowStart = 1; + table[0] = columnLabels; + } + + if (rowLabels != null) { + dataColumnStart = 1; + for (int i = 0; i < dataRows; ++i) + table[dataRowStart + i][0] = rowLabels[i]; + } + + for (int i = 0; i < dataRows; ++i) + for (int j = 0; j < dataColumns; ++j) + table[dataRowStart + i][dataColumnStart + j] = + data[i] == null || data[i][j] == null ? "-" : format( + data[i][j].doubleValue(), sigDigits[j]); + + int[] columnWidths = new int[columns]; + + for (int i = 0; i < rows; ++i) + for (int j = 0; j < columns; ++j) { + columnWidths[j] = Math.max(columnWidths[j], table[i][j].length()); + } + + if (columnLabels != null) + for (int j = 0; j < columns; ++j) + table[0][j] = center(table[0][j], columnWidths[j]); + if (rowLabels != null) + for (int i = 0; i < dataRows; ++i) + table[dataRowStart + i][0] = ljust(table[dataRowStart + i][0], columnWidths[0]); + + for (int i = 0; i < dataRows; ++i) + for (int j = 0; j < dataColumns; ++j) + table[dataRowStart + i][dataColumnStart + j] = + table[dataRowStart + i][dataColumnStart + j].equals("-") ? center("-", + columnWidths[dataColumnStart + j]) : rjust( + table[dataRowStart + i][dataColumnStart + j], + columnWidths[dataColumnStart + j]); + + Arrays.sort(dashRows); + int d = 0; + while (d < dashRows.length && dashRows[d] < 0) + ++d; + + String[] result = new String[rows + dashRows.length - d]; + String dashes = ""; + for (int i = 0; i < columnWidths.length - 1; ++i) + dashes += "-"; + for (int j = 0; j < columnWidths.length; ++j) + for (int i = 0; i < columnWidths[j]; ++i) + dashes += "-"; + int r = 0; + + for (int i = 0; i < result.length; ++i) + if (d < dashRows.length && (r == dashRows[d] + 1 || r == rows)) { + result[i] = dashes; + ++d; + } else { + result[i] = table[r][0]; + for (int j = 1; j < columns; ++j) + result[i] += " " + table[r][j]; + ++r; + } + + return result; } - for (int i = 0; i < dataRows; ++i) - for (int j = 0; j < dataColumns; ++j) - table[dataRowStart + i][dataColumnStart + j] = - data[i] == null || data[i][j] == null - ? "-" : format(data[i][j].doubleValue(), sigDigits[j]); - - int[] columnWidths = new int[columns]; - - for (int i = 0; i < rows; ++i) - for (int j = 0; j < columns; ++j) { - columnWidths[j] = Math.max(columnWidths[j], table[i][j].length()); - } - - if (columnLabels != null) - for (int j = 0; j < columns; ++j) - table[0][j] = center(table[0][j], columnWidths[j]); - if (rowLabels != null) - for (int i = 0; i < dataRows; ++i) - table[dataRowStart + i][0] = - ljust(table[dataRowStart + i][0], columnWidths[0]); - - for (int i = 0; i < dataRows; ++i) - for (int j = 0; j < dataColumns; ++j) - table[dataRowStart + i][dataColumnStart + j] = - table[dataRowStart + i][dataColumnStart + j].equals("-") - ? center("-", columnWidths[dataColumnStart + j]) - : rjust(table[dataRowStart + i][dataColumnStart + j], - columnWidths[dataColumnStart + j]); - - Arrays.sort(dashRows); - int d = 0; - while (d < dashRows.length && dashRows[d] < 0) ++d; - - String[] result = new String[rows + dashRows.length - d]; - String dashes = ""; - for (int i = 0; i < columnWidths.length - 1; ++i) dashes += "-"; - for (int j = 0; j < columnWidths.length; ++j) - for (int i = 0; i < columnWidths[j]; ++i) - dashes += "-"; - int r = 0; - - for (int i = 0; i < result.length; ++i) - if (d < dashRows.length && (r == dashRows[d] + 1 || r == rows)) { - result[i] = dashes; - ++d; - } - else { - result[i] = table[r][0]; - for (int j = 1; j < columns; ++j) - result[i] += " " + table[r][j]; - ++r; - } - - return result; - } - - - /** - * Transposes the given matrix so that the rows become the columns and the - * columns become the rows. - * - * @param m The matrix to transpose. - * @return The transposed matrix. - **/ - public static double[][] transpose(double[][] m) { - if (m == null) return null; - if (m.length == 0) return new double[0][]; - int columns = 0; - int rows = 0; - - for (int i = 0; i < m.length; ++i) - if (m[i] != null) { - rows = Math.max(rows, m[i].length); - ++columns; - } - - double[][] result = new double[rows][]; - - for (int i = 0; i < result.length; ++i) { - result[i] = new double[columns]; - int c = 0; - - for (int j = 0; j < m.length; ++j) - if (m[j] != null) { - if (m[j].length > i) result[i][c] = m[j][i]; - ++c; + + /** + * Transposes the given matrix so that the rows become the columns and the columns become the + * rows. + * + * @param m The matrix to transpose. + * @return The transposed matrix. + **/ + public static double[][] transpose(double[][] m) { + if (m == null) + return null; + if (m.length == 0) + return new double[0][]; + int columns = 0; + int rows = 0; + + for (int i = 0; i < m.length; ++i) + if (m[i] != null) { + rows = Math.max(rows, m[i].length); + ++columns; + } + + double[][] result = new double[rows][]; + + for (int i = 0; i < result.length; ++i) { + result[i] = new double[columns]; + int c = 0; + + for (int j = 0; j < m.length; ++j) + if (m[j] != null) { + if (m[j].length > i) + result[i][c] = m[j][i]; + ++c; + } } + + return result; } - return result; - } - - - /** - * Transposes the given matrix so that the rows become the columns and the - * columns become the rows. - * - * @param m The matrix to transpose. - * @return The transposed matrix. - **/ - public static Double[][] transpose(Double[][] m) { - if (m == null) return null; - if (m.length == 0) return new Double[0][]; - int columns = 0; - int rows = 0; - - for (int i = 0; i < m.length; ++i) - if (m[i] != null) { - rows = Math.max(rows, m[i].length); - ++columns; - } - - Double[][] result = new Double[rows][]; - - for (int i = 0; i < result.length; ++i) { - result[i] = new Double[columns]; - int c = 0; - - for (int j = 0; j < m.length; ++j) - if (m[j] != null) { - if (m[j].length > i) result[i][c] = m[j][i]; - ++c; + + /** + * Transposes the given matrix so that the rows become the columns and the columns become the + * rows. + * + * @param m The matrix to transpose. + * @return The transposed matrix. + **/ + public static Double[][] transpose(Double[][] m) { + if (m == null) + return null; + if (m.length == 0) + return new Double[0][]; + int columns = 0; + int rows = 0; + + for (int i = 0; i < m.length; ++i) + if (m[i] != null) { + rows = Math.max(rows, m[i].length); + ++columns; + } + + Double[][] result = new Double[rows][]; + + for (int i = 0; i < result.length; ++i) { + result[i] = new Double[columns]; + int c = 0; + + for (int j = 0; j < m.length; ++j) + if (m[j] != null) { + if (m[j].length > i) + result[i][c] = m[j][i]; + ++c; + } } + + return result; } - return result; - } - - - /** - * Formats a floating point number so that it is rounded and zero-padded to - * the given number of significant digits after the decimal point. - * - * @param f The floating point value to format. - * @param d The number of significant digits to round to. If less than 0, - * this method assumes 0; if greater than 18, this method assumes - * 18. - * @return The formatted result. - **/ - protected static String format(double f, int d) { - if (d > 18) d = 18; - long m = 1; - for (int j = 0; j < d; ++j) m *= 10; - - String unformatted = "" + f; - String sign = ""; - - if (unformatted.startsWith("-")) { - sign = "-"; - unformatted = unformatted.substring(1); - f *= -1; + + /** + * Formats a floating point number so that it is rounded and zero-padded to the given number of + * significant digits after the decimal point. + * + * @param f The floating point value to format. + * @param d The number of significant digits to round to. If less than 0, this method assumes 0; + * if greater than 18, this method assumes 18. + * @return The formatted result. + **/ + protected static String format(double f, int d) { + if (d > 18) + d = 18; + long m = 1; + for (int j = 0; j < d; ++j) + m *= 10; + + String unformatted = "" + f; + String sign = ""; + + if (unformatted.startsWith("-")) { + sign = "-"; + unformatted = unformatted.substring(1); + f *= -1; + } + + int leftDigits = 0; + int p = unformatted.indexOf('.'); + if (p == -1) + leftDigits = unformatted.length(); + else + leftDigits = p; + + String base = unformatted; + String exponent = ""; + int e = unformatted.indexOf('E'); + + if (e != -1) { + base = unformatted.substring(0, e); + exponent = unformatted.substring(e); + f = Double.parseDouble(base); + } + + StringBuffer buffer = new StringBuffer("" + Math.round(f * m)); + while (buffer.length() < d + leftDigits) + buffer.insert(0, '0'); + if (d > 0) + buffer.insert(buffer.length() - d, '.'); + return sign + buffer + exponent; } - int leftDigits = 0; - int p = unformatted.indexOf('.'); - if (p == -1) leftDigits = unformatted.length(); - else leftDigits = p; - String base = unformatted; - String exponent = ""; - int e = unformatted.indexOf('E'); + /** + * Returns a space-padded string of at least the specified width such that the argument string + * is left-justified within the returned string. + * + * @param original The string to justify. + * @param width The width within which the original string should be padded. + * @return The padded string. If the original is longer than the specified width, the original + * is simply returned. + **/ + protected static String ljust(String original, int width) { + while (original.length() < width) + original += " "; + return original; + } + - if (e != -1) { - base = unformatted.substring(0, e); - exponent = unformatted.substring(e); - f = Double.parseDouble(base); + /** + * Returns a space-padded string of at least the specified width such that the argument string + * is right-justified within the returned string. + * + * @param original The string to justify. + * @param width The width within which the original string should be padded. + * @return The padded string. If the original is longer than the specified width, the original + * is simply returned. + **/ + protected static String rjust(String original, int width) { + while (original.length() < width) + original = " " + original; + return original; } - StringBuffer buffer = new StringBuffer("" + Math.round(f * m)); - while (buffer.length() < d + leftDigits) buffer.insert(0, '0'); - if (d > 0) buffer.insert(buffer.length() - d, '.'); - return sign + buffer + exponent; - } - - - /** - * Returns a space-padded string of at least the specified width such that - * the argument string is left-justified within the returned string. - * - * @param original The string to justify. - * @param width The width within which the original string should be - * padded. - * @return The padded string. If the original is longer than the specified - * width, the original is simply returned. - **/ - protected static String ljust(String original, int width) { - while (original.length() < width) original += " "; - return original; - } - - - /** - * Returns a space-padded string of at least the specified width such that - * the argument string is right-justified within the returned string. - * - * @param original The string to justify. - * @param width The width within which the original string should be - * padded. - * @return The padded string. If the original is longer than the specified - * width, the original is simply returned. - **/ - protected static String rjust(String original, int width) { - while (original.length() < width) original = " " + original; - return original; - } - - - /** - * Returns a space-padded string of at least the specified width such that - * the argument string is centered within the returned string. - * - * @param original The string to justify. - * @param width The width within which the original string should be - * padded. - * @return The padded string. If the original is longer than the specified - * width, the original is simply returned. - **/ - protected static String center(String original, int width) { - int toAdd = width - original.length(), i; - for (i = 0; i < toAdd / 2; ++i) original = " " + original; - for (; i < toAdd; ++i) original += " "; - return original; - } -} + /** + * Returns a space-padded string of at least the specified width such that the argument string + * is centered within the returned string. + * + * @param original The string to justify. + * @param width The width within which the original string should be padded. + * @return The padded string. If the original is longer than the specified width, the original + * is simply returned. + **/ + protected static String center(String original, int width) { + int toAdd = width - original.length(), i; + for (i = 0; i < toAdd / 2; ++i) + original = " " + original; + for (; i < toAdd; ++i) + original += " "; + return original; + } +} diff --git a/lbjava/src/main/lex/scanner.jlex b/lbjava/src/main/lex/scanner.jlex index 67a54f87..4f1d4ca0 100644 --- a/lbjava/src/main/lex/scanner.jlex +++ b/lbjava/src/main/lex/scanner.jlex @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ /// --- scanner.jlex ------------------------------------------ vim:syntax=lex diff --git a/lbjava/src/main/resources/eclipse-java-google-style.xml b/lbjava/src/main/resources/eclipse-java-google-style.xml new file mode 100644 index 00000000..7dd90403 --- /dev/null +++ b/lbjava/src/main/resources/eclipse-java-google-style.xml @@ -0,0 +1,337 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/lbjava/src/main/resources/license-header.txt b/lbjava/src/main/resources/license-header.txt index 91750bdd..dfdc8453 100644 --- a/lbjava/src/main/resources/license-header.txt +++ b/lbjava/src/main/resources/license-header.txt @@ -1,8 +1,5 @@ -This software is released under the University of Illinois/Research and - Academic Use License. See the LICENSE file in the root folder for details. -Copyright (c) 2016 +This software is released under the University of Illinois/Research and Academic Use License. See +the LICENSE file in the root folder for details. Copyright (c) 2016 -Developed by: -The Cognitive Computations Group -University of Illinois at Urbana-Champaign +Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign http://cogcomp.cs.illinois.edu/ diff --git a/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/AdaGradTest.java b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/AdaGradTest.java index da5068ac..60e5eab3 100644 --- a/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/AdaGradTest.java +++ b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/AdaGradTest.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava; @@ -69,7 +66,7 @@ public void testDefaultLossFunction() { } /** - * Test user specified loss function, i.e. "lms" -> least-mean-square, to use + * Test user specified loss function, i.e. "lms" -> least-mean-square, to use */ @Test public void testCustomLossFunction() { @@ -94,8 +91,9 @@ public void testHingeLossLearn() { p.learningRateP = 1; learner.setParameters(p); - /* Example 1: features {1, 1}, label {+1} - * + /* + * Example 1: features {1, 1}, label {+1} + * * Made a mistake - need to update weight vector */ int[] exampleFeatures1 = {0, 1}; @@ -110,8 +108,9 @@ public void testHingeLossLearn() { assertArrayEquals(exp_w1, w1, 0); - /* Example 2: features {1, 0}, label {-1} - * + /* + * Example 2: features {1, 0}, label {-1} + * * Made a mistake - need to update weight vector */ @@ -127,8 +126,9 @@ public void testHingeLossLearn() { assertArrayEquals(exp_w2, w2, 0.000001); - /* Example 3: features {0, 1}, label {1} - * + /* + * Example 3: features {0, 1}, label {1} + * * No mistake - no update on weight vector */ @@ -148,29 +148,18 @@ public void testHingeLossLearn() { /** * This is a simple test to test for overfitting * - * The AdaGrad is given with simple data set with 2 features and a label. - * Train the data set for 30 iterations and - * see if the algorithm can classify the same data set correctly. + * The AdaGrad is given with simple data set with 2 features and a label. Train the + * data set for 30 iterations and see if the algorithm can classify the same data set correctly. */ @Test public void overfittingSimpleTest() { /** - * static data set; - * the first 2 numbers are 2 features and the last one is the label; - * this data set is linearly separable + * static data set; the first 2 numbers are 2 features and the last one is the label; this + * data set is linearly separable */ - double [][] dataSet = new double[][]{ - {-2, -4, 1}, - {-2, 0, 1}, - {0, 2, 1}, - {-2, 2, 1}, - {0, 4, 1}, - {2, 2, -1}, - {2, -2, -1}, - {0, -4, -1}, - {2, -4, -1}, - {4, -2, -1} - }; + double[][] dataSet = + new double[][] { {-2, -4, 1}, {-2, 0, 1}, {0, 2, 1}, {-2, 2, 1}, {0, 4, 1}, + {2, 2, -1}, {2, -2, -1}, {0, -4, -1}, {2, -4, -1}, {4, -2, -1}}; int[] exampleFeatures = {0, 1}; int[] exampleLabels = {0}; @@ -180,9 +169,9 @@ public void overfittingSimpleTest() { /* train AdaGrad for 30 iterations */ for (int i = 0; i < 30; i++) { - exampleValues[0] = dataSet[i%10][0]; - exampleValues[1] = dataSet[i%10][1]; - labelValues[0] = dataSet[i%10][2]; + exampleValues[0] = dataSet[i % 10][0]; + exampleValues[1] = dataSet[i % 10][1]; + labelValues[0] = dataSet[i % 10][2]; learner.learn(exampleFeatures, exampleValues, exampleLabels, labelValues); } @@ -196,7 +185,7 @@ public void overfittingSimpleTest() { double result = learner.realValue(exampleFeatures, exampleValues); if (result * dataSet[i][2] > 0) { - correctNumber ++; + correctNumber++; } } assertEquals(10, correctNumber); @@ -205,14 +194,13 @@ public void overfittingSimpleTest() { /** * This is a complete test to test overfitting in AdaGrad * - * Data set consists of 10 examples, each with 2 features; - * Each feature value is randomly generated from range [0, 10]; + * Data set consists of 10 examples, each with 2 features; Each feature value is randomly + * generated from range [0, 10]; * - * A "correct" weight vector is randomly generated; - * Each value is from range [0, 10]; + * A "correct" weight vector is randomly generated; Each value is from range [0, 10]; * - * The hyperplane is set by taking the medium of w*x; - * Almost half of examples are labeled as +1; the rest are labeled -1; + * The hyperplane is set by taking the medium of w*x; Almost half of examples are labeled as +1; + * the rest are labeled -1; * * Thus, the data set is linearly separable, while being random * @@ -231,12 +219,12 @@ public void overfittingCompleteTest() { /* give a seed to rand */ Random rand = new Random(0); - /** create 10 examples, each with 2 features, - * with values randomly generated from [0, 10] + /** + * create 10 examples, each with 2 features, with values randomly generated from [0, 10] */ ArrayList> dataSet = new ArrayList>(); - for(int i = 0; i < 10; i++) { + for (int i = 0; i < 10; i++) { ArrayList eachExample = new ArrayList(); eachExample.add((double) randInt(rand, 0, 10)); eachExample.add((double) randInt(rand, 0, 10)); @@ -263,8 +251,7 @@ public void overfittingCompleteTest() { for (int i = 0; i < 10; i++) { if (computeDotProduct(dataSet.get(i), weightVector) >= medium) { dataSet.get(i).set(2, 1.0); - } - else { + } else { dataSet.get(i).set(2, -1.0); } } @@ -293,7 +280,7 @@ public void overfittingCompleteTest() { double result = learner.realValue(exampleFeatures, exampleValues); if (result * dataSet.get(i % 10).get(2) > 0) { - correctNumber ++; + correctNumber++; } } @@ -303,13 +290,14 @@ public void overfittingCompleteTest() { /** * Compute the dot product of weight vector and feature vector + * * @param x feature vector * @param w weight vector * @return dot product result */ private double computeDotProduct(ArrayList x, ArrayList w) { double result = 0.0; - for (int i = 0; i < x.size()-1; i++) { + for (int i = 0; i < x.size() - 1; i++) { result += x.get(i) * w.get(i); } result += w.get(w.size() - 1); @@ -317,19 +305,20 @@ private double computeDotProduct(ArrayList x, ArrayList w) { } /** - * Returns a pseudo-random number between min and max, inclusive. - * The difference between min and max can be at most - * Integer.MAX_VALUE - 1. + * Returns a pseudo-random number between min and max, inclusive. The difference between min and + * max can be at most Integer.MAX_VALUE - 1. * * @param rand random instance * @param min minimim value - * @param max maximim value. Must be greater than min. + * @param max maximim value. Must be greater than min. * @return integer between min and max, inclusive. * @see java.util.Random#nextInt(int) * - * Reference: http://stackoverflow.com/questions/20389890/generating-a-random-number-between-1-and-10-java + * Reference: + * http://stackoverflow.com/questions/20389890/generating-a-random-number-between + * -1-and-10-java */ private int randInt(Random rand, int min, int max) { return rand.nextInt((max - min) + 1) + min; } -} \ No newline at end of file +} diff --git a/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/JalgoHookTest.java b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/JalgoHookTest.java deleted file mode 100644 index 9bc99293..00000000 --- a/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/JalgoHookTest.java +++ /dev/null @@ -1,302 +0,0 @@ -/** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 - * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign - * http://cogcomp.cs.illinois.edu/ - */ -package edu.illinois.cs.cogcomp.lbjava; - -import edu.illinois.cs.cogcomp.lbjava.infer.OJalgoHook; -import org.junit.Test; - -import static org.junit.Assert.assertTrue; - -public class JalgoHookTest { - @Test - public void testProgram1() throws Exception { - OJalgoHook ojaHook = new OJalgoHook(); - int[] varInds = new int[2]; - - int i = 0; - while (i< 2) { - int x = ojaHook.addBooleanVariable(-1.0); - varInds[i] = x; - i++; - } - - double[] coefs = { 1, 2 }; - ojaHook.addGreaterThanConstraint(varInds, coefs, -3); - ojaHook.addLessThanConstraint(varInds, coefs, 4); - - ojaHook.setMaximize(false); - - try { - ojaHook.solve(); - } catch (Exception e) { - e.printStackTrace(); - } - - ojaHook.printModelInfo(); - - assertTrue(ojaHook.objectiveValue() == -2.0); - assertTrue(ojaHook.getBooleanValue(0)); - assertTrue(ojaHook.getBooleanValue(1)); - } - - @Test - public void testProgram2() throws Exception { - OJalgoHook ojaHook = new OJalgoHook(); - int[] varInds = new int[2]; - - int i = 0; - while (i< 2) { - int x = ojaHook.addBooleanVariable(-1.0); - varInds[i] = x; - i++; - } - - double[] coefs = { 1, 2 }; - ojaHook.addGreaterThanConstraint(varInds, coefs, -3); - ojaHook.addLessThanConstraint(varInds, coefs, 4); - - ojaHook.setMaximize(true); - - try { - ojaHook.solve(); - } catch (Exception e) { - e.printStackTrace(); - } - - ojaHook.printModelInfo(); - - assertTrue(ojaHook.objectiveValue() == 0); - assertTrue(!ojaHook.getBooleanValue(0)); - assertTrue(!ojaHook.getBooleanValue(1)); - } - - - @Test - public void testProgram3() throws Exception { - OJalgoHook ojaHook = new OJalgoHook(); - int[] varInds = new int[2]; - - int i = 0; - while (i< 2) { - int x = ojaHook.addBooleanVariable(1.5); - varInds[i] = x; - i++; - } - - double[] coefs = { 1, 2 }; - ojaHook.addGreaterThanConstraint(varInds, coefs, -3); - ojaHook.addLessThanConstraint(varInds, coefs, 4); - - ojaHook.setMaximize(true); - - try { - ojaHook.solve(); - } catch (Exception e) { - e.printStackTrace(); - } - - ojaHook.printModelInfo(); - - assertTrue(ojaHook.objectiveValue() == 3); - assertTrue(ojaHook.getBooleanValue(0)); - assertTrue(ojaHook.getBooleanValue(1)); - } - - @Test - public void testProgram4() throws Exception { - OJalgoHook ojaHook = new OJalgoHook(); - int[] varInds = new int[2]; - - int i = 0; - while (i< 2) { - int x = ojaHook.addBooleanVariable(1.5); - varInds[i] = x; - i++; - } - - double[] coefs = { 1, 2 }; - ojaHook.addGreaterThanConstraint(varInds, coefs, -3); - ojaHook.addLessThanConstraint(varInds, coefs, 4); - - ojaHook.setMaximize(false); - - try { - ojaHook.solve(); - } catch (Exception e) { - e.printStackTrace(); - } - - ojaHook.printModelInfo(); - - assertTrue(ojaHook.objectiveValue() == 0); - assertTrue(!ojaHook.getBooleanValue(0)); - assertTrue(!ojaHook.getBooleanValue(1)); - } - - @Test - public void testProgram5() throws Exception { - OJalgoHook ojaHook = new OJalgoHook(); - int[] varInds = new int[2]; - - double[] objCoefs = {1.5, 2.5}; - int i = 0; - while (i< 2) { - int x = ojaHook.addBooleanVariable(objCoefs[i]); - varInds[i] = x; - i++; - } - - double[] coefs = { 1, 2 }; - ojaHook.addGreaterThanConstraint(varInds, coefs, 1); - ojaHook.addLessThanConstraint(varInds, coefs, 4); - - ojaHook.setMaximize(true); - - try { - ojaHook.solve(); - } catch (Exception e) { - e.printStackTrace(); - } - - ojaHook.printModelInfo(); - - assertTrue(ojaHook.objectiveValue() == 4); - assertTrue(ojaHook.getBooleanValue(0)); - assertTrue(ojaHook.getBooleanValue(1)); - } - - @Test - public void testProgram6() throws Exception { - OJalgoHook ojaHook = new OJalgoHook(); - int[] varInds = new int[2]; - - double[] objCoefs = {1.5, 2.5}; - int i = 0; - while (i< 2) { - int x = ojaHook.addBooleanVariable(objCoefs[i]); - varInds[i] = x; - i++; - } - - double[] coefs = { 1, 2 }; - ojaHook.addGreaterThanConstraint(varInds, coefs, 1); - ojaHook.addLessThanConstraint(varInds, coefs, 2); - - ojaHook.setMaximize(false); - - try { - ojaHook.solve(); - } catch (Exception e) { - e.printStackTrace(); - } - - ojaHook.printModelInfo(); - - assertTrue(ojaHook.objectiveValue() == 1.5); - assertTrue(ojaHook.getBooleanValue(0)); - assertTrue(!ojaHook.getBooleanValue(1)); - } - - @Test - public void testProgram7() throws Exception { - OJalgoHook ojaHook = new OJalgoHook(); - int[] varInds = new int[2]; - - double[] objCoefs = {1.5, 2.5}; - int i = 0; - while (i< 2) { - int x = ojaHook.addBooleanVariable(objCoefs[i]); - varInds[i] = x; - i++; - } - - double[] coefs = { 1, 2 }; - ojaHook.addGreaterThanConstraint(varInds, coefs, 1); - ojaHook.addLessThanConstraint(varInds, coefs, 2); - - ojaHook.setMaximize(true); - - try { - ojaHook.solve(); - } catch (Exception e) { - e.printStackTrace(); - } - - ojaHook.printModelInfo(); - - assertTrue(ojaHook.objectiveValue() == 2.5); - assertTrue(!ojaHook.getBooleanValue(0)); - assertTrue(ojaHook.getBooleanValue(1)); - } - - @Test - public void testProgram8() throws Exception { - OJalgoHook ojaHook = new OJalgoHook(); - int[] varInds = new int[3]; - - double[] objCoefs = {-1, -1, -1}; - int i = 0; - while (i< 3) { - int x = ojaHook.addBooleanVariable(objCoefs[i]); - varInds[i] = x; - i++; - } - - double[] coefs = { 1, 1, 1}; - ojaHook.addEqualityConstraint(varInds, coefs, 3); - ojaHook.setMaximize(true); - - try { - ojaHook.solve(); - } catch (Exception e) { - e.printStackTrace(); - } - - ojaHook.printModelInfo(); - - assertTrue(ojaHook.objectiveValue() == -3); - assertTrue(ojaHook.getBooleanValue(0)); - assertTrue(ojaHook.getBooleanValue(1)); - assertTrue(ojaHook.getBooleanValue(2)); - } - - @Test - public void testProgram9() throws Exception { - OJalgoHook ojaHook = new OJalgoHook(); - - double[] objCoefs = {0, -1}; - ojaHook.addDiscreteVariable(objCoefs); - ojaHook.addDiscreteVariable(objCoefs); - ojaHook.addDiscreteVariable(objCoefs); - - double[] coefs = { 1, 1, 1}; - int[] varInds = {1, 3, 5}; - ojaHook.addEqualityConstraint(varInds, coefs, 3); - ojaHook.setMaximize(true); - - try { - ojaHook.solve(); - } catch (Exception e) { - e.printStackTrace(); - } - - ojaHook.printModelInfo(); - - assertTrue(ojaHook.objectiveValue() == -3); - assertTrue(!ojaHook.getBooleanValue(0)); - assertTrue(ojaHook.getBooleanValue(1)); - assertTrue(!ojaHook.getBooleanValue(2)); - assertTrue(ojaHook.getBooleanValue(3)); - assertTrue(!ojaHook.getBooleanValue(4)); - assertTrue(ojaHook.getBooleanValue(5)); - } - -} diff --git a/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/MainTest.java b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/MainTest.java index 044de150..ac3e7b98 100644 --- a/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/MainTest.java +++ b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/MainTest.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava; @@ -13,6 +10,7 @@ import edu.illinois.cs.cogcomp.lbjava.IR.*; import edu.illinois.cs.cogcomp.lbjava.frontend.Yylex; import edu.illinois.cs.cogcomp.lbjava.frontend.parser; +import edu.illinois.cs.cogcomp.lbjava.util.FileUtils; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -31,10 +29,10 @@ /** * Tests the main functions of LBJava: *

    - *
  • {@link SemanticAnalysis}
  • - *
  • {@link ClassifierCSE}
  • - *
  • {@link RevisionAnalysis}
  • - *
  • {@link TranslateToJava}
  • + *
  • {@link SemanticAnalysis}
  • + *
  • {@link ClassifierCSE}
  • + *
  • {@link RevisionAnalysis}
  • + *
  • {@link TranslateToJava}
  • *
* * @author Christos Christodoulopoulos @@ -42,27 +40,25 @@ public class MainTest { private String generateLBJavaScript(String learnerName, String extractor, String featImport) { - return "import java.util.Vector;\n" + - "import edu.illinois.cs.cogcomp.lbjava.VectorParser;\n" + - featImport + "\n" + - "import edu.illinois.cs.cogcomp.lbjava.PredefinedLabel;\n" + - "\n" + - "discrete "+learnerName+"(Vector v) <-\n" + - "learn PredefinedLabel\n" + - "\tusing "+extractor+"\n" + - "\tfrom new VectorParser(\"target/test-classes/test1.train\")\n" + - "\twith new NaiveBayes()\n" + - "\ttestFrom new VectorParser(\"target/test-classes/test2.train\")\n" + - "end"; + return "import java.util.Vector;\n" + + "import edu.illinois.cs.cogcomp.lbjava.VectorParser;\n" + featImport + "\n" + + "import edu.illinois.cs.cogcomp.lbjava.PredefinedLabel;\n" + "\n" + "discrete " + + learnerName + "(Vector v) <-\n" + "learn PredefinedLabel\n" + "\tusing " + + extractor + "\n" + + "\tfrom new VectorParser(\"target/test-classes/test1.train\")\n" + + "\twith new NaiveBayes()\n" + + "\ttestFrom new VectorParser(\"target/test-classes/test2.train\")\n" + "end"; } @Before public void setUp() throws Exception { Main.fileNames = new HashSet<>(); - Main.generatedSourceDirectory = "target/test-classes/lbj"; - Main.classDirectory = "target/test-classes"; - Main.classPackageDirectory = "target/test-classes/lbj"; - Main.sourceDirectory = "target/test-classes/lbj"; + Main.generatedSourceDirectory = + FileUtils.getPlatformIndependentFilePath("target/test-classes/lbj"); + Main.classDirectory = FileUtils.getPlatformIndependentFilePath("target/test-classes"); + Main.classPackageDirectory = + FileUtils.getPlatformIndependentFilePath("target/test-classes/lbj"); + Main.sourceDirectory = FileUtils.getPlatformIndependentFilePath("target/test-classes/lbj"); // The auto-generated code directory needs to be added to classpath URLClassLoader urlClassLoader = (URLClassLoader) ClassLoader.getSystemClassLoader(); @@ -73,8 +69,9 @@ public void setUp() throws Exception { @Test public void testOneFeature() throws Exception { - String input = generateLBJavaScript("OneFeatLearner", "testFeature1", - "import edu.illinois.cs.cogcomp.lbjava.features.PredefinedFeature;"); + String input = + generateLBJavaScript("OneFeatLearner", "testFeature1", + "import edu.illinois.cs.cogcomp.lbjava.features.PredefinedFeature;"); Yylex scanner = new Yylex(new ByteArrayInputStream(input.getBytes())); AST ast = (AST) new parser(scanner).parse().value; @@ -91,7 +88,8 @@ public void testOneFeature() throws Exception { LearningClassifierExpression lce = (LearningClassifierExpression) astNodes[2]; assertEquals("testFeature1", lce.extractor.getName()); assertTrue(((ClassifierName) lce.extractor).isField); - assertEquals("PredefinedFeature", AST.globalSymbolTable.classForName(lce.extractor.getName()).getSimpleName()); + assertEquals("PredefinedFeature", + AST.globalSymbolTable.classForName(lce.extractor.getName()).getSimpleName()); new RevisionAnalysis(ast).run(ast); new ClassifierCSE(ast).run(ast); @@ -101,8 +99,9 @@ public void testOneFeature() throws Exception { @Test public void testTwoFeatures() throws Exception { - String input = generateLBJavaScript("TwoFeatLearner", "testFeature1, testFeature2", - "import edu.illinois.cs.cogcomp.lbjava.features.PredefinedFeature;"); + String input = + generateLBJavaScript("TwoFeatLearner", "testFeature1, testFeature2", + "import edu.illinois.cs.cogcomp.lbjava.features.PredefinedFeature;"); Yylex scanner = new Yylex(new ByteArrayInputStream(input.getBytes())); AST ast = (AST) new parser(scanner).parse().value; @@ -122,7 +121,8 @@ public void testTwoFeatures() throws Exception { assertEquals(2, components.size()); ClassifierName component1 = (ClassifierName) components.iterator().next(); assertTrue(component1.isField); - assertEquals("PredefinedFeature", AST.globalSymbolTable.classForName(component1.getName()).getSimpleName()); + assertEquals("PredefinedFeature", AST.globalSymbolTable.classForName(component1.getName()) + .getSimpleName()); new RevisionAnalysis(ast).run(ast); new ClassifierCSE(ast).run(ast); @@ -132,8 +132,9 @@ public void testTwoFeatures() throws Exception { @Test public void testPackageFeature() throws Exception { - String input = generateLBJavaScript("PackageFeatLearner", "testFeature1", - "import edu.illinois.cs.cogcomp.lbjava.features.*;"); + String input = + generateLBJavaScript("PackageFeatLearner", "testFeature1", + "import edu.illinois.cs.cogcomp.lbjava.features.*;"); Yylex scanner = new Yylex(new ByteArrayInputStream(input.getBytes())); AST ast = (AST) new parser(scanner).parse().value; @@ -150,7 +151,8 @@ public void testPackageFeature() throws Exception { LearningClassifierExpression lce = (LearningClassifierExpression) astNodes[2]; assertEquals("testFeature1", lce.extractor.getName()); assertTrue(((ClassifierName) lce.extractor).isField); - assertEquals("PredefinedFeature", AST.globalSymbolTable.classForName(lce.extractor.getName()).getSimpleName()); + assertEquals("PredefinedFeature", + AST.globalSymbolTable.classForName(lce.extractor.getName()).getSimpleName()); new RevisionAnalysis(ast).run(ast); new ClassifierCSE(ast).run(ast); @@ -160,7 +162,7 @@ public void testPackageFeature() throws Exception { @After public void cleanup() { - //Make sure we don't leave our auto-generated files behind + // Make sure we don't leave our auto-generated files behind File lbjDir = new File(Main.generatedSourceDirectory); File[] dirFiles = lbjDir.listFiles(new FilenameFilter() { @@ -168,6 +170,11 @@ public boolean accept(File dir, String name) { return !name.endsWith(".lbj") && !name.startsWith(".nfs"); } }); - for (File file: dirFiles) assert file.delete() : "Could not delete file " + file; + + for (File file : dirFiles) { + if (!file.delete()) { + System.out.println("Could not delete file " + file); + } + } } -} \ No newline at end of file +} diff --git a/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/NaiveBayesTest.java b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/NaiveBayesTest.java index 816d3378..b9eca7df 100644 --- a/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/NaiveBayesTest.java +++ b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/NaiveBayesTest.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava; @@ -23,22 +20,22 @@ public class NaiveBayesTest { - @Test - public void test() throws Exception { - File lbjFile = new File(getClass().getResource("/lbj/naive-bayes.lbj").getFile()); - - // The auto-generated code directory needs to be added to classpath - addPath(getClass().getResource("/lbj/.")); - - assertTrue(lbjFile.exists()); - - String[] args = { "-d", lbjFile.getParent(), lbjFile.getPath() }; - Main.main(args); - } + @Test + public void test() throws Exception { + File lbjFile = new File(getClass().getResource("/lbj/naive-bayes.lbj").getFile()); + + // The auto-generated code directory needs to be added to classpath + addPath(getClass().getResource("/lbj/.")); + + assertTrue(lbjFile.exists()); + + String[] args = {"-d", lbjFile.getParent(), lbjFile.getPath()}; + Main.main(args); + } @After public void cleanup() { - //Make sure we don't leave our auto-generated files behind + // Make sure we don't leave our auto-generated files behind File lbjDir = new File(Main.generatedSourceDirectory); File[] dirFiles = lbjDir.listFiles(new FilenameFilter() { @@ -46,15 +43,20 @@ public boolean accept(File dir, String name) { return !name.endsWith(".lbj"); } }); - for (File file: dirFiles) assert file.delete() : "Could not delete file " + file; + + for (File file : dirFiles) { + if (!file.delete()) { + System.out.println("Could not delete file " + file); + } + } + } + + @SuppressWarnings({"unchecked", "rawtypes"}) + private static void addPath(URL u) throws Exception { + URLClassLoader urlClassLoader = (URLClassLoader) ClassLoader.getSystemClassLoader(); + Class urlClass = URLClassLoader.class; + Method method = urlClass.getDeclaredMethod("addURL", URL.class); + method.setAccessible(true); + method.invoke(urlClassLoader, u); } - - @SuppressWarnings({ "unchecked", "rawtypes" }) - private static void addPath(URL u) throws Exception { - URLClassLoader urlClassLoader = (URLClassLoader) ClassLoader.getSystemClassLoader(); - Class urlClass = URLClassLoader.class; - Method method = urlClass.getDeclaredMethod("addURL", URL.class); - method.setAccessible(true); - method.invoke(urlClassLoader, u); - } } diff --git a/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/PredefinedLabel.java b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/PredefinedLabel.java index af67baea..fa129b06 100644 --- a/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/PredefinedLabel.java +++ b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/PredefinedLabel.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava; @@ -37,8 +34,8 @@ public String discreteValue(Object o) { @Override public Feature featureValue(Object o) { String value = discreteValue(o); - return new DiscretePrimitiveStringFeature("", "", "", value, - valueIndexOf(value), (short) allowableValues().length); + return new DiscretePrimitiveStringFeature("", "", "", value, valueIndexOf(value), + (short) allowableValues().length); } @Override diff --git a/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/SparseNetworkLearningPruneTest.java b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/SparseNetworkLearningPruneTest.java new file mode 100644 index 00000000..e5202d51 --- /dev/null +++ b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/SparseNetworkLearningPruneTest.java @@ -0,0 +1,20 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava; + +import static org.junit.Assert.*; + +import org.junit.Test; + +public class SparseNetworkLearningPruneTest { + + @Test + public void test() { + } + +} diff --git a/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/VectorParser.java b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/VectorParser.java index 61bd8c2f..1b065b16 100644 --- a/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/VectorParser.java +++ b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/VectorParser.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava; @@ -16,29 +13,34 @@ /** * File reader used for testing. + * * @author Nick Rizzolo * @author Christos Christodoulopoulos * */ public class VectorParser extends LineByLine { - public VectorParser(String file) { super(file); } - - public Object next() { - String line = readLine(); - if (line == null) return null; - - String[] features = line.split(", "); - Vector result = new Vector(); - for (int i = 0; i < 101; ++i) result.add(null); - - for (String feature1 : features) { - int paren = feature1.indexOf('('); - int feature = Integer.parseInt(feature1.substring(0, paren)); - double value = Double.parseDouble(feature1.substring(paren + 1, feature1.length() - 1)); - result.set(feature, value); - } - - return result; - } -} \ No newline at end of file + public VectorParser(String file) { + super(file); + } + + public Object next() { + String line = readLine(); + if (line == null) + return null; + + String[] features = line.split(", "); + Vector result = new Vector(); + for (int i = 0; i < 101; ++i) + result.add(null); + + for (String feature1 : features) { + int paren = feature1.indexOf('('); + int feature = Integer.parseInt(feature1.substring(0, paren)); + double value = Double.parseDouble(feature1.substring(paren + 1, feature1.length() - 1)); + result.set(feature, value); + } + + return result; + } +} diff --git a/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/features/PredefinedFeature.java b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/features/PredefinedFeature.java index c76d898c..8a0d6c98 100644 --- a/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/features/PredefinedFeature.java +++ b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/features/PredefinedFeature.java @@ -1,11 +1,8 @@ /** - * This software is released under the University of Illinois/Research and - * Academic Use License. See the LICENSE file in the root folder for details. - * Copyright (c) 2016 + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 * - * Developed by: - * The Cognitive Computations Group - * University of Illinois at Urbana-Champaign + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign * http://cogcomp.cs.illinois.edu/ */ package edu.illinois.cs.cogcomp.lbjava.features; @@ -46,9 +43,12 @@ public FeatureVector classify(Object o) { FeatureVector featureVector = new FeatureVector(); for (int i = 0; i < 100; ++i) { String id = "" + (i + 1); - if (instance.get(i + 1) == null) continue; + if (instance.get(i + 1) == null) + continue; String value = getFeature((Double) instance.get(i + 1)); - Feature feature = new DiscretePrimitiveStringFeature("", "", id, value, valueIndexOf(value), (short) 0); + Feature feature = + new DiscretePrimitiveStringFeature("", "", id, value, valueIndexOf(value), + (short) 0); featureVector.addFeature(feature); } diff --git a/pom.xml b/pom.xml index 8e2ae375..db0a0881 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ edu.illinois.cs.cogcomp lbjava-project pom - 1.2.16 + 1.3.1 lbjava @@ -18,6 +18,7 @@ UTF-8 UTF-8 + ${project.basedir}/../lbjava/src/main/resources/eclipse-java-google-style.xml @@ -32,11 +33,11 @@ CogcompSoftware CogcompSoftware - scp://bilbo.cs.illinois.edu:/mounts/bilbo/disks/0/www/cogcomp/html/m2repo + scp://legolas.cs.illinois.edu:/srv/data/cogcomp/html/m2repo CogcompSoftwareDoc - scp://bilbo.cs.illinois.edu:/mounts/bilbo/disks/0/www/cogcomp/html/software/doc/${project.artifactId} + scp://legolas.cs.illinois.edu:/srv/data/cogcomp/html/software/doc/ @@ -101,6 +102,7 @@ **/*.sh **/*.lbj **/*.xml + **/*.yml **/*.md src/test/resources/** src/main/resources/** @@ -117,6 +119,18 @@ + + net.revelc.code + formatter-maven-plugin + 0.5.2 + + ${googleEclipseFormatterStyle} + + lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/frontend/parser.java + lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/frontend/sym.java + + +

- * The precedence values for Java operators were taken from - * Java-FAQ.com. - * @author Nick Rizzolo + * LBJava supports every Java operator. This class stores information about those operators, such as + * their symbols and precedences, as well as information about LBJava's new operators. The static + * constant fields defined in this class serve as indeces into its static array fields. + * + *