diff --git a/.classpath b/.classpath deleted file mode 100644 index 66f3001..0000000 --- a/.classpath +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 3558c56..0000000 --- a/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -bin/ -build/ diff --git a/.project b/.project deleted file mode 100644 index 4c732cd..0000000 --- a/.project +++ /dev/null @@ -1,17 +0,0 @@ - - - org.openscience.chemojava - - - - - - org.eclipse.jdt.core.javabuilder - - - - - - org.eclipse.jdt.core.javanature - - diff --git a/ProjectHome.md b/ProjectHome.md new file mode 100644 index 0000000..ce4a1ed --- /dev/null +++ b/ProjectHome.md @@ -0,0 +1 @@ +ChemoJava is a project based on the Chemistry Development Kit (LGPL). It aggregates chemoinformatics functionality which itself is license GPL, but using the CDK APIs. It is not aimed at replacing the CDK, but complementing it with unique functionality, possibly from originally other projects, which is not available from the CDK itself. \ No newline at end of file diff --git a/README.md b/README.md deleted file mode 100644 index 31a1489..0000000 --- a/README.md +++ /dev/null @@ -1,26 +0,0 @@ -# CDK-GPL The Chemistry Development Kit GPL extension - -Copyright 2008-2011 The CDK Development Team -License: GPL v2, see doc/gpl.license - -## Introduction - -You are currently reading the README file for the Chemistry Development Kit GPL extension (CDK-GPL). -This project is hosted under http://cdk.sourceforge.net -Please refer to these pages for updated information and the latest version of the CDK-GPL. - -The CDK is an open-source library of algorithms for structural chemo- and bioinformatics, implemented in -the programming language Java(tm). The library is published under terms of the the -GNU General Public License. This has implications on what you can do with sources and -binaries of the CDK library. For details, please refer to the file LICENSE, which should have been -provided with this distribution. - -IMPORTANT: Unlike the CDK itself, this library is not LGPL, but GPL. This project is also not the -CDK library itself. This is a separate library, which is based on the CDK and extends it with -code which uses GPL libraries. - -## Compile - -```shell -ant dist-all -``` diff --git a/build.properties b/build.properties deleted file mode 100644 index ff33983..0000000 --- a/build.properties +++ /dev/null @@ -1 +0,0 @@ -version=1.4.2.1 diff --git a/build.xml b/build.xml deleted file mode 100755 index 9d0136a..0000000 --- a/build.xml +++ /dev/null @@ -1,757 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Not skipping doclet run... - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - -
-
- - - -
-
- - - - - - - - - - - - - - - -
- - - - - - -
-
- - - -
-
- - - - - - - - - - - - - - - - - - -
- - - - - - -
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
diff --git a/develjar/ant-contrib-1.0b3.jar b/develjar/ant-contrib-1.0b3.jar deleted file mode 100644 index 0625376..0000000 Binary files a/develjar/ant-contrib-1.0b3.jar and /dev/null differ diff --git a/develjar/com-sun-javadoc.jar b/develjar/com-sun-javadoc.jar deleted file mode 100644 index bde0623..0000000 Binary files a/develjar/com-sun-javadoc.jar and /dev/null differ diff --git a/develjar/com-sun-tools-doclets-Taglet.jar b/develjar/com-sun-tools-doclets-Taglet.jar deleted file mode 100644 index 340e50b..0000000 Binary files a/develjar/com-sun-tools-doclets-Taglet.jar and /dev/null differ diff --git a/develjar/junit-4.3.1.jar b/develjar/junit-4.3.1.jar deleted file mode 100644 index ff5d188..0000000 Binary files a/develjar/junit-4.3.1.jar and /dev/null differ diff --git a/jar/JRI.jar b/jar/JRI.jar deleted file mode 100644 index 3c18de2..0000000 Binary files a/jar/JRI.jar and /dev/null differ diff --git a/jar/JRI.jar.meta b/jar/JRI.jar.meta deleted file mode 100644 index 5f27a04..0000000 --- a/jar/JRI.jar.meta +++ /dev/null @@ -1,9 +0,0 @@ -[JRI.jar] -Library=JRI -#Version= -#Copyright= -#License= -#LicenseURL= -#Download= -#SourceCode= -Homepage=http://www.rforge.net/JRI/ diff --git a/jar/cdk-core.jar b/jar/cdk-core.jar deleted file mode 100644 index b3482b8..0000000 Binary files a/jar/cdk-core.jar and /dev/null differ diff --git a/jar/cdk-interfaces.jar b/jar/cdk-interfaces.jar deleted file mode 100644 index 9f49997..0000000 Binary files a/jar/cdk-interfaces.jar and /dev/null differ diff --git a/jar/cdk-qsar.jar b/jar/cdk-qsar.jar deleted file mode 100644 index 3283223..0000000 Binary files a/jar/cdk-qsar.jar and /dev/null differ diff --git a/jar/cdk-standard.jar b/jar/cdk-standard.jar deleted file mode 100644 index 6172239..0000000 Binary files a/jar/cdk-standard.jar and /dev/null differ diff --git a/jar/log4j.jar b/jar/log4j.jar deleted file mode 100644 index dde9972..0000000 Binary files a/jar/log4j.jar and /dev/null differ diff --git a/jar/sjava-0.68.jar b/jar/sjava-0.68.jar deleted file mode 100644 index d81e8eb..0000000 Binary files a/jar/sjava-0.68.jar and /dev/null differ diff --git a/jar/sjava-0.68.jar.meta b/jar/sjava-0.68.jar.meta deleted file mode 100644 index 6ac3968..0000000 --- a/jar/sjava-0.68.jar.meta +++ /dev/null @@ -1,9 +0,0 @@ -[sjava-0.68.jar] -Library=SJava -Version=0.68 -Copyright=(c) 1998 1999, The Omega Project for Statistical Computing. All rights reserved. -License=BSD -LicenseURL=http://www.omegahat.org/License.html -Download=Binary downlaod on request from omega-bugs@www.omegahat.org -SourceCode=http://www.omegahat.org/RSJava/SJava_0.68-0.tar.gz -Homepage=http://www.omegahat.org/RSJava/ diff --git a/jar/vecmath1.2-1.14.jar b/jar/vecmath1.2-1.14.jar deleted file mode 100644 index bf60357..0000000 Binary files a/jar/vecmath1.2-1.14.jar and /dev/null differ diff --git a/jar/weka.jar b/jar/weka.jar deleted file mode 100644 index 030517a..0000000 Binary files a/jar/weka.jar and /dev/null differ diff --git a/jar/xom-1.1.jar b/jar/xom-1.1.jar deleted file mode 100644 index 3d1fcc8..0000000 Binary files a/jar/xom-1.1.jar and /dev/null differ diff --git a/src/META-INF/libio-openbabel.cdkdepends b/src/META-INF/libio-openbabel.cdkdepends deleted file mode 100644 index 2969007..0000000 --- a/src/META-INF/libio-openbabel.cdkdepends +++ /dev/null @@ -1,5 +0,0 @@ -cdk-interfaces.jar -cdk-data.jar -cdk-core.jar -cdk-standard.jar -cdk-io.jar diff --git a/src/META-INF/qsarweka.cdkdepends b/src/META-INF/qsarweka.cdkdepends deleted file mode 100644 index 3f34056..0000000 --- a/src/META-INF/qsarweka.cdkdepends +++ /dev/null @@ -1,5 +0,0 @@ -cdk-annotation.jar -cdk-interfaces.jar -cdk-core.jar -cdk-standard.jar -cdk-qsar.jar diff --git a/src/META-INF/qsarweka.libdepends b/src/META-INF/qsarweka.libdepends deleted file mode 100644 index 7002eb7..0000000 --- a/src/META-INF/qsarweka.libdepends +++ /dev/null @@ -1 +0,0 @@ -weka.jar diff --git a/src/main/net/sf/cdk/tools/MakeJavafilesFiles.java b/src/main/net/sf/cdk/tools/MakeJavafilesFiles.java deleted file mode 100644 index f32ab63..0000000 --- a/src/main/net/sf/cdk/tools/MakeJavafilesFiles.java +++ /dev/null @@ -1,294 +0,0 @@ -/* $Revision: 6707 $ $Author: egonw $ $Date: 2006-07-30 16:38:18 -0400 (Sun, 30 Jul 2006) $ - * - * Copyright (C) 2006 Egon Willighagen - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ -package net.sf.cdk.tools; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.PrintWriter; -import java.util.ArrayList; -import java.util.Hashtable; -import java.util.Iterator; -import java.util.List; -import java.util.Map; - -/** - * Class that creates the ${build}/*.javafiles. - * - * @author egonw - */ -public class MakeJavafilesFiles { - - private Map> cdkPackages; - private Map> cdkSets; - - /** Java files that should not be processed because they are - * listed in a *.autogenerated file. - */ - private List blacklist; - - private String sourceDir = null; - private String outputDir = null; - private String metainfDir = null; - - public MakeJavafilesFiles(String metainfDir, String sourceDir, String outputDir) { - cdkPackages = new Hashtable>(); - cdkSets = new Hashtable>(); - this.sourceDir = sourceDir; - this.outputDir = outputDir; - this.metainfDir = metainfDir; - readBlackList(); - } - - private void readBlackList() { - blacklist = new ArrayList(); - String metainfDirPath = this.metainfDir; - File metinfDir = new File(metainfDirPath); - File[] files = metinfDir.listFiles(); - for (int i=0; i 0) - blacklist.add(line); - line = reader.readLine(); - } - } catch (Exception e) { - System.out.println("Error reading a *.autogenerated file: " + e.getMessage()); - e.printStackTrace(); - System.exit(1); - } - } - } - - } - - public void outputResults() { - // output information in .javafiles and .classes files - try { - Iterator keys = cdkPackages.keySet().iterator(); - while (keys.hasNext()) { - String key = (String)keys.next(); - - // create one file for each cdk package = key - PrintWriter outJava = new PrintWriter( - new FileWriter(outputDir + "/" + key + ".javafiles") - ); - PrintWriter outClass = new PrintWriter( - new FileWriter(outputDir + "/" + key + ".classes") - ); - List packageClasses = cdkPackages.get(key); - Iterator classes = packageClasses.iterator(); - while (classes.hasNext()) { - String packageClass = toAPIPath(classes.next()); - if (!blacklist.contains(packageClass + ".java")) { - outJava.println(packageClass + ".java"); - outClass.println(packageClass + "*.class"); - } - } - outJava.flush(); outJava.close(); - outClass.flush(); outClass.close(); - } - // output information in .set files - keys = cdkSets.keySet().iterator(); - while (keys.hasNext()) { - String key = (String)keys.next(); - - // create one file for each cdk package = key - PrintWriter outJava = new PrintWriter( - new FileWriter(outputDir + "/" + key + ".set") - ); - List packageClasses = cdkSets.get(key); - Iterator classes = packageClasses.iterator(); - while (classes.hasNext()) { - String packageClass = (String)classes.next(); - outJava.println(packageClass); - } - outJava.flush(); outJava.close(); - } - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - - public void processJavaSourceFiles(File path) { - if (path.isDirectory()) { - File[] files = path.listFiles(); - for (int i=files.length;i>0;i--) { - processJavaSourceFiles(files[i-1]); - } - } else if (path.isFile() && path.getPath().endsWith(".java") && - !(path.getPath().indexOf("net/sf") != -1 || - path.getPath().indexOf("net\\sf") != -1)) { - String[] moduleAndSet = getModuleAndSet(path); - if (moduleAndSet == null) { - System.out.println("Something wrong with the Java source file: " + path); - } else { - if (moduleAndSet[0] != null) { - addClassToCDKPackage(getSourceName(path), moduleAndSet[0]); - } - if (moduleAndSet[1] != null) { - addClassToCDKSet(getClassName(path), moduleAndSet[1]); - } - } - } - } - - public String[] getModuleAndSet(File file) { - try { - String[] results = new String[2]; - results[0] = "extra"; - results[1] = null; - BufferedReader reader = new BufferedReader( - new FileReader(file) - ); - String line = null; - boolean inComment = false; - while ((line = reader.readLine()) != null) { - int index = line.indexOf("/**"); - if (index != -1) { - inComment = true; - if (line.substring(index).indexOf("**/") != -1) inComment = false; - } else { - if (line.indexOf("*/") != -1) inComment = false; - } - - if (!inComment && (line.indexOf("public class") != -1 || - line.indexOf("public interface") != -1 || - line.indexOf("public @interface") != -1 || - line.indexOf("abstract class") != -1 || - line.indexOf("final class") != -1)) { - // Nothing specified: return the default 'extra' - reader.close(); - return results; - } - - index = line.indexOf("@cdk.module"); - String name = ""; - if (index != -1) { - index += 11; - // skip the first chars - while (Character.isWhitespace(line.charAt(index))) index++; - while (index < line.length() && - !Character.isWhitespace(line.charAt(index))) { - name += line.charAt(index); - index++; - } - results[0] = name; - } else { - index = line.indexOf("@cdk.set"); - String set = ""; - if (index != -1) { - index += 11; - // skip the first chars - while (Character.isWhitespace(line.charAt(index))) index++; - while (index < line.length() && - !Character.isWhitespace(line.charAt(index))) { - set += line.charAt(index); - index++; - } - results[1] = set; - } - } - } - } catch (Exception e) { - e.printStackTrace(); - } - return null; - } - - public static void main(String[] args) { - if (args.length != 3) { - System.out.println("Syntax: MakeJavafilesFiles "); - System.exit(-1); - } - - MakeJavafilesFiles processor = new MakeJavafilesFiles(args[0], args[1], args[2]); - - processor.processJavaSourceFiles(new File(args[1])); - processor.outputResults(); - - } - - private String toAPIPath(String className) { - StringBuffer sb = new StringBuffer(); - for (int i=0; i packageClasses = cdkPackages.get(cdkPackageName); - if (packageClasses == null) { - packageClasses = new ArrayList(); - cdkPackages.put(cdkPackageName, packageClasses); - } - packageClasses.add(packageClass); - } - - private void addClassToCDKSet(String packageClass, String cdkPackageName) { - List packageClasses = cdkSets.get(cdkPackageName); - if (packageClasses == null) { - packageClasses = new ArrayList(); - cdkSets.put(cdkPackageName, packageClasses); - } - packageClasses.add(packageClass); - } - -} diff --git a/src/main/net/sf/cdk/tools/bibtex/BibTeXMLEntry.java b/src/main/net/sf/cdk/tools/bibtex/BibTeXMLEntry.java deleted file mode 100644 index 60104da..0000000 --- a/src/main/net/sf/cdk/tools/bibtex/BibTeXMLEntry.java +++ /dev/null @@ -1,106 +0,0 @@ -/* $Revision: 6707 $ $Author: egonw $ $Date: 2006-07-30 16:38:18 -0400 (Sun, 30 Jul 2006) $ - * - * Copyright (C) 2007 Egon Willighagen - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ -package net.sf.cdk.tools.bibtex; - -import nu.xom.Element; -import nu.xom.Node; -import nu.xom.Nodes; -import nu.xom.XPathContext; - -/** - * This is a tool that creates HTML for a subset of the entry types - * defined in BibTeXML. It expects the document to be valid according to - * the BibTeXML schema, or will fail horribly. - * - * @author egonw - */ -public class BibTeXMLEntry { - - private Node entry; - private XPathContext context; - - public BibTeXMLEntry(Node entry) { - this.entry = entry; - context = new XPathContext("bibtex", BibTeXMLFile.BIBTEXML_NAMESPACE); - context.addNamespace("b", BibTeXMLFile.BIBTEXML_NAMESPACE); - } - - /** - * The style is undefined and just made to look nice. - */ - public String toHTML() { - // b:article - Nodes results = entry.query("./b:article", context); - for (int i=0; i").append(title).append(", "); - buffer.append(journal).append(", ").append(year).append(", "); - buffer.append(volume).append(":").append(pages); - return buffer.toString(); - } - - protected String formatMisc(String authors, String title) { - StringBuffer buffer = new StringBuffer(); - buffer.append(authors).append(", ").append(title).append(""); - return buffer.toString(); - } - - /** - * @param node Parent for the child. - * @param childElement Localname of the child element. - * @param def String to default to if no child element is found. - * @return String value for the child node. - */ - private String getString(Node node, String childElement, String def) { - Nodes result = node.query("./b:" + childElement, context); - return result.size() > 0 ? ((Element)result.get(0)).getValue() : def; - } - -} diff --git a/src/main/net/sf/cdk/tools/bibtex/BibTeXMLFile.java b/src/main/net/sf/cdk/tools/bibtex/BibTeXMLFile.java deleted file mode 100644 index 61e8548..0000000 --- a/src/main/net/sf/cdk/tools/bibtex/BibTeXMLFile.java +++ /dev/null @@ -1,75 +0,0 @@ -/* $Revision: 6707 $ $Author: egonw $ $Date: 2006-07-30 16:38:18 -0400 (Sun, 30 Jul 2006) $ - * - * Copyright (C) 2007 Egon Willighagen - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ -package net.sf.cdk.tools.bibtex; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import nu.xom.Node; -import nu.xom.Nodes; -import nu.xom.XPathContext; - -/** - * Wrapper for a BibTeXML file. - * - * @author egonw - */ -public class BibTeXMLFile { - - public final static String BIBTEXML_NAMESPACE = "http://bibtexml.sf.net/"; - - private Node root; - private XPathContext context; - - public BibTeXMLFile(Node root) { - this.root = root; - context = new XPathContext("bibtex", BIBTEXML_NAMESPACE); - context.addNamespace("b", BIBTEXML_NAMESPACE); - } - - /** - * Returns an Iterator<BibTeXMLEntry>. - * - * @return The BibTeXMLEntry - */ - public Iterator getEntries() { - List entries = new ArrayList(); - Nodes results = root.query("//b:entry", context); - for (int i=0; i 0) { - return new BibTeXMLEntry(results.get(0)); - } - return null; - } - -} diff --git a/src/main/net/sf/cdk/tools/doclets/CDKBugTaglet.java b/src/main/net/sf/cdk/tools/doclets/CDKBugTaglet.java deleted file mode 100644 index 5ba055b..0000000 --- a/src/main/net/sf/cdk/tools/doclets/CDKBugTaglet.java +++ /dev/null @@ -1,100 +0,0 @@ -/* $Revision: 10432 $ $Author: miguelrojasch $ $Date: 2008-03-18 10:48:13 +0100 (Tue, 18 Mar 2008) $ - * - * Copyright (C) 2004-2007 Egon Willighagen - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package net.sf.cdk.tools.doclets; - -import com.sun.tools.doclets.Taglet; -import com.sun.javadoc.*; -import java.util.Map; - -/** - * Taglet that expands @cdk.bug tag into a weblink to CDK's - * SourceForge bug track system. It's typically used as: - *
- *   @cdk.bug 1095690
- * 
- */ -public class CDKBugTaglet implements Taglet { - - private static final String NAME = "cdk.bug"; - - public String getName() { - return NAME; - } - - public boolean inField() { - return false; - } - - public boolean inConstructor() { - return false; - } - - public boolean inMethod() { - return false; - } - - public boolean inOverview() { - return false; - } - - public boolean inPackage() { - return false; - } - - public boolean inType() { - return true; - } - - public boolean isInlineTag() { - return false; - } - - public static void register(Map tagletMap) { - CDKBugTaglet tag = new CDKBugTaglet(); - Taglet t = (Taglet) tagletMap.get(tag.getName()); - if (t != null) { - tagletMap.remove(tag.getName()); - } - tagletMap.put(tag.getName(), tag); - } - - public String toString(Tag tag) { - return "
This class is affected by these bug(s):
" - + expand(tag) + "
\n"; - } - - public String toString(Tag[] tags) { - if (tags.length == 0) { - return null; - } else { - StringBuffer list = new StringBuffer(); - list.append("
This class is affected by these bug(s):
"); - for (int i=0; i\n"); - return list.toString(); - } - } - - private String expand(Tag tag) { - return "" + tag.text() + ""; - } -} diff --git a/src/main/net/sf/cdk/tools/doclets/CDKCiteTaglet.java b/src/main/net/sf/cdk/tools/doclets/CDKCiteTaglet.java deleted file mode 100644 index 01223c2..0000000 --- a/src/main/net/sf/cdk/tools/doclets/CDKCiteTaglet.java +++ /dev/null @@ -1,158 +0,0 @@ -/* $Revision: 7327 $ $Author: egonw $ $Date: 2006-11-20 20:22:51 +0100 (Mon, 20 Nov 2006) $ - * - * Copyright (C) 2004-2007 Egon Willighagen - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package net.sf.cdk.tools.doclets; - -import java.io.File; -import java.util.Map; -import java.util.StringTokenizer; - -import net.sf.cdk.tools.bibtex.BibTeXMLEntry; -import net.sf.cdk.tools.bibtex.BibTeXMLFile; -import nu.xom.Builder; -import nu.xom.Document; - -import com.sun.javadoc.Tag; -import com.sun.tools.doclets.Taglet; - -/** - * Taglet that expands inline cdk.cite tags into a weblink to the CDK - * bibliography webpage. Like all inline tags it's used in the JavaDoc - * text as: - *
- * This class does nothing {@cdk.cite NULL}.
- * 
- * For this code a reference is created like this: - *
- * NULL
- * 
- * - *

Citations can be singular, like {@cdk.cite BLA}, - * and multiple, like {@cdk.cite BLA,BLA2,FOO}. - */ -public class CDKCiteTaglet implements Taglet { - - private static final String NAME = "cdk.cite"; - - private static BibTeXMLFile bibtex = null; - - static { - try { - Builder parser = new Builder(); - Document doc = parser.build(new File("doc/refs/cheminf.bibx")); - bibtex = new BibTeXMLFile(doc.getRootElement()); - } catch (Exception exc) { - System.out.println("Horrible problem: " + exc.getMessage()); - exc.printStackTrace(); - } - } - - public String getName() { - return NAME; - } - - public boolean inField() { - return true; - } - - public boolean inConstructor() { - return true; - } - - public boolean inMethod() { - return true; - } - - public boolean inOverview() { - return true; - } - - public boolean inPackage() { - return true; - } - - public boolean inType() { - return true; - } - - public boolean isInlineTag() { - return true; - } - - public static void register(Map tagletMap) { - CDKCiteTaglet tag = new CDKCiteTaglet(); - Taglet t = (Taglet) tagletMap.get(tag.getName()); - if (t != null) { - tagletMap.remove(tag.getName()); - } - tagletMap.put(tag.getName(), tag); - } - - public String toString(Tag tag) { - return "[" + expandCitation(tag.text()) + "]"; - } - - public String toString(Tag[] tags) { - String result = null; - if (tags.length > 0) { - result = "["; - for (int i=0; i"; - if (citation.indexOf(separator) != -1) { - StringTokenizer tokenizer = new StringTokenizer(citation, separator); - while (tokenizer.hasMoreTokens()) { - String token = tokenizer.nextToken().trim(); - BibTeXMLEntry entry = bibtex.getEntry(token); - if (entry != null) { - result += entry.toHTML(); - } else { - result += token + " (not found in db)"; - System.out.println("CDKCiteERROR: entry not found in database: " + token); - } - if (tokenizer.hasMoreTokens()) { - result += ", "; - } - } - } else { - citation = citation.trim(); - BibTeXMLEntry entry = bibtex.getEntry(citation); - if (entry != null) { - result += entry.toHTML(); - } else { - result += citation + " (not found in db)"; - System.out.println("CDKCiteERROR: entry not found in database: " + citation); - } - } - return result; - } - -} diff --git a/src/main/net/sf/cdk/tools/doclets/CDKDictRefTaglet.java b/src/main/net/sf/cdk/tools/doclets/CDKDictRefTaglet.java deleted file mode 100644 index 2b39813..0000000 --- a/src/main/net/sf/cdk/tools/doclets/CDKDictRefTaglet.java +++ /dev/null @@ -1,134 +0,0 @@ -/* $Revision: 10432 $ $Author: miguelrojasch $ $Date: 2008-03-18 10:48:13 +0100 (Tue, 18 Mar 2008) $ - * - * Copyright (C) 2004-2007 Egon Willighagen - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package net.sf.cdk.tools.doclets; - -import com.sun.tools.doclets.Taglet; -import com.sun.javadoc.*; -import java.util.Map; -import java.util.StringTokenizer; -import java.util.Hashtable; - -/** - * Taglet that expands inline cdk.dictref tags into a weblink to the appropriate - * dictionary. For example. - *

- * @cdk.dictref blue-obelisk:graphPartitioning
- * 
- * - *

The known dictionaries are: - *

- */ -public class CDKDictRefTaglet implements Taglet { - - private static final String NAME = "cdk.dictref"; - - private static final Map dictURLs; - private static final Map dictNames; - - static { - dictURLs = new Hashtable(5); - dictNames = new Hashtable(5); - - dictURLs.put("bodf", "http://qsar.sourceforge.net/ontologies/data-features/index.xhtml"); - dictNames.put("bodf", "Blue Obelisk Data Features Dictionary"); - - dictURLs.put("blue-obelisk", "http://qsar.sourceforge.net/dicts/blue-obelisk/index.xhtml"); - dictNames.put("blue-obelisk", "Blue Obelisk Chemoinformatics Dictionary"); - - dictURLs.put("qsar-descriptors", "http://qsar.sourceforge.net/dicts/qsar-descriptors/index.xhtml"); - dictNames.put("qsar-descriptors", "QSAR.sf.net Descriptors Dictionary"); - } - - public String getName() { - return NAME; - } - - public boolean inField() { - return true; - } - - public boolean inConstructor() { - return true; - } - - public boolean inMethod() { - return true; - } - - public boolean inOverview() { - return true; - } - - public boolean inPackage() { - return true; - } - - public boolean inType() { - return true; - } - - public boolean isInlineTag() { - return false; - } - - public static void register(Map tagletMap) { - CDKDictRefTaglet tag = new CDKDictRefTaglet(); - Taglet t = (Taglet) tagletMap.get(tag.getName()); - if (t != null) { - tagletMap.remove(tag.getName()); - } - tagletMap.put(tag.getName(), tag); - } - - public String toString(Tag tag) { - String tagText = tag.text(); - String separator = ":"; - if (tagText.indexOf(separator) != -1) { - StringTokenizer tokenizer = new StringTokenizer(tagText, separator); - String dictCode = tokenizer.nextToken(); - String dictRef = tokenizer.nextToken(); - String output = "
Dictionary pointer(s):
"; - if (dictURLs.containsKey(dictCode)) { - String url = dictURLs.get(dictCode) + "#" + dictRef; - output += "" + dictRef + - " in the " + dictNames.get(dictCode) + " [" + - tagText + "]
\n"; - } else { - output += "Unknown code: " + tagText + "\n"; - } - return output; - } else { - return "
A pointer to a dictionary:
Unknown code: " + tagText + "
\n"; - } - } - - public String toString(Tag[] tags) { - if (tags.length == 0) { - return null; - } else { - return toString(tags[0]); - } - } - -} diff --git a/src/main/net/sf/cdk/tools/doclets/CDKInChITaglet.java b/src/main/net/sf/cdk/tools/doclets/CDKInChITaglet.java deleted file mode 100644 index 5f0377c..0000000 --- a/src/main/net/sf/cdk/tools/doclets/CDKInChITaglet.java +++ /dev/null @@ -1,99 +0,0 @@ -/* $Revision: 7973 $ $Author: egonw $ $Date: 2007-02-19 13:16:03 +0100 (Mon, 19 Feb 2007) $ - * - * Copyright (C) 2007 Egon Willighagen - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package net.sf.cdk.tools.doclets; - -import com.sun.tools.doclets.Taglet; -import com.sun.javadoc.*; -import java.util.Map; - -/** - * Taglet that expands @cdk.inchi tag into a RDFa marked up HTML - * fragment. It's typically used as: - *
- *   @cdk.inchi InChI=1/CH4/h1H4
- * 
- */ -public class CDKInChITaglet implements Taglet { - - private static final String NAME = "cdk.inchi"; - - public String getName() { - return NAME; - } - - public boolean inField() { - return false; - } - - public boolean inConstructor() { - return false; - } - - public boolean inMethod() { - return true; - } - - public boolean inOverview() { - return false; - } - - public boolean inPackage() { - return false; - } - - public boolean inType() { - return true; - } - - public boolean isInlineTag() { - return false; - } - - public static void register(Map tagletMap) { - CDKInChITaglet tag = new CDKInChITaglet(); - Taglet t = (Taglet) tagletMap.get(tag.getName()); - if (t != null) { - tagletMap.remove(tag.getName()); - } - tagletMap.put(tag.getName(), tag); - } - - public String toString(Tag tag) { - return "
InChI:
" - + expand(tag) + "
\n"; - } - - public String toString(Tag[] tags) { - if (tags.length == 0) { - return null; - } else { - StringBuffer list = new StringBuffer(); - list.append("
InChI(s):
"); - for (int i=0; i\n"); - return list.toString(); - } - } - - private String expand(Tag tag) { - return "" + tag.text() + ""; - } -} diff --git a/src/main/net/sf/cdk/tools/doclets/CDKModuleTaglet.java b/src/main/net/sf/cdk/tools/doclets/CDKModuleTaglet.java deleted file mode 100644 index 56ab3a1..0000000 --- a/src/main/net/sf/cdk/tools/doclets/CDKModuleTaglet.java +++ /dev/null @@ -1,88 +0,0 @@ -/* $Revision: 10432 $ $Author: miguelrojasch $ $Date: 2008-03-18 10:48:13 +0100 (Tue, 18 Mar 2008) $ - * - * Copyright (C) 2004 Egon Willighagen - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package net.sf.cdk.tools.doclets; - -import com.sun.tools.doclets.Taglet; -import com.sun.javadoc.*; -import java.util.Map; - -/** - * Taglet that expands @cdk.module tag into a weblink to the CDK - * webpage. - */ -public class CDKModuleTaglet implements Taglet { - - private static final String NAME = "cdk.module"; - - public String getName() { - return NAME; - } - - public boolean inField() { - return true; - } - - public boolean inConstructor() { - return true; - } - - public boolean inMethod() { - return true; - } - - public boolean inOverview() { - return true; - } - - public boolean inPackage() { - return true; - } - - public boolean inType() { - return true; - } - - public boolean isInlineTag() { - return false; - } - - public static void register(Map tagletMap) { - CDKModuleTaglet tag = new CDKModuleTaglet(); - Taglet t = (Taglet) tagletMap.get(tag.getName()); - if (t != null) { - tagletMap.remove(tag.getName()); - } - tagletMap.put(tag.getName(), tag); - } - - public String toString(Tag tag) { - return "
Belongs to CDK module:
" - + "" + tag.text() + "
\n"; - } - - public String toString(Tag[] tags) { - if (tags.length == 0) { - return null; - } else { - return toString(tags[0]); - } - } - -} diff --git a/src/main/net/sf/cdk/tools/doclets/CDKSVNTaglet.java b/src/main/net/sf/cdk/tools/doclets/CDKSVNTaglet.java deleted file mode 100644 index 34f1aa4..0000000 --- a/src/main/net/sf/cdk/tools/doclets/CDKSVNTaglet.java +++ /dev/null @@ -1,137 +0,0 @@ -/* $Revision: 7973 $ $Author: egonw $ $Date: 2007-02-19 13:16:03 +0100 (Mon, 19 Feb 2007) $ - * - * Copyright (C) 2007 Egon Willighagen - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package net.sf.cdk.tools.doclets; - -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import com.sun.javadoc.SourcePosition; -import com.sun.javadoc.Tag; -import com.sun.tools.doclets.Taglet; - -/** - * Taglet that expands @cdk.svnrev tag into a link to the SVN - * source tree. The syntax must be as follows: - *
- *   @cdk.svnrev $Revision: 7973 $
- * 
- * - *

The actual version number is automatically updated by the - * SVN repository. - */ -public class CDKSVNTaglet implements Taglet { - - private static final String NAME = "cdk.svnrev"; - private final static Pattern svnrevPattern = Pattern.compile("\\$Revision:\\s*(\\d*)\\s*\\$"); - private final static Pattern pathPattern = Pattern.compile(".*/(src/.*\\.java)"); - - public String getName() { - return NAME; - } - - public boolean inField() { - return false; - } - - public boolean inConstructor() { - return false; - } - - public boolean inMethod() { - return false; - } - - public boolean inOverview() { - return false; - } - - public boolean inPackage() { - return false; - } - - public boolean inType() { - return true; - } - - public boolean isInlineTag() { - return false; - } - - public static void register(Map tagletMap) { - CDKSVNTaglet tag = new CDKSVNTaglet(); - Taglet t = (Taglet) tagletMap.get(tag.getName()); - if (t != null) { - tagletMap.remove(tag.getName()); - } - tagletMap.put(tag.getName(), tag); - } - - public String toString(Tag tag) { - return "

Source code:
" - + expand(tag) + "
\n"; - } - - public String toString(Tag[] tags) { - if (tags.length == 0) { - return null; - } else { - return toString(tags[0]); - } - } - - private String expand(Tag tag) { - // get the version number - String text = tag.text(); - Matcher matcher = svnrevPattern.matcher(text); - String revision = "HEAD"; - if (matcher.matches()) { - revision = matcher.group(1); - } else { - System.out.println("Malformed @cdk.svnrev content: " + text); - return ""; - } - // create the URL - SourcePosition file = tag.position(); - String path = correctSlashes(file.file().getAbsolutePath()); - matcher = pathPattern.matcher(path); - if (matcher.matches()) { - String url = "http://cdk.svn.sourceforge.net/viewvc/cdk/trunk/cdk/" + - matcher.group(1) + "?revision=" + - revision + "&view=markup"; - return "revision " + revision + ""; - } else { - System.out.println("Could not resolve class name from: " + path); - } - return ""; - } - - private String correctSlashes(String absolutePath) { - StringBuffer buffer = new StringBuffer(); - for (int i=0; i - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -package org.openscience.cdk.qsar.model; - -/** - * Classes that implement this interface will build statistical models. - * - * Currently the design of the modeling system is that classes based on - * a given backend should be based of an abtract class that implements - * this interface. See RModel as an example. - * - * @author Rajarshi Guha - * @cdk.module qsar - * @cdk.githash - */ -public interface IModel { - - /** - * Builds (trains) the model. - * - * @throws QSARModelException if errors occur in data types, calls to the R session. See - * the corresponding method in subclasses of this class for further details. - */ - public void build() throws QSARModelException; - /** - * Makes predictions using a previously built model. - * - * @throws QSARModelException if errors occur in data types, calls to the R session. See - * the corresponding method in subclasses of this class for further details. - */ - public void predict() throws QSARModelException; -} - diff --git a/src/main/org/openscience/cdk/qsar/model/QSARModelException.java b/src/main/org/openscience/cdk/qsar/model/QSARModelException.java deleted file mode 100644 index 011a692..0000000 --- a/src/main/org/openscience/cdk/qsar/model/QSARModelException.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (C) 2004-2007 Rajarshi Guha - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -package org.openscience.cdk.qsar.model; - -import org.openscience.cdk.exception.CDKException; - -/** - * Exception that is thrown by model routines when a problem has occured. - * - * @author Rajarshi Guha - * @cdk.module qsar - * @cdk.githash - */ -public class QSARModelException extends CDKException { - - private static final long serialVersionUID = 4931287199065879144L; - - /** - * Constructs a new QSARModelException with the given message. - * - * @param message for the constructed exception - */ - public QSARModelException(String message) { - super( message ); - } -} - diff --git a/src/main/org/openscience/cdk/qsar/model/R/CNNClassificationModel.java b/src/main/org/openscience/cdk/qsar/model/R/CNNClassificationModel.java deleted file mode 100644 index feb88c3..0000000 --- a/src/main/org/openscience/cdk/qsar/model/R/CNNClassificationModel.java +++ /dev/null @@ -1,577 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2004-2008 Rajarshi Guha - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -package org.openscience.cdk.qsar.model.R; - -import org.openscience.cdk.qsar.model.QSARModelException; - -import java.util.HashMap; - -/** - * A modeling class that provides a computational neural network classification model. - * - * When instantiated this class ensures that the R/Java interface has been - * initialized. The response and independent variables can be specified at construction - * time or via the setParameters method. - * The actual fitting procedure is carried out by build after which - * the model may be used to make predictions, via predict. An example of the use - * of this class is shown below: - *
- * double[][] x;
- * String[] y;
- * Double[] wts;
- * Double[][] newx;
- * ...
- * try {
- *     CNNClassificationModel cnnrm = new CNNClassificationModel(x,y,3);
- *     cnnrm.setParameters("Wts",wts);
- *     cnnrm.build();
- *     
- *     double fitValue = cnnrm.getFitValue();
- *     
- *     cnnrm.setParameters("newdata", newx);
- *     cnnrm.setParameters("type", "raw");
- *     cnnrm.predict();
- *
- *     double[][] preds = cnnrm.getPredictPredicted();
- * } catch (QSARModelException qme) {
- *     System.out.println(qme.toString());
- * }
- * 
- * The above code snippet builds a 3-3-1 CNN model. - * Multiple output neurons are easily - * specified by supplying a matrix for y (i.e., double[][]) with the output variables - * in the columns. - *

- * Nearly all the arguments to - * nnet() are - * supported via the setParameters method. The table below lists the names of the arguments, - * the expected type of the argument and the default setting for the arguments supported by this wrapper class. - *

- * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
NameJava TypeDefaultNotes
xDouble[][]NoneThis must be set by the caller via the constructors or via setParameters
yString[][]NoneThis must be set by the caller via the constructors or via setParameters
weightsDouble[]rep(1,nobs)The default case weights is a vector of 1's equal in length to the number of observations, nobs
sizeIntegerNoneThis must be set by the caller via the constructors or via setParameters
subsetInteger[]1:nobsThis is supposed to be an index vector specifying which observations are to be used in building the model. The default indicates that all should be used
WtsDouble[]runif(1,nwt)The initial weight vector is set to a random vector of length equal to the number of weights if not set by the user
maskBoolean[]rep(TRUE,nwt)All weights are to be optimized unless otherwise specified by the user
linoutBooleanFALSESince this class performs classification this need not be changed
entropyBooleanTRUE
softmaxBooleanFALSE
censoredBooleanFALSE
skipBooleanFALSE
rangDouble0.7
decayDouble0.0
maxitInteger100
HessBooleanFALSE
traceBooleanTRUE
MaxNWtsInteger1000
abstolDouble1.0e-4
reltolDouble1.0e-8
- *
- *

- * In general the getFit* methods provide access to results from the fit - * and getPredict* methods provide access to results from the prediction (i.e., - * prediction using the model on new data). The values returned correspond to the various - * values returned by the nnet and - * predict.nnet functions - * in R - *

- * See {@link RModel} for details regarding the R and SJava environment. - * - * @author Rajarshi Guha - * @cdk.require r-project - * @cdk.module qsar - * @cdk.githash - * - * @cdk.keyword neural network - * @cdk.keyword classification - * @deprecated - */ -public class CNNClassificationModel extends RModel { - - static int globalID = 0; - private int currentID; - private CNNClassificationModelFit modelfit = null; - private CNNClassificationModelPredict modelpredict = null; - - private HashMap params = null; - private int noutput = 0; - private int nvar = 0; - - private void setDefaults() { - // lets set the default values of the arguments that are specified - // to have default values in ?nnet - - // these params are vectors that depend on user defined stuff - // so as a default we set them to FALSE so R can check if these - // were not set - this.params.put("subset", false); - this.params.put("mask", false ); - this.params.put("Wts", false); - this.params.put("weights", false); - - this.params.put("linout", false); // we want only classification - this.params.put("entropy", true); - this.params.put("softmax",false); - this.params.put("censored", false); - this.params.put("skip", false); - this.params.put("rang", Double.valueOf(0.7)); - this.params.put("decay", Double.valueOf(0.0)); - this.params.put("maxit", Integer.valueOf(100)); - this.params.put("Hess", false); - this.params.put("trace", false); // no need to see output - this.params.put("MaxNWts", Integer.valueOf(1000)); - this.params.put("abstol", Double.valueOf(1.0e-4)); - this.params.put("reltol", Double.valueOf(1.0e-8)); - } - - /** - * Constructs a CNNClassificationModel object. - * - * This constructor allows the user to simply set up the modeling class. It is - * expected that parameters such as training data, architecture will be set at a - * later point. The result of this constructor is to simply create a name for the - * current instance of the modeling object. - *

- * Other parameters that are required to be set should be done via - * calls to setParameters. A number of parameters are set to the - * defaults as specified in the manpage for - * nnet. - * - */ - public CNNClassificationModel() { - super(); - this.params = new HashMap(); - this.currentID = CNNClassificationModel.globalID; - CNNClassificationModel.globalID++; - this.setModelName("cdkCNNCModel"+this.currentID); - this.setDefaults(); - } - - /** - * Constructs a CNNClassificationModel object. - * - * This constructor allows the user to specify the dependent and - * independent variables along with the number of hidden layer neurons. - * This constructor is suitable for cases when there is a single output - * neuron. If the number of rows of the design matrix is not equal to - * the number of observations in y an exception will be thrown. - *

- * Other parameters that are required to be set should be done via - * calls to setParameters. A number of parameters are set to the - * defaults as specified in the manpage for - * nnet. - * - * @param x An array of independent variables. Observations should be in - * the rows and variables in the columns. - * @param y An array (single column) of observed class assignments - * @param size The number of hidden layer neurons - * @throws QSARModelException if the number of observations in x and y are not the same - */ - public CNNClassificationModel(double[][] x, String[] y, int size) throws QSARModelException { - super(); - this.params = new HashMap(); - this.currentID = CNNClassificationModel.globalID; - CNNClassificationModel.globalID++; - this.setModelName("cdkCNNCModel"+this.currentID); - - int nrow = y.length; - int ncol = x[0].length; - - if (nrow != x.length) { - throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix"); - } - - this.nvar = ncol; - this.noutput = 1; - - Double[][] xx = new Double[nrow][ncol]; - String[][] yy = new String[nrow][1]; - - for (int i = 0; i < nrow; i++) { - yy[i][0] = new String(y[i]); - for (int j = 0; j < ncol; j++) { - xx[i][j] = new Double(x[i][j]); - } - } - this.params.put("x", xx); - this.params.put("y", yy); - this.params.put("size", Integer.valueOf(size)); - this.setDefaults(); - } - - /** - * Constructs a CNNClassificationModel object. - * - * This constructor allows the user to specify the dependent and - * independent variables along with the number of hidden layer neurons. - * This constructor is suitable for cases when there are multiple output - * neuron. If the number of rows of the design matrix is not equal to - * the number of observations in y an exception will be thrown. - *

- * Other parameters that are required to be set should be done via - * calls to setParameters. A number of parameters are set to the - * defaults as specified in the manpage for - * nnet. - * - * @param x An array of independent variables. Observations should be in - * the rows and variables in the columns. - * @param y An array (multiple columns) of observed values - * @param size The number of hidden layer neurons - * @throws QSARModelException if the number of observations in x and y are not the same - */ - public CNNClassificationModel(double[][] x, String[][] y, int size) throws QSARModelException{ - super(); - this.params = new HashMap(); - this.currentID = CNNClassificationModel.globalID; - CNNClassificationModel.globalID++; - this.setModelName("cdkCNNCModel"+this.currentID); - - int nrow = y.length; - int ncol = x[0].length; - - if (nrow != x.length) { - throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix"); - } - - this.nvar = ncol; - this.noutput = y[0].length; - - Double[][] xx = new Double[nrow][ncol]; - String[][] yy = new String[nrow][this.noutput]; - - for (int i = 0; i < nrow; i++) { - for (int j = 0; j < ncol; j++) { - xx[i][j] = new Double(x[i][j]); - } - } - for (int i = 0; i < nrow; i++) { - for (int j = 0; j < this.noutput; j++) { - yy[i][j] = new String(y[i][j]); - } - } - this.params.put("x", xx); - this.params.put("y", yy); - this.params.put("size", Integer.valueOf(size)); - this.setDefaults(); - } - - - /** - * Sets parameters required for building a linear model or using one for prediction. - * - * This function allows the caller to set the various parameters available - * for the - * nnet - * and - * predict.nnet - * R routines. See the R help pages for the details of the available - * parameters. - * - * @param key A String containing the name of the parameter as described in the - * R help pages - * @param obj An Object containing the value of the parameter - * @throws QSARModelException if the type of the supplied value does not match the expected type - */ - public void setParameters(String key, Object obj) throws QSARModelException { - // since we know the possible values of key we should check the coresponding - // objects and throw errors if required. Note that this checking can't really check - // for values (such as number of variables in the X matrix to build the model and the - // X matrix to make new predictions) - these should be checked in functions that will - // use these parameters. The main checking done here is for the class of obj and - // some cases where the value of obj is not dependent on what is set before it - - if (key.equals("y")) { - if (!(obj instanceof String[][])) { - throw new QSARModelException("The class of the 'y' object must be String[][]"); - } else { - noutput = ((String[][])obj)[0].length; - } - } - if (key.equals("x")) { - if (!(obj instanceof Double[][])) { - throw new QSARModelException("The class of the 'x' object must be Double[][]"); - } else { - nvar = ((Double[][])obj)[0].length; - } - } - if (key.equals("weights")) { - if (!(obj instanceof Double[])) { - throw new QSARModelException("The class of the 'weights' object must be Double[]"); - } - } - if (key.equals("size")) { - if (!(obj instanceof Integer)) { - throw new QSARModelException("The class of the 'size' object must be Integer"); - } - } - if (key.equals("subset")) { - if (!(obj instanceof Integer[])) { - throw new QSARModelException("The class of the 'size' object must be Integer[]"); - } - } - if (key.equals("Wts")) { - if (!(obj instanceof Double[])) { - throw new QSARModelException("The class of the 'Wts' object must be Double[]"); - } - } - if (key.equals("mask")) { - if (!(obj instanceof Boolean[])) { - throw new QSARModelException("The class of the 'mask' object must be Boolean[]"); - } - } - if (key.equals("linout") || - key.equals("entropy") || - key.equals("softmax") || - key.equals("censored") || - key.equals("skip") || - key.equals("Hess") || - key.equals("trace")) { - if (!(obj instanceof Boolean)) { - throw new QSARModelException("The class of the 'trace|skip|Hess|linout|entropy|softmax|censored' object must be Boolean"); - } - } - if (key.equals("rang") || - key.equals("decay") || - key.equals("abstol") || - key.equals("reltol")) { - if (!(obj instanceof Double)) { - throw new QSARModelException("The class of the 'reltol|abstol|decay|rang' object must be Double"); - } - } - if (key.equals("maxit") || - key.equals("MaxNWts")) { - if (!(obj instanceof Integer)) { - throw new QSARModelException("The class of the 'maxit|MaxNWts' object must be Integer"); - } - } - - if (key.equals("newdata")) { - if ( !(obj instanceof Double[][])) { - throw new QSARModelException("The class of the 'newdata' object must be Double[][]"); - } - } - this.params.put(key,obj); - } - - /** - * Fits a CNN classification model. - * - * This method calls the R function to fit a CNN classification model - * to the specified dependent and independent variables. If an error - * occurs in the R session, an exception is thrown. - *

- * Note that, this method should be called prior to calling the various get - * methods to obtain information regarding the fit. - */ - public void build() throws QSARModelException { - try { - this.modelfit = (CNNClassificationModelFit)revaluator.call("buildCNNClass", - new Object[]{ getModelName(), this.params }); - } catch (Exception re) { - throw new QSARModelException(re.toString()); - } - } - - /** - * Uses a fitted model to predict the response for new observations. - * - * This function uses a previously fitted model to obtain predicted values - * for a new set of observations. If the model has not been fitted prior to this - * call an exception will be thrown. Use setParameters - * to set the values of the independent variable for the new observations. You can also - * set the type argument (see here). - * However, since this class performs CNN classification, the default setting (type='raw') is sufficient. - *x - */ - public void predict() throws QSARModelException { - if (this.modelfit == null) - throw new QSARModelException("Before calling predict() you must fit the model using build()"); - - Double[][] newx = (Double[][])this.params.get("newdata"); - if (newx[0].length != this.nvar) { - throw new QSARModelException("Number of independent variables used for prediction must match those used for fitting"); - } - - try { - this.modelpredict = (CNNClassificationModelPredict)revaluator.call("predictCNNClass", - new Object[]{ getModelName(), this.params }); - } catch (Exception re) { - throw new QSARModelException(re.toString()); - } - } - - /** - * Loads a CNNRegresionModel object from disk in to the current session. - * - * - * @param fileName The disk file containing the model - * @throws QSARModelException if the model that was loaded was not a CNNClassification - * model - */ - public void loadModel(String fileName) throws QSARModelException { - // should probably check that the filename does exist - Object model = (Object)revaluator.call("loadModel", new Object[]{ (Object)fileName }); - String modelName = (String)revaluator.call("loadModel.getName", new Object[] { (Object)fileName }); - - if (model.getClass().getName().equals("org.openscience.cdk.qsar.model.R.CNNClassificationModelFit")) { - this.modelfit = (CNNClassificationModelFit)model; - this.setModelName(modelName); - Double tmp = (Double)revaluator.eval(modelName+"$n[1]"); - nvar = (int)tmp.doubleValue(); - } else throw new QSARModelException("The loaded model was not a CNNClassificationModel"); - } - /** - * Loads an CNNClassificationModel object from a serialized string into the current session. - * - * @param serializedModel A String containing the serialized version of the model - * @param modelName A String indicating the name of the model in the R session - * @throws QSARModelException if the model being loaded is not a CNN classification model - * object - */ - public void loadModel(String serializedModel, String modelName) throws QSARModelException { - // should probably check that the fileName does exist - Object model = (Object)revaluator.call("unserializeModel", new Object[]{ (Object)serializedModel, (Object)modelName }); - String modelname = modelName; - - if (model.getClass().getName().equals("org.openscience.cdk.qsar.model.R.CNNClassificationModelFit")) { - this.modelfit =(CNNClassificationModelFit)model; - this.setModelName(modelname); - Double tmp = (Double)revaluator.eval(modelname+"$n[1]"); - nvar = (int)tmp.doubleValue(); - } else throw new QSARModelException("The loaded model was not a CNNClassificationModel"); - } - - /** - * Gets final value of the fitting criteria. - * - * This method only returns meaningful results if the build - * method of this class has been previously called. - * - * @return A double indicating the value of the fitting criterion plus weight decay term. - */ - public double getFitValue() { - return(this.modelfit.getValue()); - } - - /** - * Gets optimized weights for the model. - * - * This method only returns meaningful results if the build - * method of this class has been previously called. - * - * @return A double[] containing the weights. The number of weights will be - * equal to

(Ni * Nh) + (Nh * No) + Nh + No
where Ni, Nh and No - * are the number of input, hidden and output neurons. - */ - public double[] getFitWeights() { - return(this.modelfit.getWeights()); - } - /** - * Gets fitted values from the final model. - * - * This method only returns meaningful results if the build - * method of this class has been previously called. - * - * @return A double[][] containing the fitted values for each output neuron - * in the columns. Note that even if a single output neuron was specified during - * model building the return value is still a 2D array (with a single column). - */ - public double[][] getFitFitted() { - return(this.modelfit.getFitted()); - } - /** - * Gets residuals for the fitted values from the final model. - * - * This method only returns meaningful results if the build - * method of this class has been previously called. - * - * @return A double[][] containing the residuals for each output neuron - * in the columns. Note that even if a single output neuron was specified during - * model building the return value is still a 2D array (with a single column). - */ - public double[][] getFitResiduals() { - return(this.modelfit.getResiduals()); - } - /** - * Gets the Hessian of the measure of fit. - * - * If the Hess option was set to TRUE before the call to build - * then the CNN routine will return the Hessian of the measure of fit at the best set of - * weights found. * This method only returns meaningful results if the build - * method of this class has been previously called. - * - * @return A double[][] containing the Hessian. It will be a square array - * with dimensions equal to the Nwt x Nwt, where Nwt is the total number of weights - * in the CNN model. - */ - public double[][] getFitHessian() { - return(this.modelfit.getHessian()); - } - - /** - * Gets predicted values for new data using a previously built model. - * - * This method only returns meaningful results if the build - * method of this class has been previously called. Since this is a classification - * model the values represent the probability that an observation belongs to the given - * class. - * - * @return A double[][] containing the predicted for each output neuron - * in the columns. Note that even if a single output neuron was specified during - * model building the return value is still a 2D array (with a single column). - * - */ - public double[][] getPredictPredictedRaw() { - return(this.modelpredict.getPredictedRaw()); - } - - /** - * Gets predicted values for new data using a previously built model. - * - * This method only returns meaningful results if the build - * method of this class has been previously called. This function returns an - * array of Strings indicating the class assignments of the observations, rather than - * the raw probabilities. - * - * @return A String[] containing the class assigned to each observation. - * - */ - - public String[] getPredictPredictedClass() { - return(this.modelpredict.getPredictedClass()); - } - - -} diff --git a/src/main/org/openscience/cdk/qsar/model/R/CNNClassificationModelFit.java b/src/main/org/openscience/cdk/qsar/model/R/CNNClassificationModelFit.java deleted file mode 100644 index de38762..0000000 --- a/src/main/org/openscience/cdk/qsar/model/R/CNNClassificationModelFit.java +++ /dev/null @@ -1,256 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2004-2008 Rajarshi Guha - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -package org.openscience.cdk.qsar.model.R; - -/** - * A class that wraps the return value from R function, nnet(). - * - * This is an internal class used by R to return the result of - * the call to nnet. - * As a result it should not be instantiated by the user. The actual modeling - * class, CNNClassificationModel, provides acess to the various - * fields of this object. - * - * - * @author Rajarshi Guha - * @cdk.require r-project - * @cdk.module qsar - * @cdk.githash - * @deprecated - */ - -public class CNNClassificationModelFit { - private int noutput; // number of output neurons (== number of classes) - private double[] weights; - private double[][] fitted; - private double[][] residuals; - private double value; - private double[][] hessian = null; - - private double[][] vectorToMatrix(double[] v, int nrow, int ncol) { - double[][] m = new double[nrow][ncol]; - for (int i = 0; i < ncol; i++) { - for (int j = 0; j < nrow; j++) { - m[j][i] = v[j + i*nrow]; - } - } - return(m); - } - /** - * Constructs an object to contain a CNN classification fit. - * - * This class should not be instantiated directly and is only - * required withtin R - * - * @param noutput The number of output neurons (ie the number of predicted variables) - * @param nobs The number of observations - * @param weights A 1-dimensional array containing the weights and biases - * @param fitted A 1-dimensional array containing the fitted values - * @param residual A 1-dimensional array containing the residuals of the fitted values - * @param value The final value of the cost function - * @param hessian A 1-dimensional array containing the Hessian - */ - public CNNClassificationModelFit( - int noutput, - int nobs, - double[] weights, - double[] fitted, double[] residual, - double value, - double[] hessian) { - - // dimensions of hessian = nwt x nwt - // dimensions of fitted, residual = nobs x noutput - // also note that matrices come in as columnwise 1D arrays - - this.noutput = noutput; - setWeights(weights); - setResiduals(vectorToMatrix(residual, nobs,noutput)); - setFitted(vectorToMatrix(fitted, nobs,noutput)); - setValue(value); - setHessian(vectorToMatrix(hessian,weights.length,weights.length)); - } - /** - * Constructs an object to contain a CNN classification fit. - * - * This class should not be instantiated directly and is only - * required withtin R - * - * @param noutput The number of output neurons (ie the number of predicted variables) - * @param nobs The number of observations - * @param weights A 1-dimensional array containing the weights and biases - * @param fitted A 1-dimensional array containing the fitted values - * @param residual A 1-dimensional array containing the residuals of the fitted values - * @param value The final value of the cost function - */ - public CNNClassificationModelFit( - int noutput, - int nobs, - double[] weights, - double[] fitted, double[] residual, - double value) { - this.noutput = noutput; - setWeights(weights); - setResiduals(vectorToMatrix(residual, nobs,noutput)); - setFitted(vectorToMatrix(fitted, nobs,noutput)); - setValue(value); - } - /** - * Get the final value of the cost function. - * - * This method should not be called outside this class - * - * @return The final value of the cost function - * @see #setValue - */ - public double getValue() { - return(this.value); - } - /** - * Set the final value of the cost function. - * - * This method should not be called outside this class - * - * @param value The value of the cost function at convergence - * @see #getValue - */ - public void setValue(double value) { - this.value = value; - } - - - /** - * Get the Hessian for the final network. - * - * This method should not be called outside this class - * - * @return A 2-dimensional array containing the hessian - * @see #setHessian - */ - public double[][] getHessian() { return(this.hessian); } - - /** - * Set the Hessian for the final network. - * - * This method should not be called outside this class - * - * @param theHessian A 2-dimensional array containing the hessian - * @see #getHessian - */ - public void setHessian(double[][] theHessian) { - if (theHessian == null) return; - this.hessian = new double[theHessian.length][this.noutput]; - for (int i = 0; i < theHessian.length; i++) { - for (int j = 0; j < this.noutput; j++) { - this.hessian[i][j] = theHessian[i][j]; - } - } - } - - /** - * Get the weights and biases of the final network. - * - * This method should not be called outside this class - * - * @return A 1-dimensional array of weights and biases - * @see #setWeights - */ - public double[] getWeights() { return(this.weights); } - /** - * Set the weights and biases of the final network. - * - * This method should not be called outside this class - * - * @param weights A 1-dimensional array of weights and biases - * @see #getWeights - */ - public void setWeights(double[] weights) { - this.weights = new double[weights.length]; - for (int i = 0; i < weights.length; i++) { - this.weights[i] = weights[i]; - } - } - - - /** - * Get the residuals of the fit. - * - * This method should not be called outside this class - * - * @return A 2-dimensional array of residuals. The rows contain the - * observations and the columns contain the predicted variables - * @see #setResiduals - */ - public double[][] getResiduals() { return(this.residuals); } - /** - * Set the residuals of the fit. - * - * This method should not be called outside this class - * - * @param residuals A 2-dimensional array of residuals. The rows contain the - * observations and the columns contain the predicted variables - * @see #getResiduals - */ - public void setResiduals(double[][] residuals) { - this.residuals = new double[residuals.length][this.noutput]; - for (int i = 0; i < residuals.length; i++) { - for (int j = 0; j < this.noutput; j++) { - this.residuals[i][j] = residuals[i][j]; - } - } - } - - - - /** - * Get the fitted values. - * - * This method should not be called outside this class - * - * @return A 2-dimensional array of residuals. The rows contain the - * observations and the columns contain the predicted variables - * @see #setFitted - */ - public double[][] getFitted() { return(this.fitted); } - /** - * Set the fitted values. - * - * This method should not be called outside this class - * - * @param fitted A 2-dimensional array of residuals. The rows contain the - * observations and the columns contain the predicted variables - * @see #getFitted - */ - public void setFitted(double[][] fitted) { - this.fitted = new double[fitted.length][this.noutput]; - for (int i = 0; i < fitted.length; i++) { - for (int j = 0; j < this.noutput; j++) { - this.fitted[i][j] = fitted[i][j]; - } - } - } -} - - - diff --git a/src/main/org/openscience/cdk/qsar/model/R/CNNClassificationModelPredict.java b/src/main/org/openscience/cdk/qsar/model/R/CNNClassificationModelPredict.java deleted file mode 100644 index 1ac007b..0000000 --- a/src/main/org/openscience/cdk/qsar/model/R/CNNClassificationModelPredict.java +++ /dev/null @@ -1,148 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2004-2008 Rajarshi Guha - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -package org.openscience.cdk.qsar.model.R; - -/** - * A class that wraps the return value from the R function, predict.nnet for classification models. - * - * This is an internal class used by R to return the result of - * the call to predict.nnet. - * As a result it should not be instantiated by the user. The actual modeling - * class, CNNClassificationModel, provides acess to the various - * fields of this object. - * - * - * @author Rajarshi Guha - * @cdk.require r-project - * @cdk.module qsar - * @cdk.githash - * @deprecated - */ -public class CNNClassificationModelPredict { - private int noutput; - private double[][] predvalraw; - private String[] predvalclass; - - private double[][] vectorToMatrix(double[] v, int nrow, int ncol) { - double[][] m = new double[nrow][ncol]; - for (int i = 0; i < ncol; i++) { - for (int j = 0; j < nrow; j++) { - m[j][i] = v[j + i*nrow]; - } - } - return(m); - } - /** - * Create an object to hold predictions from a previously built CNN model. - * - * This class should not be accessed directly - * - * @param noutput The number of predicted variables - * @param values The predicted probabilities - */ - public CNNClassificationModelPredict(int noutput, double[] values) { - this.noutput = noutput; - int nrow = values.length / noutput; - setPredictedRaw(vectorToMatrix(values,nrow,noutput)); - } - /** - * Create an object to hold predictions from a previously built CNN model. - * - * This class should not be accessed directly. Required for the case of a single - * predicted value. - * - * @param noutput The number of predicted variables - * @param values The predicted probabilities - */ - public CNNClassificationModelPredict(int noutput, double values) { - this.noutput = noutput; - setPredictedRaw(new double[][] { {values} }); - } - - /** - * Create an object to hold predictions from a previously built CNN model. - * - * This class should not be accessed directly - * - * @param values An array of String containing the predicted class - */ - public CNNClassificationModelPredict(String[] values) { - this.predvalclass = new String[values.length]; - for (int i = 0; i < values.length; i++) { - this.predvalclass[i] = values[i]; - } - } - /** - * Create an object to hold predictions from a previously built CNN model. - * - * This class should not be accessed directly. Required for the - * case of a single predicted value - * - * @param values An array of String containing the predicted class - */ - public CNNClassificationModelPredict(String values) { - this.predvalclass = new String[1]; - this.predvalclass[1] = values; - } - - /** - * Get the raw probabilities of the classification result. - * - * This class should not be accessed directly - * - * @return A 2-dimensional array containing the predicted probabilities. The rows - * contain the observations and the columns contain the predicted variables - * @see #setPredictedRaw - */ - public double[][] getPredictedRaw() { return(this.predvalraw); } - /** - * Get the raw probabilities of the classification result. - * - * This class should not be accessed directly - * - * @param predicted A 2-dimensional array containing the predicted probabilities. The rows - * contain the observations and the columns contain the predicted variables - * @see #getPredictedRaw - */ - public void setPredictedRaw(double[][] predicted) { - this.predvalraw = new double[predicted.length][this.noutput]; - for (int i = 0; i < predicted.length; i++) { - for (int j = 0; j < this.noutput; j++) { - this.predvalraw[i][j] = predicted[i][j]; - } - } - } - - /** - * Get the predicted classes. - * - * This class should not be accessed directly - * - * @return An array of String containing the predicted classes - */ - public String[] getPredictedClass() { return(this.predvalclass); }; -} - - diff --git a/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModel.java b/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModel.java deleted file mode 100644 index 6c5138a..0000000 --- a/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModel.java +++ /dev/null @@ -1,583 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2004-2008 Rajarshi Guha - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -package org.openscience.cdk.qsar.model.R; - -import org.openscience.cdk.qsar.model.QSARModelException; - -import java.util.HashMap; - -/** - * A modeling class that provides a computational neural network regression model. - * - * When instantiated this class ensures that the R/Java interface has been - * initialized. The response and independent variables can be specified at construction - * time or via the setParameters method. - * The actual fitting procedure is carried out by build after which - * the model may be used to make predictions, via predict. An example of the use - * of this class is shown below: - *
- * double[][] x;
- * double[] y;
- * Double[] wts;
- * Double[][] newx;
- * ...
- * try {
- *     CNNRegressionModel cnnrm = new CNNRegressionModel(x,y,3);
- *     cnnrm.setParameters("Wts",wts);
- *     cnnrm.build();
- *     
- *     double fitValue = cnnrm.getFitValue();
- *     
- *     cnnrm.setParameters("newdata", newx);
- *     cnnrm.setParameters("type", "raw");
- *     cnnrm.predict();
- *
- *     double[][] preds = cnnrm.getPredictPredicted();
- * } catch (QSARModelException qme) {
- *     System.out.println(qme.toString());
- * }
- * 
- * The above code snippet builds a 3-3-1 CNN model. - * Multiple output neurons are easily - * specified by supplying a matrix for y (i.e., double[][]) with the output variables - * in the columns. - *

- * Nearly all the arguments to - * nnet() are - * supported via the setParameters method. The table below lists the names of the arguments, - * the expected type of the argument and the default setting for the arguments supported by this wrapper class. - *

- * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
NameJava TypeDefaultNotes
xDouble[][]NoneThis must be set by the caller via the constructors or via setParameters
yDouble[][]NoneThis must be set by the caller via the constructors or via setParameters
weightsDouble[]rep(1,nobs)The default case weights is a vector of 1's equal in length to the number of observations, nobs
sizeIntegerNoneThis must be set by the caller via the constructors or via setParameters
subsetInteger[]1:nobsThis is supposed to be an index vector specifying which observations are to be used in building the model. The default indicates that all should be used
WtsDouble[]runif(1,nwt)The initial weight vector is set to a random vector of length equal to the number of weights if not set by the user
maskBoolean[]rep(TRUE,nwt)All weights are to be optimized unless otherwise specified by the user
linoutBooleanTRUESince this class performs regression this need not be changed
entropyBooleanFALSE
softmaxBooleanFALSE
censoredBooleanFALSE
skipBooleanFALSE
rangDouble0.7
decayDouble0.0
maxitInteger100
HessBooleanFALSE
traceBooleanTRUE
MaxNWtsInteger1000
abstolDouble1.0e-4
reltolDouble1.0e-8
- *
- *

- * In general the getFit* methods provide access to results from the fit - * and getPredict* methods provide access to results from the prediction (i.e., - * prediction using the model on new data). The values returned correspond to the various - * values returned by the nnet and - * predict.nnet functions - * in R - *

- * See {@link RModel} for details regarding the R and SJava environment. - * - * @author Rajarshi Guha - * @cdk.require r-project - * @cdk.module qsar - * @cdk.githash - * - * @cdk.keyword neural network - * @cdk.keyword regression - * @deprecated - */ -public class CNNRegressionModel extends RModel { - - public static int globalID = 0; - private int currentID; - private CNNRegressionModelFit modelfit = null; - private CNNRegressionModelPredict modelpredict = null; - - private HashMap params = null; - private int noutput = 0; - private int nvar = 0; - - private void setDefaults() { - // lets set the default values of the arguments that are specified - // to have default values in ?nnet - - // these params are vectors that depend on user defined stuff - // so as a default we set them to FALSE so R can check if these - // were not set - this.params.put("subset", new Boolean(false)); - this.params.put("mask", new Boolean(false) ); - this.params.put("Wts", new Boolean(false)); - this.params.put("weights", new Boolean(false)); - - this.params.put("linout", new Boolean(true)); // we want only regression - this.params.put("entropy", new Boolean(false)); - this.params.put("softmax",new Boolean(false)); - this.params.put("censored", new Boolean(false)); - this.params.put("skip", new Boolean(false)); - this.params.put("rang", new Double(0.7)); - this.params.put("decay", new Double(0.0)); - this.params.put("maxit", Integer.valueOf(100)); - this.params.put("Hess", new Boolean(false)); - this.params.put("trace", new Boolean(false)); // no need to see output - this.params.put("MaxNWts", Integer.valueOf(1000)); - this.params.put("abstol", new Double(1.0e-4)); - this.params.put("reltol", new Double(1.0e-8)); - } - - /** - * Constructs a CNNRegressionModel object. - * - * This constructor allows the user to simply set up an instance of a CNN - * regression modeling class. This constructor simply sets the name for this - * instance. It is expected all the relevent parameters for modeling will be - * set at a later point. - *

- * Other parameters that are required to be set should be done via - * calls to setParameters. A number of parameters are set to the - * defaults as specified in the manpage for - * nnet. - * - */ - public CNNRegressionModel() { - super(); - this.params = new HashMap(); - this.currentID = CNNClassificationModel.globalID; - CNNClassificationModel.globalID++; - this.setModelName("cdkCNNModel"+this.currentID); - this.setDefaults(); - } - - /** - * Constructs a CNNRegressionModel object. - * - * This constructor allows the user to specify the dependent and - * independent variables along with the number of hidden layer neurons. - * This constructor is suitable for cases when there is a single output - * neuron. If the number of rows of the design matrix is not equal to - * the number of observations in y an exception will be thrown. - *

- * Other parameters that are required to be set should be done via - * calls to setParameters. A number of parameters are set to the - * defaults as specified in the manpage for - * nnet. - * - * @param x An array of independent variables. Observations should be in - * the rows and variables in the columns. - * @param y An array (single column) of observed values - * @param size The number of hidden layer neurons - * @throws QSARModelException if the number of observations in x and y do not match - */ - public CNNRegressionModel(double[][] x, double[] y, int size) throws QSARModelException { - super(); - this.params = new HashMap(); - this.currentID = CNNRegressionModel.globalID; - CNNRegressionModel.globalID++; - this.setModelName("cdkCNNModel"+this.currentID); - - int nrow = y.length; - int ncol = x[0].length; - - if (nrow != x.length) { - throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix"); - } - - this.nvar = ncol; - this.noutput = 1; - - Double[][] xx = new Double[nrow][ncol]; - Double[][] yy = new Double[nrow][1]; - - for (int i = 0; i < nrow; i++) { - yy[i][0] = new Double(y[i]); - for (int j = 0; j < ncol; j++) { - xx[i][j] = new Double(x[i][j]); - } - } - this.params.put("x", xx); - this.params.put("y", yy); - this.params.put("size", Integer.valueOf(size)); - this.setDefaults(); - } - - /** - * Constructs a CNNRegressionModel object. - * - * This constructor allows the user to specify the dependent and - * independent variables along with the number of hidden layer neurons. - * This constructor is suitable for cases when there are multiple output - * neuron. If the number of rows of the design matrix is not equal to - * the number of observations in y an exception will be thrown. - *

- * Other parameters that are required to be set should be done via - * calls to setParameters. A number of parameters are set to the - * defaults as specified in the manpage for - * nnet. - * - * @param x An array of independent variables. Observations should be in - * the rows and variables in the columns. - * @param y An array (multiple columns) of observed values - * @param size The number of hidden layer neurons - * @throws QSARModelException if the number of observations in x and y do not match - */ - public CNNRegressionModel(double[][] x, double[][] y, int size) throws QSARModelException{ - super(); - this.params = new HashMap(); - this.currentID = CNNRegressionModel.globalID; - CNNRegressionModel.globalID++; - this.setModelName("cdkCNNModel"+this.currentID); - - int nrow = y.length; - int ncol = x[0].length; - - if (nrow != x.length) { - throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix"); - } - - this.nvar = ncol; - this.noutput = y[0].length; - - Double[][] xx = new Double[nrow][ncol]; - Double[][] yy = new Double[nrow][this.noutput]; - - for (int i = 0; i < nrow; i++) { - for (int j = 0; j < ncol; j++) { - xx[i][j] = new Double(x[i][j]); - } - } - for (int i = 0; i < nrow; i++) { - for (int j = 0; j < this.noutput; j++) { - yy[i][j] = new Double(y[i][j]); - } - } - this.params.put("x", xx); - this.params.put("y", yy); - this.params.put("size", Integer.valueOf(size)); - this.setDefaults(); - } - - - /** - * Sets parameters required for building a linear model or using one for prediction. - * - * This function allows the caller to set the various parameters available - * for the - * nnet - * and - * predict.nnet - * R routines. See the R help pages for the details of the available - * parameters. - * - * @param key A String containing the name of the parameter as described in the - * R help pages - * @param obj An Object containing the value of the parameter - * @throws QSARModelException if the type of the supplied value does not match the - * expected type - */ - public void setParameters(String key, Object obj) throws QSARModelException { - // since we know the possible values of key we should check the coresponding - // objects and throw errors if required. Note that this checking can't really check - // for values (such as number of variables in the X matrix to build the model and the - // X matrix to make new predictions) - these should be checked in functions that will - // use these parameters. The main checking done here is for the class of obj and - // some cases where the value of obj is not dependent on what is set before it - - if (key.equals("y")) { - if (!(obj instanceof Double[][])) { - throw new QSARModelException("The class of the 'y' object must be Double[][]"); - } else { - noutput = ((Double[][])obj)[0].length; - } - } - if (key.equals("x")) { - if (!(obj instanceof Double[][])) { - throw new QSARModelException("The class of the 'x' object must be Double[][]"); - } else { - nvar = ((Double[][])obj)[0].length; - } - } - if (key.equals("weights")) { - if (!(obj instanceof Double[])) { - throw new QSARModelException("The class of the 'weights' object must be Double[]"); - } - } - if (key.equals("size")) { - if (!(obj instanceof Integer)) { - throw new QSARModelException("The class of the 'size' object must be Integer"); - } - } - if (key.equals("subset")) { - if (!(obj instanceof Integer[])) { - throw new QSARModelException("The class of the 'size' object must be Integer[]"); - } - } - if (key.equals("Wts")) { - if (!(obj instanceof Double[])) { - throw new QSARModelException("The class of the 'Wts' object must be Double[]"); - } - } - if (key.equals("mask")) { - if (!(obj instanceof Boolean[])) { - throw new QSARModelException("The class of the 'mask' object must be Boolean[]"); - } - } - if (key.equals("linout") || - key.equals("entropy") || - key.equals("softmax") || - key.equals("censored") || - key.equals("skip") || - key.equals("Hess") || - key.equals("trace")) { - if (!(obj instanceof Boolean)) { - throw new QSARModelException("The class of the 'trace|skip|Hess|linout|entropy|softmax|censored' object must be Boolean"); - } - } - if (key.equals("rang") || - key.equals("decay") || - key.equals("abstol") || - key.equals("reltol")) { - if (!(obj instanceof Double)) { - throw new QSARModelException("The class of the 'reltol|abstol|decay|rang' object must be Double"); - } - } - if (key.equals("maxit") || - key.equals("MaxNWts")) { - if (!(obj instanceof Integer)) { - throw new QSARModelException("The class of the 'maxit|MaxNWts' object must be Integer"); - } - } - - if (key.equals("newdata")) { - if ( !(obj instanceof Double[][])) { - throw new QSARModelException("The class of the 'newdata' object must be Double[][]"); - } - } - this.params.put(key,obj); - } - - /** - * Fits a CNN regression model. - * - * This method calls the R function to fit a CNN regression model - * to the specified dependent and independent variables. If an error - * occurs in the R session, an exception is thrown. - *

- * Note that, this method should be called prior to calling the various get - * methods to obtain information regarding the fit. - */ - public void build() throws QSARModelException { - try { - this.modelfit = (CNNRegressionModelFit)revaluator.call("buildCNN", - new Object[]{ getModelName(), this.params }); - } catch (Exception re) { - throw new QSARModelException(re.toString()); - } - } - - /** - * Uses a fitted model to predict the response for new observations. - * - * This function uses a previously fitted model to obtain predicted values - * for a new set of observations. If the model has not been fitted prior to this - * call an exception will be thrown. Use setParameters - * to set the values of the independent variable for the new observations. You can also - * set the type argument (see here). - * However, since this class performs CNN regression, the default setting (type='raw') is sufficient. - */ - public void predict() throws QSARModelException { - if (this.modelfit == null) - throw new QSARModelException("Before calling predict() you must fit the model using build()"); - - Double[][] newx = (Double[][])this.params.get("newdata"); - if (newx[0].length != this.nvar) { - throw new QSARModelException("Number of independent variables used for prediction must match those used for fitting"); - } - - try { - this.modelpredict = (CNNRegressionModelPredict)revaluator.call("predictCNN", - new Object[]{ getModelName(), this.params }); - } catch (Exception re) { - throw new QSARModelException(re.toString()); - } - } - - /** - * Returns an object summarizing the CNN regression model. - * - * The return object simply wraps the fields from the summary.nnet - * return value. Various details can be extracted from the return object, - * See {@link CNNRegressionModelSummary} for more details. - * - * @return A summary for the CNN regression model - * @throws QSARModelException if the model has not been built prior to a call - * to this method. - */ - public CNNRegressionModelSummary summary() throws QSARModelException { - if (this.modelfit == null) - throw new QSARModelException("Before calling summary() you must fit the model using build()"); - - CNNRegressionModelSummary s = null; - try { - s = (CNNRegressionModelSummary)revaluator.call("summaryModel", - new Object[]{ getModelName() }); - } catch (Exception re) { - throw new QSARModelException(re.toString()); - } - return(s); - } - - /** - * Loads a CNNRegresionModel object from disk in to the current session. - * - * - * @param fileName The disk file containing the model - * @throws QSARModelException if the model being loaded is not a CNN regression model - * object - */ - public void loadModel(String fileName) throws QSARModelException { - // should probably check that the filename does exist - Object model = (Object)revaluator.call("loadModel", new Object[]{ (Object)fileName }); - String modelName = (String)revaluator.call("loadModel.getName", new Object[] { (Object)fileName }); - - if (model.getClass().getName().equals("org.openscience.cdk.qsar.model.R.CNNRegressionModelFit")) { - this.modelfit = (CNNRegressionModelFit)model; - this.setModelName(modelName); - Integer tmp = (Integer)revaluator.eval(modelName+"$n[1]"); - nvar = tmp.intValue(); - } else throw new QSARModelException("The loaded model was not a CNNRegressionModel"); - } - /** - * Loads an CNNRegressionModel object from a serialized string into the current session. - * - * @param serializedModel A String containing the serialized version of the model - * @param modelName A String indicating the name of the model in the R session - * @throws QSARModelException if the model being loaded is not a CNN regression model - * object - */ - public void loadModel(String serializedModel, String modelName) throws QSARModelException { - // should probably check that the fileName does exist - Object model = (Object)revaluator.call("unserializeModel", new Object[]{ (Object)serializedModel, (Object)modelName }); - String modelname = modelName; - - if (model.getClass().getName().equals("org.openscience.cdk.qsar.model.R.CNNRegressionModelFit")) { - this.modelfit =(CNNRegressionModelFit)model; - this.setModelName(modelname); - Double tmp = (Double)revaluator.eval(modelName+"$n[1]"); - nvar = (int)tmp.doubleValue(); - } else throw new QSARModelException("The loaded model was not a CNNRegressionModel"); - } - - /** - * Gets final value of the fitting criteria. - * - * This method only returns meaningful results if the build - * method of this class has been previously called. - * - * @return A double indicating the value of the fitting criterion plus weight decay term. - */ - public double getFitValue() { - return(this.modelfit.getValue()); - } - - /** - * Gets optimized weights for the model. - * - * This method only returns meaningful results if the build - * method of this class has been previously called. - * - * @return A double[] containing the weights. The number of weights will be - * equal to

(Ni * Nh) + (Nh * No) + Nh + No
where Ni, Nh and No - * are the number of input, hidden and output neurons. - */ - public double[] getFitWeights() { - return(this.modelfit.getWeights()); - } - /** - * Gets fitted values from the final model. - * - * This method only returns meaningful results if the build - * method of this class has been previously called. - * - * @return A double[][] containing the fitted values for each output neuron - * in the columns. Note that even if a single output neuron was specified during - * model building the return value is still a 2D array (with a single column). - */ - public double[][] getFitFitted() { - return(this.modelfit.getFitted()); - } - /** - * Gets residuals for the fitted values from the final model. - * - * This method only returns meaningful results if the build - * method of this class has been previously called. - * - * @return A double[][] containing the residuals for each output neuron - * in the columns. Note that even if a single output neuron was specified during - * model building the return value is still a 2D array (with a single column). - */ - public double[][] getFitResiduals() { - return(this.modelfit.getResiduals()); - } - /** - * Gets the Hessian of the measure of fit. - * - * If the Hess option was set to TRUE before the call to build - * then the CNN routine will return the Hessian of the measure of fit at the best set of - * weights found. * This method only returns meaningful results if the build - * method of this class has been previously called. - * - * @return A double[][] containing the Hessian. It will be a square array - * with dimensions equal to the Nwt x Nwt, where Nwt is the total number of weights - * in the CNN model. - */ - public double[][] getFitHessian() { - return(this.modelfit.getHessian()); - } - - /** - * Gets predicted values for new data using a previously built model. - * - * This method only returns meaningful results if the build - * method of this class has been previously called. - * - * @return A double[][] containing the predicted for each output neuron - * in the columns. Note that even if a single output neuron was specified during - * model building the return value is still a 2D array (with a single column). - */ - public double[][] getPredictPredicted() { - return(this.modelpredict.getPredicted()); - } - - -} diff --git a/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModelFit.java b/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModelFit.java deleted file mode 100644 index ac9da3e..0000000 --- a/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModelFit.java +++ /dev/null @@ -1,251 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2004-2008 Rajarshi Guha - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -package org.openscience.cdk.qsar.model.R; - -/** - * A class that wraps the return value from R function, nnet(). - * - * This is an internal class used by R to return the result of - * the call to nnet. - * As a result it should not be instantiated by the user. The actual modeling - * class, CNNRegressionModel, provides acess to the various - * fields of this object. - * - * - * @author Rajarshi Guha - * @cdk.require r-project - * @cdk.module qsar - * @cdk.githash - * @deprecated - */ - -public class CNNRegressionModelFit { - private int noutput; // number of output neurons (== number of classes) - private double[] weights; - private double[][] fitted; - private double[][] residuals; - private double value; - private double[][] hessian = null; - - private double[][] vectorToMatrix(double[] v, int nrow, int ncol) { - double[][] m = new double[nrow][ncol]; - for (int i = 0; i < ncol; i++) { - for (int j = 0; j < nrow; j++) { - m[j][i] = v[j + i*nrow]; - } - } - return(m); - } - - /** - * Constructs an object to contain a CNN regression fit. - * - * This class should not be instantiated directly and is only - * required withtin R - * - * @param noutput The number of output neurons (ie the number of predicted variables) - * @param nobs The number of observations - * @param weights A 1-dimensional array containing the weights and biases - * @param fitted A 1-dimensional array containing the fitted values - * @param residual A 1-dimensional array containing the residuals of the fitted values - * @param value The final value of the cost function - * @param hessian A 1-dimensional array containing the Hessian - */ - public CNNRegressionModelFit( - int noutput, - int nobs, - double[] weights, - double[] fitted, double[] residual, - double value, - double[] hessian) { - - // dimensions of hessian = nwt x nwt - // dimensions of fitted, residual = nobs x noutput - // also note that matrices come in as columnwise 1D arrays - - this.noutput = noutput; - setWeights(weights); - setResiduals(vectorToMatrix(residual, nobs,noutput)); - setFitted(vectorToMatrix(fitted, nobs,noutput)); - setValue(value); - setHessian(vectorToMatrix(hessian,weights.length,weights.length)); - } - /** - * Constructs an object to contain a CNN regression fit. - * - * This class should not be instantiated directly and is only - * required withtin R - * - * @param noutput The number of output neurons (ie the number of predicted variables) - * @param nobs The number of observations - * @param weights A 1-dimensional array containing the weights and biases - * @param fitted A 1-dimensional array containing the fitted values - * @param residual A 1-dimensional array containing the residuals of the fitted values - * @param value The final value of the cost function - */ - public CNNRegressionModelFit( - int noutput, - int nobs, - double[] weights, - double[] fitted, double[] residual, - double value) { - this.noutput = noutput; - setWeights(weights); - setResiduals(vectorToMatrix(residual, nobs,noutput)); - setFitted(vectorToMatrix(fitted, nobs,noutput)); - setValue(value); - } - - /** - * Get the final value of the cost function. - * - * This method should not be called outside this class - * - * @return The final value of the cost function - * @see #setValue - */ - public double getValue() { - return(this.value); - } - /** - * Set the final value of the cost function. - * - * This method should not be called outside this class - * - * @param value The value of the cost function at convergence - * @see #getValue - */ - public void setValue(double value) { - this.value = value; - } - - /** - * Get the Hessian for the final network. - * - * This method should not be called outside this class - * - * @return A 2-dimensional array containing the hessian - * @see #setHessian - */ - public double[][] getHessian() { return(this.hessian); } - /** - * Set the Hessian for the final network. - * - * This method should not be called outside this class - * - * @param theHessian A 2-dimensional array containing the hessian - * @see #getHessian - */ - public void setHessian(double[][] theHessian) { - if (theHessian == null) return; - this.hessian = new double[theHessian.length][this.noutput]; - for (int i = 0; i < theHessian.length; i++) { - for (int j = 0; j < this.noutput; j++) { - this.hessian[i][j] = theHessian[i][j]; - } - } - } - - /** - * Get the weights and biases of the final network. - * - * This method should not be called outside this class - * - * @return A 1-dimensional array of weights and biases - * @see #setWeights - */ - public double[] getWeights() { return(this.weights); } - /** - * Set the weights and biases of the final network. - * - * This method should not be called outside this class - * - * @param weights A 1-dimensional array of weights and biases - * @see #getWeights - */ - public void setWeights(double[] weights) { - this.weights = new double[weights.length]; - for (int i = 0; i < weights.length; i++) { - this.weights[i] = weights[i]; - } - } - /** - * Get the residuals of the fit. - * - * This method should not be called outside this class - * - * @return A 2-dimensional array of residuals. The rows contain the - * observations and the columns contain the predicted variables - * @see #setResiduals - */ - public double[][] getResiduals() { return(this.residuals); } - /** - * Set the residuals of the fit. - * - * This method should not be called outside this class - * - * @param residuals A 2-dimensional array of residuals. The rows contain the - * observations and the columns contain the predicted variables - * @see #getResiduals - */ - public void setResiduals(double[][] residuals) { - this.residuals = new double[residuals.length][this.noutput]; - for (int i = 0; i < residuals.length; i++) { - for (int j = 0; j < this.noutput; j++) { - this.residuals[i][j] = residuals[i][j]; - } - } - } - /** - * Get the fitted values. - * - * This method should not be called outside this class - * - * @return A 2-dimensional array of residuals. The rows contain the - * observations and the columns contain the predicted variables - * @see #setFitted - */ - public double[][] getFitted() { return(this.fitted); } - /** - * Set the fitted values. - * - * This method should not be called outside this class - * - * @param fitted A 2-dimensional array of residuals. The rows contain the - * observations and the columns contain the predicted variables - * @see #getFitted - */ - public void setFitted(double[][] fitted) { - this.fitted = new double[fitted.length][this.noutput]; - for (int i = 0; i < fitted.length; i++) { - for (int j = 0; j < this.noutput; j++) { - this.fitted[i][j] = fitted[i][j]; - } - } - } -} - - - diff --git a/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModelPredict.java b/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModelPredict.java deleted file mode 100644 index e00b958..0000000 --- a/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModelPredict.java +++ /dev/null @@ -1,108 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2004-2008 Rajarshi Guha - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ -package org.openscience.cdk.qsar.model.R; - -/** - * A class that wraps the return value from the R function, predict.cnn. - * - * This is an internal class used by R to return the result of - * the call to predict.nnet. - * As a result it should not be instantiated by the user. The actual modeling - * class, CNNRegressionModel, provides acess to the various - * fields of this object. - * - * - * @author Rajarshi Guha - * @cdk.require r-project - * @cdk.module qsar - * @cdk.githash - * @deprecated - */ -public class CNNRegressionModelPredict { - private int noutput; - private double[][] predval; - - private double[][] vectorToMatrix(double[] v, int nrow, int ncol) { - double[][] m = new double[nrow][ncol]; - for (int i = 0; i < ncol; i++) { - for (int j = 0; j < nrow; j++) { - m[j][i] = v[j + i*nrow]; - } - } - return(m); - } - - /** - * Create an object to hold predictions from a previously built CNN model. - * - * This class should not be accessed directly - * - * @param noutput The number of predicted variables - * @param values The predicted values - */ - public CNNRegressionModelPredict(int noutput, double[] values) { - this.noutput = noutput; - int nrow = values.length / noutput; - setPredicted(vectorToMatrix(values,nrow,noutput)); - } - /** - * Create an object to hold predictions from a previously built CNN model. - * - * This class should not be accessed directly. Required for the case - * of a single predicted value. - * - * @param noutput The number of predicted variables - * @param values The predicted value - */ - public CNNRegressionModelPredict(int noutput, double values) { - this.noutput = noutput; - setPredicted(new double[][] { {values} }); - } - - /** - * Get the predicted values. - * - * @return A 2-dimensional array containing the predicted values. The rows - * contain the observations and the columns contain the predicted variables - * @see #setPredicted - */ - public double[][] getPredicted() { return(this.predval); } - /** - * Set the predicted values. - * - * @param predicted A 2-dimensional array containing the predicted values. The rows - * contain the observations and the columns contain the predicted variables - * @see #getPredicted - */ - public void setPredicted(double[][] predicted) { - this.predval = new double[predicted.length][this.noutput]; - for (int i = 0; i < predicted.length; i++) { - for (int j = 0; j < this.noutput; j++) { - this.predval[i][j] = predicted[i][j]; - } - } - } -} - - diff --git a/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModelSummary.java b/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModelSummary.java deleted file mode 100644 index a29e069..0000000 --- a/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModelSummary.java +++ /dev/null @@ -1,172 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2004-2008 Rajarshi Guha - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -package org.openscience.cdk.qsar.model.R; - - -/** A class that represents a summary of a CNN regression model. - * - * This class essentially wraps the result of summary.nnet. As with other - * backend classes this class should not be instantiated directly by the - * user, though the various fields may be accessed with the provided - * methods. - * - * @author Rajarshi Guha - * @cdk.require r-project - * @cdk.module qsar - * @cdk.githash - * @deprecated - */ -public class CNNRegressionModelSummary { - - double[] residuals; - boolean entropy, softmax, censored; - double value; - int[] n; - - /** - * Constructor for an object that wraps the return value from summary.lm. - * - * This should not be instantiated directly. The class is meant to be instantiated - * from an R session - * - * @param n A 3 element array containing the number of neurons in the - * input, hidden and output layer respectively - * @param entropy A boolean indicating whether the entropy setting was used - * @param softmax A boolean indicating whether the softmax setting was used - * @param censored A boolean indicating whether the censored setting was used - * @param value The final value of the convergenc criterion - * @param residuals A 1-dimensional array of residual values - */ - public CNNRegressionModelSummary( int[] n, boolean entropy, - boolean softmax, boolean censored, double value, - double[] residuals) { - - - this.residuals = new double[residuals.length]; - for (int i = 0; i < residuals.length; i++) - this.residuals[i] = residuals[i]; - - this.n = new int[n.length]; - for (int i = 0; i < n.length; i++) - this.n[i] = n[i]; - - this.softmax = softmax; - this.censored = censored; - this.entropy = entropy; - this.value = value; - } - /** - * Constructor for an object that wraps the return value from summary.lm. - * - * This should not be instantiated directly. The class is meant to be instantiated - * from an R session - * - * @param n A 3 element array containing the number of neurons in the - * input, hidden and output layer respectively - * @param entropy A boolean indicating whether the entropy setting was used - * @param softmax A boolean indicating whether the softmax setting was used - * @param censored A boolean indicating whether the censored setting was used - * @param value The final value of the convergenc criterion - * @param residuals A 1-dimensional array of residual values - */ - public CNNRegressionModelSummary( double[] n, boolean entropy, - boolean softmax, boolean censored, double value, - double[] residuals) { - - - this.residuals = new double[residuals.length]; - for (int i = 0; i < residuals.length; i++) - this.residuals[i] = residuals[i]; - - this.n = new int[n.length]; - for (int i = 0; i < n.length; i++) - this.n[i] = (int)n[i]; - - this.softmax = softmax; - this.censored = censored; - this.entropy = entropy; - this.value = value; - } - - - /** - * Return the residuals of the fit. - * - * @return A 1-dimensional array of doubles containing the - * residuals of the fit - */ - public double[] getResiduals() { - return(this.residuals); - } - - - /** - * Return the number of neurons in the CNN layers. - * - * This method returns a 3-element array containing the number - * of neurons in the input, hidden and output layer - * respectively. - * - * @return A 3-element int array - */ - public int[] getNumNeurons() { - return(this.n); - } - - /** - * Return the final value of the convergence criterion. - * - * @return The final value of the convergence criterion - */ - public double getValue(){ - return(this.value); - } - - /** - * Return whether softmax was used. - * - * @return A boolean indicating whether softmax was used or not - */ - public boolean getSoftmax() { - return(this.softmax); - } - /** - * Return whether entropy was used. - * - * @return A boolean indicating whether entropy was used or not - */ - public boolean getEntropy() { - return(this.entropy); - } - /** - * Return whether censored was used. - * - * @return A boolean indicating whether censored was used or not - */ - public boolean getCensored() { - return(this.censored); - } -} - diff --git a/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModel.java b/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModel.java deleted file mode 100644 index 6729563..0000000 --- a/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModel.java +++ /dev/null @@ -1,552 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2004-2008 Rajarshi Guha - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -package org.openscience.cdk.qsar.model.R; - -import org.openscience.cdk.qsar.model.QSARModelException; - -import java.util.HashMap; - -/** - * A modeling class that provides a linear least squares regression model. - * - * When instantiated this class ensures that the R/Java interface has been - * initialized. The response and independent variables can be specified at construction - * time or via the setParameters method. The actual fitting procedure is carried out by build after which - * the model may be used to make predictions. - *

- * Currently, the design of the class is quite sparse as it does not allow subsetting, - * variable names, setting of contrasts and so on. - * It is also assumed that the values of all the variables are defined (i.e., not such that - * they are NA - * in an R session). - * The use of - * this class is shown in the following code snippet - *

- * try {
- *     LinearRegressionModel lrm = new LinearRegressionModel(x,y);
- *     lrm.build();
- *     lrm.setParameters("newdata", newx);
- *     lrm.setParameters("interval", "confidence");
- *     lrm.predict();
- * } catch (QSARModelException qme) {
- *     System.out.println(qme.toString());
- * }
- * double[] fitted = lrm.getFitFitted();
- * double[] predictedvalues = lrm.getPredictPredicted();
- * 
- * Note that when making predictions, the new X matrix and interval type can be set by calls - * to setParameters(). In general, the arguments for lm() and predict.lm() can be set via - * calls to setParameters(). The following table lists the parameters that can be set and their - * expected types. More detailed informationis available in the R documentation. - *
- * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
NameJava TypeNotes
xDouble[][]
yDouble[]Length should be equal to the rows of x
weightsDouble[]Length should be equal to rows of x
newdataDouble[][]Number of columns should be the same as in x
intervalStringCan be 'confidence' or 'predicton'
- *
- * In general the getFit* methods provide access to results from the fit - * and getPredict* methods provide access to results from the prediction (i.e., - * prediction using the model on new data). The values returned correspond to the various - * values returned by the lm - * and predict.lm - * functions in R. - *

- * See {@link RModel} for details regarding the R and SJava environment. - * - * @author Rajarshi Guha - * @cdk.require r-project - * @cdk.module qsar - * @cdk.githash - * - * @cdk.keyword regression, linear - * @deprecated - */ -public class LinearRegressionModel extends RModel { - - private static int globalID = 0; - private int currentID; - private LinearRegressionModelFit modelfit = null; - private LinearRegressionModelPredict modelpredict = null; - - private HashMap params = null; - private int nvar = 0; - - /** - * Constructs a LinearRegressionModel object. - * - * The constructor simply instantiates the model ID. Dependent and independent variables - * should be set via setParameters(). - *

- * An important feature of the current implementation is that all the - * independent variables are used during the fit. Furthermore no subsetting is possible. - * As a result when setting these via setParameters() the caller should specify only - * the variables and observations that will be used for the fit. - */ - public LinearRegressionModel(){ - super(); - - this.params = new HashMap(); - - this.currentID = LinearRegressionModel.globalID; - LinearRegressionModel.globalID++; - this.setModelName("cdkLMModel"+this.currentID); - } - - /** - * Constructs a LinearRegressionModel object. - * - * The constructor allows the user to specify the - * dependent and independent variables. The length of the dependent variable - * array should equal the number of rows of the independent variable matrix. If this - * is not the case an exception will be thrown. - *

- * An important feature of the current implementation is that all the - * independent variables are used during the fit. Furthermore no subsetting is possible. - * As a result when creating an instance of this object the caller should specify only - * the variables and observations that will be used for the fit. - * - * @param xx An array of independent variables. The observations should be in the rows - * and the variables should be in the columns - * @param yy an array containing the dependent variable - * @throws QSARModelException if the number of observations in x and y do not match - */ - public LinearRegressionModel(double[][] xx, double[] yy) throws QSARModelException{ - super(); - - this.params = new HashMap(); - - this.currentID = LinearRegressionModel.globalID; - LinearRegressionModel.globalID++; - this.setModelName("cdkLMModel"+this.currentID); - - int nrow = yy.length; - this.nvar = xx[0].length; - - if (nrow != xx.length) { - throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix"); - } - - Double[][] x = new Double[nrow][this.nvar]; - Double[] y = new Double[nrow]; - Double[] weights = new Double[nrow]; - - for (int i = 0; i < nrow; i++) { - y[i] = new Double(yy[i]); - weights[i] = new Double(1.0); - } - for (int i = 0; i < nrow; i++) { - for (int j = 0; j < this.nvar; j++) - x[i][j] = new Double(xx[i][j]); - } - - params.put("x", x); - params.put("y", y); - params.put("weights", weights); - } - - - /** - * Constructs a LinearRegressionModel object. - * - * The constructor allows the user to specify the - * dependent and independent variables as well as weightings for - * the observations. - *

- * The length of the dependent variable - * array should equal the number of rows of the independent variable matrix. If this - * is not the case an exception will be thrown. - *

- * An important feature of the current implementation is that all the - * independent variables are used during the fit. Furthermore no subsetting is possible. - * As a result when creating an instance of this object the caller should specify only - * the variables and observations that will be used for the fit. - * - * @param xx An array of independent variables. The observations should be in the rows - * and the variables should be in the columns - * @param yy an array containing the dependent variable - * @param weights Specifies the weights for each observation. Unit weights are equivilant - * to OLS - * @throws QSARModelException if the number of observations in x and y do not match - */ - public LinearRegressionModel(double[][] xx, double[] yy, double[] weights) throws QSARModelException{ - super(); - - this.params = new HashMap(); - - this.currentID = LinearRegressionModel.globalID; - LinearRegressionModel.globalID++; - this.setModelName("cdkLMModel"+this.currentID); - - int nrow = yy.length; - this.nvar = xx[0].length; - - if (nrow != xx.length) { - throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix"); - } - if (nrow != weights.length) { - throw new QSARModelException("The length of the weight vector does not match the number of rows of the design matrix"); - } - - Double[][] x = new Double[nrow][this.nvar]; - Double[] y = new Double[nrow]; - Double[] wts = new Double[nrow]; - - for (int i = 0; i < nrow; i++) { - y[i] = new Double(yy[i]); - wts[i] = new Double(weights[i]); - } - for (int i = 0; i < nrow; i++) { - for (int j = 0; j < this.nvar; j++) - x[i][j] = new Double(xx[i][j]); - } - params.put("x", x); - params.put("y", y); - params.put("weights", wts); - } - - protected void finalize() { - revaluator.voidEval("rm("+this.getModelName()+",pos=1)"); - } - - - /** - * Fits a linear regression model. - * - * This method calls the R function to fit a linear regression model - * to the specified dependent and independent variables. If an error - * occurs in the R session, an exception is thrown. - *

- * Note that, this method should be called prior to calling the various get - * methods to obtain information regarding the fit. - */ - public void build() throws QSARModelException { - // lets do some checks in case stuff was set via setParameters() - Double[][] x; - Double[] y,weights; - x = (Double[][])this.params.get("x"); - y = (Double[])this.params.get("y"); - weights = (Double[])this.params.get("weights"); - if (this.nvar == 0) this.nvar = x[0].length; - else { - if (y.length != x.length) { - throw new QSARModelException("Number of observations does no match number of rows in the design matrix"); - } - if (weights.length != y.length) { - throw new QSARModelException("The weight vector must have the same length as the number of observations"); - } - } - - // lets build the model - try { - this.modelfit = (LinearRegressionModelFit)revaluator.call("buildLM", - new Object[]{ getModelName(), this.params }); - } catch (Exception re) { - throw new QSARModelException(re.toString()); - } - } - - - /** - * Sets parameters required for building a linear model or using one for prediction. - * - * This function allows the caller to set the various parameters available - * for the lm() and predict.lm() R routines. See the R help pages for the details of the available - * parameters. - * - * @param key A String containing the name of the parameter as described in the - * R help pages - * @param obj An Object containing the value of the parameter - * @throws QSARModelException if the type of the supplied value does not match the - * expected type - * - */ - public void setParameters(String key, Object obj) throws QSARModelException { - // since we know the possible values of key we should check the coresponding - // objects and throw errors if required. Note that this checking can't really check - // for values (such as number of variables in the X matrix to build the model and the - // X matrix to make new predictions) - these should be checked in functions that will - // use these parameters. The main checking done here is for the class of obj and - // some cases where the value of obj is not dependent on what is set before it - - if (key.equals("y")) { - if (!(obj instanceof Double[])) { - throw new QSARModelException("The class of the 'y' object must be Double[]"); - } - } - if (key.equals("x")) { - if (!(obj instanceof Double[][])) { - throw new QSARModelException("The class of the 'x' object must be Double[][]"); - } - } - if (key.equals("weights")) { - if (!(obj instanceof Double[])) { - throw new QSARModelException("The class of the 'weights' object must be Double[]"); - } - } - if (key.equals("interval")) { - if (!(obj instanceof String)) { - throw new QSARModelException("The class of the 'interval' object must be String"); - } - if (!(obj.equals("confidence") || obj.equals("prediction"))) { - throw new QSARModelException("The type of interval must be: prediction or confidence"); - } - } - if (key.equals("newdata")) { - if ( !(obj instanceof Double[][])) { - throw new QSARModelException("The class of the 'newdata' object must be Double[][]"); - } - } - this.params.put(key,obj); - } - - - /** - * Uses a fitted model to predict the response for new observations. - * - * This function uses a previously fitted model to obtain predicted values - * for a new set of observations. If the model has not been fitted prior to this - * call an exception will be thrown. Use setParameters - * to set the values of the independent variable for the new observations and the - * interval type. - * @throws QSARModelException if the model has not been built prior to a call - * to this method. Also if the number of independent variables specified for prediction - * is not the same as specified during model building - */ - public void predict() throws QSARModelException { - if (this.modelfit == null) - throw new QSARModelException("Before calling predict() you must fit the model using build()"); - - Double[][] newx = (Double[][])this.params.get(new String("newdata")); - if (newx[0].length != this.nvar) { - throw new QSARModelException("Number of independent variables used for prediction must match those used for fitting"); - } - - try { - this.modelpredict = (LinearRegressionModelPredict)revaluator.call("predictLM", - new Object[]{ getModelName(), this.params }); - } catch (Exception re) { - throw new QSARModelException(re.toString()); - } - } - - /** - * Returns an object summarizing the linear regression model. - * - * The return object simply wraps the fields from the summary.lm - * return value. Various details can be extracted from the return object, - * See {@link LinearRegressionModelSummary} for more details. - * - * @return A summary for the linear regression model - * @throws QSARModelException if the model has not been built prior to a call - * to this method - */ - public LinearRegressionModelSummary summary() throws QSARModelException { - if (this.modelfit == null) - throw new QSARModelException("Before calling summary() you must fit the model using build()"); - - LinearRegressionModelSummary s = null; - try { - s = (LinearRegressionModelSummary)revaluator.call("summaryModel", - new Object[]{ getModelName() }); - } catch (Exception re) { - throw new QSARModelException(re.toString()); - } - return(s); - } - - - /** - * Loads an LinearRegressionModel object from disk in to the current session. - * - * @param fileName The disk file containing the model - * @throws QSARModelException if the model being loaded is not a linear regression model - * object - */ - public void loadModel(String fileName) throws QSARModelException { - // should probably check that the fileName does exist - Object model = (Object)revaluator.call("loadModel", new Object[]{ (Object)fileName }); - String modelName = (String)revaluator.call("loadModel.getName", new Object[] { (Object)fileName }); - - if (model.getClass().getName().equals("org.openscience.cdk.qsar.model.R.LinearRegressionModelFit")) { - this.modelfit =(LinearRegressionModelFit)model; - this.setModelName(modelName); - Double tmp = (Double)revaluator.eval("length("+modelName+"$coefficients)-1"); - nvar = (int)tmp.doubleValue(); - } else throw new QSARModelException("The loaded model was not a LinearRegressionModel"); - } - /** - * Loads an LinearRegressionModel object from a serialized string into the current session. - * - * @param serializedModel A String containing the serialized version of the model - * @param modelName A String indicating the name of the model in the R session - * @throws QSARModelException if the model being loaded is not a linear regression model - * object - */ - public void loadModel(String serializedModel, String modelName) throws QSARModelException { - // should prxbably check that the fileName does exist - Object model = (Object)revaluator.call("unserializeModel", new Object[]{ (Object)serializedModel, (Object)modelName }); - String modelname = modelName; - - if (model.getClass().getName().equals("org.openscience.cdk.qsar.model.R.LinearRegressionModelFit")) { - this.modelfit =(LinearRegressionModelFit)model; - this.setModelName(modelname); - Double tmp = (Double)revaluator.eval("length("+modelName+"$coefficients)-1"); - nvar = (int)tmp.doubleValue(); - } else throw new QSARModelException("The loaded model was not a LinearRegressionModel"); - } - - /* interface to fit object */ - - /** - * Gets the rank of the fitted linear model. - * - * This method only returns meaningful results if the build - * method of this class has been previously called. - * - * @return An integer indicating the rank - */ - public int getFitRank() { return(this.modelfit.getRank()); } - - /** - * Returns the residuals. - * - * The residuals are the response minus the fitted values. - * This method only returns meaningful results if the build - * method of this class has been previously called. - * @return A double[] contaning the residuals for each observation - */ - public double[] getFitResiduals() { return(this.modelfit.getResiduals()); } - - /** - * Returns the estimated coefficients. - * - * This method only returns meaningful results if the build - * method of this class has been previously called. - * @return A double[] containing the coefficients - */ - public double[] getFitCoefficients() { return(this.modelfit.getCoefficients()); } - - /** - * Returns the residual degrees of freedom. - * - * This method only returns meaningful results if the build - * method of this class has been previously called. - * @return An integr indicating the residual degrees of freedom - */ - public int getFitDFResidual() { return(this.modelfit.getdfResidual()); } - - /** - * Returns the fitted mean values. - * - * This method only returns meaningful results if the build - * method of this class has been previously called. - * @return A double[] containing the fitted values - */ - public double[] getFitFitted() { return(this.modelfit.getFitted()); } - - - - - - /* interface to predict object */ - - /** - * Returns the degrees of freedom for residual. - * - * @return An integer indicating degrees of freedom - */ - public int getPredictDF() { return(this.modelpredict.getDF()); } - - /** - * Returns the residual standard deviations. - * - * @return A double indicating residual standard deviations - */ - public double getPredictResidualScale() { return(this.modelpredict.getResidualScale()); } - - /** - * Returns the predicted values for the prediction set. - * - * This function only returns meaningful results if the predict - * method of this class has been called. - * - * @return A double[] containing the predicted values - */ - public double[] getPredictPredicted() { return(this.modelpredict.getPredicted()); } - - /** - * Returns the lower prediction bounds. - * - * By default the bounds (both lower and upper) are confidence bounds. However - * the call to predict can specify prediction bounds. - * This function only returns meaningful results if the predict - * method of this class has been called. - * - * @return A double[] containing the lower bounds for the predictions - */ - public double[] getPredictLowerBound() { return(this.modelpredict.getLower()); } - - /** - * Returns the upper prediction bounds. - * - * By default the bounds (both lower and upper) are confidence bounds. However - * the call to predict can specify prediction bounds. - * This function only returns meaningful results if the predict - * method of this class has been called. - * - * @return A double[] containing the upper bounds for the predictions - */ - public double[] getPredictUpperBound() { return(this.modelpredict.getUpper()); } - - /** - * Returns the standard error of predictions. - * - * This function only returns meaningful results if the predict - * method of this class has been called. - * - * @return A double[] containing the standard error of predictions. - */ - public double[] getPredictSEPredictions() { return(this.modelpredict.getSEFit()); } -} diff --git a/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModelFit.java b/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModelFit.java deleted file mode 100644 index 82147e8..0000000 --- a/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModelFit.java +++ /dev/null @@ -1,178 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2004-2008 Rajarshi Guha - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -package org.openscience.cdk.qsar.model.R; - -/** - * A class that wraps the return value from R function, lm.. - * - * This is an internal class used by R to return the result of - * the call to lm. - * As a result it should not be instantiated by the user. The actual modeling - * class, LinearRegressionModel, provides acess to the various - * fields of this object. - * - * - * @author Rajarshi Guha - * @cdk.require r-project - * @cdk.module qsar - * @cdk.githash - * @deprecated - */ - -public class LinearRegressionModelFit { - double[] coeff, res, fitted; - int rank, dfResidual; - - /** - * Construct the object to contain a linear regression fit. - * - * @param coeff A 1-dimensional array of coefficients - * @param res A 1-dimensional array of residuals - * @param fitted A 1-dimensional array of fitted values - * @param rank An integer indicating the rank of the fit - * @param degreesOfFreedom The degrees of freedom - */ - public LinearRegressionModelFit(double[] coeff, double[] res, double[] fitted, int rank, int degreesOfFreedom) { - setCoefficients(coeff); - setResiduals(res); - setFitted(fitted); - setRank(rank); - setdfResidual(degreesOfFreedom); - } - - /** - * Get the rank of the fit. - * - * @return The rank of the fit - * @see #setRank - */ - public int getRank() { return(this.rank); } - - /** - * Set the rank of the fit. - * - * This method should not be called outside this class - * - * @param rank The rank of the fit - * @see #getRank - */ - public void setRank(int rank) { this.rank = rank; }; - - /** - * Get the residuals of the fit. - * - * The number of residuals equals the number of observations used - * to build the model - * - * @return A 1-dimensional array containing the residuals. - * @see #setResiduals - */ - public double[] getResiduals() { return(this.res); } - - /** - * Set the residuals of the fit. - * - * This method should not be called outside this class - * - * @param residuals A 1-dimensional array of residual values - * @see #getResiduals - */ - public void setResiduals(double[] residuals) { - this.res = new double[residuals.length]; - for (int i = 0; i < residuals.length; i++) this.res[i] = residuals[i]; - } - - /** - * Get the fitted coefficients. - * - * The number of coefficients equals the number of independent - * variables used to build the model - * - * @return A 1-dimensional array containing the coefficients. - * @see #setCoefficients - */ - public double[] getCoefficients() { return(this.coeff); } - - /** - * Set the fitted coefficients. - * - * - * This method should not be called outside this class - * - * @param coeff A 1-dimensional array containing the coefficients. - * @see #getCoefficients - */ - public void setCoefficients(double[] coeff) { - this.coeff = new double[coeff.length]; - for (int i = 0; i < coeff.length; i++) this.coeff[i] = coeff[i]; - } - - /** - * Get the DOF of the residuals. - * - * @return An integer indicating the D.O.F - * @see #setdfResidual - */ - public int getdfResidual() { return(this.dfResidual); } - - /** - * Set the DOF of the residuals. - * - * This method should not be called outside this class - * - * @param degreesOfFreedom The degrees of freedom - * @see #getdfResidual - */ - public void setdfResidual(int degreesOfFreedom) { this.dfResidual = degreesOfFreedom; } - - - /** - * Get the fitted values. - * - * Returns the predicted values for the observations used to - * build the model. The number of fitted values equals the number - * observations used to build the model. - * - * @return A 1-dimensional array containing the fitted values - * @see #setFitted - */ - public double[] getFitted() { return(this.fitted); } - - /** - * Set the fitted values. - * - * This method should not be called outside this class - * - * @param fitted A 1-dimensional array of fitted values - * @see #getFitted - */ - public void setFitted(double[] fitted) { - this.fitted = new double[fitted.length]; - for (int i = 0; i < fitted.length; i++) this.fitted[i] = fitted[i]; - } -} - - - diff --git a/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModelPredict.java b/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModelPredict.java deleted file mode 100644 index 14faab3..0000000 --- a/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModelPredict.java +++ /dev/null @@ -1,222 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2004-2008 Rajarshi Guha - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -package org.openscience.cdk.qsar.model.R; - -/** - * A class that wraps the return value from the R function, predict.lm. - * - * This is an internal class used by R to return the result of - * the call to predict.lm. - * As a result it should not be instantiated by the user. The actual modeling - * class, LinearRegressionModel, provides acess to the various - * fields of this object. - * - * - * @author Rajarshi Guha - * @cdk.require r-project - * @cdk.module qsar - * @cdk.githash - * @deprecated - */ -public class LinearRegressionModelPredict { - double[] pred, lwr, upr, sefit; - int degreesOfFreedom; - double residualScale; - - /** - * Construct the object to contain linear regression predictions. - * - * @param predicted A 1-dimensional array of predicted values - * @param standardErrors A 1-dimensional array of standard errors of prediction - * @param lowerBounds A 1-dimensional array of lower confidence bounds - * @param upperBounds A 1-dimensional array of upper confidence bounds - * @param degreesOfFreedom The degrees of freedom of hte predictions - * @param residualScale The scale of the residuals - */ - public LinearRegressionModelPredict(double[] predicted, double[] standardErrors, - double[] lowerBounds, double[] upperBounds, - int degreesOfFreedom, double residualScale) { - setPredicted(predicted); - setSEFit(standardErrors); - setLower(lowerBounds); - setUpper(upperBounds); - setDF(degreesOfFreedom); - setResidualScale(residualScale); - } - /** - * Construct the object to contain linear regression predictions. - * - * This is required if a single prediction was requested in which case - * R will pass a single double value rather than an array. - * - * @param predicted The predicted values - * @param standardErrors The standard errors of prediction - * @param lowerBounds The lower confidence bounds - * @param upperBounds The upper confidence bounds - * @param degreesOfFreedom The degrees of freedom of hte predictions - * @param residualScale The scale of the residuals - */ - public LinearRegressionModelPredict(double predicted, double standardErrors, - double lowerBounds, double upperBounds, - int degreesOfFreedom, double residualScale) { - setPredicted(new double[] {predicted}); - setSEFit(new double[] {standardErrors}); - setLower(new double[] {lowerBounds}); - setUpper(new double[] {upperBounds}); - setDF(degreesOfFreedom); - setResidualScale(residualScale); - } - - /** - * Get the degrees of freedom. - * - * @return An integer indicating the degrees of freedom - * @see #setDF - */ - public int getDF() { return(this.degreesOfFreedom); } - - /** - * Set the degrees of freedom. - * - * This method should not be called outside this class - * - * @param degreesOfFreedom An integer indicating the degrees of freedom - * @see #getDF - */ - public void setDF(int degreesOfFreedom) { this.degreesOfFreedom = degreesOfFreedom; } - - /** - * Get the scale of residuals. - * - * @return A double indicating the residual scale - * @see #setResidualScale - */ - public double getResidualScale() { return(this.residualScale); } - - /** - * Set the scale of the residuals. - * - * This method should not be called outside this class - * - * @param scale The scale of the residuals - * @see #getResidualScale - */ - public void setResidualScale(double scale) { this.residualScale = scale; } - - /** - * Get predicted values. - * - * Get the predictions for a set of observations from the current linear - * regression fit - * - * @return A 1-dimensional array containing the predicted values - * @see #setPredicted - */ - public double[] getPredicted() { return(this.pred); } - - /** - * Set the predicted values. - * - * This method should not be called outside this class - * - * @param predicted A 1-dimensional array of predicted values - * @see #getPredicted - */ - public void setPredicted(double[] predicted) { - this.pred = new double[predicted.length]; - for (int i = 0; i < predicted.length; i++) this.pred[i] = predicted[i]; - } - - /** - * Get the lower confidence bounds. - * - * Gets the lower confidence bounds for the predicted values of - * the observations - * - * @return A 1-dimensional array of lower confidence bounds - * @see #setLower - */ - public double[] getLower() { return(this.lwr); } - - /** - * Set the lower confidence bounds. - * - * This method should not be called outside this class - * - * @param lowerBounds A 1-dimensional array of lower confidence bounds - * @see #getLower - */ - public void setLower(double[] lowerBounds) { - this.lwr = new double[lowerBounds.length]; - for (int i = 0; i < lowerBounds.length; i++) this.lwr[i] = lowerBounds[i]; - } - - /** - * Get the upper confidence bounds. - * - * Gets the upper confidence bounds for the predicted values of - * the observations - * - * @return A 1-dimensional array of upper confidence bounds - * @see #setUpper - */ - public double[] getUpper() { return(this.upr); } - - /** - * Set the upper confidence bounds. - * - * This method should not be called outside this class - * - * @param upperBounds A 1-dimensional array of upper confidence bounds - * @see #getUpper - */ - public void setUpper(double[] upperBounds) { - this.upr = new double[upperBounds.length]; - for (int i = 0; i < upperBounds.length; i++) this.upr[i] = upperBounds[i]; - } - - - /** - * Get the standard errors of prediction. - * - * @return A 1-dimensional array of standard errors - * @see #setSEFit - */ - public double[] getSEFit() { return(this.sefit); } - - /** - * Set the standard errors of predictions. - * - * @param standardErrors A 1-dimensional array of standard errors - * @see #getSEFit - */ - public void setSEFit(double[] standardErrors) { - this.sefit = new double[standardErrors.length]; - for (int i = 0; i < standardErrors.length; i++) this.sefit[i] = standardErrors[i]; - } - -} - - diff --git a/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModelSummary.java b/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModelSummary.java deleted file mode 100644 index 317319e..0000000 --- a/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModelSummary.java +++ /dev/null @@ -1,192 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2004-2008 Rajarshi Guha - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -package org.openscience.cdk.qsar.model.R; - - -/** A class that represents a summary of a linear regression model. - * - * This class essentially wraps the result of summar.lm. As with other - * backend classes this class should not be instantiated directly by the - * user, though the various fields may be accessed with the provided - * methods. - * - * @author Rajarshi Guha - * @cdk.require r-project - * @cdk.module qsar - * @cdk.githash - * @deprecated - */ -public class LinearRegressionModelSummary { - - double[] residuals; - double[][] coeff; // rows - vars, cols - stats - double rsq, adjrsq, sigma; - int df; - int numdf, dendf; - double fstat; - - String[] colNames; - String[] rowNames; - - private double[][] vectorToMatrix(double[] v, int nrow, int ncol) { - double[][] m = new double[nrow][ncol]; - for (int i = 0; i < ncol; i++) { - for (int j = 0; j < nrow; j++) { - m[j][i] = v[j + i*nrow]; - } - } - return(m); - } - - /** - * Constructor for an object that wraps the return value from summary.lm. - * - * This should not be instantiated directly. The class is meant to be instantiated - * from an R session - * - * @param residuals An array of residuals - * @param coeff An array of coeffs and associated statistics - * @param coeffColNames The names of the columns for the coefficient matrix - * @param coeffRowNames The names of the rows for the coefficient matrix - * @param sigma The residual error - * @param df The degrees of freedom - * @param rsq The R^2 value - * @param adjrsq The adjusted R^2 value - * @param fstat The value of the F-statistic - */ - public LinearRegressionModelSummary( double[] residuals , double coeff[], - double sigma, double rsq, double adjrsq, int df, - double[] fstat, String[] coeffRowNames, String[] coeffColNames) { - - - this.residuals = new double[residuals.length]; - for (int i = 0; i < residuals.length; i++) - this.residuals[i] = residuals[i]; - - this.coeff = vectorToMatrix(coeff, coeff.length/4, 4); - - - this.colNames = new String[coeffColNames.length]; - this.rowNames = new String[coeffRowNames.length]; - for (int i = 0; i < coeffColNames.length; i++) this.colNames[i] = coeffColNames[i]; - for (int i = 0; i < coeffRowNames.length; i++) this.rowNames[i] = coeffRowNames[i]; - - - this.sigma = sigma; - this.df = df; - this.rsq = rsq; - this.adjrsq = adjrsq; - this.numdf = (int)fstat[1]; - this.dendf = (int)fstat[2]; - this.fstat = fstat[0]; - - } - - /** - * Return the residuals of the fit. - * - * @return A 1-dimensional array of doubles containing the - * residuals of the fit - */ - public double[] getResiduals() { - return(this.residuals); - } - - - /** - * Returns the coefficients and associated statistics. - * - * This method will return the coefficients as well as the standard - * error in the coefficients, t-values and p-values corresponding to the - * t-values. Thus the return value is a 2D array of doubles, with rows equal - * to the number of coefficients (ie 1+num predictor variables) and 4 columns - * containing the estimated coefficients and the above statistics, in the - * order mentioned above. - * - * @return A 2-D array of doubles containing the estimated coefficients and - * associated statistics - */ - public double[][] getCoeff() { - return(this.coeff); - } - - /** - * Returns the R^2 value. - * - * @return The R^2 value - */ - public double getRSQ() { - return(this.rsq); - } - - /** - * Return the adjusted R^2 value. - * - * This statistic is generally a better indicator than plain R^2 - * - * @return The adjusted R^2 value - */ - public double getAdjRSQ() { - return(this.adjrsq); - } - - /** - * Return the residual standard error. - * - * This method returns the residual standard error and the associated degrees - * of freedom, in a 2 element array of doubles. - * - * @return A 2 element array of doubles containing the residual error and - * DoF - */ - public double[] getSigma() { - double[] ret = {this.sigma, this.df}; - return( ret ); - } - - /** - * Returns the value of the F-statistic. - * - * @return The F-statistic - * @see #getFStatisticDF - */ - public double getFStatistic() { - return(this.fstat); - } - - /** - * Returns the degrees of freedom (DoF) for which the F-statistic was calculated. - * - * @return A 2 element int[]. The first element is the DoF of the numerator - * and the second element is the DoF of the denominator - * @see #getFStatistic - */ - public int[] getFStatisticDF() { - int[] ret = {this.numdf, this.dendf}; - return( ret ); - } - -} - diff --git a/src/main/org/openscience/cdk/qsar/model/R/PLSRegressionModel.java b/src/main/org/openscience/cdk/qsar/model/R/PLSRegressionModel.java deleted file mode 100644 index 407c106..0000000 --- a/src/main/org/openscience/cdk/qsar/model/R/PLSRegressionModel.java +++ /dev/null @@ -1,585 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2004-2008 Rajarshi Guha - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -package org.openscience.cdk.qsar.model.R; - -import org.openscience.cdk.qsar.model.QSARModelException; - -import java.util.HashMap; - -/** - * A modeling class that provides a PLS regression model. - * - * When instantiated this class ensures that the R/Java interface has been - * initialized. The response and independent variables can be specified at construction - * time or via the setParameters method. - * The actual fitting procedure is carried out by build. - *

NOTE: For this class to work, you must have the - * pls.pcr - * package installed in your R library. - *

- * When building the PLS model, parameters such as whether cross validation is to be used, the type of - * PLS algorithm etc can be specified by making calls to setParameters. This method can also - * be used to set a new X matrix for prediction. - * The following table lists the parameters that can be set and their - * expected types. More detailed information is available in the R documentation. - *

- * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
NameJava TypeDefaultNotes
XDouble[][]NoneVariables should be in the columns, observations in the rows
YDouble[][]NoneLength should be equal to the rows of X. Variables should be in the columns, observations in the rows
newXDouble[][]NoneA 2D array of values to make predictions for. Variables should be in the columns, observations in the rows
ncompInteger[]{1,rank(X)}This can be an array of length 1 or 2. If there is only one element - * then only the specified number of latent variables will be assessed during modeling. If 2 values are specified - * then the model will use N1 to N2 latent variables where N1 and N2 are the first and second elements respectively
methodString"SIMPLS"The type of PLS algorithm to use (can be SIMPLS or kernelPLS)
validationString"none"Indicates whether cross validation should be used. To enable cross validation set this to "CV"
grpsizeInteger0The group size for the "CV" validation. By default this is ignored and niter is used to determine the value of this argument
niterInteger10The number of iterations in the cross-validation. Note that if grpsize is set to a non-zero value then the value of niter will be calculated from the value of grpsize
nlvIntegerNoneThe number of latent variables to use during prediction. By default this does not need to be specified and will be obtained from the fitted model
- *
- *

- * In general the getFit* methods provide access to results from the fit and - * getPredict* methods provide access to results from the prediction. In case validation is specified - * then the results from the CV can be obtained via the getValidation* methods. - * The values returned correspond to the various - * values returned by the pls and - * predict.mvr - * functions in R. - *

- * See {@link RModel} for details regarding the R and SJava environment. - * - * @author Rajarshi Guha - * @cdk.require r-project - * @cdk.module qsar - * @cdk.githash - * - * @cdk.keyword partial least squares - * @cdk.keyword PLS - * @cdk.keyword regression - * @deprecated - */ -public class PLSRegressionModel extends RModel { - - private static int globalID = 0; - private int currentID; - private PLSRegressionModelFit modelfit = null; - private PLSRegressionModelPredict modelpredict = null; - - private HashMap params = null; - private int nvar = 0; - - private void setDefaults() { - this.params.put("ncomp", new Boolean(false)); - this.params.put("method", "SIMPLS"); - this.params.put("validation", "none"); - this.params.put("grpsize", Integer.valueOf(0)); - this.params.put("niter", Integer.valueOf(10)); - this.params.put("nlv", new Boolean(false)); - } - /** - * Constructs a PLSRegressionModel object. - * - * The constructor simply instantiates the model ID. Dependent and independent variables - * should be set via setParameters(). - */ - public PLSRegressionModel(){ - super(); - - this.params = new HashMap(); - - this.currentID = PLSRegressionModel.globalID; - PLSRegressionModel.globalID++; - this.setModelName("cdkPLSRegressionModel"+this.currentID); - this.setDefaults(); - } - - /** - * Constructs a PLSRegressionModel object. - * - * The constructor allows the user to specify the - * dependent and independent variables. The length of the dependent variable - * array should equal the number of rows of the independent variable matrix. If this - * is not the case an exception will be thrown. - * - * @param xx An array of independent variables. The observations should be in the rows - * and the variables should be in the columns - * @param yy An array containing the dependent variable - * @throws QSARModelException if the number of observations in x and y do not match - */ - public PLSRegressionModel(double[][] xx, double[] yy) throws QSARModelException{ - super(); - - this.params = new HashMap(); - - this.currentID = PLSRegressionModel.globalID; - PLSRegressionModel.globalID++; - this.setModelName("cdkPLSRegressionModel"+this.currentID); - this.setDefaults(); - - int nrow = yy.length; - this.nvar = xx[0].length; - - if (nrow != xx.length) { - throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix"); - } - - Double[][] x = new Double[nrow][this.nvar]; - Double[][] y = new Double[nrow][1]; - - for (int i = 0; i < nrow; i++) { - y[i][1] = new Double(yy[i]); - for (int j = 0; j < this.nvar; j++) - x[i][j] = new Double(xx[i][j]); - } - - params.put("X", x); - params.put("Y", y); - } - - - /** - * Constructs a PLSRegressionModel object. - * - * The constructor allows the user to specify the - * dependent and independent variables. This constructor will accept a matrix - * of Y values. - *

- * The length of the dependent variable - * array should equal the number of rows of the independent variable matrix. If this - * is not the case an exception will be thrown. - * - * @param xx An array of independent variables. The observations should be in the rows - * and the variables should be in the columns - * @param yy A 2D array containing the dependent variable - * @throws QSARModelException if the number of observations in x and y do not match - */ - public PLSRegressionModel(double[][] xx, double[][] yy) throws QSARModelException{ - super(); - - this.params = new HashMap(); - - this.currentID = PLSRegressionModel.globalID; - PLSRegressionModel.globalID++; - this.setModelName("cdkPLSRegressionModel"+this.currentID); - this.setDefaults(); - - int nrow = yy.length; - int ncoly = yy[0].length; - this.nvar = xx[0].length; - - if (nrow != xx.length) { - throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix"); - } - - Double[][] x = new Double[nrow][this.nvar]; - Double[][] y = new Double[nrow][ncoly]; - //Double[] wts = new Double[nrow]; - - for (int i = 0; i < nrow; i++) { - for (int j = 0; j < ncoly; j++) { - y[i][j] = new Double(yy[i][j]); - } - } - for (int i = 0; i < nrow; i++) { - for (int j = 0; j < this.nvar; j++) - x[i][j] = new Double(xx[i][j]); - } - params.put("X", x); - params.put("Y", y); - } - - protected void finalize() { - revaluator.voidEval("rm("+this.getModelName()+",pos=1)"); - } - - - - /** - * Fits a PLS model. - * - * This method calls the R function to fit a PLS model - * using the specified dependent and independent variables. If an error - * occurs in the R session, an exception is thrown. - */ - public void build() throws QSARModelException { - // lets do some checks in case stuff was set via setParameters() - Double[][] x,y; - x = (Double[][])this.params.get("X"); - y = (Double[][])this.params.get("Y"); - if (this.nvar == 0) this.nvar = x[0].length; - else { - if (y.length != x.length) { - throw new QSARModelException("Number of observations does no match number of rows in the design matrix"); - } - } - - // lets build the model - try { - this.modelfit = (PLSRegressionModelFit)revaluator.call("buildPLS", - new Object[]{ getModelName(), this.params }); - } catch (Exception re) { - throw new QSARModelException(re.toString()); - } - } - /** - * Uses a fitted model to predict the response for new observations. - * - * This function uses a previously fitted model to obtain predicted values - * for a new set of observations. If the model has not been fitted prior to this - * call an exception will be thrown. Use setParameters - * to set the values of the independent variable for the new observations. - */ - public void predict() throws QSARModelException { - if (this.modelfit == null) - throw new QSARModelException("Before calling predict() you must fit the model using build()"); - - Double[][] newx = (Double[][])this.params.get(new String("newX")); - if (newx[0].length != this.nvar) { - throw new QSARModelException("Number of independent variables used for prediction must match those used for fitting"); - } - - try { - this.modelpredict = (PLSRegressionModelPredict)revaluator.call("predictPLS", - new Object[]{ getModelName(), this.params }); - } catch (Exception re) { - throw new QSARModelException(re.toString()); - } - } - - /** - * Loads a PLSRegressionModel object from disk in to the current session. - * - * @param fileName The disk file containing the model - * @throws QSARModelException if the model being loaded is not a PLS regression model - * object - */ - public void loadModel(String fileName) throws QSARModelException { - // should probably check that the filename does exist - Object model = (Object)revaluator.call("loadModel", new Object[]{ (Object)fileName }); - String modelName = (String)revaluator.call("loadModel.getName", new Object[] { (Object)fileName }); - - if (model.getClass().getName().equals("org.openscience.cdk.qsar.model.R.PLSRegressionModelFit")) { - this.modelfit = (PLSRegressionModelFit)model; - this.setModelName(modelName); - } else throw new QSARModelException("The loaded model was not a PLSRegressionModel"); - } - /** - * Loads an PLSRegressionModel object from a serialized string into the current session. - * - * @param serializedModel A String containing the serialized version of the model - * @param modelName A String indicating the name of the model in the R session - * @throws QSARModelException if the model being loaded is not a PLS regression model - * object - */ - public void loadModel(String serializedModel, String modelName) throws QSARModelException { - // should probably check that the fileName does exist - Object model = (Object)revaluator.call("unserializeModel", new Object[]{ (Object)serializedModel, (Object)modelName }); - String modelname = modelName; - - if (model.getClass().getName().equals("org.openscience.cdk.qsar.model.R.PLSRegressionModelFit")) { - this.modelfit =(PLSRegressionModelFit)model; - this.setModelName(modelname); - } else throw new QSARModelException("The loaded model was not a PLSRegressionModel"); - } - - - - /** - * Sets parameters required for building a PLS model or using one for prediction. - * - * This function allows the caller to set the various parameters available - * for the pls() and predict.mvr() R routines. See the R help pages for the details of the available - * parameters. - * - * @param key A String containing the name of the parameter as described in the - * R help pages - * @param obj An Object containing the value of the parameter - * @throws QSARModelException if the type of the supplied value does not match the - * expected type - */ - public void setParameters(String key, Object obj) throws QSARModelException { - // since we know the possible values of key we should check the coresponding - // objects and throw errors if required. Note that this checking can't really check - // for values (such as number of variables in the X matrix to build the model and the - // X matrix to make new predictions) - these should be checked in functions that will - // use these parameters. The main checking done here is for the class of obj and - // some cases where the value of obj is not dependent on what is set before it - - if (key.equals("Y")) { - if (!(obj instanceof Double[])) { - throw new QSARModelException("The class of the 'Y' object must be Double[][]"); - } - } - if (key.equals("X")) { - if (!(obj instanceof Double[][])) { - throw new QSARModelException("The class of the 'X' object must be Double[][]"); - } - } - if (key.equals("method")) { - if (!(obj instanceof String)) { - throw new QSARModelException("The class of the 'method' object must be String"); - } - if (!(obj.equals("SIMPLS") || obj.equals("kernelPLS"))) { - throw new QSARModelException("The value of method must be: SIMPLS or kernelPLS "); - } - } - if (key.equals("validation")) { - if (!(obj instanceof String)) { - throw new QSARModelException("The class of the 'validation' object must be String"); - } - if (!(obj.equals("none") || obj.equals("CV"))) { - throw new QSARModelException("The value of validation must be: none or CV"); - } - } - - if (key.equals("newX")) { - if ( !(obj instanceof Double[][])) { - throw new QSARModelException("The class of the 'newX' object must be Double[][]"); - } - } - if (key.equals("grpsize")) { - if (!(obj instanceof Integer)) { - throw new QSARModelException("The class of the 'grpsize' object must be Integer"); - } - } - if (key.equals("niter")) { - if (!(obj instanceof Integer)) { - throw new QSARModelException("The class of the 'niter' object must be Integer"); - } - } - if (key.equals("nlv")) { - if (!(obj instanceof Integer)) { - throw new QSARModelException("The class of the 'nlv' object must be Integer"); - } - } - - if (key.equals("ncomp")) { - if (!(obj instanceof Integer[])) { - throw new QSARModelException("The class of the 'ncomp' object must be Integer[]"); - } - Integer[] tmp = (Integer[])obj; - if (tmp.length != 1 && tmp.length != 2) { - throw new QSARModelException("The 'ncomp' array can have a length of 1 or 2. See documentation"); - } - } - - this.params.put(key,obj); - } - - - - /* interface to fit object */ - - /** - * The method used to build the PLS model. - * - * @return String containing 'SIMPLS' or 'kernelPLS' - */ - public String getFitMethod() { - return(this.modelfit.getMethod()); - } - - - /** - * Returns the fit NComp value. - * - * @return An array of integers indicating the number of components - * (latent variables) - */ - public int[] getFitNComp() { - return(this.modelfit.getNComp()); - } - - /** - * Gets the coefficents. - * - * The return value is a 3D array. The first dimension corresponds - * to the specific number of LV's (1 or 2 or 3 and so on). The second - * dimension corresponds to the independent variables and the third - * dimension corresponds to the Y variables. - * - * @return double[][][] containing the coefficients - */ - public double[][][] getFitB() { - return(this.modelfit.getB()); - } - - /** - * Get the Root Mean Square (RMS) error for the fit. - * - * @return A 2-dimensional array of RMS errors. - */ - public double[][] getFitRMS() { - return(this.modelfit.getTrainingRMS()); - } - - /** - * Get the predicted Y's. - * - * Each set of latent variables is used to make predictions for all the - * Y variables. - * - * @return A 3-dimensional array of doubles. The first dimension corresponds - * to the set of latent variables and the remaining two correspond to the - * Y's themselves. - */ - public double[][][] getFitYPred() { - return(this.modelfit.getTrainingYPred()); - } - - /** - * Get the X loadings. - * - * @return A 2-dimensional array of doubles containing the X loadings - */ - public double[][] getFitXLoading() { - return(this.modelfit.getXLoading()); - } - /** - * Get the Y loadings. - * - * @return A 2-dimensional array of doubles containing the Y loadings - */ - public double[][] getFitYLoading() { - return(this.modelfit.getYLoading()); - } - /** - * Get the X scores. - * - * @return A 2-dimensional array of doubles containing the X scores - */ - public double[][] getFitXScores() { - return(this.modelfit.getXScores()); - } - /** - * Get the Y scores. - * - * @return A 2-dimensional array of doubles containing the Y scores - */ - public double[][] getFitYScores() { - return(this.modelfit.getYScores()); - } - /** - * Indicates whether CV was used to build the model. - * - * @return A boolean indicating whether CV was used - */ - public boolean getFitWasValidated() { - return(this.modelfit.wasValidated()); - } - - - /** - * The number of iterations used during CV. - * - * @return An int value indicating the number of iterations in CV - */ - public int getValidationIter() { - return(this.modelfit.getValidationIter()); - } - /** - * The number of latent variables suggested by CV. - * - * @return An int value indicating the number of LV's - */ - public int getValidationLV() { - return(this.modelfit.getValidationLV()); - } - - /** - * Get the R^2 value for validation. - * - * @return A 2-dimensional array of doubles - */ - public double[][] getValidationR2() { - return(this.modelfit.getValidationR2()); - } - /** - * Get the RMS value for validation. - * - * @return A 2-dimensional array of doubles - */ - public double[][] getValidationRMS() { - return(this.modelfit.getValidationRMS()); - } - /** - * Get the standard deviation of the RMS errrors for validation. - * - * @return A 2-dimensional array of doubles - */ - public double[][] getValidationRMSsd() { - return(this.modelfit.getValidationRMSSD()); - } - /** - * Get the predicted Y values from validation. - * - * @return A 2-dimensional array of doubles - */ - public double[][][] getValidationYPred() { - return(this.modelfit.getValidationYPred()); - } - - - - - /* interface to predict object */ - - /** - * Returns the predicted values for the prediction set. - * - * This function only returns meaningful results if the predict - * method of this class has been called. - * - * @return A double[][] containing the predicted values - */ - public double[][] getPredictPredicted() { - return(this.modelpredict.getPredictions()); - } -} diff --git a/src/main/org/openscience/cdk/qsar/model/R/PLSRegressionModelFit.java b/src/main/org/openscience/cdk/qsar/model/R/PLSRegressionModelFit.java deleted file mode 100644 index cff1dab..0000000 --- a/src/main/org/openscience/cdk/qsar/model/R/PLSRegressionModelFit.java +++ /dev/null @@ -1,211 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2004-2008 Rajarshi Guha - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -package org.openscience.cdk.qsar.model.R; - -/** - * A class that wraps the return value from R function, pls. - * - * This is an internal class used by R to return the result of - * the call to - * pls. - * As a result it should not be instantiated by the user. The actual modeling - * class, PLSRegressionModel, provides acess to the various - * fields of this object. - * - * - * @author Rajarshi Guha - * @cdk.require r-project - * @cdk.module qsar - * @cdk.githash - * @deprecated - */ - -class V2M { - static double[][] VectorToMatrix(double[] v, int nrow, int ncol) { - double[][] m = new double[nrow][ncol]; - for (int i = 0; i < ncol; i++) { - for (int j = 0; j < nrow; j++) { - m[j][i] = v[j + i*nrow]; - } - } - return(m); - } - static double[][][] VectorToCube(double[] v, int d1, int d2, int d3) { - // d2 ~ nrow, d3 ~ ncol - double[][][] m = new double[d1][d2][d3]; - for (int k = 0; k < d1; k++) { - for (int i = 0; i < d3; i++) { - for (int j = 0; j < d2; j++) { - m[k][j][i] = v[j + i*d2 + k*d2*d3]; - } - } - } - return(m); - } -} -/* - ncase tells us how many latent variable cases are being considered. - So if ncase == 1, the model only considered 1 LV. - if ncase == 2, the model considered the cases of 1 LV and 2 LV - - ncomp will contain the number of latent variables for each case. So - if pls() was called with 2:3 there are 2 cases, the first case considered - 2 LV's, the second case considered 3 LV's. - - But note that xscores, yscores, xload and yload will have the number - of columns equal to the max value of ncomp. So even if ncomp contains 2:3 - these arrays will have 3 columns (for the three cases - 1LV, 2LV & 3LV) - - npvar is the number of Y variables - - rms - ncase x npvar - yscores - nobs x max(ncomp) - xscores - nobs x max(nncomp) - yload - npvar x max(ncomp) - xload - nvar x max(ncomp) - ypred - ncase x nobs x npvar - B - ncase x nvar x npvar -*/ -class PLSTraining { - double[][] rms = null; - double[][] xscores = null; - double[][] xload = null; - double[][] yscores = null; - double[][] yload = null; - double[][][] B = null; - double[][][] ypred = null; - - PLSTraining(int[] ncomp, double[] B, double[] ypred, - double[] rms, - double[] xscores, double[] xload, - double[] yscores, double[] yload) { - - int ncase = ncomp.length; - int nobs = xscores.length / ncase; - int nvar = xload.length / ncase; - int npvar = yload.length / ncase; - - int maxncomp = -999999; - for (int i = 0; i < ncomp.length; i++) { - if (ncomp[i] > maxncomp) maxncomp = ncomp[i]; - } - - this.rms = V2M.VectorToMatrix(rms, ncase, npvar); - this.xscores = V2M.VectorToMatrix(xscores, nobs, maxncomp); - this.yscores = V2M.VectorToMatrix(yscores, nobs, maxncomp); - this.yload = V2M.VectorToMatrix(yload, npvar, maxncomp); - this.xload = V2M.VectorToMatrix(xload, nvar, maxncomp); - this.ypred = V2M.VectorToCube(ypred, ncase, nobs, npvar); - this.B = V2M.VectorToCube(B, ncase, nvar, npvar); - } -} - -/* - * npvar is the number of Y variables - * rms - ncase x npvar - * rmssd - ncase x npvar - * r2 - ncase x npvar - * ypred - ncase x nobs x npvar - */ -class PLSValidation { - double[][][] ypred = null; - int niter, nlv; - double[][] rms = null; - double[][] rmssd = null; - double[][] r2 = null; - - PLSValidation(int[] ncomp, int nobs, int niter, int nlv, - double[] ypred, double[] rms, double[] rmssd, double[] r2) { - - int ncase = ncomp.length; - int npvar = rms.length / ncase; - - this.niter = niter; - this.nlv = nlv; - this.rms = V2M.VectorToMatrix(rms, ncase, npvar); - this.rmssd = V2M.VectorToMatrix(rmssd, ncase, npvar); - this.r2 = V2M.VectorToMatrix(r2, ncase, npvar);; - this.ypred = V2M.VectorToCube(ypred, ncase, nobs, npvar); - } -} - -public class PLSRegressionModelFit { - int nobs, nvar, npvar, ncase; - int[] ncomp = null; - String method; - PLSTraining train = null; - PLSValidation valid = null; - - public PLSRegressionModelFit(int nobs, int nvar, int npred, - int[] ncomp, String method) { - - this.nobs = nobs; - this.nvar = nvar; - this.npvar = npred; - this.ncase = ncomp.length; - this.method = method; - - this.ncomp = new int[this.ncase]; - for (int i = 0; i < this.ncase; i++) this.ncomp[i] = ncomp[i]; - } - - public void setTrainingData(double[] B, double[] ypred, double[] rms, - double[] xscores, double[] yscores, - double[] xload, double[] yload) { - this.train = new PLSTraining(this.ncomp, B, ypred, rms, xscores, yscores, xload, yload); - } - public void setValidationData(int niter, int nlv, - double[] ypred, double[] rms, double[] rmssd, double[] r2) { - this.valid = new PLSValidation(this.ncomp, this.nobs, niter, nlv, ypred, rms, rmssd, r2); - } - - - public boolean wasValidated() { - if (this.valid != null) return(true); - else return(false); - } - - public int[] getNComp() { return this.ncomp; } - public String getMethod() { return this.method; } - - public double[][][] getB() { return this.train.B; } - public double[][][] getTrainingYPred() { return this.train.ypred; } - public double[][] getTrainingRMS() { return this.train.rms; } - public double[][] getXScores() { return this.train.xscores; } - public double[][] getYScores() { return this.train.yscores; } - public double[][] getXLoading() { return this.train.xload; } - public double[][] getYLoading() { return this.train.yload; } - - public double[][][] getValidationYPred() { return this.valid.ypred; } - public double[][] getValidationRMS() { return this.valid.rms; } - public double[][] getValidationRMSSD() { return this.valid.rmssd; } - public double[][] getValidationR2() { return this.valid.r2; } - public int getValidationLV() { return this.valid.nlv; } - public int getValidationIter() { return this.valid.niter; } - -} - - - diff --git a/src/main/org/openscience/cdk/qsar/model/R/PLSRegressionModelPredict.java b/src/main/org/openscience/cdk/qsar/model/R/PLSRegressionModelPredict.java deleted file mode 100644 index fbdcef1..0000000 --- a/src/main/org/openscience/cdk/qsar/model/R/PLSRegressionModelPredict.java +++ /dev/null @@ -1,83 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2004-2008 Rajarshi Guha - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -package org.openscience.cdk.qsar.model.R; - -/** - * A class that wraps the return value from the R function, predict.mvr. - * - * This is an internal class used by R to return the result of - * the call to predict.mvr. - * As a result it should not be instantiated by the user. The actual modeling - * class, PLSRegressionModel, provides acess to the various - * fields of this object. - * - * - * @author Rajarshi Guha - * @cdk.require r-project - * @cdk.module qsar - * @cdk.githash - * @deprecated - */ -public class PLSRegressionModelPredict { - double[][] preds = null; - - private double[][] VectorToMatrix(double[] v, int nrow, int ncol) { - double[][] m = new double[nrow][ncol]; - for (int i = 0; i < ncol; i++) { - for (int j = 0; j < nrow; j++) { - m[j][i] = v[j + i*nrow]; - } - } - return(m); - } - - /** - * Constructor to contain the results of a PLS prediction. - * - * This class should not be instantiated directly and is really - * only meant to be instantiated from an R session - * - * @param ncol The number of predicted variables - * @param preds A 1-dimensional array of predicted values - */ - public PLSRegressionModelPredict(int ncol, double[] preds) { - this.preds = VectorToMatrix(preds, preds.length/ncol, ncol); - } - - /** - * Get the predicted values. - * - * This method returns the predicted values obtained by using new data - * with a previously built PLS regression model - * - * @return A 2-dimensional array of predictions, columns correspond to the - * predicted variables - */ - public double[][] getPredictions() { - return(this.preds); - } -} - - diff --git a/src/main/org/openscience/cdk/qsar/model/R/RModel.java b/src/main/org/openscience/cdk/qsar/model/R/RModel.java deleted file mode 100644 index b057296..0000000 --- a/src/main/org/openscience/cdk/qsar/model/R/RModel.java +++ /dev/null @@ -1,345 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2004-2008 Rajarshi Guha - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -package org.openscience.cdk.qsar.model.R; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.InputStreamReader; -import java.io.StringWriter; - -import org.omegahat.R.Java.REvaluator; -import org.omegahat.R.Java.ROmegahatInterpreter; -import org.openscience.cdk.qsar.model.IModel; -import org.openscience.cdk.qsar.model.QSARModelException; -import org.openscience.cdk.tools.ILoggingTool; -import org.openscience.cdk.tools.LoggingToolFactory; - -/** Base class for modeling classes that use R as the backend. - * - * This cannot be directly instantiated as its sole function is - * to initialize the SJava system and source R matcher/converter - * functions into the loaded R session. The class variable revaluator - * can be accessed from subclasses to make calls to the R session. - * - * Any class that builds models using R should be a subclass of this. - * - * An important feature to note when using the R backend is that the SJava - * initialization must be done only once in a Java thread. As a result - * when any model class based on RModel is instantiated the constructor for the - * super class (i.e., Rmodel) makes sure that SJava is not already initialized. - *

- * By default the intialization uses a temporary file which is sourced in the - * R session. In some cases, such as web applications, temporary files might be - * problematic. In this case the R backend can be initialized via strings. To - * do this the application should specify -DinitRFromString=true on the command - * line. Note that this approach will be slightly slower compared to initializsation - * via a temporary file. - *

- * NOTE: For the R backend to work, ensure that R is correctly installed - * and that SJava is also installed, using the -c option. Finally, ensure - * that the R_HOME environment variable points to the R installation. - * - * - * @author Rajarshi Guha - * @cdk.require r-project - * @cdk.module qsar - * @cdk.githash - * @deprecated - */ -public abstract class RModel implements IModel { - - private String modelName = null; - - /** - * The object that performs the calls to the R engine. - */ - public static REvaluator revaluator = null; - /** - * This object represents an instance of the R interpreter. - * - * Due to the design of R, only one interpreter can be instantiated in a given - * thread. That is, the underlying R engine is not thread safe. As a result - * care must be taken to have only one instance of the interpreter. - */ - public static ROmegahatInterpreter interp = null; - - /** - * A boolean that indicates whether the R/Java subsystem has been initialized or not. - */ - private static boolean doneInit = false; - private static ILoggingTool logger = - LoggingToolFactory.createLoggingTool(RModel.class); - - private void loadRFunctions(REvaluator evaluator) { - String scriptLocator = "org/openscience/cdk/qsar/model/data/cdkSJava.R"; - try { - File scriptFile = File.createTempFile("XXXXX",".R"); - scriptFile.deleteOnExit(); - - InputStreamReader reader = new InputStreamReader( - this.getClass().getClassLoader().getResourceAsStream(scriptLocator)); - BufferedReader inFile = new BufferedReader(reader); - - FileWriter outFile = new FileWriter(scriptFile); - BufferedWriter outBuffer = new BufferedWriter(outFile); - String inputLine; - while ( (inputLine = inFile.readLine()) != null) { - outBuffer.write(inputLine,0,inputLine.length()); - outBuffer.newLine(); - } - outBuffer.close(); - inFile.close(); - outFile.close(); - - evaluator.voidEval("source(\""+scriptFile.getAbsolutePath()+"\")"); - - } catch (Exception exception) { - logger.error("Could not load CDK-SJava R script: ", scriptLocator); - logger.debug(exception); - } - } - - private void loadRFunctionsAsStrings(REvaluator evaluator) { - String[] scripts = { - "init_1.R", - "lm_2.R", - "cnn_3.R", "cnn_4.R", - "pls_5.R", - "register_999.R" - }; - String scriptPrefix = "org/openscience/cdk/qsar/model/data/"; - for (int i = 0; i < scripts.length; i++) { - - String scriptLocator = scriptPrefix + scripts[i]; - try { - InputStreamReader reader = new InputStreamReader( - this.getClass().getClassLoader().getResourceAsStream(scriptLocator)); - BufferedReader inFile = new BufferedReader(reader); - - StringWriter sw = new StringWriter(); - String inputLine; - while ( (inputLine = inFile.readLine()) != null) { - sw.write(inputLine); - sw.write("\n"); - } - sw.close(); - - evaluator.voidEval("eval(parse(text=\""+sw.toString()+"\"))"); - - } catch (Exception exception) { - logger.error("Could not load CDK-SJava R scripts: ", scriptLocator); - logger.debug(exception); - } - - } - } - - - /** - * Initializes SJava and R with the specified command line arguments (see R documentation). - * - * This constructor will initialize the R session via a temporary file - * - * @param args A String[] containing the command line parameters as elements - */ - public RModel(String[] args) { - String initRFromString = System.getProperty("initRFromString"); - boolean useDisk = true; - if (initRFromString != null && initRFromString.equals("true")) { - useDisk = false; - } - - if (!doneInit) { - RModel.interp = new ROmegahatInterpreter(ROmegahatInterpreter.fixArgs(args), false); - RModel.revaluator = new REvaluator(); - - if (useDisk) { - loadRFunctions(RModel.revaluator); - logger.info("Initializing from disk"); - } else { - loadRFunctionsAsStrings(RModel.revaluator); - logger.info("Initializing from strings"); - } - - doneInit = true; - logger.info("SJava initialized"); - } else { - logger.info("SJava already initialized"); - } - } - - /** - * Initializes SJava with the --vanilla, -q, --slave flags. - * - * This constructor will initialize the R session via a temporary file - */ - public RModel() { - String[] args = {"--vanilla","-q", "--slave"}; - String initRFromString = System.getProperty("initRFromString"); - boolean useDisk = true; - if (initRFromString != null && initRFromString.equals("true")) { - useDisk = false; - } - - if (!doneInit) { - RModel.interp = new ROmegahatInterpreter(ROmegahatInterpreter.fixArgs(args), false); - RModel.revaluator = new REvaluator(); - - if (useDisk) { - loadRFunctions(RModel.revaluator); - logger.info("Initializing from disk"); - } else { - loadRFunctionsAsStrings(RModel.revaluator); - logger.info("Initializing from strings"); - } - - doneInit = true; - logger.info("SJava initialized"); - } else { - logger.info("SJava already initialized"); - } - } - - - /** - * Saves a R model to disk. - * - * This function can be used to save models built in a session, and then loaded - * again in a different session. - * - * @param modelname The name of the model as returned by \code{getModelName}. - * @param filename The file to which the model should be saved - * @throws QSARModelException if the R session cannot save the model - * @see #loadModel - */ - public static void saveModel(String modelname, String filename) throws QSARModelException { - if (filename.equals("") || filename == null) { - filename = modelname+".rda"; - } - //Boolean result = null; - try { - revaluator.call("saveModel", - new Object[] { (Object)modelname, (Object)filename }); - } catch (Exception e) { - System.out.println("Caught the exception"); - throw new QSARModelException("Error saving model"); - } - } - - - /** - * Get the name of the model. - * - * This function returns the name of the variable that the actual - * model is stored in within the R session. In general this is - * not used for the end user. In the future this might be changed - * to a private method. - * - * @return A String containing the name of the R variable - * @see #setModelName - */ - public String getModelName() { - return(this.modelName); - } - - /** - * Set the name of the model. - * - * Ordinarily the user does not need to call this function as each model - * is assigned a unique ID at instantiation. However, if a user saves a model - * to disk and then later loads it, the loaded - * model may overwrite a model in that session. In this situation, this method - * can be used to assign a name to the model. - * - * @param newName The name of the model - * @see #getModelName - * @see #saveModel - * @see #loadModel - * - */ - public void setModelName(String newName) { - if (this.modelName != null && this.modelName.equals(newName)) return; - String oldName = this.modelName; - if (oldName != null) { - revaluator.voidEval("if ('"+oldName+"' %in% ls()) {"+newName+"<-"+oldName+";rm("+oldName+")}"); - } - this.modelName = newName; - } - - abstract public void build() throws QSARModelException; - abstract public void predict() throws QSARModelException; - - /** - * Specifies the parameters value. - * - * @param key A String representing the name of the parameter (corresponding to the - * name described in the R manpages) - * @param obj The value of the parameter - * @throws QSARModelException if the parameters are of the wrong type for the given modeling function - * - */ - abstract public void setParameters(String key, Object obj) throws QSARModelException; - - /** - * Abstract method to handle loading R models. - * - * This method can be used to load a previously saved R model object. Since - * the user can save any arbitrary R object, checks must be made that the - * object being returned is an instance of one of the current modeling classes. - *

- * This is best achieved by forcing each modeling class to write its own loader. - * - * @param fileName The file containing the R object to load - * @throws QSARModelException if the R session could not load the object or if the loaded model - * does not correspond to the class that it was loaded from - * @see #saveModel - */ - abstract public void loadModel(String fileName) throws QSARModelException; - /** - * Abstract method to handle loading R models that were previously serialized. - * - * This method can be used to load a previously serialized R model object (usinging - * serialize()). Since - * the user can save any arbitrary R object, checks must be made that the - * object being returned is an instance of one of the current modeling classes. - * This is best achieved by forcing each modeling class to write its own loader. - *

- * In addition - * objects saved using serialize() do not have a name. As a result a name for the object must - * be specified when using this method. - * - * @param serializedModel A String containing the ASCII sreialized R object - * @param modelName The name of the model. (Within the R session, the model will be assigned to - * a variable of this name) - * @throws QSARModelException if the R session could not load the object or if the loaded model - * does not correspond to the class that it was loaded from - * @see #saveModel - */ - abstract public void loadModel(String serializedModel, String modelName) throws QSARModelException; -} - - diff --git a/src/main/org/openscience/cdk/qsar/model/R2/CNNRegressionModel.java b/src/main/org/openscience/cdk/qsar/model/R2/CNNRegressionModel.java deleted file mode 100644 index 9837085..0000000 --- a/src/main/org/openscience/cdk/qsar/model/R2/CNNRegressionModel.java +++ /dev/null @@ -1,673 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2004-2008 Rajarshi Guha - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -package org.openscience.cdk.qsar.model.R2; - -import java.io.File; -import java.util.HashMap; - -import org.openscience.cdk.qsar.model.QSARModelException; -import org.openscience.cdk.tools.ILoggingTool; -import org.openscience.cdk.tools.LoggingToolFactory; -import org.rosuda.JRI.RBool; -import org.rosuda.JRI.REXP; -import org.rosuda.JRI.RList; - -/** - * A modeling class that provides a computational neural network regression model. - *

- * When instantiated this class ensures that the R/Java interface has been - * initialized. The response and independent variables can be specified at construction - * time or via the setParameters method. - * The actual fitting procedure is carried out by build after which - * the model may be used to make predictions, via predict. An example of the use - * of this class is shown below: - *

- * double[][] x;
- * double[] y;
- * Double[] wts;
- * Double[][] newx;
- * ...
- * try {
- *     CNNRegressionModel cnnrm = new CNNRegressionModel(x,y,3);
- *     cnnrm.setParameters("Wts",wts);
- *     cnnrm.build();
- * 

- * double fitValue = cnnrm.getFitValue(); - *

- * cnnrm.setParameters("newdata", newx); - * cnnrm.setParameters("type", "raw"); - * cnnrm.predict(); - *

- * double[][] preds = cnnrm.getPredictPredicted(); - * } catch (QSARModelException qme) { - * System.out.println(qme.toString()); - * } - *

- * The above code snippet builds a 3-3-1 CNN model. - * Multiple output neurons are easily - * specified by supplying a matrix for y (i.e., double[][]) with the output variables - * in the columns. - *

- * Nearly all the arguments to - * nnet() are - * supported via the setParameters method. The table below lists the names of the arguments, - * the expected type of the argument and the default setting for the arguments supported by this wrapper class. - *

- * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
NameJava TypeDefaultNotes
xDouble[][]NoneThis must be set by the caller via the constructors or via setParameters
yDouble[][]NoneThis must be set by the caller via the constructors or via setParameters
weightsDouble[]rep(1,nobs)The default case weights is a vector of 1's equal in length to the number of observations, nobs
sizeIntegerNoneThis must be set by the caller via the constructors or via setParameters
subsetInteger[]1:nobsThis is supposed to be an index vector specifying which observations are to be used in building the model. The default indicates that all should be used
WtsDouble[]runif(1,nwt)The initial weight vector is set to a random vector of length equal to the number of weights if not set by the user
maskBoolean[]rep(TRUE,nwt)All weights are to be optimized unless otherwise specified by the user
linoutBooleanTRUESince this class performs regression this need not be changed
entropyBooleanFALSE
softmaxBooleanFALSE
censoredBooleanFALSE
skipBooleanFALSE
rangDouble0.7
decayDouble0.0
maxitInteger100
HessBooleanFALSE
traceBooleanTRUE
MaxNWtsInteger1000
abstolDouble1.0e-4
reltolDouble1.0e-8
- *
- *

- * The values returned correspond to the various - * values returned by the nnet and - * predict.nnet functions - * in R - *

- * See {@link org.openscience.cdk.qsar.model.R.RModel} for details regarding the R and Java environment. - * - * @author Rajarshi Guha - * @cdk.require r-project - * @cdk.module qsar - * @cdk.githash - * @cdk.keyword neural network - * @cdk.keyword R - */ - -public class CNNRegressionModel extends RModel { - public static int globalID = 0; - private int noutput = 0; - private int nvar = 0; - - private double[][] modelPredict = null; - - private static ILoggingTool logger = - LoggingToolFactory.createLoggingTool(CNNRegressionModel.class); - - private void setDefaults() { - // lets set the default values of the arguments that are specified - // to have default values in ?nnet - - // these params are vectors that depend on user defined stuff - // so as a default we set them to FALSE so R can check if these - // were not set - this.params.put("subset", Boolean.FALSE); - this.params.put("mask", Boolean.FALSE); - this.params.put("Wts", Boolean.FALSE); - this.params.put("weights", Boolean.FALSE); - - this.params.put("linout", Boolean.TRUE); // we want only regression - this.params.put("entropy", Boolean.FALSE); - this.params.put("softmax", Boolean.FALSE); - this.params.put("censored", Boolean.FALSE); - this.params.put("skip", Boolean.FALSE); - this.params.put("rang", new Double(0.7)); - this.params.put("decay", new Double(0.0)); - this.params.put("maxit", Integer.valueOf(100)); - this.params.put("Hess", Boolean.FALSE); - this.params.put("trace", Boolean.FALSE); // no need to see output - this.params.put("MaxNWts", Integer.valueOf(1000)); - this.params.put("abstol", new Double(1.0e-4)); - this.params.put("reltol", new Double(1.0e-8)); - } - - /** - * Constructs a CNNRegressionModel object. - *

- * This constructor allows the user to simply set up an instance of a CNN - * regression modeling class. This constructor simply sets the name for this - * instance. It is expected all the relevent parameters for modeling will be - * set at a later point. - *

- * Other parameters that are required to be set should be done via - * calls to setParameters. A number of parameters are set to the - * defaults as specified in the manpage for - * nnet. - */ - public CNNRegressionModel() throws QSARModelException { - super(); - params = new HashMap(); - int currentID = CNNRegressionModel.globalID; - CNNRegressionModel.globalID++; - setModelName("cdkCNNModel" + currentID); - setDefaults(); - - - } - - - /** - * Constructs a CNNRegressionModel object. - *

- * This constructor allows the user to specify the dependent and - * independent variables along with the number of hidden layer neurons. - * This constructor is suitable for cases when there is a single output - * neuron. If the number of rows of the design matrix is not equal to - * the number of observations in y an exception will be thrown. - *

- * Other parameters that are required to be set should be done via - * calls to setParameters. A number of parameters are set to the - * defaults as specified in the manpage for - * nnet. - * - * @param x An array of independent variables. Observations should be in - * the rows and variables in the columns. - * @param y An array (single column) of observed values - * @param size The number of hidden layer neurons - * @throws QSARModelException if the number of observations in x and y do not match - */ - public CNNRegressionModel(double[][] x, double[] y, int size) throws QSARModelException { - super(); - params = new HashMap(); - int currentID = CNNRegressionModel.globalID; - CNNRegressionModel.globalID++; - setModelName("cdkCNNModel" + currentID); - - int nrow = y.length; - int ncol = x[0].length; - - if (nrow != x.length) { - throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix"); - } - - nvar = ncol; - noutput = 1; - - Double[][] xx = new Double[nrow][ncol]; - Double[][] yy = new Double[nrow][1]; - - for (int i = 0; i < nrow; i++) { - yy[i][0] = new Double(y[i]); - for (int j = 0; j < ncol; j++) { - xx[i][j] = new Double(x[i][j]); - } - } - params.put("x", xx); - params.put("y", yy); - params.put("size", Integer.valueOf(size)); - setDefaults(); - } - - /** - * Constructs a CNNRegressionModel object. - *

- * This constructor allows the user to specify the dependent and - * independent variables along with the number of hidden layer neurons. - * This constructor is suitable for cases when there are multiple output - * neuron. If the number of rows of the design matrix is not equal to - * the number of observations in y an exception will be thrown. - *

- * Other parameters that are required to be set should be done via - * calls to setParameters. A number of parameters are set to the - * defaults as specified in the manpage for - * nnet. - * - * @param x An array of independent variables. Observations should be in - * the rows and variables in the columns. - * @param y An array (multiple columns) of observed values - * @param size The number of hidden layer neurons - * @throws QSARModelException if the number of observations in x and y do not match - */ - public CNNRegressionModel(double[][] x, double[][] y, int size) throws QSARModelException { - super(); - params = new HashMap(); - int currentID = CNNRegressionModel.globalID; - CNNRegressionModel.globalID++; - setModelName("cdkCNNModel" + currentID); - - int nrow = y.length; - int ncol = x[0].length; - - if (nrow != x.length) { - throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix"); - } - - nvar = ncol; - noutput = y[0].length; - - Double[][] xx = new Double[nrow][ncol]; - Double[][] yy = new Double[nrow][noutput]; - - for (int i = 0; i < nrow; i++) { - for (int j = 0; j < ncol; j++) { - xx[i][j] = new Double(x[i][j]); - } - } - for (int i = 0; i < nrow; i++) { - for (int j = 0; j < noutput; j++) { - yy[i][j] = new Double(y[i][j]); - } - } - params.put("x", xx); - params.put("y", yy); - params.put("size", Integer.valueOf(size)); - setDefaults(); - } - - - /** - * Sets parameters required for building a CNN model or using one for prediction. - *

- * This function allows the caller to set the various parameters available - * for the - * nnet - * and - * predict.nnet - * R routines. See the R help pages for the details of the available - * parameters. - * - * @param key A String containing the name of the parameter as described in the - * R help pages - * @param obj An Object containing the value of the parameter - * @throws QSARModelException if the type of the supplied value does not match the - * expected type - */ - public void setParameters(String key, Object obj) throws QSARModelException { - // since we know the possible values of key we should check the coresponding - // objects and throw errors if required. Note that this checking can't really check - // for values (such as number of variables in the X matrix to build the model and the - // X matrix to make new predictions) - these should be checked in functions that will - // use these parameters. The main checking done here is for the class of obj and - // some cases where the value of obj is not dependent on what is set before it - - if (key.equals("y")) { - if (!(obj instanceof Double[][])) { - throw new QSARModelException("The class of the 'y' object must be Double[][]"); - } else { - noutput = ((Double[][]) obj)[0].length; - } - } - if (key.equals("x")) { - if (!(obj instanceof Double[][])) { - throw new QSARModelException("The class of the 'x' object must be Double[][]"); - } else { - nvar = ((Double[][]) obj)[0].length; - } - } - if (key.equals("weights")) { - if (!(obj instanceof Double[])) { - throw new QSARModelException("The class of the 'weights' object must be Double[]"); - } - } - if (key.equals("size")) { - if (!(obj instanceof Integer)) { - throw new QSARModelException("The class of the 'size' object must be Integer"); - } - } - if (key.equals("subset")) { - if (!(obj instanceof Integer[])) { - throw new QSARModelException("The class of the 'size' object must be Integer[]"); - } - } - if (key.equals("Wts")) { - if (!(obj instanceof Double[])) { - throw new QSARModelException("The class of the 'Wts' object must be Double[]"); - } - } - if (key.equals("mask")) { - if (!(obj instanceof Boolean[])) { - throw new QSARModelException("The class of the 'mask' object must be Boolean[]"); - } - } - if (key.equals("linout") || - key.equals("entropy") || - key.equals("softmax") || - key.equals("censored") || - key.equals("skip") || - key.equals("Hess") || - key.equals("trace")) { - if (!(obj instanceof Boolean)) { - throw new QSARModelException("The class of the 'trace|skip|Hess|linout|entropy|softmax|censored' object must be Boolean"); - } - } - if (key.equals("rang") || - key.equals("decay") || - key.equals("abstol") || - key.equals("reltol")) { - if (!(obj instanceof Double)) { - throw new QSARModelException("The class of the 'reltol|abstol|decay|rang' object must be Double"); - } - } - if (key.equals("maxit") || - key.equals("MaxNWts")) { - if (!(obj instanceof Integer)) { - throw new QSARModelException("The class of the 'maxit|MaxNWts' object must be Integer"); - } - } - - if (key.equals("newdata")) { - if (!(obj instanceof Double[][])) { - throw new QSARModelException("The class of the 'newdata' object must be Double[][]"); - } - } - params.put(key, obj); - } - - /** - * Fits a CNN regression model. - *

- * This method calls the R function to fit a CNN regression model - * to the specified dependent and independent variables. If an error - * occurs in the R session, an exception is thrown. - *

- * Note that, this method should be called prior to calling the various get - * methods to obtain information regarding the fit. - */ - public void build() throws QSARModelException { - Double[][] x; - Double[][] y; - x = (Double[][]) this.params.get("x"); - y = (Double[][]) this.params.get("y"); - if (x.length != y.length) - throw new QSARModelException("Number of observations does not match number of rows in the design matrix"); - if (nvar == 0) nvar = x[0].length; - - // lets build the model - String paramVarName = loadParametersIntoRSession(); - String cmd = "buildCNN(\"" + getModelName() + "\", " + paramVarName + ")"; - REXP ret = rengine.eval(cmd); - if (ret == null) { - CNNRegressionModel.logger.debug("Error in buildCNN"); - throw new QSARModelException("Error in buildCNN"); - } - - // remove the parameter list - rengine.eval("rm(" + paramVarName + ")"); - - // save the model object on the Java side - modelObject = ret.asList(); - } - - /** - * Uses a fitted model to predict the response for new observations. - *

- * This function uses a previously fitted model to obtain predicted values - * for a new set of observations. If the model has not been fitted prior to this - * call an exception will be thrown. Use setParameters - * to set the values of the independent variable for the new observations and the - * interval type. - * - * @throws org.openscience.cdk.qsar.model.QSARModelException - * if the model has not been built prior to a call - * to this method. Also if the number of independent variables specified for prediction - * is not the same as specified during model building - */ - public void predict() throws QSARModelException { - - if (modelObject == null) - throw new QSARModelException("Before calling predict() you must fit the model using build()"); - - Double[][] newx = (Double[][]) params.get("newdata"); - if (newx[0].length != nvar) { - throw new QSARModelException("Number of independent variables used for prediction must match those used for fitting"); - } - - String pn = loadParametersIntoRSession(); - REXP ret = rengine.eval("predicCNN(\"" + getModelName() + "\", " + pn + ")"); - if (ret == null) throw new QSARModelException("Error occured in prediction"); - - // remove the parameter list - rengine.eval("rm(" + pn + ")"); - - modelPredict = ret.asDoubleMatrix(); - } - - /** - * Get the matrix of predicted values obtained from predict.nnet. - * - * @return The result of the prediction. - */ - public double[][] getPredictions() { - return modelPredict; - } - - /** - * Returns an RList object summarizing the nnet regression model. - *

- * The return object can be queried via the RList methods to extract the - * required components. - * - * @return A summary for the nnet regression model - * @throws org.openscience.cdk.qsar.model.QSARModelException - * if the model has not been built prior to a call - * to this method - */ - public RList summary() throws QSARModelException { - if (modelObject == null) - throw new QSARModelException("Before calling summary() you must fit the model using build()"); - - REXP ret = rengine.eval("summary(" + getModelName() + ")"); - if (ret == null) { - logger.debug("Error in summary()"); - throw new QSARModelException("Error in summary()"); - } - return ret.asList(); - } - - - /** - * Loads a 'nnet' object from disk in to the current session. - * - * @param fileName The disk file containing the model - * @throws org.openscience.cdk.qsar.model.QSARModelException - * if the model being loaded is not a 'nnet' model - * object or the file does not exist - */ - public void loadModel(String fileName) throws QSARModelException { - File f = new File(fileName); - if (!f.exists()) throw new QSARModelException(fileName + " does not exist"); - - rengine.assign("tmpFileName", fileName); - REXP ret = rengine.eval("loadModel(tmpFileName)"); - if (ret == null) throw new QSARModelException("Model could not be loaded"); - - String name = ret.asList().at("name").asString(); - if (!isOfClass(name, "nnet")) { - removeObject(name); - throw new QSARModelException("Loaded object was not of class \'nnet\'"); - } - - modelObject = ret.asList().at("model").asList(); - setModelName(name); - nvar = (int) getN()[0]; - noutput = (int) getN()[2]; - } - - /** - * Loads a 'nnet' object from a serialized string into the current session. - * - * @param serializedModel A String containing the serialized version of the model - * @param modelName A String indicating the name of the model in the R session - * @throws org.openscience.cdk.qsar.model.QSARModelException - * if the model being loaded is not a 'nnet' model - * object - */ - public void loadModel(String serializedModel, String modelName) throws QSARModelException { - rengine.assign("tmpSerializedModel", serializedModel); - rengine.assign("tmpModelName", modelName); - REXP ret = rengine.eval("unserializeModel(tmpSerializedModel, tmpModelName)"); - - if (ret == null) throw new QSARModelException("Model could not be unserialized"); - - String name = ret.asList().at("name").asString(); - if (!isOfClass(name, "nnet")) { - removeObject(name); - throw new QSARModelException("Loaded object was not of class \'nnet\'"); - } - - modelObject = ret.asList().at("model").asList(); - setModelName(name); - nvar = (int) getN()[0]; - noutput = (int) getN()[2]; - } - -// Autogenerated code: assumes that 'modelObject' is -// a RList object - - - /** - * Gets the censored field of an 'nnet' object. - * - * @return The value of the censored field - */ - public RBool getCensored() { - return modelObject.at("censored").asBool(); - } - - /** - * Gets the conn field of an 'nnet' object. - * - * @return The value of the conn field - */ - public double[] getConn() { - return modelObject.at("conn").asDoubleArray(); - } - - /** - * Gets the decay field of an 'nnet' object. - * - * @return The value of the decay field - */ - public double getDecay() { - return modelObject.at("decay").asDouble(); - } - - /** - * Gets the entropy field of an 'nnet' object. - * - * @return The value of the entropy field - */ - public RBool getEntropy() { - return modelObject.at("entropy").asBool(); - } - - /** - * Gets the fitted.values field of an 'nnet' object. - * - * @return The value of the fitted.values field - */ - public double[][] getFittedValues() { - return modelObject.at("fitted.values").asDoubleMatrix(); - } - - /** - * Gets the n field of an 'nnet' object. - * - * @return The value of the n field - */ - public double[] getN() { - return modelObject.at("n").asDoubleArray(); - } - - /** - * Gets the nconn field of an 'nnet' object. - * - * @return The value of the nconn field - */ - public double[] getNconn() { - return modelObject.at("nconn").asDoubleArray(); - } - - /** - * Gets the nsunits field of an 'nnet' object. - * - * @return The value of the nsunits field - */ - public double getNsunits() { - return modelObject.at("nsunits").asDouble(); - } - - /** - * Gets the nunits field of an 'nnet' object. - * - * @return The value of the nunits field - */ - public double getNunits() { - return modelObject.at("nunits").asDouble(); - } - - /** - * Gets the residuals field of an 'nnet' object. - * - * @return The value of the residuals field - */ - public double[][] getResiduals() { - return modelObject.at("residuals").asDoubleMatrix(); - } - - /** - * Gets the softmax field of an 'nnet' object. - * - * @return The value of the softmax field - */ - public RBool getSoftmax() { - return modelObject.at("softmax").asBool(); - } - - /** - * Gets the value field of an 'nnet' object. - * - * @return The value of the value field - */ - public double getValue() { - return modelObject.at("value").asDouble(); - } - - /** - * Gets the wts field of an 'nnet' object. - * - * @return The value of the wts field - */ - public double[] getWts() { - return modelObject.at("wts").asDoubleArray(); - } - - -} diff --git a/src/main/org/openscience/cdk/qsar/model/R2/LinearRegressionModel.java b/src/main/org/openscience/cdk/qsar/model/R2/LinearRegressionModel.java deleted file mode 100644 index eb98d52..0000000 --- a/src/main/org/openscience/cdk/qsar/model/R2/LinearRegressionModel.java +++ /dev/null @@ -1,570 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2004-2008 Rajarshi Guha - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ -package org.openscience.cdk.qsar.model.R2; - -import java.io.File; -import java.util.HashMap; - -import org.openscience.cdk.qsar.model.QSARModelException; -import org.openscience.cdk.tools.ILoggingTool; -import org.openscience.cdk.tools.LoggingToolFactory; -import org.rosuda.JRI.REXP; -import org.rosuda.JRI.RList; - -/** - * A modeling class that provides a linear least squares regression model. - *

- * When instantiated this class ensures that the R/Java interface has been - * initialized. The response and independent variables can be specified at construction - * time or via the setParameters method. The actual fitting procedure is carried out by build after which - * the model may be used to make predictions. - *

- * Currently, the design of the class is quite sparse as it does not allow subsetting, - * variable names, setting of contrasts and so on. - * It is also assumed that the values of all the variables are defined (i.e., not such that - * they are NA - * in an R session). - * The use of - * this class is shown in the following code snippet - *

- * double[][] x;
- * double[] y;
- * try {
- *     LinearRegressionModel lrm = new LinearRegressionModel(x,y);
- *     lrm.build();
- *     lrm.setParameters("newdata", newx);
- *     lrm.setParameters("interval", "confidence");
- *     lrm.predict();
- * } catch (QSARModelException qme) {
- *     System.out.println(qme.toString());
- * }
- * double[] fitted = lrm.getFittedValues()
- * double[] predicted = lrm.getModelPredict().asList.at("fit").asDoubleArray();
- * 
- * Note that when making predictions, the new X matrix and interval type can be set by calls - * to setParameters(). In general, the arguments for lm() and predict.lm() can be set via - * calls to setParameters(). The following table lists the parameters that can be set and their - * expected types. More detailed informationis available in the R documentation. - *
- * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
NameJava TypeNotes
xDouble[][]
yDouble[]Length should be equal to the rows of x
weightsDouble[]Length should be equal to rows of x
newdataDouble[][]Number of columns should be the same as in x
intervalStringCan be 'confidence' or 'predicton'
- *
- * In general the getFit* methods provide access to results from the fit - * and getPredict* methods provide access to results from the prediction (i.e., - * prediction using the model on new data). The values returned correspond to the various - * values returned by the lm - * and predict.lm - * functions in R. - *

- * See {@link RModel} for details regarding the R and rJava environment. - * - * @author Rajarshi Guha - * @cdk.require r-project - * @cdk.module qsar - * @cdk.githash - * @cdk.keyword linear regression - * @cdk.keyword R - */ - -public class LinearRegressionModel extends org.openscience.cdk.qsar.model.R2.RModel { - - private static int globalID = 0; - private int nvar = 0; - - private RList modelPredict = null; - - private static ILoggingTool logger = - LoggingToolFactory.createLoggingTool(LinearRegressionModel.class); - - /** - * Constructs a LinearRegressionModel object. - *

- * The constructor simply instantiates the model ID. Dependent and independent variables - * should be set via setParameters(). - *

- * An important feature of the current implementation is that all the - * independent variables are used during the fit. Furthermore no subsetting is possible. - * As a result when setting these via setParameters() the caller should specify only - * the variables and observations that will be used for the fit. - */ - public LinearRegressionModel() throws QSARModelException { - super(); - params = new HashMap(); - int currentID = LinearRegressionModel.globalID; - org.openscience.cdk.qsar.model.R2.LinearRegressionModel.globalID++; - this.setModelName("cdkLMModel" + currentID); - } - - /** - * Constructs a LinearRegressionModel object. - *

- * The constructor allows the user to specify the - * dependent and independent variables. The length of the dependent variable - * array should equal the number of rows of the independent variable matrix. If this - * is not the case an exception will be thrown. - *

- * An important feature of the current implementation is that all the - * independent variables are used during the fit. Furthermore no subsetting is possible. - * As a result when creating an instance of this object the caller should specify only - * the variables and observations that will be used for the fit. - * - * @param xx An array of independent variables. The observations should be in the rows - * and the variables should be in the columns - * @param yy an array containing the dependent variable - * @throws org.openscience.cdk.qsar.model.QSARModelException - * if the number of observations in x and y do not match - */ - public LinearRegressionModel(double[][] xx, double[] yy) throws QSARModelException { - super(); - - params = new HashMap(); - int currentID = LinearRegressionModel.globalID; - LinearRegressionModel.globalID++; - this.setModelName("cdkLMModel" + currentID); - - int nrow = yy.length; - this.nvar = xx[0].length; - - if (nrow != xx.length) { - throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix"); - } - - Double[][] x = new Double[nrow][this.nvar]; - Double[] y = new Double[nrow]; - Double[] weights = new Double[nrow]; - - for (int i = 0; i < nrow; i++) { - y[i] = new Double(yy[i]); - weights[i] = new Double(1.0); - } - for (int i = 0; i < nrow; i++) { - for (int j = 0; j < this.nvar; j++) - x[i][j] = new Double(xx[i][j]); - } - - params.put("x", x); - params.put("y", y); - params.put("weights", weights); - } - - - /** - * Constructs a LinearRegressionModel object. - *

- * The constructor allows the user to specify the - * dependent and independent variables as well as weightings for - * the observations. - *

- * The length of the dependent variable - * array should equal the number of rows of the independent variable matrix. If this - * is not the case an exception will be thrown. - *

- * An important feature of the current implementation is that all the - * independent variables are used during the fit. Furthermore no subsetting is possible. - * As a result when creating an instance of this object the caller should specify only - * the variables and observations that will be used for the fit. - * - * @param xx An array of independent variables. The observations should be in the rows - * and the variables should be in the columns - * @param yy an array containing the dependent variable - * @param weights Specifies the weights for each observation. Unit weights are equivilant - * to OLS - * @throws org.openscience.cdk.qsar.model.QSARModelException - * if the number of observations in x and y do not match - */ - public LinearRegressionModel(double[][] xx, double[] yy, double[] weights) throws QSARModelException { - super(); - - params = new HashMap(); - - int currentID = LinearRegressionModel.globalID; - org.openscience.cdk.qsar.model.R2.LinearRegressionModel.globalID++; - this.setModelName("cdkLMModel" + currentID); - - int nrow = yy.length; - this.nvar = xx[0].length; - - if (nrow != xx.length) { - throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix"); - } - if (nrow != weights.length) { - throw new QSARModelException("The length of the weight vector does not match the number of rows of the design matrix"); - } - - Double[][] x = new Double[nrow][this.nvar]; - Double[] y = new Double[nrow]; - Double[] wts = new Double[nrow]; - - for (int i = 0; i < nrow; i++) { - y[i] = new Double(yy[i]); - wts[i] = new Double(weights[i]); - } - for (int i = 0; i < nrow; i++) { - for (int j = 0; j < this.nvar; j++) - x[i][j] = new Double(xx[i][j]); - } - params.put("x", x); - params.put("y", y); - params.put("weights", wts); - } - - /** - * Fits a linear regression model. - *

- * This method calls the R function to fit a linear regression model - * to the specified dependent and independent variables. If an error - * occurs in the R session, an exception is thrown. - *

- * Note that, this method should be called prior to calling the various get - * methods to obtain information regarding the fit. - */ - public void build() throws QSARModelException { - // lets do some checks in case stuff was set via setParameters() - Double[][] x; - Double[] y, weights; - x = (Double[][]) this.params.get("x"); - y = (Double[]) this.params.get("y"); - weights = (Double[]) this.params.get("weights"); - if (this.nvar == 0) this.nvar = x[0].length; - else { - if (y.length != x.length) { - throw new QSARModelException("Number of observations does no match number of rows in the design matrix"); - } - if (weights.length != y.length) { - throw new QSARModelException("The weight vector must have the same length as the number of observations"); - } - } - - // lets build the model - String paramVarName = loadParametersIntoRSession(); - String cmd = "buildLM(\"" + getModelName() + "\", " + paramVarName + ")"; - REXP ret = rengine.eval(cmd); - if (ret == null) { - logger.debug("Error in buildLM"); - throw new QSARModelException("Error in buildLM"); - } - - // remove the parameter list - rengine.eval("rm(" + paramVarName + ")"); - - // save the model object on the Java side - modelObject = ret.asList(); - } - - - /** - * Sets parameters required for building a linear model or using one for prediction. - *

- * This function allows the caller to set the various parameters available - * for the lm() and predict.lm() R routines. See the R help pages for the details of the available - * parameters. - * - * @param key A String containing the name of the parameter as described in the - * R help pages - * @param obj An Object containing the value of the parameter - * @throws org.openscience.cdk.qsar.model.QSARModelException - * if the type of the supplied value does not match the - * expected type - */ - public void setParameters(String key, Object obj) throws QSARModelException { - // since we know the possible values of key we should check the coresponding - // objects and throw errors if required. Note that this checking can't really check - // for values (such as number of variables in the X matrix to build the model and the - // X matrix to make new predictions) - these should be checked in functions that will - // use these parameters. The main checking done here is for the class of obj and - // some cases where the value of obj is not dependent on what is set before it - - if (key.equals("y")) { - if (!(obj instanceof Double[])) { - throw new QSARModelException("The class of the 'y' object must be Double[]"); - } - } - if (key.equals("x")) { - if (!(obj instanceof Double[][])) { - throw new QSARModelException("The class of the 'x' object must be Double[][]"); - } - } - if (key.equals("weights")) { - if (!(obj instanceof Double[])) { - throw new QSARModelException("The class of the 'weights' object must be Double[]"); - } - } - if (key.equals("interval")) { - if (!(obj instanceof String)) { - throw new QSARModelException("The class of the 'interval' object must be String"); - } - if (!(obj.equals("confidence") || obj.equals("prediction"))) { - throw new QSARModelException("The type of interval must be: prediction or confidence"); - } - } - if (key.equals("newdata")) { - if (!(obj instanceof Double[][])) { - throw new QSARModelException("The class of the 'newdata' object must be Double[][]"); - } - } - this.params.put(key, obj); - } - - - /** - * Uses a fitted model to predict the response for new observations. - *

- * This function uses a previously fitted model to obtain predicted values - * for a new set of observations. If the model has not been fitted prior to this - * call an exception will be thrown. Use setParameters - * to set the values of the independent variable for the new observations and the - * interval type. - * - * @throws org.openscience.cdk.qsar.model.QSARModelException - * if the model has not been built prior to a call - * to this method. Also if the number of independent variables specified for prediction - * is not the same as specified during model building - */ - public void predict() throws QSARModelException { - - if (modelObject == null) - throw new QSARModelException("Before calling predict() you must fit the model using build()"); - - Double[][] newx = (Double[][]) params.get("newdata"); - if (newx[0].length != nvar) { - throw new QSARModelException("Number of independent variables used for prediction must match those used for fitting"); - } - - String pn = loadParametersIntoRSession(); - REXP ret = rengine.eval("predictLM(\"" + getModelName() + "\", " + pn + ")"); - if (ret == null) throw new QSARModelException("Error occured in prediction"); - - // remove the parameter list - rengine.eval("rm(" + pn + ")"); - - modelPredict = ret.asList(); - } - - /** - * Get the R object obtained from predict.lm(). - * - * @return The result of the prediction. Contains a number of fields corresponding to - * predicted values, SE and other items depending on the parameters that we set. - * Note that the call to predict.lm() is performde with se.fit = TRUE - */ - public RList getModelPredict() { - return modelPredict; - } - - /** - * Returns an RList object summarizing the linear regression model. - *

- * The return object can be queried via the RList methods to extract the - * required components. - * - * @return A summary for the linear regression model - * @throws org.openscience.cdk.qsar.model.QSARModelException - * if the model has not been built prior to a call - * to this method - */ - public RList summary() throws QSARModelException { - if (modelObject == null) - throw new QSARModelException("Before calling summary() you must fit the model using build()"); - - REXP ret = rengine.eval("summary(" + getModelName() + ")"); - if (ret == null) { - logger.debug("Error in summary()"); - throw new QSARModelException("Error in summary()"); - } - return ret.asList(); - } - - - /** - * Loads an LinearRegressionModel object from disk in to the current session. - * - * @param fileName The disk file containing the model - * @throws org.openscience.cdk.qsar.model.QSARModelException - * if the model being loaded is not a linear regression model - * object or the file does not exist - */ - public void loadModel(String fileName) throws QSARModelException { - File f = new File(fileName); - if (!f.exists()) throw new QSARModelException(fileName + " does not exist"); - - rengine.assign("tmpFileName", fileName); - REXP ret = rengine.eval("loadModel(tmpFileName)"); - if (ret == null) throw new QSARModelException("Model could not be loaded"); - - String name = ret.asList().at("name").asString(); - if (!isOfClass(name, "lm")) { - removeObject(name); - throw new QSARModelException("Loaded object was not of class \'lm\'"); - } - - modelObject = ret.asList().at("model").asList(); - setModelName(name); - nvar = getCoefficients().length - 1; // since the intercept is also returned - } - - /** - * Loads an LinearRegressionModel object from a serialized string into the current session. - * - * @param serializedModel A String containing the serialized version of the model - * @param modelName A String indicating the name of the model in the R session - * @throws org.openscience.cdk.qsar.model.QSARModelException - * if the model being loaded is not a linear regression model - * object - */ - public void loadModel(String serializedModel, String modelName) throws QSARModelException { - rengine.assign("tmpSerializedModel", serializedModel); - rengine.assign("tmpModelName", modelName); - REXP ret = rengine.eval("unserializeModel(tmpSerializedModel, tmpModelName)"); - - if (ret == null) throw new QSARModelException("Model could not be unserialized"); - - String name = ret.asList().at("name").asString(); - if (!isOfClass(name, "lm")) { - removeObject(name); - throw new QSARModelException("Loaded object was not of class \'lm\'"); - } - - modelObject = ret.asList().at("model").asList(); - setModelName(name); - nvar = getCoefficients().length - 1; // as the intercept is also returned - } - -// Autogenerated code: assumes that 'modelObject' is -// a RList object - - - /** - * Gets the assign field of an 'lm' object. - * - * @return The value of the assign field - */ - public int[] getAssign() { - return modelObject.at("assign").asIntArray(); - } - - /** - * Gets the coefficients field of an 'lm' object. - * - * @return The value of the coefficients field - */ - public double[] getCoefficients() { - return modelObject.at("coefficients").asDoubleArray(); - } - - /** - * Gets the df.residual field of an 'lm' object. - * - * @return The value of the df.residual field - */ - public int getDfResidual() { - return modelObject.at("df.residual").asInt(); - } - - /** - * Gets the effects field of an 'lm' object. - * - * @return The value of the effects field - */ - public double[] getEffects() { - return modelObject.at("effects").asDoubleArray(); - } - - /** - * Gets the fitted.values field of an 'lm' object. - * - * @return The value of the fitted.values field - */ - public double[] getFittedValues() { - return modelObject.at("fitted.values").asDoubleArray(); - } - - /** - * Gets the model field of an 'lm' object. - * - * @return The value of the model field - */ - public RList getModel() { - return modelObject.at("model").asList(); - } - - /** - * Gets the qr field of an 'lm' object. - * - * @return The value of the qr field - */ - public RList getQr() { - return modelObject.at("qr").asList(); - } - - /** - * Gets the rank field of an 'lm' object. - * - * @return The value of the rank field - */ - public int getRank() { - return modelObject.at("rank").asInt(); - } - - /** - * Gets the residuals field of an 'lm' object. - * - * @return The value of the residuals field - */ - public double[] getResiduals() { - return modelObject.at("residuals").asDoubleArray(); - } - - /** - * Gets the xlevels field of an 'lm' object. - * - * @return The value of the xlevels field - */ - public RList getXlevels() { - return modelObject.at("xlevels").asList(); - } - - -} diff --git a/src/main/org/openscience/cdk/qsar/model/R2/RModel.java b/src/main/org/openscience/cdk/qsar/model/R2/RModel.java deleted file mode 100644 index b68dd2e..0000000 --- a/src/main/org/openscience/cdk/qsar/model/R2/RModel.java +++ /dev/null @@ -1,594 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2004-2008 Rajarshi Guha - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ -package org.openscience.cdk.qsar.model.R2; - -import java.awt.FileDialog; -import java.awt.Frame; -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.InputStreamReader; -import java.io.StringWriter; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Random; -import java.util.Set; - -import org.openscience.cdk.qsar.model.IModel; -import org.openscience.cdk.qsar.model.QSARModelException; -import org.openscience.cdk.tools.ILoggingTool; -import org.openscience.cdk.tools.LoggingToolFactory; -import org.rosuda.JRI.REXP; -import org.rosuda.JRI.RList; -import org.rosuda.JRI.RMainLoopCallbacks; -import org.rosuda.JRI.Rengine; - -/** - * Base class for the R-CDK interface. - *

- * This class provides the basis for all classes that wish to interface with - * R functions from a CDK program. - *

- * Since the R engine is multi-threaded only one instance of the R session can exist - * for a given Java process. This implies that initialization must be perfored exactly once - * within a Java process. This class ensure that this occurs. - *

- * In addition, this class loads some helper functions into the R session. The loading - * can be via a temporary file (the default) or via a String, which may be useful in - * webservice scenarios. - *

- * Requirement The class (and implementing subclasses) is dependent on the - * JRI library. This provides an interface to R - * for Java code. Though the rJava for R - * includes JRI, the code here is only dependent on JRI and does not attempt to - * go from R to Java. Hence rJava is not a requirement. To compile this code, the CDK - * includes the JRI jar file. However to run the code, the JRI native library (libjri.so - * on Linux) must be located in the users LD_LIBRARY_PATH. Also the versions of the JRI Java - * API and native library should match and this is checked for. - *

- * Currently the CDK uses JRI 0.3 (available from here) - *

- *

- * Implementation Notes - *

    - *
  • If the user requires other initializations the only way to do so at - * this point is to edit helper.R or perform the initialization by hand - *
  • An implementing class must call super() - *
  • Though this class provides a field to store the R model object as a - * RList the actual R variable will remain in the R session. This is useful - * for saving the model as a .Rda file at one point. Also by storing the model on the R - * side we do not not need to make repeated queries on the model via eval(). - *
  • Subclasses of this class are generally Java front-ends to a specific R model type - * (such as linear regression, CNN etc.). Thus each subclass should provide getter methods - * for the various components of such an object. Since this is tedious to do by hand, - * you can use the stubs.R script that comes with the CDK distribution to - * generate source code for the getter methods for the individual components of an R model - * object. Note, that the script currently ignores objects of classes 'call' - * and 'formula'. - *
- *

- * NOTE: For the R backend to work, ensure that R is correctly installed. - * Other requirements are - *

    - *
  • LD_LIBRARY_PATH should include the directory that contains libjri.so as well - * as the dierctory that contains libR.so - *
  • R_HOME should be set to the appropriate location - *
- * - * @author Rajarshi Guha - * @cdk.require r-project - * @cdk.require JRI.jar - * @cdk.module qsar - * @cdk.githash - * @cdk.keyword R - * @cdk.keyword JRI - */ -public abstract class RModel implements IModel { - private String modelName = null; - protected RList modelObject = null; - protected HashMap params = null; - - /** - * The object that performs the calls to the R engine. - */ - protected static Rengine rengine = null; - - /** - * A boolean that indicates whether the R/Java subsystem has been initialized or not. - */ - private static boolean doneInit = false; - private static ILoggingTool logger = - LoggingToolFactory.createLoggingTool(RModel.class); - - private void checkEnvironmentVariables() throws QSARModelException { - String rhome = System.getenv("R_HOME"); - String ldlibrarypath = System.getenv("LD_LIBRARY_PATH"); - if (rhome == null || rhome.length() == 0 || - ldlibrarypath == null || ldlibrarypath.length() == 0) { - throw new QSARModelException( - "Cannot find R: R_HOME and LD_LIBRARY_PATH are not set." - ); - } - } - - private void initRengine(String[] args, boolean useDisk) throws QSARModelException { - if (!doneInit) { - rengine = new Rengine(args, false, new TextConsole()); - if (!rengine.waitForR()) { - throw new QSARModelException("Could not load rJava"); - } else { - logger.debug("Started R"); - } - doneInit = true; - if (useDisk) { - loadRFunctions(rengine); - logger.info("Initializing from disk"); - } else { - loadRFunctionsAsStrings(rengine); - logger.info("Initializing from strings"); - } - logger.info("rJava initialized"); - } else { - logger.info("rjava already intialized"); - } - } - - private void loadRFunctions(Rengine engine) { - // File.separator is used to be system independent - // Fix me: After creating a jar file it don't work on a windwos OS - // but within eclipse it won't work on while working with '/' on windows OS - // No idea how to solve this - - // String scriptLocator = "org" + File.separator + "openscience" + - // File.separator + "cdk" + File.separator + "qsar" + File.separator + - // "model" + File.separator + "data" + File.separator + "helper.R"; - String scriptLocator = "org/openscience/cdk/qsar/model/data/helper.R"; - try { - File scriptFile = File.createTempFile("XXXXX", ".R"); - scriptFile.deleteOnExit(); - - InputStreamReader reader = new InputStreamReader( - this.getClass().getClassLoader().getResourceAsStream(scriptLocator)); - BufferedReader inFile = new BufferedReader(reader); - - FileWriter outFile = new FileWriter(scriptFile); - BufferedWriter outBuffer = new BufferedWriter(outFile); - String inputLine; - while ((inputLine = inFile.readLine()) != null) { - outBuffer.write(inputLine, 0, inputLine.length()); - outBuffer.newLine(); - } - outBuffer.close(); - inFile.close(); - outFile.close(); - // Necessary for windows user, R needs a '/' in the path of a file even on windows - String path = scriptFile.getAbsolutePath(); - path = path.replaceAll("\\\\", "/"); - engine.eval("source(\"" + path + "\")"); - - } catch (Exception exception) { - logger.error("Could not load helper R script for JRI: ", scriptLocator); - logger.debug(exception); - } - } - - private void loadRFunctionsAsStrings(Rengine evaluator) { - String[] scripts = { - "helper.R", - }; - String scriptPrefix = "org/openscience/cdk/qsar/model/data/"; - for (int i = 0; i < scripts.length; i++) { - - String scriptLocator = scriptPrefix + scripts[i]; - try { - InputStreamReader reader = new InputStreamReader( - this.getClass().getClassLoader().getResourceAsStream(scriptLocator)); - BufferedReader inFile = new BufferedReader(reader); - - StringWriter sw = new StringWriter(); - String inputLine; - while ((inputLine = inFile.readLine()) != null) { - sw.write(inputLine); - sw.write("\n"); - } - sw.close(); - - evaluator.eval("eval(parse(text=\"" + sw.toString() + "\"))"); - - } catch (Exception exception) { - logger.error("Could not load CDK-rJava R scripts: ", scriptLocator); - logger.debug(exception); - } - - } - } - - /** - * Initializes R with the --vanilla, --quiet, --slave flags. - *

- * This constructor will initialize the R session via a temporary file or - * from a String depending on whether the symbol initRFromString - * is specified on the command line - */ - public RModel() throws QSARModelException { - checkEnvironmentVariables(); - // check that the JRI jar and .so match - if (!Rengine.versionCheck()) { - logger.debug("API version of the JRI library does not match that of the native binary"); - throw new QSARModelException("API version of the JRI library does not match that of the native binary"); - } - - params = new HashMap(); - String[] args = {"--vanilla", "--quiet", "--slave"}; - - String initRFromString = System.getProperty("initRFromString"); - boolean useDisk = true; - if (initRFromString != null && initRFromString.equals("true")) { - useDisk = false; - } - initRengine(args, useDisk); - } - - - /** - * Saves a R model to disk. - *

- * This function can be used to save models built in a session, and then loaded - * again in a different session. - * - * @param modelName The name of the model as returned by \code{getModelName}. - * @param fileName The file to which the model should be saved - * @throws QSARModelException if the R session cannot save the model - * @see #loadModel - */ - public void saveModel(String modelName, String fileName) throws QSARModelException { - if (fileName == null || fileName.equals("")) { - fileName = modelName + ".rda"; - } - rengine.assign("tmpModelName", modelName); - rengine.assign("tmpFileName", fileName); - REXP result = rengine.eval("saveModel(tmpModelName, tmpFileName)"); - if (result == null) { - logger.debug("Error in 'saveModel(tmpModelName, tmpFileName)'"); - throw new QSARModelException("Error saving model"); - } - } - - - /** - * Get the name of the model. - *

- * This function returns the name of the variable that the actual - * model is stored in within the R session. In general this is - * not used for the end user. In the future this might be changed - * to a private method. - * - * @return A String containing the name of the R variable - * @see #setModelName - */ - public String getModelName() { - return (this.modelName); - } - - /** - * Set the name of the model. - *

- * Ordinarily the user does not need to call this function as each model - * is assigned a unique ID at instantiation. However, if a user saves a model - * to disk and then later loads it, the loaded - * model may overwrite a model in that session. In this situation, this method - * can be used to assign a name to the model. - * - * @param newName The name of the model - * @see #getModelName - * @see #saveModel - * @see #loadModel - */ - public void setModelName(String newName) { - if (this.modelName != null && this.modelName.equals(newName)) return; - String oldName = this.modelName; - if (oldName != null) { - rengine.eval("if ('" + oldName + "' %in% ls()) {" + newName + "<-" + oldName + ";rm(" + oldName + ")}"); - } - this.modelName = newName; - } - - /** - * Get the instance of the Rengine. - *

- * In case the R engine has not been initialized, it is initialized before - * returning the object. - * - * @return The Rengine object - */ - public static Rengine getRengine() { - return rengine; - } - - /** - * Get the actual model object. - * - * @return An RList object representation of the model. - */ - public RList getModelObject() { - return modelObject; - } - - /** - * Get a unique String value. - *

- * This method can be used to get unique variable names for use in an R session. The - * String is generated from a combination of the prefix, the system time and a random - * portion. - * - * @param prefix Any value. If empty or null, "var" is used. - * @return A unique String value - */ - public String getUniqueVariableName(String prefix) { - if (prefix == null || prefix.equals("")) prefix = "var"; - Random rnd = new Random(); - long uid = ((System.currentTimeMillis() >>> 16) << 16) + rnd.nextLong(); - return prefix + String.valueOf(Math.abs(uid)).trim(); - } - - /** - * Loads the parameters for a model into a list object in the R session. - *

- * The method assigns the list to a (relatively) unique variable name and returns - * the variable name to the caller so that the list can be accessed later on. - * - * @return - * @throws QSARModelException if there are any problems within the R session. - */ - protected String loadParametersIntoRSession() throws QSARModelException { - REXP result; - Set keys = params.keySet(); - String paramVariableName = getUniqueVariableName("param"); - - for (Iterator iterator = keys.iterator(); iterator.hasNext();) { - String name = (String) iterator.next(); - Object value = params.get(name); - - if (value instanceof Integer) { - logger.debug("Assigning a Integer"); - Integer tmp1 = (Integer) value; - int[] tmp2 = new int[]{tmp1.intValue()}; - rengine.assign(name, tmp2); - } else if (value instanceof String) { - logger.debug("Assigning a String"); - rengine.assign(name, (String) value); - } else if (value instanceof Boolean) { - logger.debug("Assigning a Boolean"); - Boolean tmp1 = (Boolean) value; - if (tmp1.booleanValue()) result = rengine.eval(name + "<- TRUE"); - else result = rengine.eval(name + "<- FALSE"); - if (result == null) throw new QSARModelException("Error assigning a boolean"); - } else if (value instanceof Double) { - logger.debug("Assigning a Double"); - Double tmp1 = (Double) value; - double[] tmp2 = new double[]{tmp1.doubleValue()}; - rengine.assign(name, tmp2); - } else if (value instanceof Integer[]) { - logger.debug("Assigning a Integer[]"); - Integer[] tmp1 = (Integer[]) value; - int[] tmp2 = new int[tmp1.length]; - for (int i = 0; i < tmp1.length; i++) tmp2[i] = tmp1[i].intValue(); - rengine.assign(name, tmp2); - } else if (value instanceof Double[]) { - logger.debug("Assigning a Double[]"); - Double[] tmp1 = (Double[]) value; - double[] tmp2 = new double[tmp1.length]; - for (int i = 0; i < tmp1.length; i++) tmp2[i] = tmp1[i].doubleValue(); - rengine.assign(name, tmp2); - } else if (value instanceof Integer[][]) { - logger.debug("Assigning a Integer[][]"); - Integer[][] tmp1 = (Integer[][]) value; - int nrow = tmp1.length; - int ncol = tmp1[0].length; - int[] tmp2 = new int[nrow * ncol]; - for (int i = 0; i < ncol; i++) { - for (int j = 0; j < nrow; j++) { - tmp2[i * nrow + j] = (tmp1[j][i]).intValue(); - } - } - rengine.assign(name, tmp2); - result = rengine.eval(name + "<- matrix(" + name + ", nrow=" + nrow + ")"); - if (result == null) throw new QSARModelException("Error assigning a int[][]"); - } else if (value instanceof Double[][]) { - logger.debug("Assigning a Double[][]"); - Double[][] tmp1 = (Double[][]) value; - int nrow = tmp1.length; - int ncol = tmp1[0].length; - double[] tmp2 = new double[nrow * ncol]; - for (int i = 0; i < ncol; i++) { - for (int j = 0; j < nrow; j++) { - tmp2[i * nrow + j] = (tmp1[j][i]).doubleValue(); - } - } - rengine.assign(name, tmp2); - result = rengine.eval(name + "<- matrix(" + name + ", nrow=" + nrow + ")"); - if (result == null) throw new QSARModelException("Error assigning a double[][]"); - } - } - - // make the list command - String cmd = paramVariableName + " <- list("; - for (Iterator iterator = keys.iterator(); iterator.hasNext();) { - String name = (String) iterator.next(); - cmd = cmd + name + " = " + name + ", "; - } - cmd = cmd + ")"; - - // now eval the command - result = rengine.eval(cmd); - if (result == null) throw new QSARModelException("Error making the parameter list"); - - // now lets remove all the variables we had assigned - for (Iterator iterator = keys.iterator(); iterator.hasNext();) { - String name = (String) iterator.next(); - rengine.eval("rm(" + name + ")"); - } - - return paramVariableName; - } - - /** - * Checks whether the class of a named object is of the specified class. - *

- * - * @param objectName The name of the R variable holding the object to check - * @param objectClass The class to check for - * @return true if the object is of the specified class, false if the object is not - * of the specified class or the R command to obtain the class failed - */ - public boolean isOfClass(String objectName, String objectClass) { - REXP klass = rengine.eval("class(" + objectName + ")"); - if (klass == null) { - return false; - } - return klass.asString().equals(objectClass); - } - - /** - * Removes an object from the R session. - * - * @param objectName The name of the R variable to remove - * @throws QSARModelException if the 'rm' command failed - */ - public void removeObject(String objectName) throws QSARModelException { - REXP ret = rengine.eval("rm(\"" + objectName + "\")"); - if (ret == null) throw new QSARModelException("Error removing \'" + objectName + "\'"); - } - - /** - * Abstract method to handle loading R models. - *

- * This method can be used to load a previously saved R model object. Since - * the user can save any arbitrary R object, checks must be made that the - * object being returned is an instance of one of the current modeling classes. - *

- * This is best achieved by forcing each modeling class to write its own loader. - * - * @param fileName The file containing the R object to load - * @throws org.openscience.cdk.qsar.model.QSARModelException - * if the R session could not load the object or if the loaded model - * does not correspond to the class that it was loaded from - * @see #saveModel - */ - abstract public void loadModel(String fileName) throws QSARModelException; - - /** - * Abstract method to handle loading R models that were previously serialized. - *

- * This method can be used to load a previously serialized R model object (usinging - * serialize()). Since - * the user can save any arbitrary R object, checks must be made that the - * object being returned is an instance of one of the current modeling classes. - * This is best achieved by forcing each modeling class to write its own loader. - *

- * In addition - * objects saved using serialize() do not have a name. As a result a name for the object must - * be specified when using this method. - * - * @param serializedModel A String containing the ASCII sreialized R object - * @param modelName The name of the model. (Within the R session, the model will be assigned to - * a variable of this name) - * @throws QSARModelException if the R session could not load the object or if the loaded model - * does not correspond to the class that it was loaded from - * @see #saveModel - */ - abstract public void loadModel(String serializedModel, String modelName) throws QSARModelException; - - /** - * Specifies the parameters value. - * - * @param key A String representing the name of the parameter (corresponding to the - * name described in the R manpages) - * @param obj The value of the parameter - * @throws QSARModelException if the parameters are of the wrong type for the given modeling function - */ - abstract public void setParameters(String key, Object obj) throws QSARModelException; - - abstract public void build() throws QSARModelException; - - abstract public void predict() throws QSARModelException; - - protected void finalize() { - rengine.eval("rm(\"" + getModelName() + "\",pos=1)"); - } - - ; - - - class TextConsole implements RMainLoopCallbacks { - public void rWriteConsole(Rengine re, String text) { - System.out.print(text); - } - - public void rBusy(Rengine re, int which) { - System.out.println("rBusy(" + which + ")"); - } - - public String rReadConsole(Rengine re, String prompt, int addToHistory) { - System.out.print(prompt); - try { - BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); - String s = br.readLine(); - return (s == null || s.length() == 0) ? s : s + "\n"; - } catch (Exception e) { - System.out.println("jriReadConsole exception: " + e.getMessage()); - } - return null; - } - - public void rShowMessage(Rengine re, String message) { - System.out.println("rShowMessage \"" + message + "\""); - } - - public String rChooseFile(Rengine re, int newFile) { - FileDialog fd = new FileDialog(new Frame(), (newFile == 0) ? "Select a file" : "Select a new file", (newFile == 0) ? FileDialog.LOAD : FileDialog.SAVE); - fd.pack(); - fd.setVisible(true); - String res = null; - if (fd.getDirectory() != null) res = fd.getDirectory(); - if (fd.getFile() != null) res = (res == null) ? fd.getFile() : (res + fd.getFile()); - return res; - } - - public void rFlushConsole(Rengine re) { - } - - public void rLoadHistory(Rengine re, String filename) { - } - - public void rSaveHistory(Rengine re, String filename) { - } - - public void rWriteConsole(Rengine arg0, String message, int arg2) { - System.out.println("rShowMessage \"" + message + "\""); - } - } - -} diff --git a/src/main/org/openscience/cdk/qsar/model/data/cdkSJava.R b/src/main/org/openscience/cdk/qsar/model/data/cdkSJava.R deleted file mode 100644 index 0de3ec9..0000000 --- a/src/main/org/openscience/cdk/qsar/model/data/cdkSJava.R +++ /dev/null @@ -1,451 +0,0 @@ -# -# Copyright (C) 2004-2007 The Chemistry Development Kit (CDK) project -# -# Contact: cdk-devel@lists.sourceforge.net -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public License -# as published by the Free Software Foundation; either version 2.1 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - - -# Basically the idea is to be able to pass an arbitrary Java object -# to an R session. For this to work, the object should be converted to -# a valid R object within the R session. -# -# How does R know how to convert a Java object it recieves? This is done -# by a matcher function. This looks at the class name of the object and if -# it matches the class name in the matcher function, the converter is called -# -# The converter then accesses any methods for the Java object or uses the methods -# provided by SJava to extract information from the Java object to create an R -# object. -# -# After implementing matcher and converter functions they should be registered -# with SJava using setJavaFunctionConverter() -# -# So the flow when calling an R function *from* a Java program and passing -# an arbitrary Java object is: -# -# 1. The R function recieves the Java object -# 2. Runs it through the matcher functions SJava knows about -# 3. If a matcher function returns TRUE the corresponding converter function -# is called. The return value if an R object (vector, data.frame, list etc) -# 4. The function then works with the object as usual -# 5. If no matcher was found in (2) then the R function will see the object -# as an AnonymousOmegahatReference -# -# If the R function that was called from the Java session returns the recieved -# object then Java will see it as a R object. So if the converter for a Java -# vector turns it into a numeric() and returns it Java will get the object back -# as a double[] which can be printed by ROmegahatInterpreter.show() -# -# -# -# Passing an arbitrary R object back to Java is done similarly. In this case -# the converter function will call some Java function that creaates a -# AnonymousOmegahatReference (or named) from the R object (possibly by -# calling methods of the class). The matcher function uses the inherits function -# in R to determine whether the R object is of the proper class. So in this case -# the flow is : -# -# 1. Java calls a R function which does some calculation and returns an R object -# 2. SJava looks for a matcher that matches the R class of the return value -# and calls the corresponding converter function with the R object -# 3. The converter will generally return a Java object containing the information -# from the R object. -# -# For primitives such as vectors, this process is not required. But if we want -# to return say a lm or nnet object we would create a Java class that contains -# setter and getter methods. The R converter would create a new instance of this -# wrapping class and set the fields with the values from the R object and return this -# Java object which will then be passed back to the Java calling program - - -require(SJava) -if (!isJavaInitialized()) { - .JavaInit() -} -library(nnet) -#library(pls.pcr) - -saveModel <- function(modelname, filename) { - resp <- try( do.call('save',list(modelname,file=filename)), silent=TRUE ) -} - -loadModel <- function(filename) { - modelname <- load(filename, .GlobalEnv) - get(modelname) -} -loadModel.getName <- function(filename) { - modelname <- load(filename) - modelname -} -unserializeModel <- function(modelstr, modelname) { - zzz <- paste(paste(modelstr, sep='', collapse='\n'), '\n', sep='', collapse='') - assign(modelname, unserialize(zzz), pos=1) - get(modelname) -} - -summaryModel <- function(modelname) { - summary(get(modelname)) -} - -hashmap.to.list <- function(params) { - keys <- unlist(params$keySet()$toArray()) - paramlist <- list() - cnt <- 1 - for (key in keys) { - paramlist[[cnt]] <- params$get(key) - cnt <- cnt+1 - } - names(paramlist) <- keys - paramlist -} - -############################################# -# Linear regression fit/predict converters -############################################# -lmFitConverter <- -function(obj,...) -{ - .JNew('org.openscience.cdk.qsar.model.R.LinearRegressionModelFit', - obj$coefficients, obj$residuals, - obj$fitted, obj$rank, obj$df.residual) -} -lmPredictConverter <- function(preds,...) { - .JNew('org.openscience.cdk.qsar.model.R.LinearRegressionModelPredict', - preds$fit[,1], preds$se.fit, preds$fit[,2], preds$fit[,3], - preds$df, preds$residual.scale) -} -lmSummaryConverter <- function(sumry,...) { - .JNew('org.openscience.cdk.qsar.model.R.LinearRegressionModelSummary', - sumry$residuals, sumry$coeff, - sumry$sigma, sumry$r.squared, sumry$adj.r.squared, - sumry$df[2], sumry$fstatistic, - attr(sumry$coeff, 'dimnames')[[1]], - attr(sumry$coeff, 'dimnames')[[2]]) -} - -############################################# -# CNN regression fit/predict converters -############################################# -cnnSummaryConverter <- -function(obj,...) -{ - .JNew('org.openscience.cdk.qsar.model.R.CNNRegressionModelSummary', - obj$n, obj$entropy, obj$softmax, obj$censored, obj$value, obj$residuals) -} -cnnFitConverter <- -function(obj,...) -{ - noutput <- ncol(obj$fitted) - nobs <- nrow(obj$fitted) - if ('Hessian' %in% names(obj)) { - .JNew('org.openscience.cdk.qsar.model.R.CNNRegressionModelFit', - noutput,nobs, obj$wts, obj$fitted, obj$residuals, obj$value, obj$Hessian) - } else { - .JNew('org.openscience.cdk.qsar.model.R.CNNRegressionModelFit', - noutput, nobs,obj$wts, obj$fitted, obj$residuals, obj$value) - } -} -cnnClassFitConverter <- -function(obj,...) -{ - noutput <- ncol(obj$fitted) - nobs <- nrow(obj$fitted) - if ('Hessian' %in% names(obj)) { - .JNew('org.openscience.cdk.qsar.model.R.CNNClassificationModelFit', - noutput,nobs, obj$wts, obj$fitted, obj$residuals, obj$value, obj$Hessian) - } else { - .JNew('org.openscience.cdk.qsar.model.R.CNNClassificationModelFit', - noutput, nobs,obj$wts, obj$fitted, obj$residuals, obj$value) - } -} -cnnPredictConverter <- -function(obj,...) { - # The obj we get is actually a 'matrix' but we set its class - # to cnnregprediction so that SJava would send it specifically - # to us. So we should convert obj back to class 'matrix' so - # that SJava can send it correctly to the Java side - class(obj) <- 'matrix' - .JNew('org.openscience.cdk.qsar.model.R.CNNRegressionModelPredict', - ncol(obj), obj) -} -cnnClassPredictConverter <- -function(obj,...) { - # The obj we get is actually a 'matrix' but we set its class - # to cnnclsprediction so that SJava would send it specifically - # to us. So we should convert obj back to class 'matrix' so - # that SJava can send it correctly to the Java side - if (class(obj[1]) == 'numeric') { - class(obj) <- 'matrix' - .JNew('org.openscience.cdk.qsar.model.R.CNNClassificationModelPredict', - ncol(obj), obj) - } else if (class(obj[1]) == 'character') { - class(obj) <- 'character' - .JNew('org.openscience.cdk.qsar.model.R.CNNClassificationModelPredict', obj) - } -} - - -############################################# -# PLS fit/predict converter -############################################# -plsFitConverter <- -function(obj,...) { - tmp <- .JNew('org.openscience.cdk.qsar.model.R.PLSRegressionModelFit', - obj$nobj, obj$nvar, obj$npred, obj$ncomp, obj$method) - tmp$setTrainingData( - obj$training$B, obj$training$Ypred, obj$training$RMS, - obj$training$Xscores, obj$training$Xload, - obj$training$Yscores, obj$training$Yload) - tmp$PLSRegressionModelSetTrain() - if ('validat' %in% names(obj)) { - # Add validation fields - tmp$setValidationData( - obj$valid$niter, obj$valid$nLV, - obj$valid$Ypred, obj$valid$RMS, obj$valid$RMS.sd, obj$valid$R2) - } - tmp -} -plsPredictConverter <- -function(obj,...) { - class(obj) <- 'matrix' - .JNew('org.openscience.cdk.qsar.model.R.PLSRegressionModelPredict',ncol(obj),obj) -} - -############################################# -# Register the fit/predict converter funcs -############################################# -setJavaFunctionConverter(lmFitConverter, function(x,...){inherits(x,'lm')}, - description='lm fit object to Java', - fromJava=F) -setJavaFunctionConverter(lmPredictConverter, function(x,...){inherits(x,'lmregprediction')}, - description='lm predict object to Java', - fromJava=F) -setJavaFunctionConverter(lmSummaryConverter, function(x,...){inherits(x,'summary.lm')}, - description='lm summary object to Java', - fromJava=F) -setJavaFunctionConverter(cnnClassFitConverter, function(x,...){inherits(x,'nnet.formula')}, - description='cnn (nnet) classification fit object to Java', - fromJava=F) -setJavaFunctionConverter(cnnSummaryConverter, function(x,...){inherits(x,'summary.nnet')}, - description='cnn (nnet) summary object to Java', - fromJava=F) -setJavaFunctionConverter(cnnFitConverter, function(x,...){inherits(x,'nnet')}, - description='cnn (nnet) fit object to Java', - fromJava=F) -setJavaFunctionConverter(cnnClassPredictConverter, function(x,...){inherits(x,'cnnclsprediction')}, - description='cnn (nnet) classification predict object to Java', - fromJava=F) -setJavaFunctionConverter(cnnPredictConverter, function(x,...){inherits(x,'cnnregprediction')}, - description='cnn (nnet) predict object to Java', - fromJava=F) -setJavaFunctionConverter(plsFitConverter, function(x,...){inherits(x,'mvr')}, - description='pls/pcr fit object to Java', - fromJava=F) -setJavaFunctionConverter(plsPredictConverter, function(x,...){inherits(x,'plsregressionprediction')}, - description='pls/pcr predict object to Java', - fromJava=F) - -buildLM <- function(modelname, params) { - # params is a java.util.HashMap containing the parameters - # we need to extract them and add them to this environment - paramlist <- hashmap.to.list(params) - attach(paramlist) - - # x will come in as a double[][] - x <- matrix(unlist(x), nrow=length(x), byrow=TRUE) - - # assumes y ~ all columns of x - d <- data.frame(y=y,x) - assign(modelname, lm(y~., d, weights=weights), pos=1) - detach(paramlist) - get(modelname) -} - -predictLM <- function( modelname, params) { - # params is a java.util.HashMap containing the parameters - # we need to extract them and add them to this environment - paramlist <- hashmap.to.list(params) - attach(paramlist) - - newx <- data.frame( matrix(unlist(newdata), nrow=length(newdata), byrow=TRUE) ) - names(newx) <- names(get(modelname)$coef)[-1] - if (interval == '' || !(interval %in% c('confidence','prediction')) ) { - interval = 'confidence' - } - preds <- predict( get(modelname), newx, se.fit = TRUE, interval=interval); - class(preds) <- 'lmregprediction' - - detach(paramlist) - preds -} - -buildCNN <- function(modelname, params) { - paramlist <- hashmap.to.list(params) - attach(paramlist) - - x <- matrix(unlist(x), nrow=length(x), byrow=TRUE) - y <- matrix(unlist(y), nrow=length(y), byrow=TRUE) - if (nrow(x) != nrow(y)) { - stop('The number of observations in x & y dont match') - } - - ninput <- ncol(x) - nhidden <- size - noutput <- ncol(y) - nwt <- (ninput*nhidden) + (nhidden*noutput) + nhidden + noutput - - if (class(weights) == 'logical' && !weights) weights <- rep(1, nrow(y)) - if (class(subset) == 'logical' && !subset) subset <- 1:nrow(y) - if (class(Wts) == 'logical' && !Wts) { Wts <- runif(nwt) } - if (class(mask) == 'logical' && !mask) { mask <- rep(TRUE, nwt) } - - assign(modelname, - nnet(x,y,weights=weights,size=size,Wts=Wts,mask=mask,linout=linout, - entropy=entropy,softmax=softmax,censored=censored,skip=skip,rang=rang, - decay=decay,maxit=maxit,Hess=Hess,trace=trace,MaxNWts=MaxNWts, - abstol=abstol,reltol=reltol), pos=1) - - detach(paramlist) - get(modelname) -} - - -buildCNNClass <- function(modelname, params) { - paramlist <- hashmap.to.list(params) - attach(paramlist) - - x <- matrix(unlist(x), nrow=length(x), byrow=TRUE) - y <- factor(unlist(y)) # y will come in as a single vector - if (nrow(x) != length(y)) { stop('The number of observations in x & y dont match') } - - ninput <- ncol(x) - nhidden <- size - if (length(levels(y)) == 2) noutput <- 1 - else noutput = length(levels(y)) - - nwt <- (ninput*nhidden) + (nhidden*noutput) + nhidden + noutput - if (class(weights) == 'logical' && !weights) weights <- rep(1, length(y)) - if (class(subset) == 'logical' && !subset) subset <- 1:length(y) - if (class(Wts) == 'logical' && !Wts) { Wts <- runif(nwt) } - if (class(mask) == 'logical' && !mask) { mask <- rep(TRUE, nwt) } - - - assign(modelname, - nnet(y~., data=data.frame(y=y,x=x),weights=weights,size=size,Wts=Wts,mask=mask,linout=linout, - softmax=softmax,censored=censored,skip=skip,rang=rang, - decay=decay,maxit=maxit,Hess=Hess,trace=trace,MaxNWts=MaxNWts, - abstol=abstol,reltol=reltol), pos=1) - - detach(paramlist) - get(modelname) -} - -predictCNN <- function(modelname, params) { - # Since buildCNN should have been called before this - # we dont bother loading the nnet library - paramlist <- hashmap.to.list(params) - attach(paramlist) - - newx <- data.frame( matrix(unlist(newdata), nrow=length(newdata), byrow=TRUE) ) - names(newx) <- get(modelname)$coefnames - if (type == '' || !(type %in% c('raw','class')) ) { - type = 'raw' - } - - - preds <- predict( get(modelname), newdata=newx, type=type); - class(preds) <- 'cnnregprediction' - - detach(paramlist) - preds -} -predictCNNClass <- function(modelname, params) { - # Since buildCNNClass should have been called before this - # we dont bother loading the nnet library - paramlist <- hashmap.to.list(params) - attach(paramlist) - - newx <- data.frame( matrix(unlist(newdata), nrow=length(newdata), byrow=TRUE) ) - names(newx) <- get(modelname)$coefnames - if (type == '' || !(type %in% c('raw','class')) ) { - type = 'raw' - } - - preds <- predict( get(modelname), newdata=newx, type=type); - class(preds) <- 'cnnclsprediction' - detach(paramlist) - preds -} - -buildPLS <- function(modelname, params) { - library(pls.pcr) - paramlist <- hasmap.to.list(params) - attach(paramlist) - - x <- matrix(unlist(x), nrow=length(x), byrow=TRUE) - y <- matrix(unlist(y), nrow=length(y), byrow=TRUE) - if (nrow(x) != nrow(y)) { stop('The number of observations in x & y dont match') } - - if (!ncomp) { - ncomp <- 1:ncol(x) - } else { - ncomp <- unlist(ncomp) - } - - if (!(method %in% c('PCR','SIMPLS','kernelPLS'))) { - stop('Invalid methopd specification') - } - if (!(validation %in% c('none','CV'))) { - stop('Invalid validation sepcification') - } - - if (niter == 0 && validation == 'CV') { - niter = nrow(y) - } - - - # We should do this since when both grpsize and niter are specified niter - # is used. So if grpsize comes in as 0 (which will be the default setting) - # we specify only niter and if not zero we use grpsize and ignore niter - if (grpsize != 0) { - assign(modelname, - pls(x=x,y=y,ncomp=ncomp,method=method,validation=validation,grpsize=grpsize), - pos=1) - } else { - assign(modelname, - pls(x=x,y=y,ncomp=ncomp,method=method,validation=validation,niter=niter), - pos=1) - } - detach(paramlist) - get(modelname) -} -predictPLS <- function(modelname, params) { - paramlist <- hashmap.to.list(params) - attach(paramlist) - - newX <- matrix(unlist(newX), nrow=length(x), byrow=TRUE) - model <- get(modelname) - if (ncol(newX) != model$nvar) { - stop('The number of independent variables in the new data does not match that specified during building') - } - if (nlv == FALSE) { - preds <- predict(model, newX) - } else { - preds <- predict(model, newX, nlv) - } - class(preds) <- 'plsregressionprediction' - preds -} diff --git a/src/main/org/openscience/cdk/qsar/model/data/cnn_3.R b/src/main/org/openscience/cdk/qsar/model/data/cnn_3.R deleted file mode 100644 index 732c923..0000000 --- a/src/main/org/openscience/cdk/qsar/model/data/cnn_3.R +++ /dev/null @@ -1,83 +0,0 @@ -############################################# -# CNN regression fit/predict converters -############################################# -cnnSummaryConverter <- -function(obj,...) -{ - .JNew('org.openscience.cdk.qsar.model.R.CNNRegressionModelSummary', - obj$n, obj$entropy, obj$softmax, obj$censored, obj$value, obj$residuals) -} - -cnnFitConverter <- -function(obj,...) -{ - noutput <- ncol(obj$fitted) - nobs <- nrow(obj$fitted) - if ('Hessian' %in% names(obj)) { - .JNew('org.openscience.cdk.qsar.model.R.CNNRegressionModelFit', - noutput,nobs, obj$wts, obj$fitted, obj$residuals, obj$value, obj$Hessian) - } else { - .JNew('org.openscience.cdk.qsar.model.R.CNNRegressionModelFit', - noutput, nobs,obj$wts, obj$fitted, obj$residuals, obj$value) - } -} -cnnPredictConverter <- -function(obj,...) { - # The obj we get is actually a 'matrix' but we set its class - # to cnnregprediction so that SJava would send it specifically - # to us. So we should convert obj back to class 'matrix' so - # that SJava can send it correctly to the Java side - class(obj) <- 'matrix' - .JNew('org.openscience.cdk.qsar.model.R.CNNRegressionModelPredict', - ncol(obj), obj) -} -buildCNN <- function(modelname, params) { - library(nnet) - paramlist <- hashmap.to.list(params) - attach(paramlist) - - x <- matrix(unlist(x), nrow=length(x), byrow=TRUE) - y <- matrix(unlist(y), nrow=length(y), byrow=TRUE) - if (nrow(x) != nrow(y)) { - stop('The number of observations in x & y dont match') - } - - ninput <- ncol(x) - nhidden <- size - noutput <- ncol(y) - nwt <- (ninput*nhidden) + (nhidden*noutput) + nhidden + noutput - - if (class(weights) == 'logical' && !weights) weights <- rep(1, nrow(y)) - if (class(subset) == 'logical' && !subset) subset <- 1:nrow(y) - if (class(Wts) == 'logical' && !Wts) { Wts <- runif(nwt) } - if (class(mask) == 'logical' && !mask) { mask <- rep(TRUE, nwt) } - - assign(modelname, - nnet(x,y,weights=weights,size=size,Wts=Wts,mask=mask,linout=linout, - entropy=entropy,softmax=softmax,censored=censored,skip=skip,rang=rang, - decay=decay,maxit=maxit,Hess=Hess,trace=trace,MaxNWts=MaxNWts, - abstol=abstol,reltol=reltol), pos=1) - - detach(paramlist) - get(modelname) -} -predictCNN <- function(modelname, params) { - # Since buildCNN should have been called before this - # we dont bother loading the nnet library - paramlist <- hashmap.to.list(params) - attach(paramlist) - - newx <- data.frame( matrix(unlist(newdata), nrow=length(newdata), byrow=TRUE) ) - names(newx) <- get(modelname)$coefnames - if (type == '' || !(type %in% c('raw','class')) ) { - type = 'raw' - } - - - preds <- predict( get(modelname), newdata=newx, type=type); - class(preds) <- 'cnnregprediction' - - detach(paramlist) - preds -} - diff --git a/src/main/org/openscience/cdk/qsar/model/data/cnn_4.R b/src/main/org/openscience/cdk/qsar/model/data/cnn_4.R deleted file mode 100644 index ea3050b..0000000 --- a/src/main/org/openscience/cdk/qsar/model/data/cnn_4.R +++ /dev/null @@ -1,80 +0,0 @@ -############################################# -# CNN classification fit/predict converters -############################################# -cnnClassFitConverter <- -function(obj,...) -{ - noutput <- ncol(obj$fitted) - nobs <- nrow(obj$fitted) - if ('Hessian' %in% names(obj)) { - .JNew('org.openscience.cdk.qsar.model.R.CNNClassificationModelFit', - noutput,nobs, obj$wts, obj$fitted, obj$residuals, obj$value, obj$Hessian) - } else { - .JNew('org.openscience.cdk.qsar.model.R.CNNClassificationModelFit', - noutput, nobs,obj$wts, obj$fitted, obj$residuals, obj$value) - } -} -cnnClassPredictConverter <- -function(obj,...) { - # The obj we get is actually a 'matrix' but we set its class - # to cnnclsprediction so that SJava would send it specifically - # to us. So we should convert obj back to class 'matrix' so - # that SJava can send it correctly to the Java side - if (class(obj[1]) == 'numeric') { - class(obj) <- 'matrix' - .JNew('org.openscience.cdk.qsar.model.R.CNNClassificationModelPredict', - ncol(obj), obj) - } else if (class(obj[1]) == 'character') { - class(obj) <- 'character' - .JNew('org.openscience.cdk.qsar.model.R.CNNClassificationModelPredict', obj) - } -} -buildCNNClass <- function(modelname, params) { - library(nnet) - paramlist <- hashmap.to.list(params) - attach(paramlist) - - x <- matrix(unlist(x), nrow=length(x), byrow=TRUE) - y <- factor(unlist(y)) # y will come in as a single vector - if (nrow(x) != length(y)) { stop('The number of observations in x & y dont match') } - - ninput <- ncol(x) - nhidden <- size - if (length(levels(y)) == 2) noutput <- 1 - else noutput = length(levels(y)) - - nwt <- (ninput*nhidden) + (nhidden*noutput) + nhidden + noutput - if (class(weights) == 'logical' && !weights) weights <- rep(1, length(y)) - if (class(subset) == 'logical' && !subset) subset <- 1:length(y) - if (class(Wts) == 'logical' && !Wts) { Wts <- runif(nwt) } - if (class(mask) == 'logical' && !mask) { mask <- rep(TRUE, nwt) } - - - assign(modelname, - nnet(y~., data=data.frame(y=y,x=x),weights=weights,size=size,Wts=Wts,mask=mask,linout=linout, - softmax=softmax,censored=censored,skip=skip,rang=rang, - decay=decay,maxit=maxit,Hess=Hess,trace=trace,MaxNWts=MaxNWts, - abstol=abstol,reltol=reltol), pos=1) - - detach(paramlist) - get(modelname) -} - -predictCNNClass <- function(modelname, params) { - # Since buildCNNClass should have been called before this - # we dont bother loading the nnet library - paramlist <- hashmap.to.list(params) - attach(paramlist) - - newx <- data.frame( y=1, x=matrix(unlist(newdata), nrow=length(newdata), byrow=TRUE) ) - names(newx) <- get(modelname)$coefnames - if (type == '' || !(type %in% c('raw','class')) ) { - type = 'raw' - } - - preds <- predict( get(modelname), newdata=newx, type=type); - class(preds) <- 'cnnclsprediction' - detach(paramlist) - preds -} - diff --git a/src/main/org/openscience/cdk/qsar/model/data/helper.R b/src/main/org/openscience/cdk/qsar/model/data/helper.R deleted file mode 100644 index db8c20a..0000000 --- a/src/main/org/openscience/cdk/qsar/model/data/helper.R +++ /dev/null @@ -1,141 +0,0 @@ -# -# Copyright (C) 2004-2007 The Chemistry Development Kit (CDK) project -# -# Contact: cdk-devel@lists.sourceforge.net -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public License -# as published by the Free Software Foundation; either version 2.1 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - - -# load some common packages that will always be installed - -library(MASS) -library(nnet) - -# some helper functions -saveModel <- function(modelname, filename) { - resp <- try( do.call('save',list(modelname,file=filename)), silent=TRUE ) -} - -loadModel <- function(filename) { - modelname <- load(filename, .GlobalEnv) - list(model=get(modelname) , name=modelname) -} - -unserializeModel <- function(modelstr, modelname) { - zzz <- paste(paste(modelstr, sep='', collapse='\n'), '\n', sep='', collapse='') - assign(modelname, unserialize(zzz), pos=1) - list(model=get(modelname) , name=modelname) -} - -buildLM <- function(modelname, paramlist) { - attach(paramlist) - - # assumes y ~ all columns of x - d <- data.frame(y=y,x) - assign(modelname, lm(y~., d, weights=weights), pos=1) - detach(paramlist) - get(modelname) -} - -predictLM <- function( modelname, paramlist) { - attach(paramlist) - - newx <- data.frame( newdata ) - names(newx) <- names(get(modelname)$coef)[-1] - if (interval == '' || !(interval %in% c('confidence','prediction')) ) { - interval = 'confidence' - } - preds <- predict( get(modelname), newx, se.fit = TRUE, interval=interval); - detach(paramlist) - preds -} - -buildCNN <- function(modelname, paramlist) { - attach(paramlist) - if (nrow(x) != nrow(y)) { - stop('The number of observations in x & y dont match') - } - - ninput <- ncol(x) - nhidden <- size - noutput <- ncol(y) - nwt <- (ninput*nhidden) + (nhidden*noutput) + nhidden + noutput - - if (class(weights) == 'logical' && !weights) weights <- rep(1, nrow(y)) - if (class(subset) == 'logical' && !subset) subset <- 1:nrow(y) - if (class(Wts) == 'logical' && !Wts) { Wts <- runif(nwt) } - if (class(mask) == 'logical' && !mask) { mask <- rep(TRUE, nwt) } - - assign(modelname, - nnet(x,y,weights=weights,size=size,Wts=Wts,mask=mask,linout=linout, - entropy=entropy,softmax=softmax,censored=censored,skip=skip,rang=rang, - decay=decay,maxit=maxit,Hess=Hess,trace=trace,MaxNWts=MaxNWts, - abstol=abstol,reltol=reltol), pos=1) - - detach(paramlist) - get(modelname) -} - - -buildCNNClass <- function(modelname, paramlist) { - - attach(paramlist) - y <- factor(unlist(y)) # y will come in as a single vector - if (nrow(x) != length(y)) { stop('The number of observations in x & y dont match') } - - ninput <- ncol(x) - nhidden <- size - if (length(levels(y)) == 2) noutput <- 1 - else noutput = length(levels(y)) - - nwt <- (ninput*nhidden) + (nhidden*noutput) + nhidden + noutput - if (class(weights) == 'logical' && !weights) weights <- rep(1, length(y)) - if (class(subset) == 'logical' && !subset) subset <- 1:length(y) - if (class(Wts) == 'logical' && !Wts) { Wts <- runif(nwt) } - if (class(mask) == 'logical' && !mask) { mask <- rep(TRUE, nwt) } - - - assign(modelname, - nnet(y~., data=data.frame(y=y,x=x),weights=weights,size=size,Wts=Wts,mask=mask,linout=linout, - softmax=softmax,censored=censored,skip=skip,rang=rang, - decay=decay,maxit=maxit,Hess=Hess,trace=trace,MaxNWts=MaxNWts, - abstol=abstol,reltol=reltol), pos=1) - - detach(paramlist) - get(modelname) -} - -predictCNN <- function(modelname, paramlist) { - attach(paramlist) - newx <- data.frame( newdata ) - names(newx) <- get(modelname)$coefnames - if (type == '' || !(type %in% c('raw','class')) ) { - type = 'raw' - } - preds <- predict( get(modelname), newdata=newx, type=type); - detach(paramlist) - preds -} -predictCNNClass <- function(modelname, paramlist) { - attach(paramlist) - newx <- data.frame( newdata ) - names(newx) <- get(modelname)$coefnames - if (type == '' || !(type %in% c('raw','class')) ) { - type = 'raw' - } - preds <- predict( get(modelname), newdata=newx, type=type); - detach(paramlist) - preds -} diff --git a/src/main/org/openscience/cdk/qsar/model/data/init_1.R b/src/main/org/openscience/cdk/qsar/model/data/init_1.R deleted file mode 100644 index c78aa5f..0000000 --- a/src/main/org/openscience/cdk/qsar/model/data/init_1.R +++ /dev/null @@ -1,42 +0,0 @@ -require(SJava) -if (!isJavaInitialized()) { - .JavaInit() -} -library(nnet) -library(pls.pcr) -options(show.error.messages=FALSE) - -saveModel <- function(modelname, filename) { - resp <- try( do.call('save',list(modelname,file=filename)), silent=TRUE ) -} - -loadModel <- function(filename) { - modelname <- load(filename, .GlobalEnv) - get(modelname) -} -loadModel.getName <- function(filename) { - modelname <- load(filename) - modelname -} -unserializeModel <- function(modelstr, modelname) { - zzz <- paste(paste(modelstr, sep='', collapse='\n'), '\n', sep='', collapse='') - assign(modelname, unserialize(zzz), pos=1) - get(modelname) -} -summaryModel <- function(modelname) { - summary(get(modelname)) -} - - -hashmap.to.list <- function(params) { - keys <- unlist(params$keySet()$toArray()) - paramlist <- list() - cnt <- 1 - for (key in keys) { - paramlist[[cnt]] <- params$get(key) - cnt <- cnt+1 - } - names(paramlist) <- keys - paramlist -} - diff --git a/src/main/org/openscience/cdk/qsar/model/data/lm_2.R b/src/main/org/openscience/cdk/qsar/model/data/lm_2.R deleted file mode 100644 index b53bb5c..0000000 --- a/src/main/org/openscience/cdk/qsar/model/data/lm_2.R +++ /dev/null @@ -1,59 +0,0 @@ -############################################# -# Linear regression fit/predict converters -############################################# -lmFitConverter <- -function(obj,...) -{ - .JNew('org.openscience.cdk.qsar.model.R.LinearRegressionModelFit', - obj$coefficients, obj$residuals, - obj$fitted, obj$rank, obj$df.residual) -} -lmPredictConverter <- function(preds,...) { - .JNew('org.openscience.cdk.qsar.model.R.LinearRegressionModelPredict', - preds$fit[,1], preds$se.fit, preds$fit[,2], preds$fit[,3], - preds$df, preds$residual.scale) -} -lmSummaryConverter <- function(sumry,...) { - .JNew('org.openscience.cdk.qsar.model.R.LinearRegressionModelSummary', - sumry$residuals, sumry$coeff, - sumry$sigma, sumry$r.squared, sumry$adj.r.squared, - sumry$df[2], sumry$fstatistic, - attr(sumry$coeff, 'dimnames')[[1]], - attr(sumry$coeff, 'dimnames')[[2]]) -} - -buildLM <- function(modelname, params) { - # params is a java.util.HashMap containing the parameters - # we need to extract them and add them to this environment - paramlist <- hashmap.to.list(params) - attach(paramlist) - - # x will come in as a double[][] - x <- matrix(unlist(x), nrow=length(x), byrow=TRUE) - - # assumes y ~ all columns of x - d <- data.frame(y=y,x) - assign(modelname, lm(y~., d, weights=weights), pos=1) - detach(paramlist) - get(modelname) -} - -predictLM <- function( modelname, params) { - # params is a java.util.HashMap containing the parameters - # we need to extract them and add them to this environment - paramlist <- hashmap.to.list(params) - attach(paramlist) - - newx <- data.frame( matrix(unlist(newdata), nrow=length(newdata), byrow=TRUE) ) - names(newx) <- names(get(modelname)$coef)[-1] - if (interval == '' || !(interval %in% c('confidence','prediction')) ) { - interval = 'confidence' - } - preds <- predict( get(modelname), newx, se.fit = TRUE, interval=interval); - class(preds) <- 'lmregprediction' - - detach(paramlist) - preds -} - - diff --git a/src/main/org/openscience/cdk/qsar/model/data/pls_5.R b/src/main/org/openscience/cdk/qsar/model/data/pls_5.R deleted file mode 100644 index 9789c4c..0000000 --- a/src/main/org/openscience/cdk/qsar/model/data/pls_5.R +++ /dev/null @@ -1,84 +0,0 @@ -############################################# -# PLS fit/predict converter -############################################# -plsFitConverter <- -function(obj,...) { - tmp <- .JNew('org.openscience.cdk.qsar.model.R.PLSRegressionModelFit', - obj$nobj, obj$nvar, obj$npred, obj$ncomp, obj$method) - tmp$setTrainingData( - obj$training$B, obj$training$Ypred, obj$training$RMS, - obj$training$Xscores, obj$training$Xload, - obj$training$Yscores, obj$training$Yload) - tmp$PLSRegressionModelSetTrain() - if ('validat' %in% names(obj)) { - # Add validation fields - tmp$setValidationData( - obj$valid$niter, obj$valid$nLV, - obj$valid$Ypred, obj$valid$RMS, obj$valid$RMS.sd, obj$valid$R2) - } - tmp -} -plsPredictConverter <- -function(obj,...) { - class(obj) <- 'matrix' - .JNew('org.openscience.cdk.qsar.model.R.PLSRegressionModelPredict',ncol(obj),obj) -} -buildPLS <- function(modelname, params) { - library(pls.pcr) - paramlist <- hasmap.to.list(params) - attach(paramlist) - - x <- matrix(unlist(x), nrow=length(x), byrow=TRUE) - y <- matrix(unlist(y), nrow=length(y), byrow=TRUE) - if (nrow(x) != nrow(y)) { stop('The number of observations in x & y dont match') } - - if (!ncomp) { - ncomp <- 1:ncol(x) - } else { - ncomp <- unlist(ncomp) - } - - if (!(method %in% c('PCR','SIMPLS','kernelPLS'))) { - stop('Invalid methopd specification') - } - if (!(validation %in% c('none','CV'))) { - stop('Invalid validation sepcification') - } - - if (niter == 0 && validation == 'CV') { - niter = nrow(y) - } - - - # We should do this since when both grpsize and niter are specified niter - # is used. So if grpsize comes in as 0 (which will be the default setting) - # we specify only niter and if not zero we use grpsize and ignore niter - if (grpsize != 0) { - assign(modelname, - pls(x=x,y=y,ncomp=ncomp,method=method,validation=validation,grpsize=grpsize), - pos=1) - } else { - assign(modelname, - pls(x=x,y=y,ncomp=ncomp,method=method,validation=validation,niter=niter), - pos=1) - } - detach(paramlist) - get(modelname) -} -predictPLS <- function(modelname, params) { - paramlist <- hashmap.to.list(params) - attach(paramlist) - - newX <- matrix(unlist(newX), nrow=length(x), byrow=TRUE) - model <- get(modelname) - if (ncol(newX) != model$nvar) { - stop('The number of independent variables in the new data does not match that specified during building') - } - if (nlv == FALSE) { - preds <- predict(model, newX) - } else { - preds <- predict(model, newX, nlv) - } - class(preds) <- 'plsregressionprediction' - preds -} diff --git a/src/main/org/openscience/cdk/qsar/model/data/register_999.R b/src/main/org/openscience/cdk/qsar/model/data/register_999.R deleted file mode 100644 index 810adc8..0000000 --- a/src/main/org/openscience/cdk/qsar/model/data/register_999.R +++ /dev/null @@ -1,31 +0,0 @@ -setJavaFunctionConverter(lmFitConverter, function(x,...){inherits(x,'lm')}, - description='lm fit object to Java', - fromJava=F) -setJavaFunctionConverter(lmPredictConverter, function(x,...){inherits(x,'lmregprediction')}, - description='lm predict object to Java', - fromJava=F) -setJavaFunctionConverter(lmSummaryConverter, function(x,...){inherits(x,'summary.lm')}, - description='lm summary object to Java', - fromJava=F) -setJavaFunctionConverter(cnnClassFitConverter, function(x,...){inherits(x,'nnet.formula')}, - description='cnn (nnet) classification fit object to Java', - fromJava=F) -setJavaFunctionConverter(cnnSummaryConverter, function(x,...){inherits(x,'summary.nnet')}, - description='cnn (nnet) summary object to Java', - fromJava=F) -setJavaFunctionConverter(cnnFitConverter, function(x,...){inherits(x,'nnet')}, - description='cnn (nnet) fit object to Java', - fromJava=F) -setJavaFunctionConverter(cnnClassPredictConverter, function(x,...){inherits(x,'cnnclsprediction')}, - description='cnn (nnet) classification predict object to Java', - fromJava=F) -setJavaFunctionConverter(cnnPredictConverter, function(x,...){inherits(x,'cnnregprediction')}, - description='cnn (nnet) predict object to Java', - fromJava=F) -setJavaFunctionConverter(plsFitConverter, function(x,...){inherits(x,'mvr')}, - description='pls/pcr fit object to Java', - fromJava=F) -setJavaFunctionConverter(plsPredictConverter, function(x,...){inherits(x,'plsregressionprediction')}, - description='pls/pcr predict object to Java', - fromJava=F) - diff --git a/src/main/org/openscience/chemojava/libio/openbabel/OpenBabelConvert.java b/src/main/org/openscience/chemojava/libio/openbabel/OpenBabelConvert.java deleted file mode 100644 index acdbfa5..0000000 --- a/src/main/org/openscience/chemojava/libio/openbabel/OpenBabelConvert.java +++ /dev/null @@ -1,186 +0,0 @@ -/* $RCSfile$ - * $Author: egonw $ - * $Date: 2008-03-22 14:36:05 +0100 (Sat, 22 Mar 2008) $ - * $Revision: 10494 $ - * - * Copyright (C) 2004-2007 Miguel Rojas - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ -package org.openscience.chemojava.libio.openbabel; - -import java.io.BufferedReader; -import java.io.File; -import java.io.InputStreamReader; - -import org.openscience.cdk.tools.LoggingTool; - -/** - * File writer thats convert input files with OpenBabel. - * It has the option to obtain the file as ChemFile. - * First, it's necessary that you install correct Openbabel. - * It was tested with OpenBabel-1.100.2. More information in - * http://openbabel.sourceforge.net/babel.shtml. - * - *

Currently supported types: - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
alc(Alchemy file)prep(Amber PREP file)bs(Ball & Stick file)
caccrt(Cacao Cartesian file)cacint(Cacao Internal file)cache(CAChe MolStruct file)
ccc(CCC file)c3d1(Chem3D Cartesian 1 file)c3d2(Chem3D Cartesian 2 file)
ct(ChemDraw Connection Table file)cht(Chemtool file)cml(Chemical Markup Language file)
crk2d(CRK2D: Chemical Resource Kit 2D file)crk3d(CRK3D: Chemical Resource Kit 3D file)cssr(CSD CSSR file)
box(Dock 3.5 Box file)dmol(DMol3 Coordinates file)feat(Feature file)
fh(Fenske-Hall Z-Matrix file)gam(GAMESS Output file)gamin(GAMESS Input file)
gamout(GAMESS Output file)gcart(Gaussian Cartesian file)gau(Gaussian Input file)
gpr(Ghemical Project file)mm1gp(Ghemical MM file)qm1gp(Ghemical QM file)
gr96a(GROMOS96(A)file)gr96n(GROMOS96(nm)file)hin(HyperChem HIN file)
jout(Jaguar Output file)bin(OpenEye Binary file)mmd(MacroModel file)
mmod(MacroModel file)out(MacroModel file)dat(MacroModel file)
car(MSI Biosym/Insight II CAR file)sdf(MDL Isis SDF file)sd(MDL Isis SDF file)
mdl(MDL Molfile file)mol(MDL Molfile file)mopcrt(MOPAC Cartesian file
mopout(MOPAC Output file)mmads(MMADS file)mpqc(MPQC file)
bgf(MSI BGF file)nwo(NWChem Output filepdb(PDB file)
ent(PDB file)pqs(PQS file)qcout(Q-Chem Output file)
res(ShelX file)ins(ShelX file)smi(SMILES file)
fix(SMILES Fix file)report(Report file)pov(POV-Ray Output file)
mol2(Sybyl Mol2 file)unixyz(UniChem XYZ file)vmol(ViewMol file)
xed(XED file)xyz(XYZ file)zin(ZINDO Input file)
- * - * @author Miguel Rojas - * @cdk.module libio-openbabel - * @cdk.svnrev $Revision: 10494 $ - */ -public class OpenBabelConvert { - - /* PATH to babel */ - private String pathToBabel = null; - - private final static LoggingTool logger = new LoggingTool(OpenBabelConvert.class); - - /** - * Constructor of the ConvertOpenBabel - * - * @param path String which set the path of the progam OpenBabel. It will necessary - * for windows systems. - */ - public OpenBabelConvert() throws Exception { - this(null); - } - - /** - * Constructor of the ConvertOpenBabel - * - * @param path String which set the path of the progam OpenBabel. It will necessary - * for windows systems. - */ - public OpenBabelConvert(String path) throws Exception { - pathToBabel = getPath(path); - } - - /** - * Call the babel program. - */ - public void convert(File inputFile, String inputType, - File outputFile, String outputType, - String addOptions) { - try { - String[] args = new String[6]; - args[0] = pathToBabel; - args[1] = "-i" + inputType; - args[2] = inputFile.getCanonicalPath(); - args[3] = "-o" + outputType; - args[4] = outputFile.getCanonicalPath(); - args[5] = addOptions == null ? "" : addOptions; - - Process p = Runtime.getRuntime().exec(args); - BufferedReader r = new BufferedReader( - new InputStreamReader(p.getInputStream())); - String x; - while ((x = r.readLine()) != null) { - System.out.println(x); - } - r.close(); - p.waitFor(); - } catch (Exception e) { - System.err.println(e); - System.err.println("There is some problem with babel. Check: "); - System.err.println("PATH: " + pathToBabel); - } - } - - /** - * Searches the babel executable from a set up reasonable picks. - * - * @param suggestedPath - * @return - */ - private static String getPath(String suggestedPath) throws Exception { - if (suggestedPath != null) { - File suggestion = new File(suggestedPath); - if (suggestion.exists()) { - return suggestedPath; - } - } - String[] possibilities = { - "C:/Programme/openbabel-2.0.0awins/babel.exe", // likely?? - "/usr/bin/babel", // most POSIX systems - "/usr/local/bin/babel" // private installation - }; - File path = null; - for (int i=0; i - * 2009 Egon Willighagen - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ -package org.openscience.chemojava.libio.weka; - -import java.io.BufferedReader; -import java.io.FileReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.io.StringReader; -import java.util.Vector; - -import weka.classifiers.Classifier; -import weka.core.Instance; -import weka.core.Instances; - - -/** - *

Weka class is a library which use the program WEKA: a Machine Learning Project.

- * To inizalizate weka class is typically done like:
- *  Classifier lr = new LinearRegression(); 
- *  weka.setDataset("/some/where/dataTraining.arff", lr);
- *  String testARFF = "/some/where/dataTest.arff";
- *  double[] result = weka.getPrediction(testARFF);
- *  
- * You have also the possibility to introduce directly values, done like: - *
- *  Classifier lr = new LinearRegression();
- *   String[] attrib = {"X1","X2","X3","Y" };
- *   int[] typAttrib = {Weka.NUMERIC,Weka.NUMERIC,Weka.NUMERIC,Weka.NUMERIC, };
- *  weka.setDataset(attrib, typAttrib, y, x, lr);
- *  double[] resultY = weka.getPrediction(testX);
- *  
- * - * @author Miguel Rojas - * @cdk.created 2006-05-23 - * @cdk.license GPL - * @cdk.module qsarweka - * @cdk.svnrev $Revision: 9162 $ - * @cdk.keyword weka, machine learning - * @cdk.depends weka.jar - */ -public class Weka { - - public static final int NUMERIC = 0; - public static final int NOMINAL = 1; - public static final int REGULAR = 2; - public static final int DATA = 3; - public static final int STRING = 4; - - /** type of classifier*/ - private Classifier classifier = null; - /** Class for handling an ordered set of weighted instances*/ - private Instances instances; - /**String with the attribut class*/ - private String[] classAttrib = null; - - /** - * Constructor of the Weka - */ - public Weka() { - } - - /** - * Set the file format arff to analize which contains the dataset and the type of classifier. - * - * - * @param pathTable Path of the dataset file format arff to train - * @param classifier Type of Classifier - * @return The Instances value - * @throws Exception - */ - public Instances setDataset(InputStream table, Object object) throws Exception{ - if (object instanceof Classifier) this.classifier = (Classifier) object; - // OK, a bit dirty, but we cannot be sure we can rewind, so we cache - // the content here locally as String - String tableContent = streamAsString(table); - BufferedReader insr = new BufferedReader( - new StringReader(tableContent) - ); - this.classAttrib = extractClass(insr); - return createInstance(new StringReader(tableContent)); - } - - private String streamAsString(InputStream input) { - StringBuffer out = new StringBuffer(); - try { - byte[] b = new byte[4096]; - for (int n; (n = input.read(b)) != -1;) { - out.append(new String(b, 0, n)); - } - } catch (IOException e) { - e.printStackTrace(); - } - return out.toString(); - } - - private Instances createInstance(Reader insr) throws Exception{ - instances = new Instances(insr); - instances.setClassIndex(instances.numAttributes() - 1); - if (classifier != null) classifier.buildClassifier(instances); - return instances; - } - /** - * Extract the class name attribute manually from the file - * - * @param input The BufferedReader - * @return Array with the class attributes - */ - private String[] extractClass(BufferedReader input) { - Vector attribV = new Vector(); - String[] classAttrib = null; - String line; - try { - while ((line = input.readLine()) != null) { - if(line.startsWith("@attribute class {")){ - int strlen = line.length(); - String line_ = null; - out: - for (int i = 0; i < strlen; i++){ - switch(line.charAt(i)){ - case '{': - line_ = line.substring(i); - break out; - } - } - StringBuffer edited = new StringBuffer(); - strlen = line_.length(); - edited = new StringBuffer(); - for (int i = 0; i < strlen; i++){ - switch(line_.charAt(i)){ - case '"': - break; - case ',': - attribV.add(edited.toString()); - edited = new StringBuffer(); - break; - case '{': - break; - case '}': - attribV.add(edited.toString()); - break; - default: - edited.append(line_.charAt(i)); - } - } - - } - } - if(attribV.size() > 0){ - classAttrib = new String[attribV.size()]; - attribV.copyInto(classAttrib); - } - } catch (IOException e) { - e.printStackTrace(); - } - return classAttrib; - } - /** - * - * Set the array which contains the dataset and the type of classifier. This method - * will be used for classifier which work with numerical values. - * - * @param attrib String with the attribut names - * @param typAttrib Attribute type: NUMERICAL or NOMINAL. - * @param y An array containing the dependent variable. It is possible numeric or string. - * @param x An array of independent variables. The observations should be in the rows - * and the variables should be in the columns - * @param classifier Type of Classifier - * @return The Instances value - * @throws Exception - */ - public Instances setDataset(String[] attrib, int[] typAttrib, Object[]y, Object[][] x, Classifier classifier) throws Exception{ - return setDataset(attrib, typAttrib ,null,y,x,classifier); - } - /** - * - * Set the array which contains the dataset and the type of classifier.This method - * will be used for classifier which work with String values. - * - * @param attrib String with the attribut names. - * @param typAttrib Attribute type: NUMERICAL or NOMINAL. - * @param classAttrib String with the attribut class. - * @param y An array containing the dependent variable. It is possible numeric or string. - * @param x An array of independent variables. The observations should be in the rows - * and the variables should be in the columns - * @param classifier Type of classifier - * @return The Instances value - * @throws Exception - */ - public Instances setDataset(String[] attrib, int[] typAttrib, String[] classAttrib, Object[]y, Object[][] x, Object object) throws Exception{ - if (object instanceof Classifier) this.classifier = (Classifier) object; - this.classAttrib = classAttrib; - Reader reader = createAttributes(attrib,typAttrib,classAttrib,y,x); - instances = new Instances(reader); - instances.setClassIndex(instances.numAttributes() - 1); - if (object instanceof Classifier) classifier.buildClassifier(instances); - return instances; - } - /** - * Return of the predicted value - * - * @param value An array of independent variables which contians the values with whose to test - * @return Result of the prediction - * @throws Exception - */ - public Object[] getPrediction(Object[][] value) throws Exception{ - Object[] object = new Object[value.length]; - for(int j = 0 ; j < value.length ; j++){ - Instance instance = new Instance(instances.numAttributes()); - instance.setDataset(instances); - for(int i = 0 ; i < value[0].length ; i++){ - if(instance.attribute(i).isNumeric()) - instance.setValue(i, ((Double)value[j][i]).doubleValue()); - else if(instance.attribute(i).isString()) - instance.setValue(i, ""+value[j][i]); - } - instance.setValue(value[0].length, 0.0); - double result = classifier.classifyInstance(instance); - if(classAttrib != null){ - object[j] = classAttrib[(new Double(result)).intValue()]; - } - else - object[j] = new Double(result); - } - return object; - } - /** - * Return of the predicted value. The file is found into src. - * - * @param pathARFF path of the file format arff which contains the values with whose to test. - * @return Result of the prediction. - * @throws Exception - */ - public Object[] getPredictionCDK(String pathARFF) throws Exception{ - InputStream ins = this.getClass().getClassLoader().getResourceAsStream(pathARFF); - Reader insr = new InputStreamReader(ins); - return createObjects(new BufferedReader(insr)); - } - - /** - * Return of the predicted value. - * - * @param pathARFF path of the file format arff which contains the values with whose to test. - * @return Result of the prediction. - * @throws Exception - */ - public Object[] getPrediction(String pathARFF) throws Exception{ - BufferedReader br = new BufferedReader(new FileReader(pathARFF)); - - return createObjects(br); - } - /** - * initiate the object. - * - * @param br The BufferedReader - * @return An Array of objects: classAttrib and Double - * @throws Exception - */ - private Object[] createObjects(BufferedReader br) throws Exception{ - Instances test = new Instances(br); - Object[] object = new Object[test.numInstances()]; - for(int i = 0 ; i < test.numInstances(); i++){ - double result = classifier.classifyInstance(test.instance(i)); - if(classAttrib != null) - object[i] = classAttrib[(new Double(result)).intValue()]; - else - object[i] = new Double(result); - } - return object; - } - /** - * create a Reader with necessary attributes to initiate a Instances for weka. - * - * @param attrib String with the attribut class - * @param typAttrib Attribute type: NOMINAL or NUMERIC. - * @param y An array containing the independent variable. - * @param x An array of dependent variables. - * @return The Reader containing the attributes - * @throws IOException - */ - private Reader createAttributes(String[] attrib, int[] typAttrib, String[] classAttrib, Object[]y, Object[][] x) throws IOException{ - String string ="@relation table1 \n"; - for(int i = 0; i < attrib.length ; i++){ - string += ("@attribute "+attrib[i]); - if(typAttrib[i] == NUMERIC) - string += " numeric \n"; - else if(typAttrib[i] == NOMINAL) - string += " string \n"; - else if(typAttrib[i] == DATA) - string += " data \n"; - else if(typAttrib[i] == REGULAR) - string += " regular \n"; - else if(typAttrib[i] == STRING) - string += " string \n"; - } - - if(classAttrib != null){ - string += "@attribute class "; - string += "{"; - for(int i = 0; i < classAttrib.length ; i++){ - string += (classAttrib[i]); - if(i != classAttrib.length -1) - string += ","; - } - string += "}\n"; - } - - string += ("@data "); - if(x != null && y != null){ - for(int j = 0 ; j < x.length; j++){ - for(int i = 0 ; i < x[0].length ; i++){ - if (x[j][i] instanceof Double) { - if (((Double)x[j][i]).isNaN()) { - string += "?,"; - continue; - } - } - string += x[j][i]+","; - } - string += y[j]+ ", \n"; - } - } - if(x != null && y == null){ - for(int j = 0 ; j < x.length; j++){ - for(int i = 0 ; i < x[0].length ; i++){ - if (x[j][i] instanceof Double) { - if (((Double)x[j][i]).isNaN()) { - string += "?,"; - continue; - } - } - string += x[j][i]+","; - } - string += "\n"; - } - } - return (Reader)new StringReader(string); - } - - /** - * get the value which belongs this position in the classification - * @param result Position in the classification - * @return Real value - */ - /*private double[] getValue(double[] result) { - Instance instance = instances.instance(0); - instance.numClasses(); - return null; - }*/ -} diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/DensityBasedClustererModel.java b/src/main/org/openscience/chemojava/qsar/model/weka/DensityBasedClustererModel.java deleted file mode 100644 index 37052c5..0000000 --- a/src/main/org/openscience/chemojava/qsar/model/weka/DensityBasedClustererModel.java +++ /dev/null @@ -1,335 +0,0 @@ -/* $RCSfile$ - * $Author: mariobaseda $ - * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $ - * $Revision: 5602 $ - * - * Copyright (C) 2007 by Mario Baseda - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package org.openscience.chemojava.qsar.model.weka; - -import weka.core.Instance; -import weka.core.Instances; -import weka.clusterers.DensityBasedClusterer; -import weka.clusterers.EM; - -import org.openscience.chemojava.libio.weka.Weka; -import org.openscience.cdk.qsar.model.QSARModelException; - -import java.io.BufferedReader; -import java.io.FileReader; - -/** - * Abstract clustering model that produces an estimate of the membership in each cluster using the Weka library. - * The use of this class is shown in the following code snippet - *
- * try {
- *     DensityBasedClusterer dbc = new EM();
- *     dbc.setData(attrib, typAttrib, classAttrib, data);
- *     dbc.build();
- *     
- * } catch (Exception e) {
- *     System.out.println(e.toString());
- * }
- * 
- * - * Other option is set the data from a file format arff. - *
- * DensityBasedClusterer dbc = new EM();
- * dbc.setData("/some/where/dataTraining.arff");
- * 
- * - * @author Mario Baseda - * @cdk.require weka.jar - * @cdk.license GPL - * @cdk.module qsarweka - * @cdk.svnrev $Revision: 9162 $ - * @see Weka - * - * @cdk.keyword clusterers, DensityBasedClusterer - */ -public class DensityBasedClustererModel { - /** An instance containing the data which should be classifier as arff file.*/ - private static Instances data; - /**new instance of clusterer*/ - private DensityBasedClusterer dbc = new EM(); - private Weka weka = new Weka(); - /**An Array Object containing the independent variable*/ - private Object[][] newX = null; - /**A String specifying the path of the file, format arff, - * which contains the new independent values*/ - private String pathNewX = null; - - /** - * Constructor of the NaiveBayesModel object - */ - public DensityBasedClustererModel(){} - - /** - * Set the variable data to the arff file - * - * @param filename The path of the file, format arff - * @throws Exception if the parameters are of the wrong type for the given modeling function - * - */ - public void setData(String filename)throws Exception { - data = weka.setDataset( - this.getClass().getClassLoader().getResourceAsStream(filename), - null - ); - } - - /** - * Parses a given list of data to an arff file, and set the variable data on it. - * - * @param attrib A string array containing the attributs - * @param typAttrib Attribute type: NUMERICAL or NOMINAL. - * @param classAttrib String with a list of the attribut class. - * @param y An array containing the dependent variable (class value). - * @param x A 2D array containing the independent variable (for example: qsar results). - * @throws Exception if it is unable to parse the data - * - */ - public void setData(String[] attrib, int[] typAttrib, String[] classAttrib, Object[] y, Object[][] x) throws Exception{ - data = weka.setDataset(attrib,typAttrib,classAttrib,y,x,null); - } - - /** - * Builds the model. - * - * @throws Exception if errors occur in data types. See - * the corresponding method in subclasses of this class for further details. - */ - public void build() throws Exception { - try { -// build the model - dbc.buildClusterer(data); - } catch (Exception e) { - e.printStackTrace(); - } - return; - } - - /**Returns the prior probability of each cluster - * @return A double[] containing the prior probability of each cluster - * @throws Exception - */ - public double[] clusterPriors() throws Exception{ - return dbc.clusterPriors(); - } - - /** - * Specifies the new parameters as arff file. - * - * @param path A String specifying the path of the file, format arff, which contains - * the new values. - * @throws QSARModelException if the parameters are of the wrong type for the given modeling function - * - */ - public void setParameters(String path) throws QSARModelException { - this.pathNewX = path; - } - - /** - * Specifies the new parameters as 2D array object. - * - * @param newX A 2D Array Object containing the new values. - * @throws QSARModelException if the parameters are of the wrong type for the given modeling function - */ - public void setParameters(Object[][] newX) throws QSARModelException { - this.newX = newX; - } - - /** Returns the cluster probability distribution for an instance - * - * @return object An Object[][] containing the cluster probability distribution for an instance - * @throws Exception if distribution could not be computed successfully - */ - public Object[][] distributionForInstance() throws Exception{ - Object[][] object = null; - try{ - if(pathNewX != null){ - BufferedReader br = new BufferedReader(new FileReader(pathNewX)); - Instances test = new Instances(br); - object = new Object[test.numInstances()][]; - for(int i = 0 ; i < test.numInstances(); i++){ - double[] result = dbc.distributionForInstance(test.instance(i)); - object[i] = new Object[result.length]; - for (int z = 0; z < result.length; z++){ - object[i][z] = new Double(result[z]); - } - } - } - else if(newX != null){ - object = new Object[newX.length][]; - for(int j = 0 ; j < newX.length ; j++){ - Instance instance = new Instance(data.numAttributes()); - instance.setDataset(data); - for(int i = 0 ; i < newX[0].length ; i++){ - if(instance.attribute(i).isNumeric()) - instance.setValue(i, ((Double)newX[j][i]).doubleValue()); - else if(instance.attribute(i).isString()) - instance.setValue(i, ""+newX[j][i]); - } - instance.setValue(newX[0].length, 0.0); - double[] result = dbc.distributionForInstance(instance); - object[j] = new Object[result.length]; - for (int z = 0; z < result.length; z++){ - object[j][z] = new Double(result[z]); - } - } - } - } catch ( Exception e){ - e.printStackTrace(); - } - return object; - } - - /** Computes the density for a given instance - * - * @return object An Object[] containing the density - * @throws Exception if the density could not be computed successfully - */ - public Object[] logDensityForInstance() throws Exception{ - Object[] object = null; - try{ - if(pathNewX != null){ - BufferedReader br = new BufferedReader(new FileReader(pathNewX)); - Instances test = new Instances(br); - object = new Object[test.numInstances()]; - for(int i = 0 ; i < test.numInstances(); i++){ - double result = dbc.logDensityForInstance(test.instance(i)); - object[i] = new Double(result); - } - } - else if(newX != null){ - object = new Object[newX.length]; - for(int j = 0 ; j < newX.length ; j++){ - Instance instance = new Instance(data.numAttributes()); - instance.setDataset(data); - for(int i = 0 ; i < newX[0].length ; i++){ - if(instance.attribute(i).isNumeric()) - instance.setValue(i, ((Double)newX[j][i]).doubleValue()); - else if(instance.attribute(i).isString()) - instance.setValue(i, ""+newX[j][i]); - } - instance.setValue(newX[0].length, 0.0); - double result = dbc.logDensityForInstance(instance); - - - object[j] = new Double(result); - } - } - } catch ( Exception e){ - e.printStackTrace(); - } - return object; - } - - /** Computes the log of the conditional density (per cluster) for a given instance - * - * @return object An Object [][] containing the instance to compute the density for - * @throws Exception if the density could not be computed successfully - */ - public Object[][] logDensityPerClusterForInstance() throws Exception{ - Object[][] object = null; - try{ - if(pathNewX != null){ - BufferedReader br = new BufferedReader(new FileReader(pathNewX)); - Instances test = new Instances(br); - object = new Object[test.numInstances()][]; - for(int i = 0 ; i < test.numInstances(); i++){ - double[] result = dbc.logDensityPerClusterForInstance(test.instance(i)); - object[i] = new Object[result.length]; - for (int z = 0; z < result.length; z++){ - object[i][z] = new Double(result[z]); - } - } - } - else if(newX != null){ - object = new Object[newX.length][]; - for(int j = 0 ; j < newX.length ; j++){ - Instance instance = new Instance(data.numAttributes()); - instance.setDataset(data); - for(int i = 0 ; i < newX[0].length ; i++){ - if(instance.attribute(i).isNumeric()) - instance.setValue(i, ((Double)newX[j][i]).doubleValue()); - else if(instance.attribute(i).isString()) - instance.setValue(i, ""+newX[j][i]); - } - instance.setValue(newX[0].length, 0.0); - double[] result = dbc.logDensityPerClusterForInstance(instance); - object[j] = new Object[result.length]; - for (int z = 0; z < result.length; z++){ - object[j][z] = new Double(result[z]); - } - } - } - } catch ( Exception e){ - e.printStackTrace(); - } - return object; - } - - /** Returns the logs of the joint densities for a given instance - * - * @return object An Object[][] containing the array of values - * @throws Exception if values could not be computed - */ - public Object[][] logJointDensitiesForInstance() throws Exception{ - Object[][] object = null; - try{ - if(pathNewX != null){ - BufferedReader br = new BufferedReader(new FileReader(pathNewX)); - Instances test = new Instances(br); - object = new Object[test.numInstances()][]; - for(int i = 0 ; i < test.numInstances(); i++){ - double[] result = dbc.logJointDensitiesForInstance(test.instance(i)); - object[i] = new Object[result.length]; - for (int z = 0; z < result.length; z++){ - object[i][z] = new Double(result[z]); - } - } - } - else if(newX != null){ - object = new Object[newX.length][]; - for(int j = 0 ; j < newX.length ; j++){ - Instance instance = new Instance(data.numAttributes()); - instance.setDataset(data); - for(int i = 0 ; i < newX[0].length ; i++){ - if(instance.attribute(i).isNumeric()) - instance.setValue(i, ((Double)newX[j][i]).doubleValue()); - else if(instance.attribute(i).isString()) - instance.setValue(i, ""+newX[j][i]); - } - instance.setValue(newX[0].length, 0.0); - double[] result = dbc.logJointDensitiesForInstance(instance); - object[j] = new Object[result.length]; - for (int z = 0; z < result.length; z++){ - object[j][z] = new Double(result[z]); - } - } - } - } catch ( Exception e){ - e.printStackTrace(); - } - return object; - } -} diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/EMCluster.java b/src/main/org/openscience/chemojava/qsar/model/weka/EMCluster.java deleted file mode 100644 index 5f19b88..0000000 --- a/src/main/org/openscience/chemojava/qsar/model/weka/EMCluster.java +++ /dev/null @@ -1,335 +0,0 @@ -/* $RCSfile$ - * $Author: mariobaseda $ - * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $ - * $Revision: 5602 $ - * - * Copyright (C) 2007 by Mario Baseda - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package org.openscience.chemojava.qsar.model.weka; - -import java.io.BufferedReader; -import java.io.FileReader; - -import org.openscience.chemojava.libio.weka.Weka; -import org.openscience.cdk.qsar.model.QSARModelException; - -import weka.clusterers.EM; -import weka.core.Instance; -import weka.core.Instances; - -/** - * EM assigns a probability distribution to each instance which indicates the probability of it - * belonging to each of the clusters by using Weka library. - * EM can decide how many clusters to create by cross validation, or you may specify apriori - * how many clusters to generate. - * - * The use of this class is shown in the following code snippet - *
- * try {
- *     EM em = new EM();
- *     em.setOptions(options);
- *     em.setData(attrib, typAttrib, classAttrib, data);
- *     em.build();
- *     
- * } catch (Exception e) {
- *     System.out.println(e.toString());
- * }
- * 
- * Other option is set the data from a file format arff. - *
- * EM em = new EM();
- * em.setData("/some/where/dataTraining.arff");
- * 
- *

Valid options are (according weka library):

- *

-V: Verbose

- *

-N: Specify the number of clusters to generate. If omitted, EM will use cross - * validation to select the number of clusters automatically

- *

-I: Terminate after this many iterations if EM has not converged.

- *

-S: Specify random number seed

- *

-M: Set the minimum allowable standard deviation for normal density calculation.

- * - * @author Mario Baseda - * @cdk.require weka.jar - * @cdk.license GPL - * @cdk.module qsarweka - * @cdk.svnrev $Revision: 9162 $ - * @see Weka - * - * @cdk.keyword clusterers - * @cdk.keyword EM - */ -public class EMCluster { - /** An instance containing the data which should be clustering as arff file.*/ - private static Instances data; - /**Array of strings containing the options*/ - private String[] options; - /**new instance of clusterer*/ - private EM em = new EM(); - Weka weka = new Weka(); - /**An Array Object containing the independent variable*/ - private Object[][] newX = null; - /**A String specifying the path of the file, format arff, - * which contains the new independent values*/ - private String pathNewX = null; - /** results of the density*/ - private Object[][] results = null; - - /** - * Constructor of the EMCluster object - */ - public EMCluster(){} - - /** - * Parses a given list of options. The parameters are determited from weka. - * - *

Valid options are (according weka library):

- *

-V: Verbose

- *

-N: Specify the number of clusters to generate. If omitted, EM will use cross - * validation to select the number of clusters automatically

- *

-I: Terminate after this many iterations if EM has not converged.

- *

-S: Specify random number seed

- *

-M: Set the minimum allowable standard deviation for normal density calculation.

- * - * @param options An Array of strings containing the options - * @throws Exception if the options are of the wrong type for the given modeling function - * - */ - public void setOptions(String[] options) throws Exception{ - this.options = options; - } - - /** - * Set the variable data to the arff file - * - * @param filename The path of the file, format arff - * @throws Exception if the parameters are of the wrong type for the given modeling function - * - */ - public void setData(String filename)throws Exception{ - data = weka.setDataset( - this.getClass().getClassLoader().getResourceAsStream(filename), - null - ); - } - - /** - * Parses a given list of data to an arff file, and set the variable data on it. - * - * @param attrib A string array containing the attributs - * @param typAttrib Attribute type: NUMERICAL or NOMINAL. - * @param classAttrib String with a list of the attribut classes. - * @param y An array containing the dependent variable (class value). - * @param x A 2D array containing the independent variable (for example: qsar results). - * @throws Exception if it is unable to parse the data - * - */ - public void setData(String[] attrib, int[] typAttrib, String[] classAttrib, Object[] y, Object[][] x) throws Exception{ - data = weka.setDataset(attrib,typAttrib,classAttrib,y,x,null); - } - - /** - * Builds the cluster. - * - * @throws Exception if errors occur in data types. See - * the corresponding method in subclasses of this class for further details. - */ - public void build() throws Exception { - try { - if(options != null) -// set the options - em.setOptions(options); -// build the clusterer - em.buildClusterer(data); - } catch (Exception e) { - e.printStackTrace(); - } - return; - } - - /** - * Specifies the new parameters as arff file. - * - * @param path A String specifying the path of the file, format arff, which contains - * the new values. - * @throws QSARModelException if the parameters are of the wrong type for the given modeling function - * - */ - public void setParameters(String path) throws QSARModelException { - this.pathNewX = path; - } - - /** - * Specifies the new parameters as 2D array object. - * - * @param newX A 2D Array Object containing the new values. - * @throws QSARModelException if the parameters are of the wrong type for the given modeling function - */ - public void setParameters(Object[][] newX) throws QSARModelException { - this.newX = newX; - } - - /** Computes the log of the conditional density (per cluster) for a given instance - * This function only returns meaningful results if the build - * method of this class has been called. - * - * @throws Exception if the density could not be computed successfully - */ - public void logDensityPerClusterForInstance() throws Exception{ - try{ - if(pathNewX != null){ - BufferedReader br = new BufferedReader(new FileReader(pathNewX)); - Instances test = new Instances(br); - results = new Object[test.numInstances()][]; - for(int i = 0 ; i < test.numInstances(); i++){ - double[] result = em.distributionForInstance(test.instance(i)); - results[i] = new Object[result.length]; - for (int z = 0; z < result.length; z++){ - results[i][z] = new Double(result[z]); - } - } - } - else if(newX != null){ - results = new Object[newX.length][]; - for(int j = 0 ; j < newX.length ; j++){ - Instance instance = new Instance(data.numAttributes()); - instance.setDataset(data); - for(int i = 0 ; i < newX[0].length ; i++){ - if(instance.attribute(i).isNumeric()) - instance.setValue(i, ((Double)newX[j][i]).doubleValue()); - else if(instance.attribute(i).isString()) - instance.setValue(i, ""+newX[j][i]); - } - instance.setValue(newX[0].length, 0.0); - double[] result = em.distributionForInstance(instance); - results[j] = new Object[result.length]; - for (int z = 0; z < result.length; z++){ - results[j][z] = new Double(result[z]); - } - } - } - } catch ( Exception e){ - e.printStackTrace(); - } - } - - /** - * Returns the log of the conditional density (per cluster) for a given instance. - * This function only returns meaningful results if the logDensityPerClusterForInstance - * method of this class has been called. - * @return An Object[][] containing the density - */ - public Object[][] getLogDensityPerClusterForInstance(){ - return results; - } - - /** - * Returns the number of clusters. - * - * @return the number of builded cluster as integer - * @throws Exception if number of clusters could not be returned successfully - */ - public int numberOfCluster() throws Exception{ - return em.numberOfClusters(); - } - - /** - * Returns the cluster priors. - * - * @return the prior probability for each cluster as double array - */ - public double[] clusterPriors() { - return em.getClusterPriors(); - } - - /** - * Return the normal distributions for the cluster models - * - * @return the normal distributions for the cluster models as double 3D array - */ - public double[][][] getClusterModelsNumericAtts(){ - return em.getClusterModelsNumericAtts(); - } - - /** - * Return the priors for the clusters - * - * @return the prior for the clusters as double array - */ - public double[] getClusterPriors() { - return em.getClusterPriors(); - } - - /** - * Get debug mode - * - * @return true or false - */ - public boolean getDebug() { - return em.getDebug(); - } - - /** - * Get the maximum number of iterations - * - * @return the number of iterations as integer - */ - public int getMaxIterations() { - return em.getMaxIterations(); - } - - /** - * Get the minimum allowable standard deviation. - * - * @return the minumum allowable standard deviation as double - */ - public double getMinStdDev() { - return em.getMinStdDev(); - } - - /** - * Get the number of clusters - * - * @return the number of clusters as integer - */ - public int getNumClusters() { - return em.getNumClusters(); - } - - /** - * Gets the current settings of EM - * - * @return an array of strings containing the options - - */ - public String[] getOptions() { - return em.getOptions(); - } - - /** - * Get the random number seed - * - * @return the seed as integer - - */ - public int getSeed() { - return em.getSeed(); - } -} diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/FilterSupervisedResample.java b/src/main/org/openscience/chemojava/qsar/model/weka/FilterSupervisedResample.java deleted file mode 100644 index f0921d7..0000000 --- a/src/main/org/openscience/chemojava/qsar/model/weka/FilterSupervisedResample.java +++ /dev/null @@ -1,203 +0,0 @@ -/* $RCSfile$ - * $Author: mariobaseda $ - * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $ - * $Revision: 5602 $ - * - * Copyright (C) 2007 by Mario Baseda - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package org.openscience.chemojava.qsar.model.weka; - -import org.openscience.chemojava.libio.weka.Weka; - -import weka.core.Instance; -import weka.core.Instances; -import weka.filters.supervised.instance.Resample; - -/** - * Produces a random subsample of a dataset using sampling with - * replacement by using Weka library. The original dataset must fit entirely in memory. - * The number of instances in the generated dataset may be specified. - * The dataset must have a nominal class attribute. - * If not, use the unsupervised version. The filter can be made to - * maintain the class distribution in the subsample, or to bias the - * class distribution toward a uniform distribution. When used in batch - * mode, subsequent batches are not resampled - * - * The use of this class is shown in the following code snippet - *
- * try {
- *     FilterSupervisedResample filter = new FilterSupervisedResample();
- *     filter.setOptions(options);
- *     filter.setData(attrib, typAttrib, classAttrib, data);
- *     filter.build();
- *     
- * } catch (Exception e) {
- *     System.out.println(e.toString());
- * }
- * 
- * Other option is set the data from a file format arff. - *
- * FilterSupervisedResample filter = new FilterSupervisedResample();
- * filter.setData("/some/where/dataTraining.arff");
- * 
- *

Valid options are (according weka library):

- *

-S num: Specify the random number seed (default 1).

- *

-B num: Specify a bias towards uniform class distribution. - * 0 = distribution in input data, 1 = uniform class distribution (default 0).

- *

-Z percent: Specify the size of the output dataset, as a percentage of the input dataset (default 100).

- * - * - * - * @author Mario Baseda - * @cdk.require weka.jar - * @cdk.license GPL - * @cdk.module qsarweka - * @cdk.svnrev $Revision: 9162 $ - * @see Weka - * - * @cdk.keyword Filter - * @cdk.keyword SupervisedFilter, Resample - */ -public class FilterSupervisedResample { - /** An instance containing the data as arff file.*/ - private static Instances data; - /**Array of strings containing the options*/ - private String[] options; - private Resample filter = new Resample(); - Weka weka = new Weka(); - - /** - * Constructor of the FilterSupervisedResample object - */ - public FilterSupervisedResample(){} - - /** - * Parses a given list of options. The parameters are determited from weka. - * - *

Valid options are (according weka library):

- *

-S num: Specify the random number seed (default 1).

- *

-B num: Specify a bias towards uniform class distribution. - * 0 = distribution in input data, 1 = uniform class distribution (default 0).

- *

-Z percent: Specify the size of the output dataset, as a percentage of the input dataset (default 100).

- * - * @param options An Array of strings containing the options - * @throws Exception if the options are of the wrong type for the given modeling function - * - */ - public void setOptions(String[] options) throws Exception{ - this.options = options; - } - /** - * Set the variable data to the arff file - * - * @param filename The path of the file, format arff - * @throws Exception if the parameters are of the wrong type - * - */ - public void setData(String filename)throws Exception{ - data = weka.setDataset( - this.getClass().getClassLoader().getResourceAsStream(filename), - null - ); - } - - /** - * Parses a given list of data to an arff file, and set the variable data on it. - * - * @param attrib A string array containing the attributs - * @param typAttrib Attribute type: NUMERICAL or NOMINAL. - * @param classAttrib String with a list of the attribut classes. - * @param y An array containing the dependent variable (class value). - * @param x A 2D array containing the independent variable (for example: qsar results). - * @throws Exception if it is unable to parse the data - * - */ - public void setData(String[] attrib, int[] typAttrib, String[] classAttrib, Object[] y, Object[][] x) throws Exception{ - data = weka.setDataset(attrib,typAttrib,classAttrib,y,x,null); - } - - /** - * The execute method for the supervised resample filter - * - * @throws Exception if errors occur in data types. See - * the corresponding method in subclasses of this class for further details. - */ - public void build() throws Exception { - try { - if(options != null) -// set the options - filter.setOptions(options); -// setInputFormat - filter.setInputFormat(data); - for (int i = 0; i < data.numInstances(); i++) { - filter.input(data.instance(i)); - } -// Signify that this batch of input to the filter is finished - filter.batchFinished(); - Instances newData = filter.getOutputFormat(); - Instance processed; - while ((processed = filter.output()) != null) { - newData.add(processed); - } - } catch (Exception exception){ - exception.printStackTrace(); - } - } - - /** - * Returns the bias towards a uniform class. A value of 0 leaves - * the class distribution as-is, a value of 1 ensures the class - * distributions are uniform in the output data. - * - * @return the current bias as double - */ - public double getBiasToUniformClass(){ - return filter.getBiasToUniformClass(); - } - - /** - * Returns the current settings of the filter. - * - * @return an array of strings containing the options - */ - public String[] getOptions(){ - return filter.getOptions(); - } - - /** - * Returns the random number seed. - * - * @return the random number seed as integer - */ - public int getRandomSeed(){ - return filter.getRandomSeed(); - } - - /** - * Returns the subsample size as a percentage of the original set. - * - * @return the subsample size as double - */ - public double getSampleSizePercent(){ - return filter.getSampleSizePercent(); - } - -} diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedRandomize.java b/src/main/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedRandomize.java deleted file mode 100644 index 8a71eb5..0000000 --- a/src/main/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedRandomize.java +++ /dev/null @@ -1,168 +0,0 @@ -/* $RCSfile$ - * $Author: mariobaseda $ - * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $ - * $Revision: 5602 $ - * - * Copyright (C) 2007 by Mario Baseda - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package org.openscience.chemojava.qsar.model.weka; - -import org.openscience.chemojava.libio.weka.Weka; - -import weka.core.Instance; -import weka.core.Instances; -import weka.filters.unsupervised.instance.Randomize; - -/** - * This filter randomly shuffles the order of instances passed through it. - * The random number generator is reset with the seed value whenever setInputFormat() is called. - * - * The use of this class is shown in the following code snippet - *
- * try {
- *     FilterUnSupervisedRandomize filter = new FilterUnSupervisedRandomize();
- *     filter.setOptions(options);
- *     filter.setData(attrib, typAttrib, classAttrib, data);
- *     filter.build();
- *     
- * } catch (Exception e) {
- *     System.out.println(e.toString());
- * }
- * 
- * Other option is set the data from a file format arff. - *
- * FilterUnSupervisedRandomize filter = new FilterUnSupervisedRandomize();
- * filter.setData("/some/where/dataTraining.arff");
- * 
- *

Valid options are (according weka library):

- *

-S num: Specify the random number seed (default 42).

- * - * @author Mario Baseda - * @cdk.require weka.jar - * @cdk.license GPL - * @cdk.module qsarweka - * @cdk.svnrev $Revision: 9162 $ - * @see Weka - * - * @cdk.keyword Filter - * @cdk.keyword UnSupervisedFilter, Randomize - */ -public class FilterUnSupervisedRandomize { - /** An instance containing the data as arff file.*/ - private static Instances data; - /**Array of strings containing the options*/ - private String[] options; - private Randomize filter = new Randomize(); - Weka weka = new Weka(); - - /** - * Constructor of the FilterUnSupervisedRandomize object - */ - public FilterUnSupervisedRandomize(){} - - /** - * Parses a given list of options. The parameters are determited from weka. - * - *

Valid options are (according weka library):

- *

-S num: Specify the random number seed (default 42).

- * - * @param options An Array of strings containing the options - * @throws Exception if the options are of the wrong type for the given modeling function - * - */ - public void setOptions(String[] options) throws Exception{ - this.options = options; - } - /** - * Set the variable data to the arff file - * - * @param filename The path of the file, format arff - * @throws Exception if the parameters are of the wrong type for the given modeling function - * - */ - public void setData(String filename)throws Exception{ - data = weka.setDataset( - this.getClass().getClassLoader().getResourceAsStream(filename), - null - ); - } - - /** - * Parses a given list of data to an arff file, and set the variable data on it. - * - * @param attrib A string array containing the attributs - * @param typAttrib Attribute type: NUMERICAL or NOMINAL. - * @param classAttrib String with a list of the attribut classes. - * @param y An array containing the dependent variable (class value). - * @param x A 2D array containing the independent variable (for example: qsar results). - * @throws Exception if it is unable to parse the data - * - */ - public void setData(String[] attrib, int[] typAttrib, String[] classAttrib, Object[] y, Object[][] x) throws Exception{ - data = weka.setDataset(attrib,typAttrib,classAttrib,y,x,null); - } - - /** - * The execute method for the unsupervised randomize filter - * - * @throws Exception if errors occur in data types. See - * the corresponding method in subclasses of this class for further details. - */ - public void build() throws Exception { - try { - if(options != null) -// set the options - filter.setOptions(options); -// setInputFormat - filter.setInputFormat(data); - for (int i = 0; i < data.numInstances(); i++) { - filter.input(data.instance(i)); - } -// Signify that this batch of input to the filter is finished - filter.batchFinished(); - Instances newData = filter.getOutputFormat(); - Instance processed; - while ((processed = filter.output()) != null) { - newData.add(processed); - } - } catch (Exception exception){ - exception.printStackTrace(); - } - } - - /** - * Returns the current settings of the filter. - * - * @return an array of strings containing the options - */ - public String[] getOptions(){ - return filter.getOptions(); - } - - /** - * Returns the random number seed. - * - * @return the random number seed as integer - */ - public int getRandomSeed(){ - return filter.getRandomSeed(); - } -} diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedResample.java b/src/main/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedResample.java deleted file mode 100644 index fdcfc67..0000000 --- a/src/main/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedResample.java +++ /dev/null @@ -1,185 +0,0 @@ -/* $RCSfile$ - * $Author: mariobaseda $ - * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $ - * $Revision: 5602 $ - * - * Copyright (C) 2007 by Mario Baseda - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package org.openscience.chemojava.qsar.model.weka; - -import org.openscience.chemojava.libio.weka.Weka; - -import weka.core.Instance; -import weka.core.Instances; -import weka.filters.unsupervised.instance.Resample; - -/** - * Produces a random subsample of a dataset using sampling with replacement. - * The original dataset must fit entirely in memory. The number of instances - * in the generated dataset may be specified. When used in batch mode, subsequent - * batches are not resampled - * - * The use of this class is shown in the following code snippet - *
- * try {
- *     FilterUnSupervisedResample filter = new FilterUnSupervisedResample();
- *     filter.setOptions(options);
- *     filter.setData(attrib, typAttrib, classAttrib, data);
- *     filter.build();
- *     
- * } catch (Exception e) {
- *     System.out.println(e.toString());
- * }
- * 
- * Other option is set the data from a file format arff. - *
- * FilterUnSupervisedResample filter = new FilterUnSupervisedResample();
- * filter.setData("/some/where/dataTraining.arff");
- * 
- *

Valid options are (according weka library):

- *

-S num: Specify the random number seed (default 1).

- *

-Z percent: Specify the size of the output dataset, as a percentage of the input dataset (default 100).

- * - * - * - * @author Mario Baseda - * @cdk.require weka.jar - * @cdk.license GPL - * @cdk.module qsarweka - * @cdk.svnrev $Revision: 9162 $ - * @see Weka - * - * @cdk.keyword Filter - * @cdk.keyword UnSupervisedFilter, Resample - */ -public class FilterUnSupervisedResample { - /** An instance containing the data as arff file.*/ - private static Instances data; - /**Array of strings containing the options*/ - private String[] options; - private Resample filter = new Resample(); - Weka weka = new Weka(); - - /** - * Constructor of the FilterUnSupervisedResample object - */ - public FilterUnSupervisedResample(){} - - /** - * Parses a given list of options. The parameters are determited from weka. - * - *

Valid options are (according weka library):

- *

-S num: Specify the random number seed (default 1).

- *

-B num: Specify a bias towards uniform class distribution. - * 0 = distribution in input data, 1 = uniform class distribution (default 0).

- *

-Z percent: Specify the size of the output dataset, as a percentage of the input dataset (default 100).

- * - * @param options An Array of strings containing the options - * @throws Exception if the options are of the wrong type for the given modeling function - * - */ - public void setOptions(String[] options) throws Exception{ - this.options = options; - } - /** - * Set the variable data to the arff file - * - * @param filename The path of the file, format arff - * @throws Exception if the parameters are of the wrong type for the given modeling function - * - */ - public void setData(String filename)throws Exception{ - data = weka.setDataset( - this.getClass().getClassLoader().getResourceAsStream(filename), - null - ); - } - - /** - * Parses a given list of data to an arff file, and set the variable data on it. - * - * @param attrib A string array containing the attributs - * @param typAttrib Attribute type: NUMERICAL or NOMINAL. - * @param classAttrib String with a list of the attribut classes. - * @param y An array containing the dependent variable (class value). - * @param x A 2D array containing the independent variable (for example: qsar results). - * @throws Exception if it is unable to parse the data - * - */ - public void setData(String[] attrib, int[] typAttrib, String[] classAttrib, Object[] y, Object[][] x) throws Exception{ - data = weka.setDataset(attrib,typAttrib,classAttrib,y,x,null); - } - - /** - * The execute method for the unsupervised resample filter - * - * @throws Exception if errors occur in data types. See - * the corresponding method in subclasses of this class for further details. - */ - public void build() throws Exception { - try { - if(options != null) -// set the options - filter.setOptions(options); -// setInputFormat - filter.setInputFormat(data); - for (int i = 0; i < data.numInstances(); i++) { - filter.input(data.instance(i)); - } -// Signify that this batch of input to the filter is finished - filter.batchFinished(); - Instances newData = filter.getOutputFormat(); - Instance processed; - while ((processed = filter.output()) != null) { - newData.add(processed); - } - } catch (Exception exception){ - exception.printStackTrace(); - } - } - - /** - * Returns the current settings of the filter. - * - * @return an array of strings containing the options - */ - public String[] getOptions(){ - return filter.getOptions(); - } - - /** - * Returns the random number seed. - * - * @return the random number seed as integer - */ - public int getRandomSeed(){ - return filter.getRandomSeed(); - } - - /** - * Returns the subsample size as a percentage of the original set. - * - * @return the subsample size as double - */ - public double getSampleSizePercent(){ - return filter.getSampleSizePercent(); - } -} diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/IWekaModel.java b/src/main/org/openscience/chemojava/qsar/model/weka/IWekaModel.java deleted file mode 100644 index d5a7224..0000000 --- a/src/main/org/openscience/chemojava/qsar/model/weka/IWekaModel.java +++ /dev/null @@ -1,93 +0,0 @@ -/* $Revision: 6228 $ $Author: egonw $ $Date: 2006-05-11 18:34:42 +0200 (Thu, 11 May 2006) $ - * - * Copyright (C) 2006-2007 Miguel Rojas - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ -package org.openscience.chemojava.qsar.model.weka; - -import org.openscience.cdk.qsar.model.IModel; -import org.openscience.cdk.qsar.model.QSARModelException; - -/** Base class for modeling classes that use weka methods as the backend. - * - * This cannot be directly instantiated as its sole function is - * to initialize the weka algorithms. - * Any class that builds models using weka algorithms should be a subclass of this. - * - * @author Miguel Rojas - * @cdk.module qsarweka - * @cdk.svnrev $Revision: 9162 $ - */ -public interface IWekaModel extends IModel { - - /** - * Parses a given list of options. The parameters are determited from weka. And are specific for each - * algorithm. - * - * @param options An Array of strings containing the options - * @throws QSARModelException if the options are of the wrong type for the given modeling function - * - */ - abstract public void setOptions(String[] options) throws QSARModelException; - - /** - * Get the current settings of the classifier. The parameters are determited from weka. And are specific for each - * algorithm. - * - * @return An Array of strings containing the options - * @throws QSARModelException if the options are of the wrong type for the given modeling function - * - */ - abstract public String[] getOptions() throws QSARModelException; - - - /** - * Specifies the parameters to predict. In this case will be the dependent varibles. - * It's found into cdk.src - * - * @param path A String specifying the path of the file, format arff, which contians - * the dependent values with whose to predict. It's found into cdk.src - * @throws QSARModelException if the parameters are of the wrong type for the given modeling function - * - */ - abstract public void setParametersCDK(String path) throws QSARModelException; - - - /** - * Specifies the parameters to predict. In this case will be the independent varibles. - * - * @param x A Array Object containing the independent variable. - * @throws QSARModelException if the parameters are of the wrong type for the given modeling function - */ - abstract public void setParameters(Object[][] x) throws QSARModelException; - - - /** - * Returns the predicted values for the prediction set. - * - * This function only returns meaningful results if the predict - * method of this class has been called. - * - * @return A Object[] containing the predicted values - */ - abstract public Object[] getPredictPredicted(); - - -} - - diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/J48WModel.java b/src/main/org/openscience/chemojava/qsar/model/weka/J48WModel.java deleted file mode 100644 index dca9fc5..0000000 --- a/src/main/org/openscience/chemojava/qsar/model/weka/J48WModel.java +++ /dev/null @@ -1,296 +0,0 @@ -/* $Revision: 6228 $ $Author: egonw $ $Date: 2006-05-11 18:34:42 +0200 (Thu, 11 May 2006) $ - * - * Copyright (C) 2006-2007 Miguel Rojas - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ -package org.openscience.chemojava.qsar.model.weka; - -import org.openscience.chemojava.libio.weka.Weka; -import org.openscience.cdk.qsar.model.QSARModelException; - -import weka.classifiers.trees.J48; -/** - * A modeling class that provides the Quinlan's model C4.5 known as J48 - * using Weka library. - * The use of this class is shown in the following code snippet - *
- * try {
- *     J48WModel j48 = new J48WModel(typAttrib,classAttrib,x,y); or J48WModel j48 = new J48WModel(typAttrib,classAttrib,x,y,attrib);
- *     j48.setOptions(options);
- *     j48.build();
- *     j48.setParameters(newX);
- *     j48.predict();
- * 	   String[] predictedvalues = (String[])j48.getPredictPredicted();
- * } catch (QSARModelException qme) {
- *     System.out.println(qme.toString());
- * }
- * 
- * Other option is set the data from a file format arff. - *
- * J48WModel j48 = new J48WModel("/some/where/dataTraining.arff");
- * 
- * Note that when making predictions, the new X matrix must be set by calls - * to setParameters(). The following table lists the parameters that can be set and their - * expected types. - *
- * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
NameJava TypeNotes
xDouble[][]
yDouble[]Length should be equal to the rows of x
newXDouble[][]Number of columns should be the same as in x
typAttribString[]Length should be equal to the rows of x
classAttribString[]Length should be equal to number of different classes
- *
- *

Valid options are (according weka library):

- *

-U: Use unpruned tree.

- *

-C confidence: Set confidence threshold for pruning. (Defalult:0.25)

- *

-M number: Set minimum number of instances per leaf.(Default 2)

- *

-R: Use reduced error pruning. No subte raising is performed.

- *

-N number: Set number of folds for reduced error pruning. One fold is used - * as the pruning set.(Deafult:3)

- *

-B: Use binary splits for nominal attributes

- *

-S: Don't perform subtree raising

- *

-L: Do not clean up alfter the tree has been built

- *

-A: If set, Laplace smoothing is used for predicted probabilities

- *

-Q:The seed for reduced-error pruning

- * - * @author Miguel Rojas - * @cdk.require weka.jar - * @cdk.license GPL - * @cdk.module qsarweka - * @cdk.svnrev $Revision: 9162 $ - * @see Weka - * - * @cdk.keyword decision and regression trees - * @cdk.keyword J48 - */ -public class J48WModel implements IWekaModel{ - /**Dependent variable */ - private Object[] y; - /**Independent variable */ - private Object[][] x; - private Weka weka = null; - /**Array of strings containing the options*/ - private String[] options; - /**A String specifying the path of the file, format arff, - * which contians the variables and attributes with whose to test.*/ - private String pathTest = null; - /** results of the prediction*/ - private String[] results = null; - /**A Array Object containing the independent variable*/ - private Object[][] newX = null; - /**A String specifying the path of the file, format arff, - * which contians the independent values with whose to predict.*/ - private String pathNewX = null; - /**Attribute type: NUMERICAL or NOMINAL*/ - private int[] typAttrib; - /**String with the attribut class*/ - private String[] classAttrib; - private boolean cdkResource; - /** String with the attributs*/ - private String[] attrib; - /** Boolean if the attributs was set*/ - private boolean setAttrib = false; - - /** - * Constructor of the J48WModel object from varibles - * - * @param typAttrib Attribute type: NUMERICAL or NOMINAL. - * @param classAttrib String with the attribut class. - * @param y An array containing the dependent variable. - * @param x An double array containing the independent variable. - */ - public J48WModel(int[] typAttrib, String[] classAttrib, Object[] y, Object[][] x){ - this.typAttrib = typAttrib; - this.classAttrib = classAttrib; - this.y = y; - this.x = x; - } - - /** - * Constructor of the J48WModel object from varibles - * - * @param typAttrib Attribute type: NUMERICAL or NOMINAL. - * @param classAttrib String with a list of the attribut class. - * @param y An array containing the dependent variable (class value). - * @param x A 2D array containing the independent variable (for example: qsar results). - * @param attrib A string array containing the attributs - */ - public J48WModel(int[] typAttrib, String[] classAttrib, Object[] y, Object[][] x, String[] attrib){ - this.typAttrib = typAttrib; - this.classAttrib = classAttrib; - this.y = y; - this.x = x; - this.attrib = attrib; - setAttrib = true; - } - /** - * Constructor of the J48WModel object from file. Default the file is found into cdk.src - * - * @param True, if the file is found into cdk.src resource - * @param pathTest Path of the dataset file format arff to train - */ - public J48WModel(boolean cdkResource, String pathTest){ - this.pathTest = pathTest; - this.cdkResource = cdkResource; - } - - /** - * Parses a given list of options. The parameters are determited from weka. - * - *

Valid options are (according weka library):

- *

-U: Use unpruned tree.

- *

-C confidence: Set confidence threshold for pruning. (Defalult:0.25)

- *

-M number: Set minimum number of instances per leaf.(Default 2)

- *

-R: Use reduced error pruning. No subte raising is performed.

- *

-N number: Set number of folds for reduced error pruning. One fold is used - * as the pruning set.(Deafult:3)

- *

-B: Use binary splits for nominal attributes

- *

-S: Don't perform subtree raising

- *

-L: Do not clean up alfter the tree has been built

- *

-A: If set, Laplace smoothing is used for predicted probabilities

- *

-Q:The seed for reduced-error pruning

- * - * @param options An Array of strings containing the options - * @throws QSARModelException if the options are of the wrong type for the given modeling function - * - */ - public void setOptions(String[] options) throws QSARModelException { - this.options = options; - } - /** - * Get the current settings of the classifier. The parameters are determited from weka. And are specific for each - * algorithm. - * - * @return An Array of strings containing the options - * @throws QSARModelException if the options are of the wrong type for the given modeling function - * - */ - public String[] getOptions() throws QSARModelException { - return options; - } - /** - * Builds (trains) the model. - * - * @throws QSARModelException if errors occur in data types, calls to the R session. See - * the corresponding method in subclasses of this class for further details. - */ - public void build() throws QSARModelException { - weka = new Weka(); - try { - J48 j48 = new J48(); - if(options != null) - j48.setOptions(options); - - if(pathTest != null){ - weka.setDataset( - this.getClass().getClassLoader().getResourceAsStream(pathTest), - j48 - ); - }else{ - if (!(setAttrib)){ - this.attrib = new String[x[0].length]; - for(int i = 0 ; i < x[0].length; i++){ - attrib[i] = "X"+i; - } - } - weka.setDataset(attrib,typAttrib,classAttrib,y,x,j48); - } - } catch (Exception e) { - e.printStackTrace(); - } - } - /** - * Specifies the parameters to predict. In this case will be the dependent varibles. - * It's found into cdk.src - * - * @param path A String specifying the path of the file, format arff, which contians - * the dependent values with whose to predict. It's found into cdk.src - * @throws QSARModelException if the parameters are of the wrong type for the given modeling function - * - */ - public void setParametersCDK(String path) throws QSARModelException { - this.pathNewX = path; - } - /** - * Specifies the parameters to predict. In this case will be the independent varibles. - * - * @param newX A 2D array Object containing the independent variable. - * @throws QSARModelException if the parameters are of the wrong type for the given modeling function - */ - public void setParameters(Object[][] newX) throws QSARModelException { - this.newX = newX; - } - - /** - * Makes predictions using a previously built model. - * - * @throws QSARModelException if errors occur in data types, calls to the R session. See - * the corresponding method in subclasses of this class for further details. - */ - public void predict() throws QSARModelException { - try{ - if(pathNewX != null){ - Object[] object = weka.getPredictionCDK(pathNewX); - results = new String[object.length]; - for(int i = 0 ; i < object.length; i++){ - results[i] = (String)object[i]; - } - } - else if(newX != null){ - Object[] object = weka.getPrediction(newX); - results = new String[object.length]; - for(int i = 0 ; i < results.length; i++){ - results[i] = (String)object[i]; - } - } - - } catch ( Exception e){ - e.printStackTrace(); - } - } - /** - * Returns the predicted values for the prediction set. - * - * This function only returns meaningful results if the predict - * method of this class has been called. - * - * @return A String[] containing the predicted values - */ - public Object[] getPredictPredicted() { - return results; - } - -} diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/KmeansCluster.java b/src/main/org/openscience/chemojava/qsar/model/weka/KmeansCluster.java deleted file mode 100644 index 550a5b7..0000000 --- a/src/main/org/openscience/chemojava/qsar/model/weka/KmeansCluster.java +++ /dev/null @@ -1,301 +0,0 @@ -/* $RCSfile$ - * $Author: mariobaseda $ - * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $ - * $Revision: 5602 $ - * - * Copyright (C) 2007 by Mario Baseda - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package org.openscience.chemojava.qsar.model.weka; - -import java.io.BufferedReader; -import java.io.FileReader; - -import org.openscience.chemojava.libio.weka.Weka; -import org.openscience.cdk.qsar.model.QSARModelException; - -import weka.clusterers.Clusterer; -import weka.clusterers.SimpleKMeans; -import weka.core.Instance; -import weka.core.Instances; - -/** - * A clustering class that provides the k means Cluster - * using Weka library. - * The use of this class is shown in the following code snippet - *
- * try {
- *     KmeansCluster kMeans = new KmeansCluster();
- *     kMeans.setOptions(options);
- *     kMeans.setData(attrib, typAttrib, classAttrib, data);
- *     kMeans.build();
- *     
- * } catch (Exception e) {
- *     System.out.println(e.toString());
- * }
- * 
- * Other option is set the data from a file format arff. - *
- * KmeansCluster kMeans = new KmeansCluster();
- * kMeans.setData("/some/where/dataTraining.arff");
- * 
- *

Valid options are (according weka library):

- *

-N: Specify the number of clusters to generate.

- *

-S: Specify random number seed.

- * - * @author Mario Baseda - * @cdk.require weka.jar - * @cdk.license GPL - * @cdk.module qsarweka - * @cdk.svnrev $Revision: 9162 $ - * @see Weka - * - * @cdk.keyword clusterers - * @cdk.keyword SimpleKMeans - */ -public class KmeansCluster{ // implements IWekaModel{ - /** An instance containing the data which should be clustering as arff file.*/ - private static Instances data; - /**Array of strings containing the options*/ - private String[] options; - /**new instance of clusterer*/ - private SimpleKMeans kMeans = new SimpleKMeans(); - Weka weka = new Weka(); - /**An Array Object containing the independent variable*/ - private Object[][] newX = null; - /**A String specifying the path of the file, format arff, - * which contains the new independent values*/ - private String pathNewX = null; - /** results of the classifying*/ - private Object[] results = null; - - /** - * Constructor of the KmeansCluster object - */ - public KmeansCluster(){} - - /** - * Parses a given list of options. The parameters are determited from weka. - * - *

Valid options are (according weka library):

- *

-N: Specify the number of clusters to generate.

- *

-S: Specify random number seed.

- * - * @param options An Array of strings containing the options - * @throws Exception if the options are of the wrong type for the given modeling function - * - */ - public void setOptions(String[] options) throws Exception{ - this.options = options; - } - /** - * Set the variable data to the arff file - * - * @param filename The path of the file, format arff - * @throws Exception if the parameters are of the wrong type for the given modeling function - * - */ - public void setData(String filename)throws Exception{ - data = weka.setDataset( - this.getClass().getClassLoader().getResourceAsStream(filename), - null - ); - } - - /** - * Parses a given list of data to an arff file, and set the variable data on it. - * - * @param attrib A string array containing the attributs - * @param typAttrib Attribute type: NUMERICAL or NOMINAL. - * @param classAttrib String with a list of the attribut classes. - * @param y An array containing the dependent variable (class value). - * @param x A 2D array containing the independent variable (for example: qsar results). - * @throws Exception if it is unable to parse the data - * - */ - public void setData(String[] attrib, int[] typAttrib, String[] classAttrib, Object[] y, Object[][] x) throws Exception{ - data = weka.setDataset(attrib,typAttrib,classAttrib,y,x,null); - } - - /** - * Specifies the new parameters as arff file. - * - * @param path A String specifying the path of the file, format arff, which contains - * the new values. - * @throws QSARModelException if the parameters are of the wrong type for the given modeling function - * - */ - public void setParameters(String path) throws QSARModelException { - this.pathNewX = path; - } - - /** - * Specifies the new parameters as 2D array object. - * - * @param newX A 2D array Object containing the new values. - * @throws QSARModelException if the parameters are of the wrong type for the given modeling function - */ - public void setParameters(Object[][] newX) throws QSARModelException { - this.newX = newX; - } - - /** - * Builds the cluster. - * - * @throws Exception if errors occur in data types. See - * the corresponding method in subclasses of this class for further details. - */ - public void build() throws Exception { - try { - if(options != null) -// set the options - kMeans.setOptions(options); -// build the clusterer - kMeans.buildClusterer(data); - } catch (Exception e) { - e.printStackTrace(); - } - return; - } - - /** Classifies a given instance. - * This function only returns meaningful results if the build - * method of this class has been called. - * - * @throws Exception if instance could not be classified successfully - */ - public void clusterInstance() throws Exception{ - try{ - if(pathNewX != null){ - BufferedReader br = new BufferedReader(new FileReader(pathNewX)); - Instances test = new Instances(br); - results = new Object[test.numInstances()]; - for(int i = 0 ; i < test.numInstances(); i++){ - results[i] = new Double(kMeans.clusterInstance(test.instance(i))); - } - } - else if(newX != null){ - results = new Object[newX.length]; - for(int j = 0 ; j < newX.length ; j++){ - Instance instance = new Instance(data.numAttributes()); - instance.setDataset(data); - for(int i = 0 ; i < newX[0].length ; i++){ - if(instance.attribute(i).isNumeric()) - instance.setValue(i, ((Double)newX[j][i]).doubleValue()); - else if(instance.attribute(i).isString()) - instance.setValue(i, ""+newX[j][i]); - } - instance.setValue(newX[0].length, 0.0); - results[j] = new Double(kMeans.clusterInstance(instance)); - } - } - } catch ( Exception e){ - e.printStackTrace(); - } - } - - /** - * Returns the number of the assigned cluster if the class is enumerated, otherwise the predicted value. - * This function only returns meaningful results if the clusterInstance - * method of this class has been called. - * @return An Object[] containing the assigned cluster as Double values - * if the class is enumerated, otherwise the predicted values. - */ - public Object[] getClusterInstance(){ - return results; - } - - /** - * Returns the number of clusters. - * - * @return the number of builded cluster as integer - * @throws Exception - */ - public int numberOfCluster() throws Exception{ - return kMeans.numberOfClusters(); - } - - /** - * Returns the cluster sum of squared errors. - * - * @return the sum of squared errors of the cluster as double - */ - public double getSquaredError(){ - return kMeans.getSquaredError(); - } - - /** - * Returns the cluster centroid values. - * - * @return the cluster centroid values as 2D double array - * @throws Exception - */ - public double[][] getClusterCentroids() throws Exception{ - double[][] results = new double[kMeans.numberOfClusters()][]; - for (int x = 0; x < kMeans.numberOfClusters(); x++){ - results[x] = new double[kMeans.getClusterCentroids().instance(x).numValues()]; - results[x] = kMeans.getClusterCentroids().instance(x).toDoubleArray(); - } - return results; - } - - /** - * Returns the cluster standard deviations. - * - * @return the cluster standard deviations as 2D double array - * @throws Exception - */ - public double[][] getClusterStandardDevs() throws Exception{ - double[][] results = new double[kMeans.numberOfClusters()][]; - for (int x = 0; x < kMeans.numberOfClusters(); x++){ - results[x] = new double[kMeans.getClusterStandardDevs().instance(x).numValues()]; - results[x] = kMeans.getClusterStandardDevs().instance(x).toDoubleArray(); - } - return results; - } - - /** - * Returns the cluster sizes. - * - * @return the cluster sizes as integer array - */ - public int[] getClusterSizes() { - return kMeans.getClusterSizes(); - } - - /** - * Returns the cluster. - * - * @return the builded Clusterer - */ - public Clusterer getClusterer(){ - return kMeans; - } - - /** - * Gets the current settings of the model - * - * @return an array of strings containing the options - - */ - public String[] getOptions() { - return kMeans.getOptions(); - } -} diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/LinearRegressionWModel.java b/src/main/org/openscience/chemojava/qsar/model/weka/LinearRegressionWModel.java deleted file mode 100644 index 5c7f16a..0000000 --- a/src/main/org/openscience/chemojava/qsar/model/weka/LinearRegressionWModel.java +++ /dev/null @@ -1,268 +0,0 @@ -/* $Revision: 6228 $ $Author: egonw $ $Date: 2006-05-11 18:34:42 +0200 (Thu, 11 May 2006) $ - * - * Copyright (C) 2006-2007 Miguel Rojas - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ -package org.openscience.chemojava.qsar.model.weka; - -import org.openscience.chemojava.libio.weka.Weka; -import org.openscience.cdk.qsar.model.QSARModelException; - -import weka.classifiers.functions.LinearRegression; -/** - * A modeling class that provides a linear least squares regression model using Weka library. - * The use of this class is shown in the following code snippet - *
- * try {
- *     LinearRegressionWModel lrm = new LinearRegressionWModel(x,y); or LinearRegressionWModel lrm = new LinearRegressionWModel(typAttrib,classAttrib,x,y,attrib);
- *     lrm.setOptions(options);
- *     lrm.build();
- *     lrm.setParameters(newX);
- *     lrm.predict();
- *     Double[] predictedvalues = (Double[])lrm.getPredictPredicted();
- * 
- * } catch (QSARModelException qme) {
- *     System.out.println(qme.toString());
- * }
- * 
- * Other option is set the data from a file format arff. - *
- * LinearRegressionWModel lrm = new LinearRegressionWModel("/some/where/dataTraining.arff");
- * 
- * Note that when making predictions, the new X matrix must be set by calls - * to setParameters(). The following table lists the parameters that can be set and their - * expected types. - *
- * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
NameJava TypeNotes
xDouble[][]
yDouble[]Length should be equal to the rows of x
newXDouble[][]Number of columns should be the same as in x
- *
- *

Valid options are (according weka library):

- *

-D: Produce debugging output.

- *

-S num:
Set the attriute selection method to use. 1=None, 2=Greedy(default 0=M5' method) - *

-C: Do no try to eleminate colinear attributes

- *

-R num: The ridge parameter(default 1.0e-8)

- * - * @author Miguel Rojas - * @cdk.require weka.jar - * @cdk.license GPL - * @cdk.module qsarweka - * @cdk.svnrev $Revision: 9162 $ - * @see Weka - * - * @cdk.keyword regression, linear - */ -public class LinearRegressionWModel implements IWekaModel{ - /**Dependent variable */ - private Object[] y; - /**Independent variable */ - private Object[][] x; - private Weka weka = null; - /**Array of strings containing the options*/ - private String[] options; - /**A String specifying the path of the file, format arff, - * which contians the variables and attributes with whose to test.*/ - private String pathTest = null; - /** results of the prediction*/ - private Double[] results; - /**A Array Object containing the independent variable*/ - private Object[][] newX = null; - /**A String specifying the path of the file, format arff, - * which contians the independent values with whose to predict.*/ - private String pathNewX = null; - /**Attribute type: NUMERICAL or NOMINAL*/ - private int[] typAttrib; - /** String with the attributs*/ - private String[]attrib; - /** Boolean if the attributs was set*/ - private boolean setAttrib = false; - - /** - * Constructor of the LinearRegressionWModel object from varibles - * @param y An array containing the dependent variable. - * @param x An double array containing the independent variable. - */ - public LinearRegressionWModel(Object[] y, Object[][] x){ - this.y = y; - this.x = x; - } - - /** - * Constructor of the LinearRegressionWModel object from varibles - * - * @param y An array containing the dependent variable (class value). - * @param x A 2D array containing the independent variable (for example: qsar results). - * @param typAttrib An integer array containing the attribute type - * @param attrib A string array containing the attributs - */ - public LinearRegressionWModel(int[] typAttrib, Object[] y, Object[][] x, String[] attrib){ - this.y = y; - this.x = x; - this.typAttrib = typAttrib; - this.attrib = attrib; - setAttrib = true; - } - /** - * Constructor of the LinearRegressionWModel object from file - * @param pathTest Path of the dataset file format arff to train - */ - public LinearRegressionWModel(String pathTest){ - this.pathTest = pathTest; - } - - /** - * Parses a given list of options. The parameters are determited from weka. - * - *

Valid options are (according weka library):

- *

-D: Produce debugging output.

- *

-S num:
Set the attriute selection method to use. 1=None, 2=Greedy(default 0=M5' method) - *

-C: Do no try to eleminate colinear attributes

- *

-R num: The ridge parameter(default 1.0e-8)

- * - * @param options An Array of strings containing the options - * @throws QSARModelException if the options are of the wrong type for the given modeling function - * - */ - public void setOptions(String[] options) throws QSARModelException { - this.options = options; - } - /** - * Get the current settings of the classifier. The parameters are determited from weka. And are specific for each - * algorithm. - * - * @return An Array of strings containing the options - * @throws QSARModelException if the options are of the wrong type for the given modeling function - * - */ - public String[] getOptions() throws QSARModelException { - return options; - } - /** - * Builds (trains) the model. - * - * @throws QSARModelException if errors occur in data types, calls to the R session. See - * the corresponding method in subclasses of this class for further details. - */ - public void build() throws QSARModelException { - weka = new Weka(); - try { - LinearRegression lr = new LinearRegression(); - if(options != null) - lr.setOptions(options); - - if(pathTest != null){ - weka.setDataset( - this.getClass().getClassLoader().getResourceAsStream(pathTest), - lr - ); - }else{ - if (!(setAttrib)){ - this.attrib = new String[x[0].length+1]; - this.typAttrib = new int[x[0].length+1]; - for(int i = 0 ; i < x[0].length; i++){ - attrib[i] = "X"+i; - typAttrib[i] = Weka.NUMERIC; - } - attrib[x[0].length] = "Y"; - typAttrib[x[0].length] = Weka.NUMERIC; - } - weka.setDataset(attrib,typAttrib,y,x,lr); - } - } catch (Exception e) { - e.printStackTrace(); - } - } - /** - * Specifies the parameters to predict. In this case will be the dependent varibles. - * It's found into cdk.src - * - * @param path A String specifying the path of the file, format arff, which contians - * the dependent values with whose to predict. It's found into cdk.src - * @throws QSARModelException if the parameters are of the wrong type for the given modeling function - * - */ - public void setParametersCDK(String path) throws QSARModelException { - this.pathNewX = path; - } - /** - * Specifies the parameters to predict. In this case will be the independent varibles. - * - * @param newX A 2D array Object containing the independent variable. - * @throws QSARModelException if the parameters are of the wrong type for the given modeling function - */ - public void setParameters(Object[][] newX) throws QSARModelException { - this.newX = newX; - } - - /** - * Makes predictions using a previously built model. - * - * @throws QSARModelException if errors occur in data types, calls to the R session. See - * the corresponding method in subclasses of this class for further details. - */ - public void predict() throws QSARModelException { - try{ - if(pathNewX != null){ - Object[] object = weka.getPredictionCDK(pathNewX); - results = new Double[object.length]; - for(int i = 0 ; i < object.length; i++){ - results[i] = (Double)object[i]; - } - } - else if(newX != null){ - Object[] object = weka.getPrediction(newX); - results = new Double[object.length]; - for(int i = 0 ; i < object.length; i++){ - results[i] = (Double)object[i]; - } - } - - } catch ( Exception e){ - e.printStackTrace(); - } - } - /** - * Returns the predicted values for the prediction set. - * - * This function only returns meaningful results if the predict - * method of this class has been called. - * - * @return A Double[] containing the predicted values - */ - public Object[] getPredictPredicted() { - return results; - } - - -} diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/MultilayerPerceptronModel.java b/src/main/org/openscience/chemojava/qsar/model/weka/MultilayerPerceptronModel.java deleted file mode 100644 index ac9adb7..0000000 --- a/src/main/org/openscience/chemojava/qsar/model/weka/MultilayerPerceptronModel.java +++ /dev/null @@ -1,368 +0,0 @@ -/* $RCSfile$ - * $Author: mariobaseda $ - * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $ - * $Revision: 5602 $ - * - * Copyright (C) 2007 by Mario Baseda - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package org.openscience.chemojava.qsar.model.weka; - -import weka.core.Instance; -import weka.core.Instances; -import weka.classifiers.functions.MultilayerPerceptron; - -import org.openscience.chemojava.libio.weka.Weka; -import org.openscience.cdk.qsar.model.QSARModelException; - -import java.io.BufferedReader; -import java.io.FileReader; - -/** - * A modelling class that uses backpropagation to classify instances using Weka library. - * The use of this class is shown in the following code snippet - *
- * try {
- *     MultilayerPerceptron mp = new MultilayerPerceptron();
- *     mp.setOptions(options);
- *     mp.setData(attrib, typAttrib, classAttrib, data);
- *     mp.build();
- *     
- * } catch (Exception e) {
- *     System.out.println(e.toString());
- * }
- * 
- * Other option is set the data from a file format arff. - *
- * MultilayerPerceptron mp = new MultilayerPerceptron();
- * mp.setData("/some/where/dataTraining.arff");
- * 
- *

Valid options are (according weka library):

- *

-L num: Set the learning rate. (default 0.3)

- *

-M num: Set the momentum (default 0.2)

- *

-N num: Set the number of epochs to train through. (default 500)

- *

-V num: Set the percentage size of the validation set from the training to use - * (default 0 (no validation set is used, instead num of epochs is used)

- *

-S num: Set the seed for the random number generator. (default 0)

- *

-E num: Set the threshold for the number of consequetive errors allowed - * during validation testing. (default 20)

- *

-G: Bring up a GUI for the neural net.

- *

-A: Do not automatically create the connections in the net. (can only be used if -G is specified)

- *

-B: Do Not automatically preprocess the instances with a nominal to binary filter

- *

-H str: Set the number of nodes to be used on each layer. Each number represents - * its own layer and the num of nodes on that layer. Each number should be comma seperated. - * There are also the wildcards 'a', 'i', 'o', 't' (default 4)

- *

-C: Do not automatically Normalize the class if it's numeric.

- *

-I: Do not automatically Normalize the attributes

- *

-R: Do not allow the network to be automatically reset

- *

-D: Cause the learning rate to decay as training is done

- * - * @author Mario Baseda - * @cdk.require weka.jar - * @cdk.license GPL - * @cdk.module qsarweka - * @cdk.svnrev $Revision: 9162 $ - * @see Weka - * - * @cdk.keyword classifiers, funktions, MultilayerPerceptron - */ -public class MultilayerPerceptronModel { - /** An instance containing the data which should be classifier as arff file.*/ - private static Instances data; - /**Array of strings containing the options*/ - private String[] options; - /**new instance of classifier*/ - private MultilayerPerceptron mp = new MultilayerPerceptron(); - private Weka weka = new Weka(); - /**An Array Object containing the independent variable*/ - private Object[][] newX = null; - /**A String specifying the path of the file, format arff, - * which contains the new independent values*/ - private String pathNewX = null; - /** An array object which contains the results of the prediction*/ - private Object[][] object = null; - - - /** - * Constructor of the MultilayerPerceptron object - */ - public MultilayerPerceptronModel(){} - - /** - * Set the variable data to the arff file - * - * @param filename The path of the file, format arff - * @throws Exception if the parameters are of the wrong type for the given modeling function - * - */ - public void setData(String filename)throws Exception{ - data = weka.setDataset( - this.getClass().getClassLoader().getResourceAsStream(filename), - null - ); - } - - /** - * Parses a given list of data to an arff file, and set the variable data on it. - * - * @param typAttrib Attribute type: NUMERICAL or NOMINAL. - * @param classAttrib String with a list of the attribut classes. - * @param y An array containing the dependent variable (class value). - * @param x A 2D array containing the independent variable (for example: qsar results). - * @param attrib A string array containing the attributs - * @throws Exception if it is unable to parse the data - * - */ - public void setData(String[] attrib, int[] typAttrib, String[] classAttrib, Object[] y, Object[][] x) throws Exception{ - data = weka.setDataset(attrib,typAttrib,classAttrib,y,x,null); - } - - /** - * Parses a given list of options. The parameters are determited from weka. - * - *

Valid options are (according weka library):

- *

-L num: Set the learning rate. (default 0.3)

- *

-M num: Set the momentum (default 0.2)

- *

-N num: Set the number of epochs to train through. (default 500)

- *

-V num: Set the percentage size of the validation set from the training to use - * (default 0 (no validation set is used, instead num of epochs is used)

- *

-S num: Set the seed for the random number generator. (default 0)

- *

-E num: Set the threshold for the number of consequetive errors allowed - * during validation testing. (default 20)

- *

-G: Bring up a GUI for the neural net.

- *

-A: Do not automatically create the connections in the net. (can only be used if -G is specified)

- *

-B: Do Not automatically preprocess the instances with a nominal to binary filter

- *

-H str: Set the number of nodes to be used on each layer. Each number represents - * its own layer and the num of nodes on that layer. Each number should be comma seperated. - * There are also the wildcards 'a', 'i', 'o', 't' (default 4)

- *

-C: Do not automatically Normalize the class if it's numeric.

- *

-I: Do not automatically Normalize the attributes

- *

-R: Do not allow the network to be automatically reset

- *

-D: Cause the learning rate to decay as training is done

- * - * @param options An Array of strings containing the options - * @throws QSARModelException if the options are of the wrong type for the given modeling function - * - */ - public void setOptions(String[] options) throws QSARModelException{ - this.options = options; - } - - /** - * Builds the model. - * - * @throws Exception if errors occur in data types. See - * the corresponding method in subclasses of this class for further details. - */ - public void build() throws Exception { - try { - if(options != null) -// set the options - mp.setOptions(options); -// build the model - mp.buildClassifier(data); - } catch (Exception e) { - e.printStackTrace(); - } - return; - } - - /** - * Specifies the new parameters as arff file. - * - * @param path A String specifying the path of the file, format arff, which contians - * the new values. - * @throws QSARModelException if the parameters are of the wrong type for the given modeling functionn - * - */ - public void setParameters(String path) throws QSARModelException { - this.pathNewX = path; - } - - /** - * Specifies the new parameters as 2D array object. - * - * @param newX A 2D array Object containing the new values. - * @throws QSARModelException if the parameters are of the wrong type for the given modeling function - */ - public void setParameters(Object[][] newX) throws QSARModelException { - this.newX = newX; - } - - - /** Predict the class of an instance once a classification model has been built with the buildClassifier call - * - * @throws Exception - */ - public void predict() throws Exception{ - try{ - if(pathNewX != null){ - BufferedReader br = new BufferedReader(new FileReader(pathNewX)); - Instances test = new Instances(br); - this.object = new Object[test.numInstances()][]; - for(int i = 0 ; i < test.numInstances(); i++){ - double[] result = mp.distributionForInstance(test.instance(i)); - this.object[i] = new Object[result.length]; - for (int z = 0; z < result.length; z++){ - this.object[i][z] = new Double(result[z]); - } - } - } - else if(newX != null){ - this.object = new Object[newX.length][]; - for(int j = 0 ; j < newX.length ; j++){ - Instance instance = new Instance(data.numAttributes()); - instance.setDataset(data); - for(int i = 0 ; i < newX[0].length ; i++){ - if(instance.attribute(i).isNumeric()) - instance.setValue(i, ((Double)newX[j][i]).doubleValue()); - else if(instance.attribute(i).isString()) - instance.setValue(i, ""+newX[j][i]); - } - instance.setValue(newX[0].length, 0.0); - double[] result = mp.distributionForInstance(instance); - this.object[j] = new Object[result.length]; - for (int z = 0; z < result.length; z++){ - this.object[j][z] = new Double(result[z]); - } - } - } - } catch ( Exception e){ - e.printStackTrace(); - } - } - - /** - * Returns the probabilities of each class type. - * - * This function only returns meaningful results if the predict - * method of this class has been called. - * - * @return An Object[][] containing the probabilities of each class type - */ - public Object[][] getPredictPredicted() { - return this.object; - } - - /**Get the hidden layers - * @return A string containig the hidden layers - */ - public String getHiddenLayers(){ - return mp.getHiddenLayers(); - } - - /**Get the value of the learning rate - * @return A double containig the value of the learning rate - */ - public double getLearningRate(){ - return mp.getLearningRate(); - } - - /**Get the value of the momentum - * @return A double containig the value of the momentum - */ - public double getMomentum(){ - return mp.getMomentum(); - } - - /**Get the value of auto build - * @return true or false - */ - public boolean getAutoBuild(){ - return mp.getAutoBuild(); - } - - /**Get the value of decay - * @return true or false - */ - public boolean getDecay(){ - return mp.getDecay(); - } - - /**Get the value of GUI - * @return true or false - */ - public boolean getGUI(){ - return mp.getGUI(); - } - - /**Get the value of NominalToBinaryFilter - * @return true or false - */ - public boolean getNominalToBinaryFilter(){ - return mp.getNominalToBinaryFilter(); - } - - /**Get the value of NormalizeAttributes - * @return true or false - */ - public boolean getNormalizeAttributes(){ - return mp.getNormalizeAttributes(); - } - - /**Get the value of NormalizeNumericClass - * @return true or false - */ - public boolean getNormalizeNumericClass(){ - return mp.getNormalizeNumericClass(); - } - - /**Gets the current settings of NeuralNet - * @return A String[] containig the settings of NeuralNet - */ - public String[] getOptions(){ - return mp.getOptions(); - } - - /**Get the value of the random seed - * @return A long containig the value of the random seed - */ - public long getRandomSeed(){ - return mp.getRandomSeed(); - } - - /**Get the number of epochs to train through - * @return An double containig the number of epochs to train through - */ - public double getTrainingTime(){ - return mp.getTrainingTime(); - } - - /**Get the percentage size of the validation set - * @return An double containig the percentage size of the validation seth - */ - public double getValidationSetSize(){ - return mp.getValidationSetSize(); - } - - /**Get the threshold used for validation testing - * @return An double containig the threshold used for validation testing - */ - public double getValidationThreshold(){ - return mp.getValidationThreshold(); - } - - /**Get the flag for reseting the network - * @return true or false - */ - public boolean getReset(){ - return mp.getReset(); - } -} diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/NaiveBayesModel.java b/src/main/org/openscience/chemojava/qsar/model/weka/NaiveBayesModel.java deleted file mode 100644 index f92268e..0000000 --- a/src/main/org/openscience/chemojava/qsar/model/weka/NaiveBayesModel.java +++ /dev/null @@ -1,292 +0,0 @@ -/* $RCSfile$ - * $Author: mariobaseda $ - * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $ - * $Revision: 5602 $ - * - * Copyright (C) 2007 by Mario Baseda - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package org.openscience.chemojava.qsar.model.weka; - -import weka.core.Instance; -import weka.core.Instances; -import weka.classifiers.bayes.NaiveBayes; - -import org.openscience.chemojava.libio.weka.Weka; -import org.openscience.cdk.qsar.model.QSARModelException; - -import java.io.BufferedReader; -import java.io.FileReader; - -/** - * A modelling class for a Naive Bayes classifier using estimator classes from the Weka library. - * Numeric estimator precision values are chosen based on analysis of the training data. - * For this reason, the classifier is not an UpdateableClassifier. - * The use of this class is shown in the following code snippet - *
- * try {
- *     NaiveBayes nb = new NaivesBayes();
- *     nb.setOptions(options);
- *     nb.setData(attrib, typAttrib, classAttrib, data);
- *     nb.build();
- *     
- * } catch (Exception e) {
- *     System.out.println(e.toString());
- * }
- * 
- * - * !!!Other option is set the data from a file format arff.!!! - * !!!!!!!!!!!!!THIS OPTION SUPPLIES AN UNSUSPECTED BUG BY CALCULATING THE PROBABILITIES!!!!!!!!!!!!!! - *
- * !!!MultilayerPerceptron mp = new MultilayerPerceptron();!!!
- * !!!mp.setData("/some/where/dataTraining.arff");!!!
- * !!!!!!!!!!!!!THIS OPTION SUPPLIES AN UNSUSPECTED BUG BY CALCULATING THE PROBABILITIES!!!!!!!!!!!!!!
- * 
- *

Valid options are (according weka library):

- *

-K: Use kernel estimation for modelling numeric attributes rather than a single normal distribution.

- *

-D: Use supervised discretization to process numeric attributes

- * - * @author Mario Baseda - * @cdk.require weka.jar - * @cdk.license GPL - * @cdk.module qsarweka - * @cdk.svnrev $Revision: 9162 $ - * @see Weka - * - * @cdk.keyword classifiers, bayes, NaiveBayes - */ -public class NaiveBayesModel { - /** An instance containing the data which should be classifier as arff file.*/ - private static Instances data; - /**Array of strings containing the options*/ - private String[] options; - /**new instance of classifier*/ - private NaiveBayes nb = new NaiveBayes(); - private Weka weka = new Weka(); - /**An Array Object containing the independent variable*/ - private Object[][] newX = null; - /**A String specifying the path of the file, format arff, - * which contains the new independent values*/ - private String pathNewX = null; - /** An array object which contains the results of the probabilities*/ - private Object[][] object = null; - - - /** - * Constructor of the NaiveBayesModel object - */ - public NaiveBayesModel(){} - - /** - * Set the variable data to the arff file - * - * @param filename The path of the file, format arff - * @throws Exception if the parameters are of the wrong type for the given modeling function - * - */ - public void setData(String filename)throws Exception{ - data = weka.setDataset( - this.getClass().getClassLoader().getResourceAsStream(filename), - null - ); - } - - /** - * Parses a given list of data to an arff file, and set the variable data on it. - * - * @param typAttrib Attribute type: NUMERICAL or NOMINAL. - * @param classAttrib String with a list of the attribut classes. - * @param y An array containing the dependent variable (class value). - * @param x A 2D array containing the independent variable (for example: qsar results). - * @param attrib A string array containing the attributs - * @throws Exception if it is unable to parse the data - * - */ - public void setData(String[] attrib, int[] typAttrib, String[] classAttrib, Object[] y, Object[][] x) throws Exception{ - data = weka.setDataset(attrib,typAttrib,classAttrib,y,x,null); - } - - /** - * Parses a given list of options. The parameters are determited from weka. - * - *

Valid options are (according weka library):

- *

-K: Use kernel estimation for modelling numeric attributes rather than a single normal distribution.

- *

-D: Use supervised discretization to process numeric attributes

- * - * @param options An Array of strings containing the options - * @throws QSARModelException if the options are of the wrong type for the given modeling function - * - */ - public void setOptions(String[] options) throws QSARModelException{ - this.options = options; - } - - /** - * Builds the model. - * - * @throws Exception if errors occur in data types. See - * the corresponding method in subclasses of this class for further details. - */ - public void build() throws Exception { - try { - if(options != null) -// set the options - nb.setOptions(options); -// build the model - nb.buildClassifier(data); - } catch (Exception e) { - e.printStackTrace(); - } - return; - } - - /** - * Specifies the new parameters as arff file. - * - * @param path A String specifying the path of the file, format arff, which contains - * the new values. - * @throws QSARModelException if the parameters are of the wrong type for the given modeling function - * - */ - public void setParameters(String path) throws QSARModelException { - this.pathNewX = path; - } - - /** - * Specifies the new parameters as 2D array object. - * - * @param newX A 2D array Object containing the new values. - * @throws QSARModelException if the parameters are of the wrong type for the given modeling function - */ - public void setParameters(Object[][] newX) throws QSARModelException { - this.newX = newX; - } - - - /** Calculates the class membership probabilities for the given instance once a classification - * model has been built with the buildClassifier call - * - * @throws Exception - */ - public void probabilities() throws Exception{ - try{ - if(pathNewX != null){ -// This supplies an unsuspected bug (Array index out of bounce error) - BufferedReader br = new BufferedReader(new FileReader(pathNewX)); - Instances test = new Instances(br); - this.object = new Object[test.numInstances()][]; - for(int i = 0 ; i < test.numInstances(); i++){ - // Here occures the bug - double[] result = nb.distributionForInstance(test.instance(i)); - this.object[i] = new Object[result.length]; - for (int z = 0; z < result.length; z++){ - this.object[i][z] = new Double(result[z]); - } - } - } - else if(newX != null){ - this.object = new Object[newX.length][]; - for(int j = 0 ; j < newX.length ; j++){ - Instance instance = new Instance(data.numAttributes()); - instance.setDataset(data); - for(int i = 0 ; i < newX[0].length ; i++){ - if(instance.attribute(i).isNumeric()) - instance.setValue(i, ((Double)newX[j][i]).doubleValue()); - else if(instance.attribute(i).isString()) - instance.setValue(i, ""+newX[j][i]); - } - instance.setValue(newX[0].length, 0.0); - double[] result = nb.distributionForInstance(instance); - this.object[j] = new Object[result.length]; - for (int z = 0; z < result.length; z++){ - this.object[j][z] = new Double(result[z]); - } - } - } - } catch ( Exception e){ - e.printStackTrace(); - } - } - - /** - * Returns the probabilities of each class type. - * - * This function only returns meaningful results if the probabilities - * method of this class has been called. - * - * @return An Object[][] containing the probabilities of each class type - */ - public Object[][] getProbabilities() { - return this.object; - } - - /** Updates the classifier with the given instance once a classification model has been built with the buildClassifier call - * - * @throws Exception - */ - public void updateClassifier() throws Exception{ - try{ - if(pathNewX != null){ - BufferedReader br = new BufferedReader(new FileReader(pathNewX)); - Instances test = new Instances(br); - for(int i = 0 ; i < test.numInstances(); i++){ - nb.updateClassifier(test.instance(i)); - } - } - else if(newX != null){ - for(int j = 0 ; j < newX.length ; j++){ - Instance instance = new Instance(data.numAttributes()); - instance.setDataset(data); - for(int i = 0 ; i < newX[0].length ; i++){ - if(instance.attribute(i).isNumeric()) - instance.setValue(i, ((Double)newX[j][i]).doubleValue()); - else if(instance.attribute(i).isString()) - instance.setValue(i, ""+newX[j][i]); - } - instance.setValue(newX[0].length, 0.0); - nb.updateClassifier(instance); - } - } - } catch ( Exception e){ - e.printStackTrace(); - } - } - - /**Gets the current settings of the classifier - * @return A String[] containig the settings of NeuralNet - */ - public String[] getOptions(){ - return nb.getOptions(); - } - - /**Gets if kernel estimator is being used - * @return true or false - */ - public boolean getUseKernelEstimator(){ - return nb.getUseKernelEstimator(); - } - - /**Get whether supervised discretization is to be used - * @return true or false - */ - public boolean getUseSupervisedDiscretization(){ - return nb.getUseSupervisedDiscretization(); - } -} diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/SMOModel.java b/src/main/org/openscience/chemojava/qsar/model/weka/SMOModel.java deleted file mode 100644 index a3a2b6b..0000000 --- a/src/main/org/openscience/chemojava/qsar/model/weka/SMOModel.java +++ /dev/null @@ -1,395 +0,0 @@ -/* $RCSfile$ - * $Author: mariobaseda $ - * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $ - * $Revision: 5602 $ - * - * Copyright (C) 2007 by Mario Baseda - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package org.openscience.chemojava.qsar.model.weka; - -import weka.core.Instance; -import weka.core.Instances; -import weka.classifiers.functions.SMO; -import org.openscience.chemojava.libio.weka.Weka; -import org.openscience.cdk.qsar.model.QSARModelException; - -import java.io.BufferedReader; -import java.io.FileReader; - -/** - * A modelling class that provides the John C. Platt's sequential minimal optimization algorithm (SMO) - * for training a support vector classifier using Weka library. - * The use of this class is shown in the following code snippet - *
- * try {
- *     SMO smo = new SMO();
- *     smo.setOptions(options);
- *     smo.setData(attrib, typAttrib, classAttrib, data);
- *     smo.build();
- *     
- * } catch (Exception e) {
- *     System.out.println(e.toString());
- * }
- * 
- * Other option is set the data from a file format arff. - *
- * SMO smo = new SMO();
- * smo.setData("/some/where/dataTraining.arff");
- * 
- *

Valid options are (according weka library):

- *

-C num: The complexity constant C. (default 1)

- *

-E num: The exponent for the polynomial kernel. (default 1)

- *

-G num: Gamma for the RBF kernel. (default 0.01)

- *

-N <0|1|2>: Whether to 0=normalize/1=standardize/2=neither. (default 0=normalize)

- *

-F: Feature-space normalization (only for non-linear polynomial kernels).

- *

-O: Use lower-order terms (only for non-linear polynomial kernels).

- *

-R: Use the RBF kernel. (default poly)

- *

-A num: Sets the size of the kernel cache. Should be a prime number. (default 250007, use 0 for full cache)

- *

-L num: Sets the tolerance parameter. (default 1.0e-3)

- *

-P num: Sets the epsilon for round-off error. (default 1.0e-12)

- *

-M: Fit logistic models to SVM outputs.

- *

-V num: Number of folds for cross-validation used to generate data for logistic models. (default -1, use training data)

- *

-W num: Random number seed for cross-validation. (default 1)

- - - * - * @author Mario Baseda - * @cdk.require weka.jar - * @cdk.license GPL - * @cdk.module qsarweka - * @cdk.svnrev $Revision: 9162 $ - * @see Weka - * - * @cdk.keyword classifiers, funktions, SMO - */ -public class SMOModel { - /** An instance containing the data which should be classifier as arff file.*/ - private static Instances data; - /**Array of strings containing the options*/ - private String[] options; - /**new instance of classifier*/ - private SMO smo = new SMO(); - private Weka weka = new Weka(); - /**An Array Object containing the independent variable*/ - private Object[][] newX = null; - /**A String specifying the path of the file, format arff, - * which contains the new independent values*/ - private String pathNewX = null; - /** An array object which contains the probabilitiesof the new values*/ - private Object[][] object = null; - - - /** - * Constructor of the SMO object - */ - public SMOModel(){} - - /** - * Parses a given list of options. The parameters are determited from weka. - * - *

Valid options are (according weka library):

- *

Valid options are (according weka library):

- *

-C num: The complexity constant C. (default 1)

- *

-E num: The exponent for the polynomial kernel. (default 1)

- *

-G num: Gamma for the RBF kernel. (default 0.01)

- *

-N <0|1|2>: Whether to 0=normalize/1=standardize/2=neither. (default 0=normalize)

- *

-F: Feature-space normalization (only for non-linear polynomial kernels).

- *

-O: Use lower-order terms (only for non-linear polynomial kernels).

- *

-R: Use the RBF kernel. (default poly)

- *

-A num: Sets the size of the kernel cache. Should be a prime number. (default 250007, use 0 for full cache)

- *

-L num: Sets the tolerance parameter. (default 1.0e-3)

- *

-P num: Sets the epsilon for round-off error. (default 1.0e-12)

- *

-M: Fit logistic models to SVM outputs.

- *

-V num: Number of folds for cross-validation used to generate data for logistic models. (default -1, use training data)

- *

-W num: Random number seed for cross-validation. (default 1)

- * - * @param options An Array of strings containing the options - * @throws QSARModelException if the options are of the wrong type for the given modeling function - * - */ - public void setOptions(String[] options) throws QSARModelException{ - this.options = options; - } - - /** - * Set the variable data to the arff file - * - * @param filename The path of the file, format arff - * @throws Exception if the parameters are of the wrong type for the given modeling function - * - */ - public void setData(String filename)throws Exception{ - data = weka.setDataset( - this.getClass().getClassLoader().getResourceAsStream(filename), - null - ); - } - - /** - * Parses a given list of data to an arff file, and set the variable data on it. - * - * @param typAttrib Attribute type: NUMERICAL or NOMINAL. - * @param classAttrib String with a list of the attribut classes. - * @param y An array containing the dependent variable (class value). - * @param x A 2D array containing the independent variable (for example: qsar results). - * @param attrib A string array containing the attributs - * @throws Exception if it is unable to parse the data - * - */ - public void setData(String[] attrib, int[] typAttrib, String[] classAttrib, Object[] y, Object[][] x) throws Exception{ - data = weka.setDataset(attrib,typAttrib,classAttrib,y,x,null); - } - - /** - * Builds the model. - * - * @throws Exception if errors occur in data types. See - * the corresponding method in subclasses of this class for further details. - */ - public void build() throws Exception { - try { - if(options != null) -// set the options - smo.setOptions(options); -// build the model - smo.buildClassifier(data); - } catch (Exception e) { - e.printStackTrace(); - } - return; - } - - /** - * Specifies the new parameters as arff file. - * - * @param path A String specifying the path of the file, format arff, which contians - * the new values. - * @throws QSARModelException if the parameters are of the wrong type for the given modeling function - * - */ - public void setParameters(String path) throws QSARModelException { - this.pathNewX = path; - } - - /** - * Specifies the new parameters as 2D array object. - * - * @param newX A 2D array Object containing the new values. - * @throws QSARModelException if the parameters are of the wrong type for the given modeling function - */ - public void setParameters(Object[][] newX) throws QSARModelException { - this.newX = newX; - } - - - /** Estimates the probabilities for the given instance, which was built from the new parameters before. - * - * @throws Exception - */ - public void probabilities() throws Exception{ - try{ - if(pathNewX != null){ - BufferedReader br = new BufferedReader(new FileReader(pathNewX)); - Instances test = new Instances(br); - this.object = new Object[test.numInstances()][]; - for(int i = 0 ; i < test.numInstances(); i++){ - double[] result = smo.distributionForInstance(test.instance(i)); - this.object[i] = new Object[result.length]; - for (int z = 0; z < result.length; z++){ - this.object[i][z] = new Double(result[z]); - } - } - } - else if(newX != null){ - this.object = new Object[newX.length][]; - for(int j = 0 ; j < newX.length ; j++){ - Instance instance = new Instance(data.numAttributes()); - instance.setDataset(data); - for(int i = 0 ; i < newX[0].length ; i++){ - if(instance.attribute(i).isNumeric()) - instance.setValue(i, ((Double)newX[j][i]).doubleValue()); - else if(instance.attribute(i).isString()) - instance.setValue(i, ""+newX[j][i]); - } - instance.setValue(newX[0].length, 0.0); - double[] result = smo.distributionForInstance(instance); - this.object[j] = new Object[result.length]; - for (int z = 0; z < result.length; z++){ - this.object[j][z] = new Double(result[z]); - } - } - } - } catch ( Exception e){ - e.printStackTrace(); - } - } - - /** - * Returns the probabilities of the new parameters. - * - * This function only returns meaningful results if the probabilities - * method of this class has been called. - * - * @return An Object[][] containing the probabilities values as Double - */ - public Object[][] getProbabilities() { - return this.object; - } - - /** Returns the attribute names - * @return A String[][][] containing the attribute names - */ - public String[][][] attributeNames(){ - return smo.attributeNames(); - } - - /** Returns the bias of each binary SMO - * @return A double[][] containing the bias of each binary SMO - */ - public double[][] bias(){ - return smo.bias(); - } - - /**Returns the class attribute names - * @return A String[] containing the class attribute names - */ - public String[] classAttributeNames(){ - return smo.classAttributeNames(); - } - - /**Get the value of buildLogisticModels - * @return true or false - */ - public boolean getBuildLogisticModels(){ - return smo.getBuildLogisticModels(); - } - - /**Get the value of C - * @return A double containig the value of C - */ - public double getC(){ - return smo.getC(); - } - - /**Get the size of the kernel cache - * @return An integer containing the size of the kernel cache - */ - public int getCacheSize(){ - return smo.getCacheSize(); - } - - /**Get the value of epsilon - * @return A double containing the value of epsilon - */ - public double getEpsilon(){ - return smo.getEpsilon(); - } - - /**Get the value of exponent - * @return A double containing the value of exponent - */ - public double getExponent(){ - return smo.getExponent(); - } - - /**Check whether feature spaces is being normalized. - * @return true or false - */ - public boolean getFeatureSpaceNormalization() throws Exception{ - return smo.getFeatureSpaceNormalization(); - } - - /**Get the value of gamma - * @return A double containing the value of gamma - */ - public double getGamma(){ - return smo.getGamma(); - } - - /**Check whether lower-order terms are being used - * @return true or false - */ - public boolean getLowerOrderTerms(){ - return smo.getLowerOrderTerms(); - } - - /**Get the value of numFolds. - * @return An integer containing the value of numFolds - */ - public int getNumFolds(){ - return smo.getNumFolds(); - } - - /**Get the value of randomSeed - * @return An integer containing the value of randomSeed - */ - public int getRandomSeed(){ - return smo.getRandomSeed(); - } - - /**Get the value of tolerance parameter - * @return A double containig the value of tolerance parameter - */ - public double getToleranceParameter(){ - return smo.getToleranceParameter(); - } - - /**Check if the RBF kernel is to be used - * @return true or false - */ - public boolean getUserRBF(){ - return smo.getUseRBF(); - } - - /**Return the number of class attribute values - * @return An integer containing the class attribute values - */ - public int numClassAttributeValues(){ - return smo.numClassAttributeValues(); - } - - /**Returns the indices in sparse format - * @return An int[][][] containing the indices in sparse format - */ - public int[][][] sparseIndices(){ - return smo.sparseIndices(); - } - - /**Returns the weights in sparse format. - * @return A double[][][] containing the weights in sparse format - */ - public double[][][] sparseWeights(){ - return smo.sparseWeights(); - } - - /** - * Get the current settings of the classifier. The parameters are determited from weka. And are specific for each - * algorithm. - * - * @return An Array of strings containing the options - * @throws QSARModelException if the options are of the wrong type for the given modeling function - * - */ - public String[] getOptions() throws QSARModelException{ - return this.options; - } -} - diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/SMOregModel.java b/src/main/org/openscience/chemojava/qsar/model/weka/SMOregModel.java deleted file mode 100644 index c067ff7..0000000 --- a/src/main/org/openscience/chemojava/qsar/model/weka/SMOregModel.java +++ /dev/null @@ -1,289 +0,0 @@ -/* $RCSfile$ - * $Author: mariobaseda $ - * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $ - * $Revision: 5602 $ - * - * Copyright (C) 2007 by Mario Baseda - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package org.openscience.chemojava.qsar.model.weka; - -import weka.core.Instance; -import weka.core.Instances; -import weka.classifiers.functions.SMOreg; -import org.openscience.chemojava.libio.weka.Weka; -import org.openscience.cdk.qsar.model.QSARModelException; - -import java.io.BufferedReader; -import java.io.FileReader; - -/** - * A modelling class that provides the Alex J.Smola and Bernhard Scholkopf - * sequential minimal optimization algorithm for training a support vector regression - * using Weka library (polynomial or RBF kernels). This implementation globally replaces all missing - * values and transforms nominal attributes into binary ones. It also normalizes all - * attributes by default. - * The use of this class is shown in the following code snippet - *
- * try {
- *     SMOreg smoreg = new SMOreg();
- *     smo.setData(attrib, typAttrib, classAttrib, data); or smo.setData(x, y);
- *     smo.build();
- *     
- * } catch (Exception e) {
- *     System.out.println(e.toString());
- * }
- * 
- * Other option is set the data from a file format arff. - *
- * SMO smo = new SMO();
- * smo.setData("/some/where/dataTraining.arff");
- * 
- * - * @author Mario Baseda - * @cdk.require weka.jar - * @cdk.license GPL - * @cdk.module qsarweka - * @cdk.svnrev $Revision: 9162 $ - * @see Weka - * - * @cdk.keyword classifiers, funktions, SMOreg - */ -public class SMOregModel { - /** An instance containing the data which should be classifier as arff file.*/ - private static Instances data; - /**new instance of classifier*/ - private SMOreg smoreg = new SMOreg(); - private Weka weka = new Weka(); - /**An Array Object containing the independent variable*/ - private Object[][] newX = null; - /**A String specifying the path of the file, format arff, - * which contains the new independent values*/ - private String pathNewX = null; - /** An array object which contains the classifacation of the new values*/ - private Object[] object = null; - - /** - * Constructor of the SMOregModel object - */ - public SMOregModel(){} - - /** - * Set the variable data to the arff file - * - * @param filename The path of the file, format arff - * @throws Exception if the parameters are of the wrong type for the given modeling function - * - */ - public void setData(String filename)throws Exception{ - data = weka.setDataset( - this.getClass().getClassLoader().getResourceAsStream(filename), - null - ); - } - - /** - * Parses a given list of data to an arff file, and set the variable data on it. - * @param y An array containing the dependent variable. - * @param x An double array containing the independent variable. - */ - public void setData(Object[] y, Object[][] x) throws Exception{ - String[] attrib = new String[x[0].length+1]; - int[] typAttrib = new int[x[0].length+1]; - for(int i = 0 ; i < x[0].length; i++){ - attrib[i] = "X"+i; - typAttrib[i] = Weka.NUMERIC; - } - attrib[x[0].length] = "Y"; - typAttrib[x[0].length] = Weka.NUMERIC; - data = weka.setDataset(attrib,typAttrib,y,x,null); - } - - /** - * Parses a given list of data to an arff file, and set the variable data on it. - * - * @param typAttrib Attribute type: NUMERICAL or NOMINAL. - * @param classAttrib String with a list of the attribut classes. - * @param y An array containing the dependent variable (class value). - * @param x A 2D array containing the independent variable (for example: qsar results). - * @param attrib A string array containing the attributs - * @throws Exception if it is unable to parse the data - * - */ - public void setData(int[] typAttrib, Object[] y, Object[][] x, String[] attrib) throws Exception{ - data = weka.setDataset(attrib,typAttrib,y,x,null); - } - - /** - * Builds the model. - * - * @throws Exception if errors occur in data types. See - * the corresponding method in subclasses of this class for further details. - */ - public void build() throws Exception { - try { -// build the model - smoreg.buildClassifier(data); - } catch (Exception e) { - e.printStackTrace(); - }; - return; - } - - /** - * Specifies the new parameters as arff file. - * - * @param path A String specifying the path of the file, format arff, which contains - * the new values. - * @throws QSARModelException if the parameters are of the wrong type for the given modeling function - * - */ - public void setParameters(String path) throws QSARModelException { - this.pathNewX = path; - } - - /** - * Specifies the new parameters as 2D array object. - * - * @param newX A 2D array Object containing the new values. - * @throws QSARModelException if the parameters are of the wrong type for the given modeling function - */ - public void setParameters(Object[][] newX) throws QSARModelException { - this.newX = newX; - } - - /** Classifies a given instance, which was built from the new parameters before. - * - * @throws Exception - */ - public void classifyInstance() throws Exception{ - try{ - if(pathNewX != null){ - BufferedReader br = new BufferedReader(new FileReader(pathNewX)); - Instances test = new Instances(br); - this.object = new Object[test.numInstances()]; - for(int i = 0 ; i < test.numInstances(); i++){ - double result = smoreg.classifyInstance(test.instance(i)); - this.object[i] = new Double(result); - } - } - else if(newX != null){ - this.object = new Object[newX.length]; - for(int j = 0 ; j < newX.length ; j++){ - Instance instance = new Instance(data.numAttributes()); - instance.setDataset(data); - for(int i = 0 ; i < newX[0].length ; i++){ - if(instance.attribute(i).isNumeric()) - instance.setValue(i, ((Double)newX[j][i]).doubleValue()); - else if(instance.attribute(i).isString()) - instance.setValue(i, ""+newX[j][i]); - } - instance.setValue(newX[0].length, 0.0); - double result = smoreg.classifyInstance(instance); - this.object[j] = new Double(result); - } - } - } catch ( Exception e){ - e.printStackTrace(); - } - } - - /** - * Returns the classification of the new parameters. - * - * This function only returns meaningful results if the classifyInstance - * method of this class has been called. - * - * @return An Object[] containing the classification values as Double - */ - public Object[] getClassification() { - return this.object; - } - - /**Get the value of C - * @return A double containig the value of C - */ - public double getC(){ - return smoreg.getC(); - } - - /**Get the size of the kernel cache - * @return An integer containing the size of the kernel cache - */ - public int getCacheSize(){ - return smoreg.getCacheSize(); - } - - /**Get the value of epsilon - * @return A double containing the value of epsilon - */ - public double getEpsilon(){ - return smoreg.getEpsilon(); - } - - /**Get the value of exponent - * @return A double containing the value of exponent - */ - public double getExponent(){ - return smoreg.getExponent(); - } - - /**Check whether feature spaces is being normalized. - * @return true or false - */ - public boolean getFeatureSpaceNormalization() throws Exception{ - return smoreg.getFeatureSpaceNormalization(); - } - - /**Get the value of gamma - * @return A double containing the value of gamma - */ - public double getGamma(){ - return smoreg.getGamma(); - } - - /**Check whether lower-order terms are being used - * @return true or false - */ - public boolean getLowerOrderTerms(){ - return smoreg.getLowerOrderTerms(); - } - - /**Get the value of tolerance parameter - * @return A double containig the value of tolerance parameter - */ - public double getToleranceParameter(){ - return smoreg.getToleranceParameter(); - } - - /**Check if the RBF kernel is to be used - * @return true or false - */ - public boolean getUserRBF(){ - return smoreg.getUseRBF(); - } - - /**Get the value of eps - * @return A double containing the value of eps - */ - public double getEps(){ - return smoreg.getEps(); - } -} diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/SimpleLinearRegressionModel.java b/src/main/org/openscience/chemojava/qsar/model/weka/SimpleLinearRegressionModel.java deleted file mode 100644 index d2f2a2e..0000000 --- a/src/main/org/openscience/chemojava/qsar/model/weka/SimpleLinearRegressionModel.java +++ /dev/null @@ -1,237 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2007 by Mario Baseda - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package org.openscience.chemojava.qsar.model.weka; - -import org.openscience.chemojava.libio.weka.Weka; -import org.openscience.cdk.qsar.model.QSARModelException; - -import weka.classifiers.functions.SimpleLinearRegression; - -/** - * A modeling class that provides a simple linear least squares regression model using Weka library. - * The use of this class is shown in the following code snippet - *
- * try {
- *     SimpleLinearRegressionModel slrm = new SimpleLinearRegressionModel(x,y); or SimpleLinearRegressionModel slrm = new SimpleLinearRegressionModel(typAttrib,classAttrib,x,y,attrib);
- *     slrm.build();
- *     slrm.setParameters(newX);
- *     slrm.predict();
- *     Double[] predictedvalues = (Double[])slrm.getPredictPredicted();
- * 
- * } catch (QSARModelException qme) {
- *     System.out.println(qme.toString());
- * }
- * 
- * Other option is set the data from a file format arff. - *
- * SimpleLinearRegressionModel slrm = new SimpleLinearRegressionModel("/some/where/dataTraining.arff");
- * 
- * Note that when making predictions, the new X matrix must be set by calls - * to setParameters(). The following table lists the parameters that can be set and their - * expected types. - *
- * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
NameJava TypeNotes
xDouble[][]
yDouble[]Length should be equal to the rows of x
newXDouble[][]Number of columns should be the same as in x
- *
- * - * @author Mario Baseda - * @cdk.require weka.jar - * @cdk.license GPL - * @cdk.module qsarweka - * @cdk.svnrev $Revision: 9162 $ - * @see Weka - * - * @cdk.keyword regression, simple linear - */ -public class SimpleLinearRegressionModel { - /**Dependent variable */ - private Object[] y; - /**Independent variable */ - private Object[][] x; - private Weka weka = null; - /**A String specifying the path of the file, format arff, - * which contians the variables and attributes with whose to test.*/ - private String pathTest = null; - /** results of the prediction*/ - private Double[] results; - /**A Array Object containing the independent variable*/ - private Object[][] newX = null; - /**A String specifying the path of the file, format arff, - * which contians the independent values with whose to predict.*/ - private String pathNewX = null; - /**Attribute type: NUMERICAL or NOMINAL*/ - private int[] typAttrib; - /** String with the attributs*/ - private String[]attrib; - /** Boolean if the attributs was set*/ - private boolean setAttrib = false; - - /** - * Constructor of the SimpleLinearRegressionModel object from varibles - * @param y An array containing the dependent variable. - * @param x An double array containing the independent variable. - */ - public SimpleLinearRegressionModel(Object[] y, Object[][] x){ - this.y = y; - this.x = x; - } - - /** - * Constructor of the SimpleLinearRegressionModel object from varibles - * @param y An array containing the dependent variable (class value). - * @param x A 2D array containing the independent variable (for example: qsar results). - * @param typAttrib An integer array containing the attribute type - * @param attrib A string array containing the attributs - */ - public SimpleLinearRegressionModel(int[] typAttrib, Object[] y, Object[][] x, String[] attrib){ - this.y = y; - this.x = x; - this.typAttrib = typAttrib; - this.attrib = attrib; - setAttrib = true; - } - - /** - * Constructor of the SimpleLinearRegressionModel object from file - * @param pathTest Path of the dataset file format arff to train - */ - public SimpleLinearRegressionModel(String pathTest){ - this.pathTest = pathTest; - } - - /** - * Builds (trains) the model. - * - * @throws QSARModelException if errors occur in data types, calls to the R session. See - * the corresponding method in subclasses of this class for further details. - */ - public void build() throws QSARModelException { - weka = new Weka(); - try { - SimpleLinearRegression slr = new SimpleLinearRegression(); - - if(pathTest != null){ - weka.setDataset( - this.getClass().getClassLoader().getResourceAsStream(pathTest), - slr - ); - }else{ - if (!(setAttrib)){ - this.attrib = new String[x[0].length+1]; - this.typAttrib = new int[x[0].length+1]; - for(int i = 0 ; i < x[0].length; i++){ - attrib[i] = "X"+i; - typAttrib[i] = Weka.NUMERIC; - } - attrib[x[0].length] = "Y"; - typAttrib[x[0].length] = Weka.NUMERIC; - } - weka.setDataset(attrib,typAttrib,y,x,slr); - } - } catch (Exception e) { - e.printStackTrace(); - } - } - - /** - * Specifies the parameters to predict. In this case will be the dependent varibles. - * - * @param path A String specifying the path of the file, format arff, which contians - * the dependent values with whose to predict. - * @throws QSARModelException if the parameters are of the wrong type for the given modeling function - * - */ - public void setParameters(String path) throws QSARModelException { - this.pathNewX = path; - } - - /** - * Specifies the parameters to predict. In this case will be the independent varibles. - * - * @param newX A 2D array Object containing the independent variable. - * @throws QSARModelException if the parameters are of the wrong type for the given modeling function - */ - public void setParameters(Object[][] newX) throws QSARModelException { - this.newX = newX; - } - - /** - * Makes predictions using a previously built model. - * - * @throws QSARModelException if errors occur in data types, calls to the R session. See - * the corresponding method in subclasses of this class for further details. - */ - public void predict() throws QSARModelException { - try{ - if(pathNewX != null){ - Object[] object = weka.getPrediction(pathNewX); - results = new Double[object.length]; - for(int i = 0 ; i < object.length; i++){ - results[i] = (Double)object[i]; - } - } - else if(newX != null){ - Object[] object = weka.getPrediction(newX); - results = new Double[object.length]; - for(int i = 0 ; i < object.length; i++){ - results[i] = (Double)object[i]; - } - } - - } catch ( Exception e){ - e.printStackTrace(); - } - } - - /** - * Returns the predicted values for the prediction set. - * - * This function only returns meaningful results if the predict - * method of this class has been called. - * - * @return A Double[] containing the predicted values - */ - public Object[] getPredictPredicted() { - return results; - } -} diff --git a/src/test/org/openscience/chemojava/libio/openbabel/OpenBabelConvertTest.java b/src/test/org/openscience/chemojava/libio/openbabel/OpenBabelConvertTest.java deleted file mode 100644 index a7a8543..0000000 --- a/src/test/org/openscience/chemojava/libio/openbabel/OpenBabelConvertTest.java +++ /dev/null @@ -1,91 +0,0 @@ -/* $RCSfile$ - * $Author: egonw $ - * $Date: 2008-03-22 16:05:21 +0100 (Sat, 22 Mar 2008) $ - * $Revision: 10503 $ - * - * Copyright (C) 2004-2007 Miguel Rojas - * - * Contact: cdk-devel@slists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * */ -package org.openscience.chemojava.libio.openbabel; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileOutputStream; -import java.io.FileReader; -import java.io.InputStream; - -import org.junit.Assert; -import org.junit.Test; -import org.openscience.cdk.CDKTestCase; -import org.openscience.cdk.tools.LoggingTool; -import org.openscience.chemojava.libio.openbabel.OpenBabelConvert; - -/** - * TestCase for the convertor using OpenBabel. - * - * @cdk.module nocompile - * - * @author Miguel Rojas - */ -public class OpenBabelConvertTest extends CDKTestCase { - - private static LoggingTool logger = new LoggingTool(OpenBabelConvert.class); - - @Test public void test5_Hexen_3_one() throws Exception { - String filenameInput = "data/mdl/540545.mol"; - InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filenameInput); - File fileOutput = File.createTempFile("540545.", ".mol"); - FileOutputStream outs = new FileOutputStream(fileOutput); - try { - byte[] buf = new byte[1024]; - int i = 0; - while ((i = ins.read(buf)) != -1) { - outs.write(buf, 0, i); - } - } catch (Exception e) { - throw e; - } finally { - if (ins != null) ins.close(); - if (outs != null) outs.close(); - } - - logger.info("Testing: " + fileOutput.getAbsolutePath()); - System.out.println("testing: " + fileOutput.getAbsolutePath()); - - OpenBabelConvert convertOB = new OpenBabelConvert(); - - File tmpFile = File.createTempFile("540545.", ".cml"); - System.out.println("testing: " + tmpFile.getAbsolutePath()); - convertOB.convert(fileOutput, "mol", tmpFile, "cml", "-h"); - - BufferedReader reader = new BufferedReader(new FileReader(tmpFile)); - String line = reader.readLine(); - int lineCount = 0; - while (line != null) { - System.out.println("Line: " + line); - lineCount++; - line = reader.readLine(); - } - Assert.assertTrue(lineCount > 0); - } - -} diff --git a/src/test/org/openscience/chemojava/libio/weka/WekaTest.java b/src/test/org/openscience/chemojava/libio/weka/WekaTest.java deleted file mode 100644 index ffc50e1..0000000 --- a/src/test/org/openscience/chemojava/libio/weka/WekaTest.java +++ /dev/null @@ -1,143 +0,0 @@ -/* $RCSfile$ - * $Author: egonw $ - * $Date: 2006-05-01 10:49:56 +0200 (Mo, 01 Mai 2006) $ - * $Revision: 6096 $ - * - * Copyright (C) 2004-2007 Miguel Rojas - * - * Contact: cdk-devel@slists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * */ -package org.openscience.chemojava.libio.weka; - -import java.io.InputStream; - -import org.junit.Assert; -import org.junit.Test; -import org.openscience.cdk.CDKTestCase; - -import weka.classifiers.functions.LinearRegression; -import weka.classifiers.trees.J48; - -/** - * TestCase for Weka class. - * - * @cdk.module test-qsarweka - */ -public class WekaTest extends CDKTestCase { - - /** - * A unit test for JUnit. Test and prediction using file arff format, - * algorithm = Linear Regression. - */ - @Test public void test1() throws Exception { - LinearRegression lr = new LinearRegression(); - String[] options = new String[4]; - options[0] = "-U"; - options[1] = "0"; - options[2] = "-R"; - options[3] = "0.00000008"; - lr.setOptions(options); - Weka weka = new Weka(); - InputStream stream = this.getClass().getClassLoader(). - getResourceAsStream("data/arff/Table1.arff"); - weka.setDataset(stream, lr); - Object[] result = weka.getPredictionCDK("data/arff/Table2.arff"); - Assert.assertNotNull(result); - } - - /** - * A unit test for JUnit. Test using file arrf and prediction using Array, - * algorithm = Linear Regression. - */ - @Test public void test2() throws Exception { - LinearRegression lr = new LinearRegression(); - String[] options = new String[4]; - options[0] = "-U"; - options[1] = "0"; - options[2] = "-R"; - options[3] = "0.00000008"; - lr.setOptions(options); - Weka weka = new Weka(); - InputStream stream = this.getClass().getClassLoader(). - getResourceAsStream("data/arff/Table1.arff"); - weka.setDataset(stream, lr); - Object[][] testX = {{new Double(2),new Double(2)}, - {new Double(5),new Double(5)} - }; - Object[] result = weka.getPrediction(testX); - Assert.assertNotNull(result); - } - - /** - * A unit test for JUnit. Test and prediction using Array, algorithm = - * Linear Regression. - */ - @Test public void test3() throws Exception { - LinearRegression lr = new LinearRegression(); - String[] attrib = {"X2","X1", "Y" }; - int[] typAttrib = {Weka.NUMERIC,Weka.NUMERIC,Weka.NUMERIC}; - double[][] x = {{ 1,1},{3,3},{4,4},{6,6} }; - Double[][] xD = new Double[x.length][x[0].length]; - for(int i = 0 ; i< xD.length; i++) - for(int j = 0 ; j < xD[i].length ; j++) - xD[i][j] = new Double(x[i][j]); - - double[] y = { 0,2,3,5}; - Double[] yD = new Double[y.length]; - for(int i = 0 ; i< yD.length; i++) - yD[i] = new Double(y[i]); - - Weka weka = new Weka(); - weka.setDataset(attrib, typAttrib, yD, xD, lr); - Double[][] testX = {{new Double(2),new Double(2)}, - {new Double(5),new Double(5)} - }; - Object[] result = weka.getPrediction(testX); - Assert.assertNotNull(result); - } - - /** - * A unit test for JUnit. Test prediction using Array, algorithm = J48 - */ - @Test public void test4() throws Exception { - String[] options = new String[1]; - options[0] = "-U"; - J48 j48 = new J48(); - j48.setOptions(options); - - String[] attrib = {"aX","bX","cX"}; - int[] typAttrib = {Weka.NUMERIC,Weka.NUMERIC,Weka.NUMERIC}; - String[] classAttrib = {"A_","B_","C_"}; - double[][] x = {{10,10 ,10 },{10 , -10 , -10},{-10 , -10 , -10}, - {11,11 ,11 },{11 , -11 , -11},{-11 , -11 , -11}}; - Double[][] xD = new Double[x.length][x[0].length]; - for(int i = 0 ; i< xD.length; i++) - for(int j = 0 ; j < xD[i].length ; j++) - xD[i][j] = new Double(x[i][j]); - String[] y = { "A_","B_" ,"C_","A_","B_" ,"C_"}; - Weka weka = new Weka(); - weka.setDataset(attrib, typAttrib, classAttrib, y, xD, j48); - Double[][] testX = {{new Double(11),new Double(-11),new Double(-11)}, - {new Double(-10),new Double(-10),new Double(-10)}}; - Object[] resultY = weka.getPrediction(testX); - Assert.assertNotNull(resultY); - } -} diff --git a/src/test/org/openscience/chemojava/modulesuites/MqsarwekaTests.java b/src/test/org/openscience/chemojava/modulesuites/MqsarwekaTests.java deleted file mode 100644 index 440cf86..0000000 --- a/src/test/org/openscience/chemojava/modulesuites/MqsarwekaTests.java +++ /dev/null @@ -1,21 +0,0 @@ -package org.openscience.chemojava.modulesuites; - -import org.junit.runner.RunWith; -import org.junit.runners.Suite; -import org.junit.runners.Suite.SuiteClasses; -import org.openscience.chemojava.libio.weka.WekaTest; - -/** - * TestSuite that runs all the tests for the CDK reaction module. - * - * @cdk.module test-qsarweka - * @cdk.depends weka.jar - * @cdk.depends junit.jar - */ -@RunWith(Suite.class) -@SuiteClasses({ - WekaTest.class -}) -public class MqsarwekaTests { - -} diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/DensityBasedClustererModelTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/DensityBasedClustererModelTest.java deleted file mode 100644 index d3af2ab..0000000 --- a/src/test/org/openscience/chemojava/qsar/model/weka/DensityBasedClustererModelTest.java +++ /dev/null @@ -1,85 +0,0 @@ -/* $RCSfile$ - * $Author: mariobaseda $ - * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $ - * $Revision: 5602 $ - * - * Copyright (C) 2007 by Mario Baseda - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package org.openscience.chemojava.qsar.model.weka; - -import org.junit.Assert; -import org.junit.Test; -import org.openscience.cdk.CDKTestCase; -import org.openscience.chemojava.libio.weka.Weka; - -/** - * TestSuite that runs a test for the DensityBasedClustererModel - * - * @author Mario Baseda - * @cdk.module test-qsar - */ -public class DensityBasedClustererModelTest extends CDKTestCase{ - - /** - * @throws Exception - */ - @Test public void testDensityBasedClustererModel() throws Exception{ - DensityBasedClustererModel test = new DensityBasedClustererModel(); - int[] typAttrib = {Weka.NUMERIC, Weka.NUMERIC, Weka.NUMERIC}; - String[] classAttrib = {"A_", "B_", "C_"}; - double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10}, - {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}}; - Double[][] xD = new Double[x.length][x[0].length]; - for (int i = 0; i < xD.length; i++) - for (int j = 0; j < xD[i].length; j++) - xD[i][j] = new Double(x[i][j]); - String[] y = {"A_", "B_", "C_", "A_", "B_", "C_"}; - String[] attrib = {"X1", "X2", "X3"}; - test.setData(attrib, typAttrib, classAttrib, y, xD); - //test.setData(attrib, typAttrib, null, null, xD); - test.build(); - Double[][] newx = { - {new Double(99), new Double(89), new Double(79)}, - {new Double(19), new Double(29), new Double(39)}, - }; - test.setParameters(newx); - Assert.assertNotNull (test.distributionForInstance()); - Assert.assertNotNull (test.logDensityForInstance()); - Assert.assertNotNull (test.logDensityPerClusterForInstance()); - Assert.assertNotNull (test.logJointDensitiesForInstance()); - Assert.assertNotNull (test.clusterPriors()); - } - - /** - * @throws Exception - */ -// public void testDensityBasedClustererModel2() throws Exception{ -// DensityBasedClustererModel test = new DensityBasedClustererModel(); -// test.setData("X:\\cdk\\src\\data\\arff\\Table3.arff"); -// test.build(); -// test.setParameters("X:\\cdk\\src\\data\\arff\\Table4.arff"); -// assertNotNull (test.distributionForInstance()); -// assertNotNull (test.logDensityForInstance()); -// assertNotNull (test.logDensityPerClusterForInstance()); -// assertNotNull (test.logJointDensitiesForInstance()); -// assertNotNull (test.clusterPriors()); -// } -} diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/EMClusterTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/EMClusterTest.java deleted file mode 100644 index 30566f3..0000000 --- a/src/test/org/openscience/chemojava/qsar/model/weka/EMClusterTest.java +++ /dev/null @@ -1,98 +0,0 @@ -/* $RCSfile$ - * $Author: mariobaseda $ - * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $ - * $Revision: 5602 $ - * - * Copyright (C) 2007 by Mario Baseda - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package org.openscience.chemojava.qsar.model.weka; - -import org.junit.Assert; -import org.junit.Test; -import org.openscience.cdk.CDKTestCase; -import org.openscience.chemojava.libio.weka.Weka; - -/** - * TestSuite that runs a test for the EMCluster - * - * @author Mario Baseda - * @cdk.module test-qsar - */ -public class EMClusterTest extends CDKTestCase{ - - /** - * @throws Exception - */ - @Test public void testEMCluster() throws Exception{ -// String[] options = {"-N", "3"}; - EMCluster test = new EMCluster(); -// test.setOptions(options); - int[] typAttrib = {Weka.NUMERIC, Weka.NUMERIC, Weka.NUMERIC}; - String[] classAttrib = {"A_", "B_", "C_"}; - double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10}, - {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}}; - Double[][] xD = new Double[x.length][x[0].length]; - for (int i = 0; i < xD.length; i++) - for (int j = 0; j < xD[i].length; j++) - xD[i][j] = new Double(x[i][j]); - String[] y = {"A_", "B_", "C_", "A_", "B_", "C_"}; - String[] attrib = {"X1", "X2", "X3"}; - test.setData(attrib, typAttrib, classAttrib, y, xD); - test.build(); - Double[][] newx = { - {new Double(99), new Double(89), new Double(79)}, - {new Double(19), new Double(29), new Double(39)}, - }; - test.setParameters(newx); - test.logDensityPerClusterForInstance(); - Assert.assertNotNull (test.clusterPriors()); - Assert.assertNotNull (test.getClusterModelsNumericAtts()); - Assert.assertNotNull (test.getClusterPriors()); - Assert.assertNotNull (test.getLogDensityPerClusterForInstance()); - Assert.assertNotNull (test.getClusterModelsNumericAtts()); - Assert.assertEquals(100, test.getSeed()); - Assert.assertEquals(-1, test.getNumClusters()); - Assert.assertEquals(1.0E-6, test.getMinStdDev(), 0.001); - Assert.assertEquals(100, test.getMaxIterations()); - Assert.assertEquals(1, test.numberOfCluster()); - } - - /** - * @throws Exception - */ -// public void testEMCluster2() throws Exception{ -// EMCluster test = new EMCluster(); -// test.setData("X:\\cdk\\src\\data\\arff\\Table3.arff"); -// test.build(); -// test.setParameters("X:\\cdk\\src\\data\\arff\\Table4.arff"); -// test.logDensityPerClusterForInstance(); -// assertNotNull (test.clusterPriors()); -// assertNotNull (test.getClusterModelsNumericAtts()); -// assertNotNull (test.getClusterPriors()); -// assertNotNull (test.getLogDensityPerClusterForInstance()); -// assertNotNull (test.getClusterModelsNumericAtts()); -// assertEquals(100, test.getSeed()); -// assertEquals(-1, test.getNumClusters()); -// assertEquals(1.0E-6, test.getMinStdDev(), 0.001); -// assertEquals(100, test.getMaxIterations()); -// assertEquals(1, test.numberOfCluster()); -// } -} diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/FilterSupervisedResampleTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/FilterSupervisedResampleTest.java deleted file mode 100644 index ecba9ec..0000000 --- a/src/test/org/openscience/chemojava/qsar/model/weka/FilterSupervisedResampleTest.java +++ /dev/null @@ -1,78 +0,0 @@ -/* $RCSfile$ - * $Author: mariobaseda $ - * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $ - * $Revision: 5602 $ - * - * Copyright (C) 2007 by Mario Baseda - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package org.openscience.chemojava.qsar.model.weka; - -import org.junit.Assert; -import org.junit.Test; -import org.openscience.cdk.CDKTestCase; -import org.openscience.chemojava.libio.weka.Weka; - -/** - * TestSuite that runs a test for the FilterSupervisedResample - * - * @author Mario Baseda - * @cdk.module test-qsar - */ -public class FilterSupervisedResampleTest extends CDKTestCase{ - - /** - * @throws Exception - */ - @Test public void testFilterSupervisedResample() throws Exception{ - //String[] options = {"-S", "1"}; - FilterSupervisedResample test = new FilterSupervisedResample(); - //test.setOptions(options); - int[] typAttrib = {Weka.NUMERIC, Weka.NUMERIC, Weka.NUMERIC}; - String[] classAttrib = {"A_", "B_", "C_"}; - double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10}, - {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}}; - Double[][] xD = new Double[x.length][x[0].length]; - for (int i = 0; i < xD.length; i++) - for (int j = 0; j < xD[i].length; j++) - xD[i][j] = new Double(x[i][j]); - String[] y = {"A_", "B_", "C_", "A_", "B_", "C_"}; - String[] attrib = {"X1", "X2", "X3"}; - test.setData(attrib, typAttrib, classAttrib, y, xD); - test.build(); - Assert.assertEquals(0.0, test.getBiasToUniformClass(), 0.001); - Assert.assertEquals(1, test.getRandomSeed()); - Assert.assertEquals(100.0, test.getSampleSizePercent(), 0.001); - } - - /** - * @throws Exception - */ -// public void testFilterSupervisedResample2() throws Exception{ -// //String[] options = {"-S", "1"}; -// FilterSupervisedResample test = new FilterSupervisedResample(); -// //test.setOptions(options); -// test.setData("X:\\cdk\\src\\data\\arff\\Table3.arff"); -// test.build(); -// assertEquals(0.0, test.getBiasToUniformClass(), 0.001); -// assertEquals(1, test.getRandomSeed()); -// assertEquals(100.0, test.getSampleSizePercent(), 0.001); -// } -} diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedRandomizeTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedRandomizeTest.java deleted file mode 100644 index 7cf9463..0000000 --- a/src/test/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedRandomizeTest.java +++ /dev/null @@ -1,74 +0,0 @@ -/* $RCSfile$ - * $Author: mariobaseda $ - * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $ - * $Revision: 5602 $ - * - * Copyright (C) 2007 by Mario Baseda - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package org.openscience.chemojava.qsar.model.weka; - -import org.junit.Assert; -import org.junit.Test; -import org.openscience.cdk.CDKTestCase; -import org.openscience.chemojava.libio.weka.Weka; - -/** - * TestSuite that runs a test for the FilterUnSupervisedRandomize - * - * @author Mario Baseda - * @cdk.module test-qsar - */ -public class FilterUnSupervisedRandomizeTest extends CDKTestCase{ - - /** - * @throws Exception - */ - @Test public void testFilterUnSupervisedRandomize() throws Exception{ - //String[] options = {"-S", "1"}; - FilterUnSupervisedRandomize test = new FilterUnSupervisedRandomize(); - //test.setOptions(options); - int[] typAttrib = {Weka.NUMERIC, Weka.NUMERIC, Weka.NUMERIC}; - String[] classAttrib = {"A_", "B_", "C_"}; - double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10}, - {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}}; - Double[][] xD = new Double[x.length][x[0].length]; - for (int i = 0; i < xD.length; i++) - for (int j = 0; j < xD[i].length; j++) - xD[i][j] = new Double(x[i][j]); - String[] y = {"A_", "B_", "C_", "A_", "B_", "C_"}; - String[] attrib = {"X1", "X2", "X3"}; - test.setData(attrib, typAttrib, classAttrib, y, xD); - test.build(); - Assert.assertEquals(42, test.getRandomSeed()); - } - - /** - * @throws Exception - */ -// public void testFilterUnSupervisedRandomize2() throws Exception{ -// //String[] options = {"-S", "1"}; -// FilterUnSupervisedRandomize test = new FilterUnSupervisedRandomize(); -// //test.setOptions(options); -// test.setData("X:\\cdk\\src\\data\\arff\\Table3.arff"); -// test.build(); -// assertEquals(42, test.getRandomSeed()); -// } -} diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedResampleTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedResampleTest.java deleted file mode 100644 index 6f0182a..0000000 --- a/src/test/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedResampleTest.java +++ /dev/null @@ -1,77 +0,0 @@ -/* $RCSfile$ - * $Author: mariobaseda $ - * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $ - * $Revision: 5602 $ - * - * Copyright (C) 2007 by Mario Baseda - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package org.openscience.chemojava.qsar.model.weka; - -import org.junit.Assert; -import org.junit.Test; -import org.openscience.cdk.CDKTestCase; -import org.openscience.chemojava.libio.weka.Weka; - -/** - * TestSuite that runs a test for the FilterUnSupervisedResample - * - * @author Mario Baseda - * @cdk.module test-qsar - */ -public class FilterUnSupervisedResampleTest extends CDKTestCase{ - - - /** - * @throws Exception - */ - @Test public void testFilterUnSupervisedResample() throws Exception{ - //String[] options = {"-S", "1"}; - FilterUnSupervisedResample test = new FilterUnSupervisedResample(); - //test.setOptions(options); - int[] typAttrib = {Weka.NUMERIC, Weka.NUMERIC, Weka.NUMERIC}; - String[] classAttrib = {"A_", "B_", "C_"}; - double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10}, - {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}}; - Double[][] xD = new Double[x.length][x[0].length]; - for (int i = 0; i < xD.length; i++) - for (int j = 0; j < xD[i].length; j++) - xD[i][j] = new Double(x[i][j]); - String[] y = {"A_", "B_", "C_", "A_", "B_", "C_"}; - String[] attrib = {"X1", "X2", "X3"}; - test.setData(attrib, typAttrib, classAttrib, y, xD); - test.build(); - Assert.assertEquals(1, test.getRandomSeed()); - Assert.assertEquals(100.0, test.getSampleSizePercent(), 0.001); - } - - /** - * @throws Exception - */ -// public void testFilterUnSupervisedResample2() throws Exception{ -// //String[] options = {"-S", "1"}; -// FilterUnSupervisedResample test = new FilterUnSupervisedResample(); -// //test.setOptions(options); -// test.setData("X:\\cdk\\src\\data\\arff\\Table3.arff"); -// test.build(); -// assertEquals(1, test.getRandomSeed()); -// assertEquals(100.0, test.getSampleSizePercent(), 0.001); -// } -} diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/J48WModelTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/J48WModelTest.java deleted file mode 100644 index f1a7ab3..0000000 --- a/src/test/org/openscience/chemojava/qsar/model/weka/J48WModelTest.java +++ /dev/null @@ -1,96 +0,0 @@ -/* $RCSfile$ - * $Author: miguelrojasch $ - * $Date: 2006-05-11 14:25:07 +0200 (Do, 11 Mai 2006) $ - * $Revision: 6221 $ - * - * Copyright (C) 2004-2007 Miguel Rojas - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -package org.openscience.chemojava.qsar.model.weka; - -import org.junit.Assert; -import org.junit.Test; -import org.openscience.cdk.CDKTestCase; -import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.qsar.model.QSARModelException; -import org.openscience.chemojava.libio.weka.Weka; - -/** - * TestSuite that runs a test for the J48WModel - * - * @author Miguel Rojas - * @cdk.module test-qsar - */ -public class J48WModelTest extends CDKTestCase { - - /** - * @throws CDKException - * @throws Exception - * @throws QSARModelException - */ - @Test public void testJ48WModel1() throws Exception { - int[] typAttrib = {Weka.NUMERIC, Weka.NUMERIC, Weka.NUMERIC}; - String[] classAttrib = {"A_", "B_", "C_"}; - double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10}, - {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}}; - Double[][] xD = new Double[x.length][x[0].length]; - for (int i = 0; i < xD.length; i++) - for (int j = 0; j < xD[i].length; j++) - xD[i][j] = new Double(x[i][j]); - String[] y = {"A_", "B_", "C_", "A_", "B_", "C_"}; - - J48WModel j48 = new J48WModel(typAttrib, classAttrib, y, xD); - String[] options = new String[1]; - options[0] = "-U"; - j48.setOptions(options); - j48.build(); - - /* Test predictions */ - Double[][] testX = {{new Double(11), new Double(-11), new Double(-11)}, - {new Double(-10), new Double(-10), new Double(-10)}}; - - j48.setParameters(testX); - j48.predict(); - - String[] preds = (String[]) j48.getPredictPredicted(); - Assert.assertEquals("B_", preds[0]); - Assert.assertEquals("C_", preds[1]); - } - - /** - * @throws CDKException - * @throws Exception - * @throws QSARModelException - */ - @Test public void testJ48WModel2() throws CDKException, java.lang.Exception, QSARModelException { - J48WModel j48 = new J48WModel(true, "data/arff/Table3.arff"); - String[] options = new String[1]; - options[0] = "-U"; - j48.setOptions(options); - j48.build(); - Double[][] testX = {{new Double(11), new Double(-11), new Double(-11)}, - {new Double(-10), new Double(-10), new Double(-10)}}; - j48.setParameters(testX); - j48.predict(); - String[] preds = (String[]) j48.getPredictPredicted(); - Assert.assertEquals("B_", preds[0]); - Assert.assertEquals("C_", preds[1]); - } -} - diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/KmeansClusterTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/KmeansClusterTest.java deleted file mode 100644 index 5c97eb3..0000000 --- a/src/test/org/openscience/chemojava/qsar/model/weka/KmeansClusterTest.java +++ /dev/null @@ -1,106 +0,0 @@ -/* $RCSfile$ - * $Author: mariobaseda $ - * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $ - * $Revision: 5602 $ - * - * Copyright (C) 2007 by Mario Baseda - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package org.openscience.chemojava.qsar.model.weka; - -import org.junit.Assert; -import org.junit.Test; -import org.openscience.cdk.CDKTestCase; -import org.openscience.chemojava.libio.weka.Weka; - -/** - * TestSuite that runs a test for the KmeansCluster - * - * @author Mario Baseda - * @cdk.module test-qsar - */ -public class KmeansClusterTest extends CDKTestCase { - - /** - * @throws Exception - */ - @Test public void testKmeansCluster_N() throws Exception{ - String[] options = {"-N", "3"}; - KmeansCluster test = new KmeansCluster(); - test.setOptions(options); - int[] typAttrib = {Weka.NUMERIC, Weka.NUMERIC, Weka.NUMERIC}; - String[] classAttrib = {"A_", "B_", "C_"}; - double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10}, - {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}}; - Double[][] xD = new Double[x.length][x[0].length]; - for (int i = 0; i < xD.length; i++) - for (int j = 0; j < xD[i].length; j++) - xD[i][j] = new Double(x[i][j]); - String[] y = {"A_", "B_", "C_", "A_", "B_", "C_"}; - String[] attrib = {"X1", "X2", "X3"}; - test.setData(attrib, typAttrib, classAttrib, y, xD); - test.build(); - Double[][] newx = { - {new Double(99), new Double(89), new Double(79)}, - {new Double(19), new Double(29), new Double(39)}, - }; - test.setParameters(newx); - test.clusterInstance();; - Assert.assertNotNull (test.getClusterInstance()); - Assert.assertNotNull (test.getClusterCentroids()); - Assert.assertEquals(3.0, Math.rint(test.getSquaredError()), 0.01); - Assert.assertNotNull (test.getClusterStandardDevs()); - Assert.assertNotNull (test.getClusterSizes()); - Assert.assertEquals(3, test.numberOfCluster()); - } - - /** - * @throws Exception - */ - @Test public void testKmeansCluster_S() throws Exception{ - String[] options = {"-S", "4"}; - KmeansCluster test = new KmeansCluster(); - test.setOptions(options); - int[] typAttrib = {Weka.NUMERIC, Weka.NUMERIC, Weka.NUMERIC}; - String[] classAttrib = {"A_", "B_", "C_"}; - double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10}, - {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}}; - Double[][] xD = new Double[x.length][x[0].length]; - for (int i = 0; i < xD.length; i++) - for (int j = 0; j < xD[i].length; j++) - xD[i][j] = new Double(x[i][j]); - String[] y = {"A_", "B_", "C_", "A_", "B_", "C_"}; - String[] attrib = {"X1", "X2", "X3"}; - test.setData(attrib, typAttrib, classAttrib, y, xD); - test.build(); - Double[][] newx = { - {new Double(99), new Double(89), new Double(79)}, - {new Double(19), new Double(29), new Double(39)}, - }; - test.setParameters(newx); - test.clusterInstance(); - Assert.assertNotNull (test.getClusterInstance()); - Assert.assertEquals(2, test.numberOfCluster()); - Assert.assertNotNull (test.getClusterCentroids()); - Assert.assertEquals(4.0, Math.rint(test.getSquaredError()), 0.01); - Assert.assertNotNull (test.getClusterStandardDevs()); - Assert.assertNotNull (test.getClusterSizes()); - } -} diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/LinearRegressionWModelTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/LinearRegressionWModelTest.java deleted file mode 100644 index 0dc11d4..0000000 --- a/src/test/org/openscience/chemojava/qsar/model/weka/LinearRegressionWModelTest.java +++ /dev/null @@ -1,278 +0,0 @@ -/* $RCSfile$ - * $Author: miguelrojasch $ - * $Date: 2006-05-11 14:25:07 +0200 (Do, 11 Mai 2006) $ - * $Revision: 6221 $ - * - * Copyright (C) 2004-2007 Miguel Rojas - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ -package org.openscience.chemojava.qsar.model.weka; - -import org.junit.Assert; -import org.junit.Test; -import org.openscience.cdk.CDKTestCase; -import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.qsar.model.QSARModelException; - -/** - * TestSuite that runs a test for the LinearRegressionWModel - * - * @author Miguel Rojas - * @cdk.module test-qsar - */ -public class LinearRegressionWModelTest extends CDKTestCase { - /** - * @throws CDKException - * @throws Exception - * @throws QSARModelException - */ - @Test public void testLinearRegressionWModel1() throws Exception { - - double[][] x = {{1, 1}, {3, 3}, {4, 4}, {6, 6}}; - Double[][] xD = new Double[x.length][x[0].length]; - for (int i = 0; i < xD.length; i++) - for (int j = 0; j < xD[i].length; j++) - xD[i][j] = new Double(x[i][j]); - double[] y = {0, 2, 3, 5}; - - Double[] yD = new Double[y.length]; - for (int i = 0; i < yD.length; i++) - yD[i] = new Double(y[i]); - - LinearRegressionWModel lrm = new LinearRegressionWModel(yD, xD); - String[] options = new String[4]; - options[0] = "-U"; - options[1] = "0"; - options[2] = "-R"; - options[3] = "0.0001"; - lrm.setOptions(options); - lrm.build(); - - /* Test predictions */ - Double[][] newx = { - {new Double(2), new Double(2)}, - {new Double(5), new Double(5)}, - }; - - lrm.setParameters(newx); - lrm.predict(); - - Double[] preds = (Double[]) lrm.getPredictPredicted(); - Assert.assertEquals(1.0, (preds[0]).doubleValue(), 0.001); - Assert.assertEquals(4.0, (preds[1]).doubleValue(), 0.001); - } - - /** - * @throws CDKException - * @throws Exception - * @throws QSARModelException - */ - public void testLinearRegressionWModel2() throws CDKException, java.lang.Exception, QSARModelException { - LinearRegressionWModel lrm = new LinearRegressionWModel("data/arff/Table1.arff"); - String[] options = new String[4]; - options[0] = "-U"; - options[1] = "0"; - options[2] = "-R"; - options[3] = "0.00000008"; - lrm.setOptions(options); - lrm.build(); - lrm.setParametersCDK("data/arff/Table2.arff"); - lrm.predict(); - Double[] result = (Double[]) lrm.getPredictPredicted(); - Assert.assertNotNull(result); - Assert.assertEquals(1.0, (result[0]).doubleValue(), 0.001); - Assert.assertEquals(4.0, (result[1]).doubleValue(), 0.001); - } -// /** -// * -// * @throws CDKException -// * @throws java.lang.Exception -// * @throws QSARModelException -// */ -// public void testLinearRegressionWModel3() throws CDKException, java.lang.Exception, QSARModelException { -// -// double[][] x = {{ 5.33029143313, 8.13257437501, 2.66720308462 }, -// { 3.29906147519, 5.06835102093, 6.47319431067 }, -// { 5.69553153292, 5.88043843898, 9.73312992111 }, -// { 5.29194559083, 6.78243188133, 3.2602449344 }, -// { 6.18105762768, 3.36588488672, 3.94539328809 }, -// { 1.32223357975, 8.78797039033, 7.77485740688 }, -// { 0.391740629966, 5.08060997023, 8.28722389016 }, -// { 4.27475126706, 8.52015977633, 7.21468545649 }, -// { 7.14131409262, 8.67086866827, 7.64228671009 }, -// { 8.55502719447, 5.25013245421, 5.73240025988 }, -// { 5.31791067667, 7.99313789208, 1.64835209014 }, -// { 9.03149835466, 1.94042287241, 9.28020345543 }, -// { 0.925468187342, 4.97155215507, 7.69457858258 }, -// { 9.16182426614, 4.74534182996, 6.58111071706 }, -// { 1.15220637861, 1.78078924823, 2.24407287943 }, -// { 9.24209878847, 7.87658524713, 2.38732162601 }, -// { 8.50715035908, 9.16453417058, 0.618727514944 }, -// { 5.84019865932, 5.20208546615, 6.61838858253 }, -// { 3.76256505014, 0.329738943471, 0.874419640166 }, -// { 9.96004184517, 9.14019090437, 4.90929645109 }, -// { 4.44743194213, 3.95642974577, 7.62629150218 }, -// { 1.24177865105, 1.48660423923, 1.20830798956 }, -// { 8.35590316383, 1.14743031542, 6.29868134513 }, -// { 6.12876561357, 4.63929392357, 5.87722199543 }, -// { 8.11829752127, 0.13950274139, 2.54723293455 }, -// { 4.40852772122, 5.07291389291, 0.100128243526 }, -// { 2.58403059855, 1.78831569742, 5.19817475725 }, -// { 8.04282601008, 3.76076347262, 1.43904088129 }, -// { 3.43713025153, 4.35105074191, 0.0189145485124 }, -// { 5.0236445539, 1.06317719489, 5.10306592945 }, -// { 9.77434875025, 9.0666617274, 6.99448050277 }, -// { 4.06797047248, 7.62659701718, 9.83152424086 }, -// { 6.48920287132, 0.156594507329, 5.46872113685 }, -// { 6.42883928789, 2.01940454563, 6.46523071259 }, -// { 1.16293901493, 5.15391581673, 3.56182526491 }, -// { 7.38000931385, 0.453325117578, 6.61031329357 }, -// { 9.32963370626, 2.12590745134, 0.405388324151 }, -// { 0.737255223472, 7.39059871721, 2.86079226118 }, -// { 6.85301380605, 2.1615949728, 8.87574040247 }, -// { 3.74156226774, 4.24620341057, 4.35371571862 }, -// { 2.18208535888, 3.53972126321, 3.59052000965 }, -// { 4.72006492073, 3.3574566235, 9.62444364758 }, -// { 2.84331278854, 1.74554945195, 4.51285607572 }, -// { 3.86999763691, 9.49323614413, 5.08797427552 }, -// { 7.43099014174, 6.2755590307, 4.57542355747 }, -// { 6.01320531795, 8.25706473123, 7.40439342966 }, -// { 6.46384266575, 3.51112862363, 9.47435948698 }, -// { 2.29011620065, 0.401145254435, 7.28671287627 }, -// { 5.7219136188, 4.43209346253, 0.0622901932013 }, -// { 4.30214056802, 1.68925570283, 7.89926376252 }, -// { 0.64305256706, 8.22063584536, 4.33019352991 }, -// { 6.44843380824, 9.10336359279, 6.8777037869 }, -// { 2.45354486215, 5.34166315571, 8.04822795875 }, -// { 9.13675572384, 8.19635101591, 6.85475060116 }, -// { 8.0492824201, 7.55216736195, 3.73472402973 }, -// { 4.40590062277, 5.27106603309, 2.59962025805 }, -// { 0.313960278741, 0.11866096726, 4.07985095305 }, -// { 0.462136466507, 0.415202739102, 1.03258083165 }, -// { 6.74723654049, 7.7080622951, 7.22322407979 }, -// { 1.97571555403, 3.18544339131, 5.56211977273 }, -// { 3.14021838165, 0.81551917817, 3.95156287418 }, -// { 4.05709817216, 2.98004731237, 5.8975379443 }, -// { 4.25420450429, 7.78663760941, 5.98061090504 }, -// { 6.2650372416, 7.96507652177, 6.43631309268 }, -// { 0.248308143147, 5.07557198176, 7.06413762375 }, -// { 2.83741089895, 0.652445391344, 3.32535947415 }, -// { 5.98115064142, 9.88913498552, 9.3923706794 }, -// { 3.45667026676, 1.37451287268, 2.35331272082 }, -// { 7.83964781871, 2.22111016571, 9.10723793073 }, -// { 0.509210152705, 7.97088780188, 6.17963669424 }, -// { 5.50910552235, 6.92372624674, 8.43151367671 }, -// { 9.94686419266, 5.16899669191, 1.77353096261 }, -// { 1.46501561342, 4.39317416608, 4.66752677391 }, -// { 7.34126711314, 1.50352255841, 7.42777093653 }, -// { 6.80122177161, 2.48753341584, 4.30535748793 }, -// { 3.43057685209, 9.11458889251, 8.1389601215 }, -// { 7.82076320157, 4.99727977399, 8.31875065375 }, -// { 8.62799832715, 5.67304190345, 1.40517550057 }, -// { 2.20910090066, 5.45236965227, 0.190013284925 }, -// { 8.27876352499, 3.23706166886, 6.23912802837 }, -// { 8.69440791615, 0.729194277167, 3.45645694332 }, -// { 8.30552885891, 2.53977734839, 0.498635632483 }, -// { 6.35009207052, 5.87727519703, 4.92604761655 }, -// { 2.21876644613, 3.85669457256, 9.44139826683 }, -// { 5.49181700898, 1.69048597254, 2.29475976286 }, -// { 3.79777411904, 0.437885574937, 8.10175192316 }, -// { 8.11720195104, 8.84115458961, 6.25490466144 }, -// { 4.58878775312, 5.51332276174, 3.85400216514 }, -// { 6.01729101329, 9.69817519935, 7.63607038602 }, -// { 4.14247512757, 9.633551519, 0.543555309265 }, -// { 1.69925453337, 4.77655288911, 0.950497583032 }, -// { 3.84897216241, 3.27769006984, 9.17922626403 }, -// { 2.79348258306, 4.38230737375, 7.26219595942 }, -// { 4.88988551153, 2.95206506434, 3.65797143803 }, -// { 1.91134803528, 0.829719567085, 1.73891604909 }, -// { 5.5514711696, 8.80684284298, 2.66911304157 }, -// { 2.95100011358, 0.832983961872, 4.19266815334 }, -// { 4.19942346415, 5.92478285192, 8.33053966924 }, -// { 3.11127058351, 3.25340097022, 7.07258377268 }, -// { 7.61105416732, 8.46642439572, 5.61730141222 }}; -// Double[][] xD = new Double[x.length][x[0].length]; -// for(int i = 0 ; i< xD.length; i++) -// for(int j = 0 ; j < xD[i].length ; j++) -// xD[i][j] = new Double(x[i][j]); -// -// double[] y = { 0.548279405588, 0.749557798438, 0.704786225556, 0.064272559019, 0.959196778261, 0.443650457811, 0.139588310157, 0.697614953528, 0.894633307417, 0.288986449536, 0.968020911596, 0.00941763156173, 0.803870693657, 0.457124742168, 0.728543899161, 0.88083354383, 0.624089352674, 0.470379461181, 0.86877991158, 0.622721685808, 0.0250057478044, 0.2376603194, 0.112920370051, 0.608780223601, 0.62741359624, 0.39753977229, 0.396823887458, 0.0259021311271, 0.433022176171, 0.94665816668, 0.788805032857, 0.831096752197, 0.981239642073, 0.72411413954, 0.585272152663, 0.694317542691, 0.890624533901, 0.244048473797, 0.422902339036, 0.597269134374, 0.911340032927, 0.00186723050398, 0.439586593554, 0.714613974993, 0.815341829936, 0.726336948414, 0.742772100572, 0.597295528478, 0.305955366581, 0.155579392014, 0.000873693540479, 0.339225424495, 0.433434106377, 0.109738110471, 0.0193980726758, 0.258795872246, 0.322462583569, 0.326807898424, 0.079866937163, 0.741776416238, 0.597174006951, 0.289816194377, 0.691182117374, 0.113315930392, 0.302120795811, 0.616653275971, 0.833480904688, 0.881803762099, 0.734675438389, 0.269429129873, 0.977225860294, 0.327410536298, 0.319292292397, 0.876227987007, 0.832930007711, 0.941552570764, 0.0433177729231, 0.333665283905, 0.889264621262, 0.367930824862, 0.143633644589, 0.0106269520474, 0.623817520313, 0.237853599409, 0.301794094647, 0.912166461213, 0.663976930266, 0.918081800984, 0.909573924607, 0.976541368479, 0.340915467396, 0.617160565805, 0.0315242385532, 0.869413665191, 0.695610662213, 0.144537534715, 0.619567870639, 0.159550199731, 0.536333432502, 0.837898880743 }; -// logger.debug("yl: "+y.length); -// Double[] yD = new Double[y.length]; -// for(int i = 0 ; i< yD.length; i++) -// yD[i] = new Double(y[i]); -// -// LinearRegressionWModel lrm = new LinearRegressionWModel(yD,xD); -// String[] options = new String[4]; -// options[0] = "-U"; -// options[1] = "0"; -// options[2] = "-R"; -// options[3] = "0.0001"; -// lrm.setOptions(options); -// lrm.build(); -// -// /* Test predictions */ -// Double[][] newx = { -// { new Double(9.81536768251), new Double(3.82849269659), new Double(7.22212024421) }, -// { new Double(0.197449829806), new Double(0.324130354642), new Double(2.8329420321) }, -//// { new Double(0.548460836141), new Double(7.28037586863), new Double(8.13728493983) }, -//// { new Double(1.76049278788), new Double(6.41731766803), new Double(5.53986167864) }, -//// { new Double(3.4541825491), new Double(9.78038580407), new Double(3.58954097059) } -// }; -// -// lrm.setParameters(newx); -// lrm.predict(); -// -// double[] preds = lrm.getPredictPredicted(); -// for(int i = 0; i < preds.length; i++) -// logger.debug("result< "+i+"="+preds[i]); -// assertTrue(preds != null); -// assertEquals(0.5235362, preds[0], 0.001);/*result extracted from test LinearRegressionTest*/ -// assertEquals(0.5030381, preds[1], 0.0000001); -// assertEquals(0.5184706, preds[2], 0.0000001); -// assertEquals(0.5232108, preds[3], 0.0000001); -// assertEquals(0.5436967, preds[4], 0.0000001); -// -// assertEquals(96, lrm.getPredictDF(), 0.1); -// -// } -// -// /** -// * -// * @throws CDKException -// * @throws java.lang.Exception -// * @throws QSARModelException -// */ -// public void testLinearRegressionWModel_4() throws CDKException, java.lang.Exception, QSARModelException { -// LinearRegressionWModel lrm = new LinearRegressionWModel("data/arff/LinearRegressionWeka_Test.arff"); -// String[] options = new String[4]; -// options[0] = "-U"; -// options[1] = "0"; -// options[2] = "-R"; -// options[3] = "0.00000008"; -// lrm.setOptions(options); -// lrm.build(); -// lrm.setParameters("data/arff/LinearRegressionWeka_Prediction.arff"); -// lrm.predict(); -// double[] result = lrm.getPredictPredicted(); -// for(int i = 0; i < result.length; i++) -// logger.debug("result< "+i+"="+result[i]); -// assertNotNull(result); -// } -} - diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/MultilayerPerceptronModelTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/MultilayerPerceptronModelTest.java deleted file mode 100644 index 289069c..0000000 --- a/src/test/org/openscience/chemojava/qsar/model/weka/MultilayerPerceptronModelTest.java +++ /dev/null @@ -1,101 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2007 by Mario Baseda - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package org.openscience.chemojava.qsar.model.weka; - -import org.junit.Assert; -import org.junit.Test; -import org.openscience.cdk.CDKTestCase; -import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.qsar.model.QSARModelException; -import org.openscience.chemojava.libio.weka.Weka; - -/** - * TestSuite that runs a test for the MultilayerPerceptronModel - * - * @author Mario Baseda - * @cdk.module test-qsar - */ -public class MultilayerPerceptronModelTest extends CDKTestCase{ - - /** - * @throws CDKException - * @throws Exception - * @throws QSARModelException - */ - @Test public void testMultilayerPerceptronModel() throws CDKException, java.lang.Exception, QSARModelException{ - MultilayerPerceptronModel test = new MultilayerPerceptronModel(); -// test.setOptions(new String[] {"-G"}); - int[] typAttrib = {Weka.NUMERIC, Weka.NUMERIC, Weka.NUMERIC}; - String[] classAttrib = {"A_", "B_", "C_"}; - double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10}, - {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}}; - Double[][] xD = new Double[x.length][x[0].length]; - for (int i = 0; i < xD.length; i++) - for (int j = 0; j < xD[i].length; j++) - xD[i][j] = new Double(x[i][j]); - String[] y = {"A_", "B_", "C_", "A_", "B_", "C_"}; - String[] attrib = {"X1", "X2", "X3"}; - test.setData(attrib, typAttrib, classAttrib, y, xD); - test.build(); - Double[][] newx = { - {new Double(99), new Double(89), new Double(79)}, - {new Double(19), new Double(29), new Double(39)}, - }; - test.setParameters(newx); - test.predict(); - Object[][] result = test.getPredictPredicted(); - Assert.assertNotNull(result); - Assert.assertEquals ("a", test.getHiddenLayers()); - Assert.assertEquals (0.3, test.getLearningRate(), 0.01); - Assert.assertEquals(0.2, test.getMomentum(), 0.01); - Assert.assertEquals (0, test.getRandomSeed()); - Assert.assertEquals (500.0, test.getTrainingTime(), 0.01); - Assert.assertEquals (0.0, test.getValidationSetSize(), 0.001); - Assert.assertEquals (20.0, test.getValidationThreshold(), 0.01); - } - - /** - * @throws CDKException - * @throws Exception - * @throws QSARModelException - */ -// public void testMultilayerPerceptronModel2() throws CDKException, java.lang.Exception, QSARModelException { -// MultilayerPerceptronModel test = new MultilayerPerceptronModel(); -// test.setData("X:\\cdk\\src\\data\\arff\\Table1.arff"); -// test.build(); -// test.setParameters("X:\\cdk\\src\\data\\arff\\Table2.arff"); -// test.predict(); -// Object[][] result = test.getPredictPredicted(); -// assertNotNull(result); -// assertEquals ("a", test.getHiddenLayers()); -// assertEquals (0.3, test.getLearningRate(), 0.01); -// assertEquals(0.2, test.getMomentum(), 0.01); -// assertEquals (0, test.getRandomSeed()); -// assertEquals (500.0, test.getTrainingTime(), 0.01); -// assertEquals (0.0, test.getValidationSetSize(), 0.001); -// assertEquals (20.0, test.getValidationThreshold(), 0.01); -// } -} diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/NaiveBayesModelTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/NaiveBayesModelTest.java deleted file mode 100644 index e1e28ce..0000000 --- a/src/test/org/openscience/chemojava/qsar/model/weka/NaiveBayesModelTest.java +++ /dev/null @@ -1,89 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2007 by Mario Baseda - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package org.openscience.chemojava.qsar.model.weka; - -import org.junit.Assert; -import org.junit.Test; -import org.openscience.cdk.CDKTestCase; -import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.qsar.model.QSARModelException; -import org.openscience.chemojava.libio.weka.Weka; - -/** - * TestSuite that runs a test for the NaiveBayesModel - * - * @author Mario Baseda - * @cdk.module test-qsar - */ -public class NaiveBayesModelTest extends CDKTestCase{ - - /** - * @throws CDKException - * @throws Exception - * @throws QSARModelException - */ - @Test public void testNaiveBayesModel() throws CDKException, java.lang.Exception, QSARModelException{ - NaiveBayesModel test = new NaiveBayesModel(); -// test.setOptions(new String[] {"-G"}); - int[] typAttrib = {Weka.NUMERIC, Weka.NUMERIC, Weka.NUMERIC}; - String[] classAttrib = {"A_", "B_", "C_"}; - double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10}, - {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}}; - Double[][] xD = new Double[x.length][x[0].length]; - for (int i = 0; i < xD.length; i++) - for (int j = 0; j < xD[i].length; j++) - xD[i][j] = new Double(x[i][j]); - String[] y = {"A_", "B_", "C_", "A_", "B_", "C_"}; - String[] attrib = {"X1", "X2", "X3"}; - test.setData(attrib, typAttrib, classAttrib, y, xD); - test.build(); - Double[][] newx = { - {new Double(99), new Double(89), new Double(79)}, - {new Double(19), new Double(29), new Double(39)}, - }; - test.setParameters(newx); - test.probabilities(); - Object[][] result = test.getProbabilities(); - test.updateClassifier(); - Assert.assertNotNull(result); - } - - /** - * @throws CDKException - * @throws Exception - * @throws QSARModelException - */ -// public void testNaiveBayesModel2() throws CDKException, java.lang.Exception, QSARModelException { -// NaiveBayesModel test = new NaiveBayesModel(); -// test.setData("X:\\cdk\\src\\data\\arff\\Table3.arff"); -// test.build(); -// test.setParameters("X:\\cdk\\src\\data\\arff\\Table3.arff"); -// test.probabilities(); -// Object[][] result = test.getProbabilities(); -// test.updateClassifier(); -// assertNotNull(result); -// } -} diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/QSARWekaModelTests.java b/src/test/org/openscience/chemojava/qsar/model/weka/QSARWekaModelTests.java deleted file mode 100644 index d61effe..0000000 --- a/src/test/org/openscience/chemojava/qsar/model/weka/QSARWekaModelTests.java +++ /dev/null @@ -1,54 +0,0 @@ -/* $RCSfile$ - * $Author: miguelrojasch $ - * $Date: 2006-05-11 14:25:07 +0200 (Do, 11 Mai 2006) $ - * $Revision: 6221 $ - * - * Copyright (C) 2004-2007 Miguel Rojas - * - * Contact: cdk-devel@lists.sourceforge.net - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - */ -package org.openscience.chemojava.qsar.model.weka; - -import org.junit.runner.RunWith; -import org.junit.runners.Suite; -import org.junit.runners.Suite.SuiteClasses; - -/** - * TestSuite that runs all the tests for the CDK libio-weka module. - * - * @cdk.module test-qsar - * @cdk.depends junit.jar - */ -@RunWith(Suite.class) -@SuiteClasses({ - J48WModelTest.class, - LinearRegressionWModelTest.class, - DensityBasedClustererModelTest.class, - EMClusterTest.class, - FilterSupervisedResampleTest.class, - FilterUnSupervisedResampleTest.class, - FilterSupervisedResampleTest.class, - KmeansClusterTest.class, - MultilayerPerceptronModelTest.class, - NaiveBayesModelTest.class, - SimpleLinearRegressionModelTest.class, - SMOModelTest.class, - SMOregModelTest.class -}) -public class QSARWekaModelTests { - -} diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/SMOModelTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/SMOModelTest.java deleted file mode 100644 index 5f3181f..0000000 --- a/src/test/org/openscience/chemojava/qsar/model/weka/SMOModelTest.java +++ /dev/null @@ -1,123 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2007 by Mario Baseda - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package org.openscience.chemojava.qsar.model.weka; - -import org.junit.Assert; -import org.junit.Test; -import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.qsar.model.QSARModelException; -import org.openscience.cdk.CDKTestCase; -import org.openscience.chemojava.libio.weka.Weka; -import org.openscience.chemojava.qsar.model.weka.SMOModel; - -/** - * TestSuite that runs a test for the SMOModel - * - * @author Mario Baseda - * @cdk.module test-qsar - */ -public class SMOModelTest extends CDKTestCase{ - - /** - * @throws CDKException - * @throws Exception - * @throws QSARModelException - */ - @Test public void testSMOModel() throws Exception{ - SMOModel test = new SMOModel(); - int[] typAttrib = {Weka.NUMERIC, Weka.NUMERIC, Weka.NUMERIC}; - String[] classAttrib = {"A_", "B_", "C_"}; - double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10}, - {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}}; - Double[][] xD = new Double[x.length][x[0].length]; - for (int i = 0; i < xD.length; i++) - for (int j = 0; j < xD[i].length; j++) - xD[i][j] = new Double(x[i][j]); - String[] y = {"A_", "B_", "C_", "A_", "B_", "C_"}; - String[] attrib = {"X1", "X2", "X3"}; - test.setData(attrib, typAttrib, classAttrib, y, xD); - test.build(); - Double[][] newx = { - {new Double(99), new Double(89), new Double(79)}, - {new Double(19), new Double(29), new Double(39)}, - }; - test.setParameters(newx); - test.probabilities(); - Object[][] result = test.getProbabilities(); - Assert.assertNotNull (test.attributeNames()); - Assert.assertNotNull (test.bias()); - Assert.assertNotNull(test.classAttributeNames()); - Assert.assertNotNull (test.sparseIndices()); - Assert.assertNotNull (test.sparseWeights()); - Assert.assertNotNull(result); - Assert.assertEquals (false, test.getBuildLogisticModels()); - Assert.assertEquals (1.0, test.getC(), 0.001); - Assert.assertEquals (250007, test.getCacheSize()); - Assert.assertEquals (1.0E-12, test.getEpsilon(), 0.001); - Assert.assertEquals (1.0, test.getExponent(), 0.01); - Assert.assertEquals (false, test.getFeatureSpaceNormalization()); - Assert.assertEquals (0.01, test.getGamma(), 0.001); - Assert.assertEquals (false, test.getLowerOrderTerms()); - Assert.assertEquals (-1, test.getNumFolds()); - Assert.assertEquals (1, test.getRandomSeed()); - Assert.assertEquals (0.0010, test.getToleranceParameter(), 0.001); - Assert.assertEquals (false, test.getUserRBF()); - Assert.assertEquals (3, test.numClassAttributeValues()); - } - - /** - * @throws CDKException - * @throws Exception - * @throws QSARModelException - */ -// public void testSMOModel2() throws CDKException, java.lang.Exception, QSARModelException { -// SMOModel test = new SMOModel(); -// test.setData("X:\\cdk\\src\\data\\arff\\Table3.arff"); -// test.build(); -// test.setParameters("X:\\cdk\\src\\data\\arff\\Table4.arff"); -// test.probabilities(); -// Object[][] result = test.getProbabilities(); -// assertNotNull (test.attributeNames()); -// assertNotNull (test.bias()); -// assertNotNull(test.classAttributeNames()); -// assertNotNull (test.sparseIndices()); -// assertNotNull (test.sparseWeights()); -// assertNotNull(result); -// assertEquals (false, test.getBuildLogisticModels()); -// assertEquals (1.0, test.getC(), 0.001); -// assertEquals (250007, test.getCacheSize()); -// assertEquals (1.0E-12, test.getEpsilon(), 0.001); -// assertEquals (1.0, test.getExponent(), 0.01); -// assertEquals (false, test.getFeatureSpaceNormalization()); -// assertEquals (0.01, test.getGamma(), 0.001); -// assertEquals (false, test.getLowerOrderTerms()); -// assertEquals (-1, test.getNumFolds()); -// assertEquals (1, test.getRandomSeed()); -// assertEquals (0.0010, test.getToleranceParameter(), 0.001); -// assertEquals (false, test.getUserRBF()); -// assertEquals (3, test.numClassAttributeValues()); -// } -} diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/SMOregModelTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/SMOregModelTest.java deleted file mode 100644 index 8e44ced..0000000 --- a/src/test/org/openscience/chemojava/qsar/model/weka/SMOregModelTest.java +++ /dev/null @@ -1,102 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2007 by Mario Baseda - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package org.openscience.chemojava.qsar.model.weka; - -import org.junit.Assert; -import org.junit.Test; -import org.openscience.cdk.CDKTestCase; -import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.qsar.model.QSARModelException; - -/** - * TestSuite that runs a test for the SMOregModel - * - * @author Mario Baseda - * @cdk.module test-qsar - */ -public class SMOregModelTest extends CDKTestCase{ - - /** - * @throws CDKException - * @throws Exception - * @throws QSARModelException - */ - @Test public void testSMOregModel() throws Exception{ - SMOregModel test = new SMOregModel(); - double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10}, - {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}}; - Double[][] xD = new Double[x.length][x[0].length]; - for (int i = 0; i < xD.length; i++) - for (int j = 0; j < xD[i].length; j++) - xD[i][j] = new Double(x[i][j]); - Object[] y = {new Double(100), new Double(200), new Double(300), new Double(100), new Double(200), new Double(300)}; - test.setData(y, xD); - test.build(); - Double[][] newx = { - {new Double(99), new Double(89), new Double(79)}, - {new Double(19), new Double(29), new Double(39)}, - }; - test.setParameters(newx); - test.classifyInstance(); - Object[] result = test.getClassification(); - Assert.assertNotNull (result); - Assert.assertEquals (1.0, test.getC(), 0.001); - Assert.assertEquals (250007, test.getCacheSize()); - Assert.assertEquals (0.0010, test.getEpsilon(), 0.001); - Assert.assertEquals (1.0, test.getExponent(), 0.01); - Assert.assertEquals (false, test.getFeatureSpaceNormalization()); - Assert.assertEquals (0.01, test.getGamma(), 0.001); - Assert.assertEquals (false, test.getLowerOrderTerms()); - Assert.assertEquals (0.0010, test.getToleranceParameter(), 0.001); - Assert.assertEquals (false, test.getUserRBF()); - Assert.assertEquals (1.0E-12, test.getEps(), 0.001); - } - - /** - * @throws CDKException - * @throws Exception - * @throws QSARModelException - */ -// public void testSMOregModel2() throws CDKException, java.lang.Exception, QSARModelException { -// SMOregModel test = new SMOregModel(); -// test.setData("X:\\cdk\\src\\data\\arff\\Table1.arff"); -// test.build(); -// test.setParameters("X:\\cdk\\src\\data\\arff\\Table2.arff"); -// test.classifyInstance(); -// Object[] result = test.getClassification(); -// assertNotNull (result); -// assertEquals (1.0, test.getC(), 0.001); -// assertEquals (250007, test.getCacheSize()); -// assertEquals (0.0010, test.getEpsilon(), 0.001);; -// assertEquals (1.0, test.getExponent(), 0.01); -// assertEquals (false, test.getFeatureSpaceNormalization()); -// assertEquals (0.01, test.getGamma(), 0.001); -// assertEquals (false, test.getLowerOrderTerms()); -// assertEquals (0.0010, test.getToleranceParameter(), 0.001); -// assertEquals (false, test.getUserRBF()); -// assertEquals (1.0E-121, test.getEps(), 0.001); -// } -} diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/SimpleLinearRegressionModelTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/SimpleLinearRegressionModelTest.java deleted file mode 100644 index b4dfdab..0000000 --- a/src/test/org/openscience/chemojava/qsar/model/weka/SimpleLinearRegressionModelTest.java +++ /dev/null @@ -1,93 +0,0 @@ -/* $RCSfile$ - * $Author$ - * $Date$ - * $Revision$ - * - * Copyright (C) 2007 by Mario Baseda - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * All we ask is that proper credit is given for our work, which includes - * - but is not limited to - adding the above copyright notice to the beginning - * of your source code files, and to any copyright notice that you may distribute - * with programs based on this work. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -package org.openscience.chemojava.qsar.model.weka; - -import org.junit.Assert; -import org.junit.Test; -import org.openscience.cdk.CDKTestCase; -import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.qsar.model.QSARModelException; - -/** - * TestSuite that runs a test for the SimpleLinearRegressionModel - * - * @author Mario Baseda - * @cdk.module test-qsar - */ -public class SimpleLinearRegressionModelTest extends CDKTestCase{ - - /** - * @throws CDKException - * @throws Exception - * @throws QSARModelException - */ - @Test public void testSimpleLinearRegressionModel1() throws Exception { - - double[][] x = {{1, 1}, {3, 3}, {4, 4}, {6, 6}}; - Double[][] xD = new Double[x.length][x[0].length]; - for (int i = 0; i < xD.length; i++) - for (int j = 0; j < xD[i].length; j++) - xD[i][j] = new Double(x[i][j]); - double[] y = {0, 2, 3, 5}; - - Double[] yD = new Double[y.length]; - for (int i = 0; i < yD.length; i++) - yD[i] = new Double(y[i]); - - SimpleLinearRegressionModel slrm = new SimpleLinearRegressionModel(yD, xD); - slrm.build(); - - /* Test predictions */ - Double[][] newx = { - {new Double(2), new Double(2)}, - {new Double(5), new Double(5)}, - }; - - slrm.setParameters(newx); - slrm.predict(); - - Double[] preds = (Double[]) slrm.getPredictPredicted(); - Assert.assertEquals(1.0, (preds[0]).doubleValue(), 0.001); - Assert.assertEquals(4.0, (preds[1]).doubleValue(), 0.001); - } - - /** - * @throws CDKException - * @throws Exception - * @throws QSARModelException - */ -// public void testSimpleLinearRegressionWModel2() throws CDKException, java.lang.Exception, QSARModelException { -// SimpleLinearRegressionModel slrm = new SimpleLinearRegressionModel("X:\\cdk\\src\\data\\arff\\Table1.arff"); -// String[] options = new String[4]; -// slrm.build(); -// slrm.setParameters("X:\\cdk\\src\\data\\arff\\Table2.arff"); -// slrm.predict(); -// Double[] result = (Double[]) slrm.getPredictPredicted(); -// assertNotNull(result); -// assertEquals(1.0, (result[0]).doubleValue(), 0.001); -// assertEquals(4.0, (result[1]).doubleValue(), 0.001); -// } -}