- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-package net.sf.cdk.tools.doclets;
-
-import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import com.sun.javadoc.SourcePosition;
-import com.sun.javadoc.Tag;
-import com.sun.tools.doclets.Taglet;
-
-/**
- * Taglet that expands @cdk.svnrev tag into a link to the SVN
- * source tree. The syntax must be as follows:
- *
- * @cdk.svnrev $Revision: 7973 $
- *
- *
- * The actual version number is automatically updated by the
- * SVN repository.
- */
-public class CDKSVNTaglet implements Taglet {
-
- private static final String NAME = "cdk.svnrev";
- private final static Pattern svnrevPattern = Pattern.compile("\\$Revision:\\s*(\\d*)\\s*\\$");
- private final static Pattern pathPattern = Pattern.compile(".*/(src/.*\\.java)");
-
- public String getName() {
- return NAME;
- }
-
- public boolean inField() {
- return false;
- }
-
- public boolean inConstructor() {
- return false;
- }
-
- public boolean inMethod() {
- return false;
- }
-
- public boolean inOverview() {
- return false;
- }
-
- public boolean inPackage() {
- return false;
- }
-
- public boolean inType() {
- return true;
- }
-
- public boolean isInlineTag() {
- return false;
- }
-
- public static void register(Map tagletMap) {
- CDKSVNTaglet tag = new CDKSVNTaglet();
- Taglet t = (Taglet) tagletMap.get(tag.getName());
- if (t != null) {
- tagletMap.remove(tag.getName());
- }
- tagletMap.put(tag.getName(), tag);
- }
-
- public String toString(Tag tag) {
- return "Source code: "
- + expand(tag) + "\n";
- }
-
- public String toString(Tag[] tags) {
- if (tags.length == 0) {
- return null;
- } else {
- return toString(tags[0]);
- }
- }
-
- private String expand(Tag tag) {
- // get the version number
- String text = tag.text();
- Matcher matcher = svnrevPattern.matcher(text);
- String revision = "HEAD";
- if (matcher.matches()) {
- revision = matcher.group(1);
- } else {
- System.out.println("Malformed @cdk.svnrev content: " + text);
- return "";
- }
- // create the URL
- SourcePosition file = tag.position();
- String path = correctSlashes(file.file().getAbsolutePath());
- matcher = pathPattern.matcher(path);
- if (matcher.matches()) {
- String url = "http://cdk.svn.sourceforge.net/viewvc/cdk/trunk/cdk/" +
- matcher.group(1) + "?revision=" +
- revision + "&view=markup";
- return "revision " + revision + "";
- } else {
- System.out.println("Could not resolve class name from: " + path);
- }
- return "";
- }
-
- private String correctSlashes(String absolutePath) {
- StringBuffer buffer = new StringBuffer();
- for (int i=0; i
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-package org.openscience.cdk.qsar.model;
-
-/**
- * Classes that implement this interface will build statistical models.
- *
- * Currently the design of the modeling system is that classes based on
- * a given backend should be based of an abtract class that implements
- * this interface. See RModel as an example.
- *
- * @author Rajarshi Guha
- * @cdk.module qsar
- * @cdk.githash
- */
-public interface IModel {
-
- /**
- * Builds (trains) the model.
- *
- * @throws QSARModelException if errors occur in data types, calls to the R session. See
- * the corresponding method in subclasses of this class for further details.
- */
- public void build() throws QSARModelException;
- /**
- * Makes predictions using a previously built model.
- *
- * @throws QSARModelException if errors occur in data types, calls to the R session. See
- * the corresponding method in subclasses of this class for further details.
- */
- public void predict() throws QSARModelException;
-}
-
diff --git a/src/main/org/openscience/cdk/qsar/model/QSARModelException.java b/src/main/org/openscience/cdk/qsar/model/QSARModelException.java
deleted file mode 100644
index 011a692..0000000
--- a/src/main/org/openscience/cdk/qsar/model/QSARModelException.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (C) 2004-2007 Rajarshi Guha
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-package org.openscience.cdk.qsar.model;
-
-import org.openscience.cdk.exception.CDKException;
-
-/**
- * Exception that is thrown by model routines when a problem has occured.
- *
- * @author Rajarshi Guha
- * @cdk.module qsar
- * @cdk.githash
- */
-public class QSARModelException extends CDKException {
-
- private static final long serialVersionUID = 4931287199065879144L;
-
- /**
- * Constructs a new QSARModelException with the given message.
- *
- * @param message for the constructed exception
- */
- public QSARModelException(String message) {
- super( message );
- }
-}
-
diff --git a/src/main/org/openscience/cdk/qsar/model/R/CNNClassificationModel.java b/src/main/org/openscience/cdk/qsar/model/R/CNNClassificationModel.java
deleted file mode 100644
index feb88c3..0000000
--- a/src/main/org/openscience/cdk/qsar/model/R/CNNClassificationModel.java
+++ /dev/null
@@ -1,577 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2004-2008 Rajarshi Guha
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-package org.openscience.cdk.qsar.model.R;
-
-import org.openscience.cdk.qsar.model.QSARModelException;
-
-import java.util.HashMap;
-
-/**
- * A modeling class that provides a computational neural network classification model.
- *
- * When instantiated this class ensures that the R/Java interface has been
- * initialized. The response and independent variables can be specified at construction
- * time or via the setParameters method.
- * The actual fitting procedure is carried out by build after which
- * the model may be used to make predictions, via predict. An example of the use
- * of this class is shown below:
- *
- * double[][] x;
- * String[] y;
- * Double[] wts;
- * Double[][] newx;
- * ...
- * try {
- * CNNClassificationModel cnnrm = new CNNClassificationModel(x,y,3);
- * cnnrm.setParameters("Wts",wts);
- * cnnrm.build();
- *
- * double fitValue = cnnrm.getFitValue();
- *
- * cnnrm.setParameters("newdata", newx);
- * cnnrm.setParameters("type", "raw");
- * cnnrm.predict();
- *
- * double[][] preds = cnnrm.getPredictPredicted();
- * } catch (QSARModelException qme) {
- * System.out.println(qme.toString());
- * }
- *
- * The above code snippet builds a 3-3-1 CNN model.
- * Multiple output neurons are easily
- * specified by supplying a matrix for y (i.e., double[][]) with the output variables
- * in the columns.
- *
- * Nearly all the arguments to
- * nnet() are
- * supported via the setParameters method. The table below lists the names of the arguments,
- * the expected type of the argument and the default setting for the arguments supported by this wrapper class.
- *
- *
- *
- *
- * | Name | Java Type | Default | Notes |
- *
- *
- *
- * | x | Double[][] | None | This must be set by the caller via the constructors or via setParameters |
- * | y | String[][] | None | This must be set by the caller via the constructors or via setParameters |
- * | weights | Double[] | rep(1,nobs) | The default case weights is a vector of 1's equal in length to the number of observations, nobs |
- * | size | Integer | None | This must be set by the caller via the constructors or via setParameters |
- * | subset | Integer[] | 1:nobs | This is supposed to be an index vector specifying which observations are to be used in building the model. The default indicates that all should be used |
- * | Wts | Double[] | runif(1,nwt) | The initial weight vector is set to a random vector of length equal to the number of weights if not set by the user |
- * | mask | Boolean[] | rep(TRUE,nwt) | All weights are to be optimized unless otherwise specified by the user |
- * | linout | Boolean | FALSE | Since this class performs classification this need not be changed |
- * | entropy | Boolean | TRUE | |
- * | softmax | Boolean | FALSE | |
- * | censored | Boolean | FALSE | |
- * | skip | Boolean | FALSE | |
- * | rang | Double | 0.7 | |
- * | decay | Double | 0.0 | |
- * | maxit | Integer | 100 | |
- * | Hess | Boolean | FALSE | |
- * | trace | Boolean | TRUE | |
- * | MaxNWts | Integer | 1000 | |
- * | abstol | Double | 1.0e-4 | |
- * | reltol | Double | 1.0e-8 | |
- *
- *
- *
- *
- * In general the getFit* methods provide access to results from the fit
- * and getPredict* methods provide access to results from the prediction (i.e.,
- * prediction using the model on new data). The values returned correspond to the various
- * values returned by the nnet and
- * predict.nnet functions
- * in R
- *
- * See {@link RModel} for details regarding the R and SJava environment.
- *
- * @author Rajarshi Guha
- * @cdk.require r-project
- * @cdk.module qsar
- * @cdk.githash
- *
- * @cdk.keyword neural network
- * @cdk.keyword classification
- * @deprecated
- */
-public class CNNClassificationModel extends RModel {
-
- static int globalID = 0;
- private int currentID;
- private CNNClassificationModelFit modelfit = null;
- private CNNClassificationModelPredict modelpredict = null;
-
- private HashMap params = null;
- private int noutput = 0;
- private int nvar = 0;
-
- private void setDefaults() {
- // lets set the default values of the arguments that are specified
- // to have default values in ?nnet
-
- // these params are vectors that depend on user defined stuff
- // so as a default we set them to FALSE so R can check if these
- // were not set
- this.params.put("subset", false);
- this.params.put("mask", false );
- this.params.put("Wts", false);
- this.params.put("weights", false);
-
- this.params.put("linout", false); // we want only classification
- this.params.put("entropy", true);
- this.params.put("softmax",false);
- this.params.put("censored", false);
- this.params.put("skip", false);
- this.params.put("rang", Double.valueOf(0.7));
- this.params.put("decay", Double.valueOf(0.0));
- this.params.put("maxit", Integer.valueOf(100));
- this.params.put("Hess", false);
- this.params.put("trace", false); // no need to see output
- this.params.put("MaxNWts", Integer.valueOf(1000));
- this.params.put("abstol", Double.valueOf(1.0e-4));
- this.params.put("reltol", Double.valueOf(1.0e-8));
- }
-
- /**
- * Constructs a CNNClassificationModel object.
- *
- * This constructor allows the user to simply set up the modeling class. It is
- * expected that parameters such as training data, architecture will be set at a
- * later point. The result of this constructor is to simply create a name for the
- * current instance of the modeling object.
- *
- * Other parameters that are required to be set should be done via
- * calls to setParameters. A number of parameters are set to the
- * defaults as specified in the manpage for
- * nnet.
- *
- */
- public CNNClassificationModel() {
- super();
- this.params = new HashMap();
- this.currentID = CNNClassificationModel.globalID;
- CNNClassificationModel.globalID++;
- this.setModelName("cdkCNNCModel"+this.currentID);
- this.setDefaults();
- }
-
- /**
- * Constructs a CNNClassificationModel object.
- *
- * This constructor allows the user to specify the dependent and
- * independent variables along with the number of hidden layer neurons.
- * This constructor is suitable for cases when there is a single output
- * neuron. If the number of rows of the design matrix is not equal to
- * the number of observations in y an exception will be thrown.
- *
- * Other parameters that are required to be set should be done via
- * calls to setParameters. A number of parameters are set to the
- * defaults as specified in the manpage for
- * nnet.
- *
- * @param x An array of independent variables. Observations should be in
- * the rows and variables in the columns.
- * @param y An array (single column) of observed class assignments
- * @param size The number of hidden layer neurons
- * @throws QSARModelException if the number of observations in x and y are not the same
- */
- public CNNClassificationModel(double[][] x, String[] y, int size) throws QSARModelException {
- super();
- this.params = new HashMap();
- this.currentID = CNNClassificationModel.globalID;
- CNNClassificationModel.globalID++;
- this.setModelName("cdkCNNCModel"+this.currentID);
-
- int nrow = y.length;
- int ncol = x[0].length;
-
- if (nrow != x.length) {
- throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix");
- }
-
- this.nvar = ncol;
- this.noutput = 1;
-
- Double[][] xx = new Double[nrow][ncol];
- String[][] yy = new String[nrow][1];
-
- for (int i = 0; i < nrow; i++) {
- yy[i][0] = new String(y[i]);
- for (int j = 0; j < ncol; j++) {
- xx[i][j] = new Double(x[i][j]);
- }
- }
- this.params.put("x", xx);
- this.params.put("y", yy);
- this.params.put("size", Integer.valueOf(size));
- this.setDefaults();
- }
-
- /**
- * Constructs a CNNClassificationModel object.
- *
- * This constructor allows the user to specify the dependent and
- * independent variables along with the number of hidden layer neurons.
- * This constructor is suitable for cases when there are multiple output
- * neuron. If the number of rows of the design matrix is not equal to
- * the number of observations in y an exception will be thrown.
- *
- * Other parameters that are required to be set should be done via
- * calls to setParameters. A number of parameters are set to the
- * defaults as specified in the manpage for
- * nnet.
- *
- * @param x An array of independent variables. Observations should be in
- * the rows and variables in the columns.
- * @param y An array (multiple columns) of observed values
- * @param size The number of hidden layer neurons
- * @throws QSARModelException if the number of observations in x and y are not the same
- */
- public CNNClassificationModel(double[][] x, String[][] y, int size) throws QSARModelException{
- super();
- this.params = new HashMap();
- this.currentID = CNNClassificationModel.globalID;
- CNNClassificationModel.globalID++;
- this.setModelName("cdkCNNCModel"+this.currentID);
-
- int nrow = y.length;
- int ncol = x[0].length;
-
- if (nrow != x.length) {
- throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix");
- }
-
- this.nvar = ncol;
- this.noutput = y[0].length;
-
- Double[][] xx = new Double[nrow][ncol];
- String[][] yy = new String[nrow][this.noutput];
-
- for (int i = 0; i < nrow; i++) {
- for (int j = 0; j < ncol; j++) {
- xx[i][j] = new Double(x[i][j]);
- }
- }
- for (int i = 0; i < nrow; i++) {
- for (int j = 0; j < this.noutput; j++) {
- yy[i][j] = new String(y[i][j]);
- }
- }
- this.params.put("x", xx);
- this.params.put("y", yy);
- this.params.put("size", Integer.valueOf(size));
- this.setDefaults();
- }
-
-
- /**
- * Sets parameters required for building a linear model or using one for prediction.
- *
- * This function allows the caller to set the various parameters available
- * for the
- * nnet
- * and
- * predict.nnet
- * R routines. See the R help pages for the details of the available
- * parameters.
- *
- * @param key A String containing the name of the parameter as described in the
- * R help pages
- * @param obj An Object containing the value of the parameter
- * @throws QSARModelException if the type of the supplied value does not match the expected type
- */
- public void setParameters(String key, Object obj) throws QSARModelException {
- // since we know the possible values of key we should check the coresponding
- // objects and throw errors if required. Note that this checking can't really check
- // for values (such as number of variables in the X matrix to build the model and the
- // X matrix to make new predictions) - these should be checked in functions that will
- // use these parameters. The main checking done here is for the class of obj and
- // some cases where the value of obj is not dependent on what is set before it
-
- if (key.equals("y")) {
- if (!(obj instanceof String[][])) {
- throw new QSARModelException("The class of the 'y' object must be String[][]");
- } else {
- noutput = ((String[][])obj)[0].length;
- }
- }
- if (key.equals("x")) {
- if (!(obj instanceof Double[][])) {
- throw new QSARModelException("The class of the 'x' object must be Double[][]");
- } else {
- nvar = ((Double[][])obj)[0].length;
- }
- }
- if (key.equals("weights")) {
- if (!(obj instanceof Double[])) {
- throw new QSARModelException("The class of the 'weights' object must be Double[]");
- }
- }
- if (key.equals("size")) {
- if (!(obj instanceof Integer)) {
- throw new QSARModelException("The class of the 'size' object must be Integer");
- }
- }
- if (key.equals("subset")) {
- if (!(obj instanceof Integer[])) {
- throw new QSARModelException("The class of the 'size' object must be Integer[]");
- }
- }
- if (key.equals("Wts")) {
- if (!(obj instanceof Double[])) {
- throw new QSARModelException("The class of the 'Wts' object must be Double[]");
- }
- }
- if (key.equals("mask")) {
- if (!(obj instanceof Boolean[])) {
- throw new QSARModelException("The class of the 'mask' object must be Boolean[]");
- }
- }
- if (key.equals("linout") ||
- key.equals("entropy") ||
- key.equals("softmax") ||
- key.equals("censored") ||
- key.equals("skip") ||
- key.equals("Hess") ||
- key.equals("trace")) {
- if (!(obj instanceof Boolean)) {
- throw new QSARModelException("The class of the 'trace|skip|Hess|linout|entropy|softmax|censored' object must be Boolean");
- }
- }
- if (key.equals("rang") ||
- key.equals("decay") ||
- key.equals("abstol") ||
- key.equals("reltol")) {
- if (!(obj instanceof Double)) {
- throw new QSARModelException("The class of the 'reltol|abstol|decay|rang' object must be Double");
- }
- }
- if (key.equals("maxit") ||
- key.equals("MaxNWts")) {
- if (!(obj instanceof Integer)) {
- throw new QSARModelException("The class of the 'maxit|MaxNWts' object must be Integer");
- }
- }
-
- if (key.equals("newdata")) {
- if ( !(obj instanceof Double[][])) {
- throw new QSARModelException("The class of the 'newdata' object must be Double[][]");
- }
- }
- this.params.put(key,obj);
- }
-
- /**
- * Fits a CNN classification model.
- *
- * This method calls the R function to fit a CNN classification model
- * to the specified dependent and independent variables. If an error
- * occurs in the R session, an exception is thrown.
- *
- * Note that, this method should be called prior to calling the various get
- * methods to obtain information regarding the fit.
- */
- public void build() throws QSARModelException {
- try {
- this.modelfit = (CNNClassificationModelFit)revaluator.call("buildCNNClass",
- new Object[]{ getModelName(), this.params });
- } catch (Exception re) {
- throw new QSARModelException(re.toString());
- }
- }
-
- /**
- * Uses a fitted model to predict the response for new observations.
- *
- * This function uses a previously fitted model to obtain predicted values
- * for a new set of observations. If the model has not been fitted prior to this
- * call an exception will be thrown. Use setParameters
- * to set the values of the independent variable for the new observations. You can also
- * set the type argument (see here).
- * However, since this class performs CNN classification, the default setting (type='raw') is sufficient.
- *x
- */
- public void predict() throws QSARModelException {
- if (this.modelfit == null)
- throw new QSARModelException("Before calling predict() you must fit the model using build()");
-
- Double[][] newx = (Double[][])this.params.get("newdata");
- if (newx[0].length != this.nvar) {
- throw new QSARModelException("Number of independent variables used for prediction must match those used for fitting");
- }
-
- try {
- this.modelpredict = (CNNClassificationModelPredict)revaluator.call("predictCNNClass",
- new Object[]{ getModelName(), this.params });
- } catch (Exception re) {
- throw new QSARModelException(re.toString());
- }
- }
-
- /**
- * Loads a CNNRegresionModel object from disk in to the current session.
- *
- *
- * @param fileName The disk file containing the model
- * @throws QSARModelException if the model that was loaded was not a CNNClassification
- * model
- */
- public void loadModel(String fileName) throws QSARModelException {
- // should probably check that the filename does exist
- Object model = (Object)revaluator.call("loadModel", new Object[]{ (Object)fileName });
- String modelName = (String)revaluator.call("loadModel.getName", new Object[] { (Object)fileName });
-
- if (model.getClass().getName().equals("org.openscience.cdk.qsar.model.R.CNNClassificationModelFit")) {
- this.modelfit = (CNNClassificationModelFit)model;
- this.setModelName(modelName);
- Double tmp = (Double)revaluator.eval(modelName+"$n[1]");
- nvar = (int)tmp.doubleValue();
- } else throw new QSARModelException("The loaded model was not a CNNClassificationModel");
- }
- /**
- * Loads an CNNClassificationModel object from a serialized string into the current session.
- *
- * @param serializedModel A String containing the serialized version of the model
- * @param modelName A String indicating the name of the model in the R session
- * @throws QSARModelException if the model being loaded is not a CNN classification model
- * object
- */
- public void loadModel(String serializedModel, String modelName) throws QSARModelException {
- // should probably check that the fileName does exist
- Object model = (Object)revaluator.call("unserializeModel", new Object[]{ (Object)serializedModel, (Object)modelName });
- String modelname = modelName;
-
- if (model.getClass().getName().equals("org.openscience.cdk.qsar.model.R.CNNClassificationModelFit")) {
- this.modelfit =(CNNClassificationModelFit)model;
- this.setModelName(modelname);
- Double tmp = (Double)revaluator.eval(modelname+"$n[1]");
- nvar = (int)tmp.doubleValue();
- } else throw new QSARModelException("The loaded model was not a CNNClassificationModel");
- }
-
- /**
- * Gets final value of the fitting criteria.
- *
- * This method only returns meaningful results if the build
- * method of this class has been previously called.
- *
- * @return A double indicating the value of the fitting criterion plus weight decay term.
- */
- public double getFitValue() {
- return(this.modelfit.getValue());
- }
-
- /**
- * Gets optimized weights for the model.
- *
- * This method only returns meaningful results if the build
- * method of this class has been previously called.
- *
- * @return A double[] containing the weights. The number of weights will be
- * equal to
(Ni * Nh) + (Nh * No) + Nh + No where Ni, Nh and No
- * are the number of input, hidden and output neurons.
- */
- public double[] getFitWeights() {
- return(this.modelfit.getWeights());
- }
- /**
- * Gets fitted values from the final model.
- *
- * This method only returns meaningful results if the build
- * method of this class has been previously called.
- *
- * @return A double[][] containing the fitted values for each output neuron
- * in the columns. Note that even if a single output neuron was specified during
- * model building the return value is still a 2D array (with a single column).
- */
- public double[][] getFitFitted() {
- return(this.modelfit.getFitted());
- }
- /**
- * Gets residuals for the fitted values from the final model.
- *
- * This method only returns meaningful results if the build
- * method of this class has been previously called.
- *
- * @return A double[][] containing the residuals for each output neuron
- * in the columns. Note that even if a single output neuron was specified during
- * model building the return value is still a 2D array (with a single column).
- */
- public double[][] getFitResiduals() {
- return(this.modelfit.getResiduals());
- }
- /**
- * Gets the Hessian of the measure of fit.
- *
- * If the Hess option was set to TRUE before the call to build
- * then the CNN routine will return the Hessian of the measure of fit at the best set of
- * weights found. * This method only returns meaningful results if the build
- * method of this class has been previously called.
- *
- * @return A double[][] containing the Hessian. It will be a square array
- * with dimensions equal to the Nwt x Nwt, where Nwt is the total number of weights
- * in the CNN model.
- */
- public double[][] getFitHessian() {
- return(this.modelfit.getHessian());
- }
-
- /**
- * Gets predicted values for new data using a previously built model.
- *
- * This method only returns meaningful results if the build
- * method of this class has been previously called. Since this is a classification
- * model the values represent the probability that an observation belongs to the given
- * class.
- *
- * @return A double[][] containing the predicted for each output neuron
- * in the columns. Note that even if a single output neuron was specified during
- * model building the return value is still a 2D array (with a single column).
- *
- */
- public double[][] getPredictPredictedRaw() {
- return(this.modelpredict.getPredictedRaw());
- }
-
- /**
- * Gets predicted values for new data using a previously built model.
- *
- * This method only returns meaningful results if the build
- * method of this class has been previously called. This function returns an
- * array of Strings indicating the class assignments of the observations, rather than
- * the raw probabilities.
- *
- * @return A String[] containing the class assigned to each observation.
- *
- */
-
- public String[] getPredictPredictedClass() {
- return(this.modelpredict.getPredictedClass());
- }
-
-
-}
diff --git a/src/main/org/openscience/cdk/qsar/model/R/CNNClassificationModelFit.java b/src/main/org/openscience/cdk/qsar/model/R/CNNClassificationModelFit.java
deleted file mode 100644
index de38762..0000000
--- a/src/main/org/openscience/cdk/qsar/model/R/CNNClassificationModelFit.java
+++ /dev/null
@@ -1,256 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2004-2008 Rajarshi Guha
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-package org.openscience.cdk.qsar.model.R;
-
-/**
- * A class that wraps the return value from R function, nnet().
- *
- * This is an internal class used by R to return the result of
- * the call to nnet.
- * As a result it should not be instantiated by the user. The actual modeling
- * class, CNNClassificationModel, provides acess to the various
- * fields of this object.
- *
- *
- * @author Rajarshi Guha
- * @cdk.require r-project
- * @cdk.module qsar
- * @cdk.githash
- * @deprecated
- */
-
-public class CNNClassificationModelFit {
- private int noutput; // number of output neurons (== number of classes)
- private double[] weights;
- private double[][] fitted;
- private double[][] residuals;
- private double value;
- private double[][] hessian = null;
-
- private double[][] vectorToMatrix(double[] v, int nrow, int ncol) {
- double[][] m = new double[nrow][ncol];
- for (int i = 0; i < ncol; i++) {
- for (int j = 0; j < nrow; j++) {
- m[j][i] = v[j + i*nrow];
- }
- }
- return(m);
- }
- /**
- * Constructs an object to contain a CNN classification fit.
- *
- * This class should not be instantiated directly and is only
- * required withtin R
- *
- * @param noutput The number of output neurons (ie the number of predicted variables)
- * @param nobs The number of observations
- * @param weights A 1-dimensional array containing the weights and biases
- * @param fitted A 1-dimensional array containing the fitted values
- * @param residual A 1-dimensional array containing the residuals of the fitted values
- * @param value The final value of the cost function
- * @param hessian A 1-dimensional array containing the Hessian
- */
- public CNNClassificationModelFit(
- int noutput,
- int nobs,
- double[] weights,
- double[] fitted, double[] residual,
- double value,
- double[] hessian) {
-
- // dimensions of hessian = nwt x nwt
- // dimensions of fitted, residual = nobs x noutput
- // also note that matrices come in as columnwise 1D arrays
-
- this.noutput = noutput;
- setWeights(weights);
- setResiduals(vectorToMatrix(residual, nobs,noutput));
- setFitted(vectorToMatrix(fitted, nobs,noutput));
- setValue(value);
- setHessian(vectorToMatrix(hessian,weights.length,weights.length));
- }
- /**
- * Constructs an object to contain a CNN classification fit.
- *
- * This class should not be instantiated directly and is only
- * required withtin R
- *
- * @param noutput The number of output neurons (ie the number of predicted variables)
- * @param nobs The number of observations
- * @param weights A 1-dimensional array containing the weights and biases
- * @param fitted A 1-dimensional array containing the fitted values
- * @param residual A 1-dimensional array containing the residuals of the fitted values
- * @param value The final value of the cost function
- */
- public CNNClassificationModelFit(
- int noutput,
- int nobs,
- double[] weights,
- double[] fitted, double[] residual,
- double value) {
- this.noutput = noutput;
- setWeights(weights);
- setResiduals(vectorToMatrix(residual, nobs,noutput));
- setFitted(vectorToMatrix(fitted, nobs,noutput));
- setValue(value);
- }
- /**
- * Get the final value of the cost function.
- *
- * This method should not be called outside this class
- *
- * @return The final value of the cost function
- * @see #setValue
- */
- public double getValue() {
- return(this.value);
- }
- /**
- * Set the final value of the cost function.
- *
- * This method should not be called outside this class
- *
- * @param value The value of the cost function at convergence
- * @see #getValue
- */
- public void setValue(double value) {
- this.value = value;
- }
-
-
- /**
- * Get the Hessian for the final network.
- *
- * This method should not be called outside this class
- *
- * @return A 2-dimensional array containing the hessian
- * @see #setHessian
- */
- public double[][] getHessian() { return(this.hessian); }
-
- /**
- * Set the Hessian for the final network.
- *
- * This method should not be called outside this class
- *
- * @param theHessian A 2-dimensional array containing the hessian
- * @see #getHessian
- */
- public void setHessian(double[][] theHessian) {
- if (theHessian == null) return;
- this.hessian = new double[theHessian.length][this.noutput];
- for (int i = 0; i < theHessian.length; i++) {
- for (int j = 0; j < this.noutput; j++) {
- this.hessian[i][j] = theHessian[i][j];
- }
- }
- }
-
- /**
- * Get the weights and biases of the final network.
- *
- * This method should not be called outside this class
- *
- * @return A 1-dimensional array of weights and biases
- * @see #setWeights
- */
- public double[] getWeights() { return(this.weights); }
- /**
- * Set the weights and biases of the final network.
- *
- * This method should not be called outside this class
- *
- * @param weights A 1-dimensional array of weights and biases
- * @see #getWeights
- */
- public void setWeights(double[] weights) {
- this.weights = new double[weights.length];
- for (int i = 0; i < weights.length; i++) {
- this.weights[i] = weights[i];
- }
- }
-
-
- /**
- * Get the residuals of the fit.
- *
- * This method should not be called outside this class
- *
- * @return A 2-dimensional array of residuals. The rows contain the
- * observations and the columns contain the predicted variables
- * @see #setResiduals
- */
- public double[][] getResiduals() { return(this.residuals); }
- /**
- * Set the residuals of the fit.
- *
- * This method should not be called outside this class
- *
- * @param residuals A 2-dimensional array of residuals. The rows contain the
- * observations and the columns contain the predicted variables
- * @see #getResiduals
- */
- public void setResiduals(double[][] residuals) {
- this.residuals = new double[residuals.length][this.noutput];
- for (int i = 0; i < residuals.length; i++) {
- for (int j = 0; j < this.noutput; j++) {
- this.residuals[i][j] = residuals[i][j];
- }
- }
- }
-
-
-
- /**
- * Get the fitted values.
- *
- * This method should not be called outside this class
- *
- * @return A 2-dimensional array of residuals. The rows contain the
- * observations and the columns contain the predicted variables
- * @see #setFitted
- */
- public double[][] getFitted() { return(this.fitted); }
- /**
- * Set the fitted values.
- *
- * This method should not be called outside this class
- *
- * @param fitted A 2-dimensional array of residuals. The rows contain the
- * observations and the columns contain the predicted variables
- * @see #getFitted
- */
- public void setFitted(double[][] fitted) {
- this.fitted = new double[fitted.length][this.noutput];
- for (int i = 0; i < fitted.length; i++) {
- for (int j = 0; j < this.noutput; j++) {
- this.fitted[i][j] = fitted[i][j];
- }
- }
- }
-}
-
-
-
diff --git a/src/main/org/openscience/cdk/qsar/model/R/CNNClassificationModelPredict.java b/src/main/org/openscience/cdk/qsar/model/R/CNNClassificationModelPredict.java
deleted file mode 100644
index 1ac007b..0000000
--- a/src/main/org/openscience/cdk/qsar/model/R/CNNClassificationModelPredict.java
+++ /dev/null
@@ -1,148 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2004-2008 Rajarshi Guha
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-package org.openscience.cdk.qsar.model.R;
-
-/**
- * A class that wraps the return value from the R function, predict.nnet for classification models.
- *
- * This is an internal class used by R to return the result of
- * the call to predict.nnet.
- * As a result it should not be instantiated by the user. The actual modeling
- * class, CNNClassificationModel, provides acess to the various
- * fields of this object.
- *
- *
- * @author Rajarshi Guha
- * @cdk.require r-project
- * @cdk.module qsar
- * @cdk.githash
- * @deprecated
- */
-public class CNNClassificationModelPredict {
- private int noutput;
- private double[][] predvalraw;
- private String[] predvalclass;
-
- private double[][] vectorToMatrix(double[] v, int nrow, int ncol) {
- double[][] m = new double[nrow][ncol];
- for (int i = 0; i < ncol; i++) {
- for (int j = 0; j < nrow; j++) {
- m[j][i] = v[j + i*nrow];
- }
- }
- return(m);
- }
- /**
- * Create an object to hold predictions from a previously built CNN model.
- *
- * This class should not be accessed directly
- *
- * @param noutput The number of predicted variables
- * @param values The predicted probabilities
- */
- public CNNClassificationModelPredict(int noutput, double[] values) {
- this.noutput = noutput;
- int nrow = values.length / noutput;
- setPredictedRaw(vectorToMatrix(values,nrow,noutput));
- }
- /**
- * Create an object to hold predictions from a previously built CNN model.
- *
- * This class should not be accessed directly. Required for the case of a single
- * predicted value.
- *
- * @param noutput The number of predicted variables
- * @param values The predicted probabilities
- */
- public CNNClassificationModelPredict(int noutput, double values) {
- this.noutput = noutput;
- setPredictedRaw(new double[][] { {values} });
- }
-
- /**
- * Create an object to hold predictions from a previously built CNN model.
- *
- * This class should not be accessed directly
- *
- * @param values An array of String containing the predicted class
- */
- public CNNClassificationModelPredict(String[] values) {
- this.predvalclass = new String[values.length];
- for (int i = 0; i < values.length; i++) {
- this.predvalclass[i] = values[i];
- }
- }
- /**
- * Create an object to hold predictions from a previously built CNN model.
- *
- * This class should not be accessed directly. Required for the
- * case of a single predicted value
- *
- * @param values An array of String containing the predicted class
- */
- public CNNClassificationModelPredict(String values) {
- this.predvalclass = new String[1];
- this.predvalclass[1] = values;
- }
-
- /**
- * Get the raw probabilities of the classification result.
- *
- * This class should not be accessed directly
- *
- * @return A 2-dimensional array containing the predicted probabilities. The rows
- * contain the observations and the columns contain the predicted variables
- * @see #setPredictedRaw
- */
- public double[][] getPredictedRaw() { return(this.predvalraw); }
- /**
- * Get the raw probabilities of the classification result.
- *
- * This class should not be accessed directly
- *
- * @param predicted A 2-dimensional array containing the predicted probabilities. The rows
- * contain the observations and the columns contain the predicted variables
- * @see #getPredictedRaw
- */
- public void setPredictedRaw(double[][] predicted) {
- this.predvalraw = new double[predicted.length][this.noutput];
- for (int i = 0; i < predicted.length; i++) {
- for (int j = 0; j < this.noutput; j++) {
- this.predvalraw[i][j] = predicted[i][j];
- }
- }
- }
-
- /**
- * Get the predicted classes.
- *
- * This class should not be accessed directly
- *
- * @return An array of String containing the predicted classes
- */
- public String[] getPredictedClass() { return(this.predvalclass); };
-}
-
-
diff --git a/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModel.java b/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModel.java
deleted file mode 100644
index 6c5138a..0000000
--- a/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModel.java
+++ /dev/null
@@ -1,583 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2004-2008 Rajarshi Guha
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-package org.openscience.cdk.qsar.model.R;
-
-import org.openscience.cdk.qsar.model.QSARModelException;
-
-import java.util.HashMap;
-
-/**
- * A modeling class that provides a computational neural network regression model.
- *
- * When instantiated this class ensures that the R/Java interface has been
- * initialized. The response and independent variables can be specified at construction
- * time or via the setParameters method.
- * The actual fitting procedure is carried out by build after which
- * the model may be used to make predictions, via predict. An example of the use
- * of this class is shown below:
- *
- * double[][] x;
- * double[] y;
- * Double[] wts;
- * Double[][] newx;
- * ...
- * try {
- * CNNRegressionModel cnnrm = new CNNRegressionModel(x,y,3);
- * cnnrm.setParameters("Wts",wts);
- * cnnrm.build();
- *
- * double fitValue = cnnrm.getFitValue();
- *
- * cnnrm.setParameters("newdata", newx);
- * cnnrm.setParameters("type", "raw");
- * cnnrm.predict();
- *
- * double[][] preds = cnnrm.getPredictPredicted();
- * } catch (QSARModelException qme) {
- * System.out.println(qme.toString());
- * }
- *
- * The above code snippet builds a 3-3-1 CNN model.
- * Multiple output neurons are easily
- * specified by supplying a matrix for y (i.e., double[][]) with the output variables
- * in the columns.
- *
- * Nearly all the arguments to
- * nnet() are
- * supported via the setParameters method. The table below lists the names of the arguments,
- * the expected type of the argument and the default setting for the arguments supported by this wrapper class.
- *
- *
- *
- *
- * | Name | Java Type | Default | Notes |
- *
- *
- *
- * | x | Double[][] | None | This must be set by the caller via the constructors or via setParameters |
- * | y | Double[][] | None | This must be set by the caller via the constructors or via setParameters |
- * | weights | Double[] | rep(1,nobs) | The default case weights is a vector of 1's equal in length to the number of observations, nobs |
- * | size | Integer | None | This must be set by the caller via the constructors or via setParameters |
- * | subset | Integer[] | 1:nobs | This is supposed to be an index vector specifying which observations are to be used in building the model. The default indicates that all should be used |
- * | Wts | Double[] | runif(1,nwt) | The initial weight vector is set to a random vector of length equal to the number of weights if not set by the user |
- * | mask | Boolean[] | rep(TRUE,nwt) | All weights are to be optimized unless otherwise specified by the user |
- * | linout | Boolean | TRUE | Since this class performs regression this need not be changed |
- * | entropy | Boolean | FALSE | |
- * | softmax | Boolean | FALSE | |
- * | censored | Boolean | FALSE | |
- * | skip | Boolean | FALSE | |
- * | rang | Double | 0.7 | |
- * | decay | Double | 0.0 | |
- * | maxit | Integer | 100 | |
- * | Hess | Boolean | FALSE | |
- * | trace | Boolean | TRUE | |
- * | MaxNWts | Integer | 1000 | |
- * | abstol | Double | 1.0e-4 | |
- * | reltol | Double | 1.0e-8 | |
- *
- *
- *
- *
- * In general the getFit* methods provide access to results from the fit
- * and getPredict* methods provide access to results from the prediction (i.e.,
- * prediction using the model on new data). The values returned correspond to the various
- * values returned by the nnet and
- * predict.nnet functions
- * in R
- *
- * See {@link RModel} for details regarding the R and SJava environment.
- *
- * @author Rajarshi Guha
- * @cdk.require r-project
- * @cdk.module qsar
- * @cdk.githash
- *
- * @cdk.keyword neural network
- * @cdk.keyword regression
- * @deprecated
- */
-public class CNNRegressionModel extends RModel {
-
- public static int globalID = 0;
- private int currentID;
- private CNNRegressionModelFit modelfit = null;
- private CNNRegressionModelPredict modelpredict = null;
-
- private HashMap params = null;
- private int noutput = 0;
- private int nvar = 0;
-
- private void setDefaults() {
- // lets set the default values of the arguments that are specified
- // to have default values in ?nnet
-
- // these params are vectors that depend on user defined stuff
- // so as a default we set them to FALSE so R can check if these
- // were not set
- this.params.put("subset", new Boolean(false));
- this.params.put("mask", new Boolean(false) );
- this.params.put("Wts", new Boolean(false));
- this.params.put("weights", new Boolean(false));
-
- this.params.put("linout", new Boolean(true)); // we want only regression
- this.params.put("entropy", new Boolean(false));
- this.params.put("softmax",new Boolean(false));
- this.params.put("censored", new Boolean(false));
- this.params.put("skip", new Boolean(false));
- this.params.put("rang", new Double(0.7));
- this.params.put("decay", new Double(0.0));
- this.params.put("maxit", Integer.valueOf(100));
- this.params.put("Hess", new Boolean(false));
- this.params.put("trace", new Boolean(false)); // no need to see output
- this.params.put("MaxNWts", Integer.valueOf(1000));
- this.params.put("abstol", new Double(1.0e-4));
- this.params.put("reltol", new Double(1.0e-8));
- }
-
- /**
- * Constructs a CNNRegressionModel object.
- *
- * This constructor allows the user to simply set up an instance of a CNN
- * regression modeling class. This constructor simply sets the name for this
- * instance. It is expected all the relevent parameters for modeling will be
- * set at a later point.
- *
- * Other parameters that are required to be set should be done via
- * calls to setParameters. A number of parameters are set to the
- * defaults as specified in the manpage for
- * nnet.
- *
- */
- public CNNRegressionModel() {
- super();
- this.params = new HashMap();
- this.currentID = CNNClassificationModel.globalID;
- CNNClassificationModel.globalID++;
- this.setModelName("cdkCNNModel"+this.currentID);
- this.setDefaults();
- }
-
- /**
- * Constructs a CNNRegressionModel object.
- *
- * This constructor allows the user to specify the dependent and
- * independent variables along with the number of hidden layer neurons.
- * This constructor is suitable for cases when there is a single output
- * neuron. If the number of rows of the design matrix is not equal to
- * the number of observations in y an exception will be thrown.
- *
- * Other parameters that are required to be set should be done via
- * calls to setParameters. A number of parameters are set to the
- * defaults as specified in the manpage for
- * nnet.
- *
- * @param x An array of independent variables. Observations should be in
- * the rows and variables in the columns.
- * @param y An array (single column) of observed values
- * @param size The number of hidden layer neurons
- * @throws QSARModelException if the number of observations in x and y do not match
- */
- public CNNRegressionModel(double[][] x, double[] y, int size) throws QSARModelException {
- super();
- this.params = new HashMap();
- this.currentID = CNNRegressionModel.globalID;
- CNNRegressionModel.globalID++;
- this.setModelName("cdkCNNModel"+this.currentID);
-
- int nrow = y.length;
- int ncol = x[0].length;
-
- if (nrow != x.length) {
- throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix");
- }
-
- this.nvar = ncol;
- this.noutput = 1;
-
- Double[][] xx = new Double[nrow][ncol];
- Double[][] yy = new Double[nrow][1];
-
- for (int i = 0; i < nrow; i++) {
- yy[i][0] = new Double(y[i]);
- for (int j = 0; j < ncol; j++) {
- xx[i][j] = new Double(x[i][j]);
- }
- }
- this.params.put("x", xx);
- this.params.put("y", yy);
- this.params.put("size", Integer.valueOf(size));
- this.setDefaults();
- }
-
- /**
- * Constructs a CNNRegressionModel object.
- *
- * This constructor allows the user to specify the dependent and
- * independent variables along with the number of hidden layer neurons.
- * This constructor is suitable for cases when there are multiple output
- * neuron. If the number of rows of the design matrix is not equal to
- * the number of observations in y an exception will be thrown.
- *
- * Other parameters that are required to be set should be done via
- * calls to setParameters. A number of parameters are set to the
- * defaults as specified in the manpage for
- * nnet.
- *
- * @param x An array of independent variables. Observations should be in
- * the rows and variables in the columns.
- * @param y An array (multiple columns) of observed values
- * @param size The number of hidden layer neurons
- * @throws QSARModelException if the number of observations in x and y do not match
- */
- public CNNRegressionModel(double[][] x, double[][] y, int size) throws QSARModelException{
- super();
- this.params = new HashMap();
- this.currentID = CNNRegressionModel.globalID;
- CNNRegressionModel.globalID++;
- this.setModelName("cdkCNNModel"+this.currentID);
-
- int nrow = y.length;
- int ncol = x[0].length;
-
- if (nrow != x.length) {
- throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix");
- }
-
- this.nvar = ncol;
- this.noutput = y[0].length;
-
- Double[][] xx = new Double[nrow][ncol];
- Double[][] yy = new Double[nrow][this.noutput];
-
- for (int i = 0; i < nrow; i++) {
- for (int j = 0; j < ncol; j++) {
- xx[i][j] = new Double(x[i][j]);
- }
- }
- for (int i = 0; i < nrow; i++) {
- for (int j = 0; j < this.noutput; j++) {
- yy[i][j] = new Double(y[i][j]);
- }
- }
- this.params.put("x", xx);
- this.params.put("y", yy);
- this.params.put("size", Integer.valueOf(size));
- this.setDefaults();
- }
-
-
- /**
- * Sets parameters required for building a linear model or using one for prediction.
- *
- * This function allows the caller to set the various parameters available
- * for the
- * nnet
- * and
- * predict.nnet
- * R routines. See the R help pages for the details of the available
- * parameters.
- *
- * @param key A String containing the name of the parameter as described in the
- * R help pages
- * @param obj An Object containing the value of the parameter
- * @throws QSARModelException if the type of the supplied value does not match the
- * expected type
- */
- public void setParameters(String key, Object obj) throws QSARModelException {
- // since we know the possible values of key we should check the coresponding
- // objects and throw errors if required. Note that this checking can't really check
- // for values (such as number of variables in the X matrix to build the model and the
- // X matrix to make new predictions) - these should be checked in functions that will
- // use these parameters. The main checking done here is for the class of obj and
- // some cases where the value of obj is not dependent on what is set before it
-
- if (key.equals("y")) {
- if (!(obj instanceof Double[][])) {
- throw new QSARModelException("The class of the 'y' object must be Double[][]");
- } else {
- noutput = ((Double[][])obj)[0].length;
- }
- }
- if (key.equals("x")) {
- if (!(obj instanceof Double[][])) {
- throw new QSARModelException("The class of the 'x' object must be Double[][]");
- } else {
- nvar = ((Double[][])obj)[0].length;
- }
- }
- if (key.equals("weights")) {
- if (!(obj instanceof Double[])) {
- throw new QSARModelException("The class of the 'weights' object must be Double[]");
- }
- }
- if (key.equals("size")) {
- if (!(obj instanceof Integer)) {
- throw new QSARModelException("The class of the 'size' object must be Integer");
- }
- }
- if (key.equals("subset")) {
- if (!(obj instanceof Integer[])) {
- throw new QSARModelException("The class of the 'size' object must be Integer[]");
- }
- }
- if (key.equals("Wts")) {
- if (!(obj instanceof Double[])) {
- throw new QSARModelException("The class of the 'Wts' object must be Double[]");
- }
- }
- if (key.equals("mask")) {
- if (!(obj instanceof Boolean[])) {
- throw new QSARModelException("The class of the 'mask' object must be Boolean[]");
- }
- }
- if (key.equals("linout") ||
- key.equals("entropy") ||
- key.equals("softmax") ||
- key.equals("censored") ||
- key.equals("skip") ||
- key.equals("Hess") ||
- key.equals("trace")) {
- if (!(obj instanceof Boolean)) {
- throw new QSARModelException("The class of the 'trace|skip|Hess|linout|entropy|softmax|censored' object must be Boolean");
- }
- }
- if (key.equals("rang") ||
- key.equals("decay") ||
- key.equals("abstol") ||
- key.equals("reltol")) {
- if (!(obj instanceof Double)) {
- throw new QSARModelException("The class of the 'reltol|abstol|decay|rang' object must be Double");
- }
- }
- if (key.equals("maxit") ||
- key.equals("MaxNWts")) {
- if (!(obj instanceof Integer)) {
- throw new QSARModelException("The class of the 'maxit|MaxNWts' object must be Integer");
- }
- }
-
- if (key.equals("newdata")) {
- if ( !(obj instanceof Double[][])) {
- throw new QSARModelException("The class of the 'newdata' object must be Double[][]");
- }
- }
- this.params.put(key,obj);
- }
-
- /**
- * Fits a CNN regression model.
- *
- * This method calls the R function to fit a CNN regression model
- * to the specified dependent and independent variables. If an error
- * occurs in the R session, an exception is thrown.
- *
- * Note that, this method should be called prior to calling the various get
- * methods to obtain information regarding the fit.
- */
- public void build() throws QSARModelException {
- try {
- this.modelfit = (CNNRegressionModelFit)revaluator.call("buildCNN",
- new Object[]{ getModelName(), this.params });
- } catch (Exception re) {
- throw new QSARModelException(re.toString());
- }
- }
-
- /**
- * Uses a fitted model to predict the response for new observations.
- *
- * This function uses a previously fitted model to obtain predicted values
- * for a new set of observations. If the model has not been fitted prior to this
- * call an exception will be thrown. Use setParameters
- * to set the values of the independent variable for the new observations. You can also
- * set the type argument (see here).
- * However, since this class performs CNN regression, the default setting (type='raw') is sufficient.
- */
- public void predict() throws QSARModelException {
- if (this.modelfit == null)
- throw new QSARModelException("Before calling predict() you must fit the model using build()");
-
- Double[][] newx = (Double[][])this.params.get("newdata");
- if (newx[0].length != this.nvar) {
- throw new QSARModelException("Number of independent variables used for prediction must match those used for fitting");
- }
-
- try {
- this.modelpredict = (CNNRegressionModelPredict)revaluator.call("predictCNN",
- new Object[]{ getModelName(), this.params });
- } catch (Exception re) {
- throw new QSARModelException(re.toString());
- }
- }
-
- /**
- * Returns an object summarizing the CNN regression model.
- *
- * The return object simply wraps the fields from the summary.nnet
- * return value. Various details can be extracted from the return object,
- * See {@link CNNRegressionModelSummary} for more details.
- *
- * @return A summary for the CNN regression model
- * @throws QSARModelException if the model has not been built prior to a call
- * to this method.
- */
- public CNNRegressionModelSummary summary() throws QSARModelException {
- if (this.modelfit == null)
- throw new QSARModelException("Before calling summary() you must fit the model using build()");
-
- CNNRegressionModelSummary s = null;
- try {
- s = (CNNRegressionModelSummary)revaluator.call("summaryModel",
- new Object[]{ getModelName() });
- } catch (Exception re) {
- throw new QSARModelException(re.toString());
- }
- return(s);
- }
-
- /**
- * Loads a CNNRegresionModel object from disk in to the current session.
- *
- *
- * @param fileName The disk file containing the model
- * @throws QSARModelException if the model being loaded is not a CNN regression model
- * object
- */
- public void loadModel(String fileName) throws QSARModelException {
- // should probably check that the filename does exist
- Object model = (Object)revaluator.call("loadModel", new Object[]{ (Object)fileName });
- String modelName = (String)revaluator.call("loadModel.getName", new Object[] { (Object)fileName });
-
- if (model.getClass().getName().equals("org.openscience.cdk.qsar.model.R.CNNRegressionModelFit")) {
- this.modelfit = (CNNRegressionModelFit)model;
- this.setModelName(modelName);
- Integer tmp = (Integer)revaluator.eval(modelName+"$n[1]");
- nvar = tmp.intValue();
- } else throw new QSARModelException("The loaded model was not a CNNRegressionModel");
- }
- /**
- * Loads an CNNRegressionModel object from a serialized string into the current session.
- *
- * @param serializedModel A String containing the serialized version of the model
- * @param modelName A String indicating the name of the model in the R session
- * @throws QSARModelException if the model being loaded is not a CNN regression model
- * object
- */
- public void loadModel(String serializedModel, String modelName) throws QSARModelException {
- // should probably check that the fileName does exist
- Object model = (Object)revaluator.call("unserializeModel", new Object[]{ (Object)serializedModel, (Object)modelName });
- String modelname = modelName;
-
- if (model.getClass().getName().equals("org.openscience.cdk.qsar.model.R.CNNRegressionModelFit")) {
- this.modelfit =(CNNRegressionModelFit)model;
- this.setModelName(modelname);
- Double tmp = (Double)revaluator.eval(modelName+"$n[1]");
- nvar = (int)tmp.doubleValue();
- } else throw new QSARModelException("The loaded model was not a CNNRegressionModel");
- }
-
- /**
- * Gets final value of the fitting criteria.
- *
- * This method only returns meaningful results if the build
- * method of this class has been previously called.
- *
- * @return A double indicating the value of the fitting criterion plus weight decay term.
- */
- public double getFitValue() {
- return(this.modelfit.getValue());
- }
-
- /**
- * Gets optimized weights for the model.
- *
- * This method only returns meaningful results if the build
- * method of this class has been previously called.
- *
- * @return A double[] containing the weights. The number of weights will be
- * equal to
(Ni * Nh) + (Nh * No) + Nh + No where Ni, Nh and No
- * are the number of input, hidden and output neurons.
- */
- public double[] getFitWeights() {
- return(this.modelfit.getWeights());
- }
- /**
- * Gets fitted values from the final model.
- *
- * This method only returns meaningful results if the build
- * method of this class has been previously called.
- *
- * @return A double[][] containing the fitted values for each output neuron
- * in the columns. Note that even if a single output neuron was specified during
- * model building the return value is still a 2D array (with a single column).
- */
- public double[][] getFitFitted() {
- return(this.modelfit.getFitted());
- }
- /**
- * Gets residuals for the fitted values from the final model.
- *
- * This method only returns meaningful results if the build
- * method of this class has been previously called.
- *
- * @return A double[][] containing the residuals for each output neuron
- * in the columns. Note that even if a single output neuron was specified during
- * model building the return value is still a 2D array (with a single column).
- */
- public double[][] getFitResiduals() {
- return(this.modelfit.getResiduals());
- }
- /**
- * Gets the Hessian of the measure of fit.
- *
- * If the Hess option was set to TRUE before the call to build
- * then the CNN routine will return the Hessian of the measure of fit at the best set of
- * weights found. * This method only returns meaningful results if the build
- * method of this class has been previously called.
- *
- * @return A double[][] containing the Hessian. It will be a square array
- * with dimensions equal to the Nwt x Nwt, where Nwt is the total number of weights
- * in the CNN model.
- */
- public double[][] getFitHessian() {
- return(this.modelfit.getHessian());
- }
-
- /**
- * Gets predicted values for new data using a previously built model.
- *
- * This method only returns meaningful results if the build
- * method of this class has been previously called.
- *
- * @return A double[][] containing the predicted for each output neuron
- * in the columns. Note that even if a single output neuron was specified during
- * model building the return value is still a 2D array (with a single column).
- */
- public double[][] getPredictPredicted() {
- return(this.modelpredict.getPredicted());
- }
-
-
-}
diff --git a/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModelFit.java b/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModelFit.java
deleted file mode 100644
index ac9da3e..0000000
--- a/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModelFit.java
+++ /dev/null
@@ -1,251 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2004-2008 Rajarshi Guha
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-package org.openscience.cdk.qsar.model.R;
-
-/**
- * A class that wraps the return value from R function, nnet().
- *
- * This is an internal class used by R to return the result of
- * the call to nnet.
- * As a result it should not be instantiated by the user. The actual modeling
- * class, CNNRegressionModel, provides acess to the various
- * fields of this object.
- *
- *
- * @author Rajarshi Guha
- * @cdk.require r-project
- * @cdk.module qsar
- * @cdk.githash
- * @deprecated
- */
-
-public class CNNRegressionModelFit {
- private int noutput; // number of output neurons (== number of classes)
- private double[] weights;
- private double[][] fitted;
- private double[][] residuals;
- private double value;
- private double[][] hessian = null;
-
- private double[][] vectorToMatrix(double[] v, int nrow, int ncol) {
- double[][] m = new double[nrow][ncol];
- for (int i = 0; i < ncol; i++) {
- for (int j = 0; j < nrow; j++) {
- m[j][i] = v[j + i*nrow];
- }
- }
- return(m);
- }
-
- /**
- * Constructs an object to contain a CNN regression fit.
- *
- * This class should not be instantiated directly and is only
- * required withtin R
- *
- * @param noutput The number of output neurons (ie the number of predicted variables)
- * @param nobs The number of observations
- * @param weights A 1-dimensional array containing the weights and biases
- * @param fitted A 1-dimensional array containing the fitted values
- * @param residual A 1-dimensional array containing the residuals of the fitted values
- * @param value The final value of the cost function
- * @param hessian A 1-dimensional array containing the Hessian
- */
- public CNNRegressionModelFit(
- int noutput,
- int nobs,
- double[] weights,
- double[] fitted, double[] residual,
- double value,
- double[] hessian) {
-
- // dimensions of hessian = nwt x nwt
- // dimensions of fitted, residual = nobs x noutput
- // also note that matrices come in as columnwise 1D arrays
-
- this.noutput = noutput;
- setWeights(weights);
- setResiduals(vectorToMatrix(residual, nobs,noutput));
- setFitted(vectorToMatrix(fitted, nobs,noutput));
- setValue(value);
- setHessian(vectorToMatrix(hessian,weights.length,weights.length));
- }
- /**
- * Constructs an object to contain a CNN regression fit.
- *
- * This class should not be instantiated directly and is only
- * required withtin R
- *
- * @param noutput The number of output neurons (ie the number of predicted variables)
- * @param nobs The number of observations
- * @param weights A 1-dimensional array containing the weights and biases
- * @param fitted A 1-dimensional array containing the fitted values
- * @param residual A 1-dimensional array containing the residuals of the fitted values
- * @param value The final value of the cost function
- */
- public CNNRegressionModelFit(
- int noutput,
- int nobs,
- double[] weights,
- double[] fitted, double[] residual,
- double value) {
- this.noutput = noutput;
- setWeights(weights);
- setResiduals(vectorToMatrix(residual, nobs,noutput));
- setFitted(vectorToMatrix(fitted, nobs,noutput));
- setValue(value);
- }
-
- /**
- * Get the final value of the cost function.
- *
- * This method should not be called outside this class
- *
- * @return The final value of the cost function
- * @see #setValue
- */
- public double getValue() {
- return(this.value);
- }
- /**
- * Set the final value of the cost function.
- *
- * This method should not be called outside this class
- *
- * @param value The value of the cost function at convergence
- * @see #getValue
- */
- public void setValue(double value) {
- this.value = value;
- }
-
- /**
- * Get the Hessian for the final network.
- *
- * This method should not be called outside this class
- *
- * @return A 2-dimensional array containing the hessian
- * @see #setHessian
- */
- public double[][] getHessian() { return(this.hessian); }
- /**
- * Set the Hessian for the final network.
- *
- * This method should not be called outside this class
- *
- * @param theHessian A 2-dimensional array containing the hessian
- * @see #getHessian
- */
- public void setHessian(double[][] theHessian) {
- if (theHessian == null) return;
- this.hessian = new double[theHessian.length][this.noutput];
- for (int i = 0; i < theHessian.length; i++) {
- for (int j = 0; j < this.noutput; j++) {
- this.hessian[i][j] = theHessian[i][j];
- }
- }
- }
-
- /**
- * Get the weights and biases of the final network.
- *
- * This method should not be called outside this class
- *
- * @return A 1-dimensional array of weights and biases
- * @see #setWeights
- */
- public double[] getWeights() { return(this.weights); }
- /**
- * Set the weights and biases of the final network.
- *
- * This method should not be called outside this class
- *
- * @param weights A 1-dimensional array of weights and biases
- * @see #getWeights
- */
- public void setWeights(double[] weights) {
- this.weights = new double[weights.length];
- for (int i = 0; i < weights.length; i++) {
- this.weights[i] = weights[i];
- }
- }
- /**
- * Get the residuals of the fit.
- *
- * This method should not be called outside this class
- *
- * @return A 2-dimensional array of residuals. The rows contain the
- * observations and the columns contain the predicted variables
- * @see #setResiduals
- */
- public double[][] getResiduals() { return(this.residuals); }
- /**
- * Set the residuals of the fit.
- *
- * This method should not be called outside this class
- *
- * @param residuals A 2-dimensional array of residuals. The rows contain the
- * observations and the columns contain the predicted variables
- * @see #getResiduals
- */
- public void setResiduals(double[][] residuals) {
- this.residuals = new double[residuals.length][this.noutput];
- for (int i = 0; i < residuals.length; i++) {
- for (int j = 0; j < this.noutput; j++) {
- this.residuals[i][j] = residuals[i][j];
- }
- }
- }
- /**
- * Get the fitted values.
- *
- * This method should not be called outside this class
- *
- * @return A 2-dimensional array of residuals. The rows contain the
- * observations and the columns contain the predicted variables
- * @see #setFitted
- */
- public double[][] getFitted() { return(this.fitted); }
- /**
- * Set the fitted values.
- *
- * This method should not be called outside this class
- *
- * @param fitted A 2-dimensional array of residuals. The rows contain the
- * observations and the columns contain the predicted variables
- * @see #getFitted
- */
- public void setFitted(double[][] fitted) {
- this.fitted = new double[fitted.length][this.noutput];
- for (int i = 0; i < fitted.length; i++) {
- for (int j = 0; j < this.noutput; j++) {
- this.fitted[i][j] = fitted[i][j];
- }
- }
- }
-}
-
-
-
diff --git a/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModelPredict.java b/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModelPredict.java
deleted file mode 100644
index e00b958..0000000
--- a/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModelPredict.java
+++ /dev/null
@@ -1,108 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2004-2008 Rajarshi Guha
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-package org.openscience.cdk.qsar.model.R;
-
-/**
- * A class that wraps the return value from the R function, predict.cnn.
- *
- * This is an internal class used by R to return the result of
- * the call to predict.nnet.
- * As a result it should not be instantiated by the user. The actual modeling
- * class, CNNRegressionModel, provides acess to the various
- * fields of this object.
- *
- *
- * @author Rajarshi Guha
- * @cdk.require r-project
- * @cdk.module qsar
- * @cdk.githash
- * @deprecated
- */
-public class CNNRegressionModelPredict {
- private int noutput;
- private double[][] predval;
-
- private double[][] vectorToMatrix(double[] v, int nrow, int ncol) {
- double[][] m = new double[nrow][ncol];
- for (int i = 0; i < ncol; i++) {
- for (int j = 0; j < nrow; j++) {
- m[j][i] = v[j + i*nrow];
- }
- }
- return(m);
- }
-
- /**
- * Create an object to hold predictions from a previously built CNN model.
- *
- * This class should not be accessed directly
- *
- * @param noutput The number of predicted variables
- * @param values The predicted values
- */
- public CNNRegressionModelPredict(int noutput, double[] values) {
- this.noutput = noutput;
- int nrow = values.length / noutput;
- setPredicted(vectorToMatrix(values,nrow,noutput));
- }
- /**
- * Create an object to hold predictions from a previously built CNN model.
- *
- * This class should not be accessed directly. Required for the case
- * of a single predicted value.
- *
- * @param noutput The number of predicted variables
- * @param values The predicted value
- */
- public CNNRegressionModelPredict(int noutput, double values) {
- this.noutput = noutput;
- setPredicted(new double[][] { {values} });
- }
-
- /**
- * Get the predicted values.
- *
- * @return A 2-dimensional array containing the predicted values. The rows
- * contain the observations and the columns contain the predicted variables
- * @see #setPredicted
- */
- public double[][] getPredicted() { return(this.predval); }
- /**
- * Set the predicted values.
- *
- * @param predicted A 2-dimensional array containing the predicted values. The rows
- * contain the observations and the columns contain the predicted variables
- * @see #getPredicted
- */
- public void setPredicted(double[][] predicted) {
- this.predval = new double[predicted.length][this.noutput];
- for (int i = 0; i < predicted.length; i++) {
- for (int j = 0; j < this.noutput; j++) {
- this.predval[i][j] = predicted[i][j];
- }
- }
- }
-}
-
-
diff --git a/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModelSummary.java b/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModelSummary.java
deleted file mode 100644
index a29e069..0000000
--- a/src/main/org/openscience/cdk/qsar/model/R/CNNRegressionModelSummary.java
+++ /dev/null
@@ -1,172 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2004-2008 Rajarshi Guha
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-package org.openscience.cdk.qsar.model.R;
-
-
-/** A class that represents a summary of a CNN regression model.
- *
- * This class essentially wraps the result of summary.nnet. As with other
- * backend classes this class should not be instantiated directly by the
- * user, though the various fields may be accessed with the provided
- * methods.
- *
- * @author Rajarshi Guha
- * @cdk.require r-project
- * @cdk.module qsar
- * @cdk.githash
- * @deprecated
- */
-public class CNNRegressionModelSummary {
-
- double[] residuals;
- boolean entropy, softmax, censored;
- double value;
- int[] n;
-
- /**
- * Constructor for an object that wraps the return value from summary.lm.
- *
- * This should not be instantiated directly. The class is meant to be instantiated
- * from an R session
- *
- * @param n A 3 element array containing the number of neurons in the
- * input, hidden and output layer respectively
- * @param entropy A boolean indicating whether the entropy setting was used
- * @param softmax A boolean indicating whether the softmax setting was used
- * @param censored A boolean indicating whether the censored setting was used
- * @param value The final value of the convergenc criterion
- * @param residuals A 1-dimensional array of residual values
- */
- public CNNRegressionModelSummary( int[] n, boolean entropy,
- boolean softmax, boolean censored, double value,
- double[] residuals) {
-
-
- this.residuals = new double[residuals.length];
- for (int i = 0; i < residuals.length; i++)
- this.residuals[i] = residuals[i];
-
- this.n = new int[n.length];
- for (int i = 0; i < n.length; i++)
- this.n[i] = n[i];
-
- this.softmax = softmax;
- this.censored = censored;
- this.entropy = entropy;
- this.value = value;
- }
- /**
- * Constructor for an object that wraps the return value from summary.lm.
- *
- * This should not be instantiated directly. The class is meant to be instantiated
- * from an R session
- *
- * @param n A 3 element array containing the number of neurons in the
- * input, hidden and output layer respectively
- * @param entropy A boolean indicating whether the entropy setting was used
- * @param softmax A boolean indicating whether the softmax setting was used
- * @param censored A boolean indicating whether the censored setting was used
- * @param value The final value of the convergenc criterion
- * @param residuals A 1-dimensional array of residual values
- */
- public CNNRegressionModelSummary( double[] n, boolean entropy,
- boolean softmax, boolean censored, double value,
- double[] residuals) {
-
-
- this.residuals = new double[residuals.length];
- for (int i = 0; i < residuals.length; i++)
- this.residuals[i] = residuals[i];
-
- this.n = new int[n.length];
- for (int i = 0; i < n.length; i++)
- this.n[i] = (int)n[i];
-
- this.softmax = softmax;
- this.censored = censored;
- this.entropy = entropy;
- this.value = value;
- }
-
-
- /**
- * Return the residuals of the fit.
- *
- * @return A 1-dimensional array of doubles containing the
- * residuals of the fit
- */
- public double[] getResiduals() {
- return(this.residuals);
- }
-
-
- /**
- * Return the number of neurons in the CNN layers.
- *
- * This method returns a 3-element array containing the number
- * of neurons in the input, hidden and output layer
- * respectively.
- *
- * @return A 3-element int array
- */
- public int[] getNumNeurons() {
- return(this.n);
- }
-
- /**
- * Return the final value of the convergence criterion.
- *
- * @return The final value of the convergence criterion
- */
- public double getValue(){
- return(this.value);
- }
-
- /**
- * Return whether softmax was used.
- *
- * @return A boolean indicating whether softmax was used or not
- */
- public boolean getSoftmax() {
- return(this.softmax);
- }
- /**
- * Return whether entropy was used.
- *
- * @return A boolean indicating whether entropy was used or not
- */
- public boolean getEntropy() {
- return(this.entropy);
- }
- /**
- * Return whether censored was used.
- *
- * @return A boolean indicating whether censored was used or not
- */
- public boolean getCensored() {
- return(this.censored);
- }
-}
-
diff --git a/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModel.java b/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModel.java
deleted file mode 100644
index 6729563..0000000
--- a/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModel.java
+++ /dev/null
@@ -1,552 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2004-2008 Rajarshi Guha
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-package org.openscience.cdk.qsar.model.R;
-
-import org.openscience.cdk.qsar.model.QSARModelException;
-
-import java.util.HashMap;
-
-/**
- * A modeling class that provides a linear least squares regression model.
- *
- * When instantiated this class ensures that the R/Java interface has been
- * initialized. The response and independent variables can be specified at construction
- * time or via the setParameters method. The actual fitting procedure is carried out by build after which
- * the model may be used to make predictions.
- *
- * Currently, the design of the class is quite sparse as it does not allow subsetting,
- * variable names, setting of contrasts and so on.
- * It is also assumed that the values of all the variables are defined (i.e., not such that
- * they are NA
- * in an R session).
- * The use of
- * this class is shown in the following code snippet
- *
- * try {
- * LinearRegressionModel lrm = new LinearRegressionModel(x,y);
- * lrm.build();
- * lrm.setParameters("newdata", newx);
- * lrm.setParameters("interval", "confidence");
- * lrm.predict();
- * } catch (QSARModelException qme) {
- * System.out.println(qme.toString());
- * }
- * double[] fitted = lrm.getFitFitted();
- * double[] predictedvalues = lrm.getPredictPredicted();
- *
- * Note that when making predictions, the new X matrix and interval type can be set by calls
- * to setParameters(). In general, the arguments for lm() and predict.lm() can be set via
- * calls to setParameters(). The following table lists the parameters that can be set and their
- * expected types. More detailed informationis available in the R documentation.
- *
- *
- *
- *
- * | Name | Java Type | Notes |
- *
- *
- *
- *
- * | x | Double[][] | |
- *
- *
- * | y | Double[] | Length should be equal to the rows of x |
- *
- *
- * | weights | Double[] | Length should be equal to rows of x |
- *
- *
- * | newdata | Double[][] | Number of columns should be the same as in x |
- *
- *
- * | interval | String | Can be 'confidence' or 'predicton' |
- *
- *
- *
- *
- * In general the getFit* methods provide access to results from the fit
- * and getPredict* methods provide access to results from the prediction (i.e.,
- * prediction using the model on new data). The values returned correspond to the various
- * values returned by the lm
- * and predict.lm
- * functions in R.
- *
- * See {@link RModel} for details regarding the R and SJava environment.
- *
- * @author Rajarshi Guha
- * @cdk.require r-project
- * @cdk.module qsar
- * @cdk.githash
- *
- * @cdk.keyword regression, linear
- * @deprecated
- */
-public class LinearRegressionModel extends RModel {
-
- private static int globalID = 0;
- private int currentID;
- private LinearRegressionModelFit modelfit = null;
- private LinearRegressionModelPredict modelpredict = null;
-
- private HashMap params = null;
- private int nvar = 0;
-
- /**
- * Constructs a LinearRegressionModel object.
- *
- * The constructor simply instantiates the model ID. Dependent and independent variables
- * should be set via setParameters().
- *
- * An important feature of the current implementation is that all the
- * independent variables are used during the fit. Furthermore no subsetting is possible.
- * As a result when setting these via setParameters() the caller should specify only
- * the variables and observations that will be used for the fit.
- */
- public LinearRegressionModel(){
- super();
-
- this.params = new HashMap();
-
- this.currentID = LinearRegressionModel.globalID;
- LinearRegressionModel.globalID++;
- this.setModelName("cdkLMModel"+this.currentID);
- }
-
- /**
- * Constructs a LinearRegressionModel object.
- *
- * The constructor allows the user to specify the
- * dependent and independent variables. The length of the dependent variable
- * array should equal the number of rows of the independent variable matrix. If this
- * is not the case an exception will be thrown.
- *
- * An important feature of the current implementation is that all the
- * independent variables are used during the fit. Furthermore no subsetting is possible.
- * As a result when creating an instance of this object the caller should specify only
- * the variables and observations that will be used for the fit.
- *
- * @param xx An array of independent variables. The observations should be in the rows
- * and the variables should be in the columns
- * @param yy an array containing the dependent variable
- * @throws QSARModelException if the number of observations in x and y do not match
- */
- public LinearRegressionModel(double[][] xx, double[] yy) throws QSARModelException{
- super();
-
- this.params = new HashMap();
-
- this.currentID = LinearRegressionModel.globalID;
- LinearRegressionModel.globalID++;
- this.setModelName("cdkLMModel"+this.currentID);
-
- int nrow = yy.length;
- this.nvar = xx[0].length;
-
- if (nrow != xx.length) {
- throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix");
- }
-
- Double[][] x = new Double[nrow][this.nvar];
- Double[] y = new Double[nrow];
- Double[] weights = new Double[nrow];
-
- for (int i = 0; i < nrow; i++) {
- y[i] = new Double(yy[i]);
- weights[i] = new Double(1.0);
- }
- for (int i = 0; i < nrow; i++) {
- for (int j = 0; j < this.nvar; j++)
- x[i][j] = new Double(xx[i][j]);
- }
-
- params.put("x", x);
- params.put("y", y);
- params.put("weights", weights);
- }
-
-
- /**
- * Constructs a LinearRegressionModel object.
- *
- * The constructor allows the user to specify the
- * dependent and independent variables as well as weightings for
- * the observations.
- *
- * The length of the dependent variable
- * array should equal the number of rows of the independent variable matrix. If this
- * is not the case an exception will be thrown.
- *
- * An important feature of the current implementation is that all the
- * independent variables are used during the fit. Furthermore no subsetting is possible.
- * As a result when creating an instance of this object the caller should specify only
- * the variables and observations that will be used for the fit.
- *
- * @param xx An array of independent variables. The observations should be in the rows
- * and the variables should be in the columns
- * @param yy an array containing the dependent variable
- * @param weights Specifies the weights for each observation. Unit weights are equivilant
- * to OLS
- * @throws QSARModelException if the number of observations in x and y do not match
- */
- public LinearRegressionModel(double[][] xx, double[] yy, double[] weights) throws QSARModelException{
- super();
-
- this.params = new HashMap();
-
- this.currentID = LinearRegressionModel.globalID;
- LinearRegressionModel.globalID++;
- this.setModelName("cdkLMModel"+this.currentID);
-
- int nrow = yy.length;
- this.nvar = xx[0].length;
-
- if (nrow != xx.length) {
- throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix");
- }
- if (nrow != weights.length) {
- throw new QSARModelException("The length of the weight vector does not match the number of rows of the design matrix");
- }
-
- Double[][] x = new Double[nrow][this.nvar];
- Double[] y = new Double[nrow];
- Double[] wts = new Double[nrow];
-
- for (int i = 0; i < nrow; i++) {
- y[i] = new Double(yy[i]);
- wts[i] = new Double(weights[i]);
- }
- for (int i = 0; i < nrow; i++) {
- for (int j = 0; j < this.nvar; j++)
- x[i][j] = new Double(xx[i][j]);
- }
- params.put("x", x);
- params.put("y", y);
- params.put("weights", wts);
- }
-
- protected void finalize() {
- revaluator.voidEval("rm("+this.getModelName()+",pos=1)");
- }
-
-
- /**
- * Fits a linear regression model.
- *
- * This method calls the R function to fit a linear regression model
- * to the specified dependent and independent variables. If an error
- * occurs in the R session, an exception is thrown.
- *
- * Note that, this method should be called prior to calling the various get
- * methods to obtain information regarding the fit.
- */
- public void build() throws QSARModelException {
- // lets do some checks in case stuff was set via setParameters()
- Double[][] x;
- Double[] y,weights;
- x = (Double[][])this.params.get("x");
- y = (Double[])this.params.get("y");
- weights = (Double[])this.params.get("weights");
- if (this.nvar == 0) this.nvar = x[0].length;
- else {
- if (y.length != x.length) {
- throw new QSARModelException("Number of observations does no match number of rows in the design matrix");
- }
- if (weights.length != y.length) {
- throw new QSARModelException("The weight vector must have the same length as the number of observations");
- }
- }
-
- // lets build the model
- try {
- this.modelfit = (LinearRegressionModelFit)revaluator.call("buildLM",
- new Object[]{ getModelName(), this.params });
- } catch (Exception re) {
- throw new QSARModelException(re.toString());
- }
- }
-
-
- /**
- * Sets parameters required for building a linear model or using one for prediction.
- *
- * This function allows the caller to set the various parameters available
- * for the lm() and predict.lm() R routines. See the R help pages for the details of the available
- * parameters.
- *
- * @param key A String containing the name of the parameter as described in the
- * R help pages
- * @param obj An Object containing the value of the parameter
- * @throws QSARModelException if the type of the supplied value does not match the
- * expected type
- *
- */
- public void setParameters(String key, Object obj) throws QSARModelException {
- // since we know the possible values of key we should check the coresponding
- // objects and throw errors if required. Note that this checking can't really check
- // for values (such as number of variables in the X matrix to build the model and the
- // X matrix to make new predictions) - these should be checked in functions that will
- // use these parameters. The main checking done here is for the class of obj and
- // some cases where the value of obj is not dependent on what is set before it
-
- if (key.equals("y")) {
- if (!(obj instanceof Double[])) {
- throw new QSARModelException("The class of the 'y' object must be Double[]");
- }
- }
- if (key.equals("x")) {
- if (!(obj instanceof Double[][])) {
- throw new QSARModelException("The class of the 'x' object must be Double[][]");
- }
- }
- if (key.equals("weights")) {
- if (!(obj instanceof Double[])) {
- throw new QSARModelException("The class of the 'weights' object must be Double[]");
- }
- }
- if (key.equals("interval")) {
- if (!(obj instanceof String)) {
- throw new QSARModelException("The class of the 'interval' object must be String");
- }
- if (!(obj.equals("confidence") || obj.equals("prediction"))) {
- throw new QSARModelException("The type of interval must be: prediction or confidence");
- }
- }
- if (key.equals("newdata")) {
- if ( !(obj instanceof Double[][])) {
- throw new QSARModelException("The class of the 'newdata' object must be Double[][]");
- }
- }
- this.params.put(key,obj);
- }
-
-
- /**
- * Uses a fitted model to predict the response for new observations.
- *
- * This function uses a previously fitted model to obtain predicted values
- * for a new set of observations. If the model has not been fitted prior to this
- * call an exception will be thrown. Use setParameters
- * to set the values of the independent variable for the new observations and the
- * interval type.
- * @throws QSARModelException if the model has not been built prior to a call
- * to this method. Also if the number of independent variables specified for prediction
- * is not the same as specified during model building
- */
- public void predict() throws QSARModelException {
- if (this.modelfit == null)
- throw new QSARModelException("Before calling predict() you must fit the model using build()");
-
- Double[][] newx = (Double[][])this.params.get(new String("newdata"));
- if (newx[0].length != this.nvar) {
- throw new QSARModelException("Number of independent variables used for prediction must match those used for fitting");
- }
-
- try {
- this.modelpredict = (LinearRegressionModelPredict)revaluator.call("predictLM",
- new Object[]{ getModelName(), this.params });
- } catch (Exception re) {
- throw new QSARModelException(re.toString());
- }
- }
-
- /**
- * Returns an object summarizing the linear regression model.
- *
- * The return object simply wraps the fields from the summary.lm
- * return value. Various details can be extracted from the return object,
- * See {@link LinearRegressionModelSummary} for more details.
- *
- * @return A summary for the linear regression model
- * @throws QSARModelException if the model has not been built prior to a call
- * to this method
- */
- public LinearRegressionModelSummary summary() throws QSARModelException {
- if (this.modelfit == null)
- throw new QSARModelException("Before calling summary() you must fit the model using build()");
-
- LinearRegressionModelSummary s = null;
- try {
- s = (LinearRegressionModelSummary)revaluator.call("summaryModel",
- new Object[]{ getModelName() });
- } catch (Exception re) {
- throw new QSARModelException(re.toString());
- }
- return(s);
- }
-
-
- /**
- * Loads an LinearRegressionModel object from disk in to the current session.
- *
- * @param fileName The disk file containing the model
- * @throws QSARModelException if the model being loaded is not a linear regression model
- * object
- */
- public void loadModel(String fileName) throws QSARModelException {
- // should probably check that the fileName does exist
- Object model = (Object)revaluator.call("loadModel", new Object[]{ (Object)fileName });
- String modelName = (String)revaluator.call("loadModel.getName", new Object[] { (Object)fileName });
-
- if (model.getClass().getName().equals("org.openscience.cdk.qsar.model.R.LinearRegressionModelFit")) {
- this.modelfit =(LinearRegressionModelFit)model;
- this.setModelName(modelName);
- Double tmp = (Double)revaluator.eval("length("+modelName+"$coefficients)-1");
- nvar = (int)tmp.doubleValue();
- } else throw new QSARModelException("The loaded model was not a LinearRegressionModel");
- }
- /**
- * Loads an LinearRegressionModel object from a serialized string into the current session.
- *
- * @param serializedModel A String containing the serialized version of the model
- * @param modelName A String indicating the name of the model in the R session
- * @throws QSARModelException if the model being loaded is not a linear regression model
- * object
- */
- public void loadModel(String serializedModel, String modelName) throws QSARModelException {
- // should prxbably check that the fileName does exist
- Object model = (Object)revaluator.call("unserializeModel", new Object[]{ (Object)serializedModel, (Object)modelName });
- String modelname = modelName;
-
- if (model.getClass().getName().equals("org.openscience.cdk.qsar.model.R.LinearRegressionModelFit")) {
- this.modelfit =(LinearRegressionModelFit)model;
- this.setModelName(modelname);
- Double tmp = (Double)revaluator.eval("length("+modelName+"$coefficients)-1");
- nvar = (int)tmp.doubleValue();
- } else throw new QSARModelException("The loaded model was not a LinearRegressionModel");
- }
-
- /* interface to fit object */
-
- /**
- * Gets the rank of the fitted linear model.
- *
- * This method only returns meaningful results if the build
- * method of this class has been previously called.
- *
- * @return An integer indicating the rank
- */
- public int getFitRank() { return(this.modelfit.getRank()); }
-
- /**
- * Returns the residuals.
- *
- * The residuals are the response minus the fitted values.
- * This method only returns meaningful results if the build
- * method of this class has been previously called.
- * @return A double[] contaning the residuals for each observation
- */
- public double[] getFitResiduals() { return(this.modelfit.getResiduals()); }
-
- /**
- * Returns the estimated coefficients.
- *
- * This method only returns meaningful results if the build
- * method of this class has been previously called.
- * @return A double[] containing the coefficients
- */
- public double[] getFitCoefficients() { return(this.modelfit.getCoefficients()); }
-
- /**
- * Returns the residual degrees of freedom.
- *
- * This method only returns meaningful results if the build
- * method of this class has been previously called.
- * @return An integr indicating the residual degrees of freedom
- */
- public int getFitDFResidual() { return(this.modelfit.getdfResidual()); }
-
- /**
- * Returns the fitted mean values.
- *
- * This method only returns meaningful results if the build
- * method of this class has been previously called.
- * @return A double[] containing the fitted values
- */
- public double[] getFitFitted() { return(this.modelfit.getFitted()); }
-
-
-
-
-
- /* interface to predict object */
-
- /**
- * Returns the degrees of freedom for residual.
- *
- * @return An integer indicating degrees of freedom
- */
- public int getPredictDF() { return(this.modelpredict.getDF()); }
-
- /**
- * Returns the residual standard deviations.
- *
- * @return A double indicating residual standard deviations
- */
- public double getPredictResidualScale() { return(this.modelpredict.getResidualScale()); }
-
- /**
- * Returns the predicted values for the prediction set.
- *
- * This function only returns meaningful results if the predict
- * method of this class has been called.
- *
- * @return A double[] containing the predicted values
- */
- public double[] getPredictPredicted() { return(this.modelpredict.getPredicted()); }
-
- /**
- * Returns the lower prediction bounds.
- *
- * By default the bounds (both lower and upper) are confidence bounds. However
- * the call to predict can specify prediction bounds.
- * This function only returns meaningful results if the predict
- * method of this class has been called.
- *
- * @return A double[] containing the lower bounds for the predictions
- */
- public double[] getPredictLowerBound() { return(this.modelpredict.getLower()); }
-
- /**
- * Returns the upper prediction bounds.
- *
- * By default the bounds (both lower and upper) are confidence bounds. However
- * the call to predict can specify prediction bounds.
- * This function only returns meaningful results if the predict
- * method of this class has been called.
- *
- * @return A double[] containing the upper bounds for the predictions
- */
- public double[] getPredictUpperBound() { return(this.modelpredict.getUpper()); }
-
- /**
- * Returns the standard error of predictions.
- *
- * This function only returns meaningful results if the predict
- * method of this class has been called.
- *
- * @return A double[] containing the standard error of predictions.
- */
- public double[] getPredictSEPredictions() { return(this.modelpredict.getSEFit()); }
-}
diff --git a/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModelFit.java b/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModelFit.java
deleted file mode 100644
index 82147e8..0000000
--- a/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModelFit.java
+++ /dev/null
@@ -1,178 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2004-2008 Rajarshi Guha
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-package org.openscience.cdk.qsar.model.R;
-
-/**
- * A class that wraps the return value from R function, lm..
- *
- * This is an internal class used by R to return the result of
- * the call to lm.
- * As a result it should not be instantiated by the user. The actual modeling
- * class, LinearRegressionModel, provides acess to the various
- * fields of this object.
- *
- *
- * @author Rajarshi Guha
- * @cdk.require r-project
- * @cdk.module qsar
- * @cdk.githash
- * @deprecated
- */
-
-public class LinearRegressionModelFit {
- double[] coeff, res, fitted;
- int rank, dfResidual;
-
- /**
- * Construct the object to contain a linear regression fit.
- *
- * @param coeff A 1-dimensional array of coefficients
- * @param res A 1-dimensional array of residuals
- * @param fitted A 1-dimensional array of fitted values
- * @param rank An integer indicating the rank of the fit
- * @param degreesOfFreedom The degrees of freedom
- */
- public LinearRegressionModelFit(double[] coeff, double[] res, double[] fitted, int rank, int degreesOfFreedom) {
- setCoefficients(coeff);
- setResiduals(res);
- setFitted(fitted);
- setRank(rank);
- setdfResidual(degreesOfFreedom);
- }
-
- /**
- * Get the rank of the fit.
- *
- * @return The rank of the fit
- * @see #setRank
- */
- public int getRank() { return(this.rank); }
-
- /**
- * Set the rank of the fit.
- *
- * This method should not be called outside this class
- *
- * @param rank The rank of the fit
- * @see #getRank
- */
- public void setRank(int rank) { this.rank = rank; };
-
- /**
- * Get the residuals of the fit.
- *
- * The number of residuals equals the number of observations used
- * to build the model
- *
- * @return A 1-dimensional array containing the residuals.
- * @see #setResiduals
- */
- public double[] getResiduals() { return(this.res); }
-
- /**
- * Set the residuals of the fit.
- *
- * This method should not be called outside this class
- *
- * @param residuals A 1-dimensional array of residual values
- * @see #getResiduals
- */
- public void setResiduals(double[] residuals) {
- this.res = new double[residuals.length];
- for (int i = 0; i < residuals.length; i++) this.res[i] = residuals[i];
- }
-
- /**
- * Get the fitted coefficients.
- *
- * The number of coefficients equals the number of independent
- * variables used to build the model
- *
- * @return A 1-dimensional array containing the coefficients.
- * @see #setCoefficients
- */
- public double[] getCoefficients() { return(this.coeff); }
-
- /**
- * Set the fitted coefficients.
- *
- *
- * This method should not be called outside this class
- *
- * @param coeff A 1-dimensional array containing the coefficients.
- * @see #getCoefficients
- */
- public void setCoefficients(double[] coeff) {
- this.coeff = new double[coeff.length];
- for (int i = 0; i < coeff.length; i++) this.coeff[i] = coeff[i];
- }
-
- /**
- * Get the DOF of the residuals.
- *
- * @return An integer indicating the D.O.F
- * @see #setdfResidual
- */
- public int getdfResidual() { return(this.dfResidual); }
-
- /**
- * Set the DOF of the residuals.
- *
- * This method should not be called outside this class
- *
- * @param degreesOfFreedom The degrees of freedom
- * @see #getdfResidual
- */
- public void setdfResidual(int degreesOfFreedom) { this.dfResidual = degreesOfFreedom; }
-
-
- /**
- * Get the fitted values.
- *
- * Returns the predicted values for the observations used to
- * build the model. The number of fitted values equals the number
- * observations used to build the model.
- *
- * @return A 1-dimensional array containing the fitted values
- * @see #setFitted
- */
- public double[] getFitted() { return(this.fitted); }
-
- /**
- * Set the fitted values.
- *
- * This method should not be called outside this class
- *
- * @param fitted A 1-dimensional array of fitted values
- * @see #getFitted
- */
- public void setFitted(double[] fitted) {
- this.fitted = new double[fitted.length];
- for (int i = 0; i < fitted.length; i++) this.fitted[i] = fitted[i];
- }
-}
-
-
-
diff --git a/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModelPredict.java b/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModelPredict.java
deleted file mode 100644
index 14faab3..0000000
--- a/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModelPredict.java
+++ /dev/null
@@ -1,222 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2004-2008 Rajarshi Guha
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-package org.openscience.cdk.qsar.model.R;
-
-/**
- * A class that wraps the return value from the R function, predict.lm.
- *
- * This is an internal class used by R to return the result of
- * the call to predict.lm.
- * As a result it should not be instantiated by the user. The actual modeling
- * class, LinearRegressionModel, provides acess to the various
- * fields of this object.
- *
- *
- * @author Rajarshi Guha
- * @cdk.require r-project
- * @cdk.module qsar
- * @cdk.githash
- * @deprecated
- */
-public class LinearRegressionModelPredict {
- double[] pred, lwr, upr, sefit;
- int degreesOfFreedom;
- double residualScale;
-
- /**
- * Construct the object to contain linear regression predictions.
- *
- * @param predicted A 1-dimensional array of predicted values
- * @param standardErrors A 1-dimensional array of standard errors of prediction
- * @param lowerBounds A 1-dimensional array of lower confidence bounds
- * @param upperBounds A 1-dimensional array of upper confidence bounds
- * @param degreesOfFreedom The degrees of freedom of hte predictions
- * @param residualScale The scale of the residuals
- */
- public LinearRegressionModelPredict(double[] predicted, double[] standardErrors,
- double[] lowerBounds, double[] upperBounds,
- int degreesOfFreedom, double residualScale) {
- setPredicted(predicted);
- setSEFit(standardErrors);
- setLower(lowerBounds);
- setUpper(upperBounds);
- setDF(degreesOfFreedom);
- setResidualScale(residualScale);
- }
- /**
- * Construct the object to contain linear regression predictions.
- *
- * This is required if a single prediction was requested in which case
- * R will pass a single double value rather than an array.
- *
- * @param predicted The predicted values
- * @param standardErrors The standard errors of prediction
- * @param lowerBounds The lower confidence bounds
- * @param upperBounds The upper confidence bounds
- * @param degreesOfFreedom The degrees of freedom of hte predictions
- * @param residualScale The scale of the residuals
- */
- public LinearRegressionModelPredict(double predicted, double standardErrors,
- double lowerBounds, double upperBounds,
- int degreesOfFreedom, double residualScale) {
- setPredicted(new double[] {predicted});
- setSEFit(new double[] {standardErrors});
- setLower(new double[] {lowerBounds});
- setUpper(new double[] {upperBounds});
- setDF(degreesOfFreedom);
- setResidualScale(residualScale);
- }
-
- /**
- * Get the degrees of freedom.
- *
- * @return An integer indicating the degrees of freedom
- * @see #setDF
- */
- public int getDF() { return(this.degreesOfFreedom); }
-
- /**
- * Set the degrees of freedom.
- *
- * This method should not be called outside this class
- *
- * @param degreesOfFreedom An integer indicating the degrees of freedom
- * @see #getDF
- */
- public void setDF(int degreesOfFreedom) { this.degreesOfFreedom = degreesOfFreedom; }
-
- /**
- * Get the scale of residuals.
- *
- * @return A double indicating the residual scale
- * @see #setResidualScale
- */
- public double getResidualScale() { return(this.residualScale); }
-
- /**
- * Set the scale of the residuals.
- *
- * This method should not be called outside this class
- *
- * @param scale The scale of the residuals
- * @see #getResidualScale
- */
- public void setResidualScale(double scale) { this.residualScale = scale; }
-
- /**
- * Get predicted values.
- *
- * Get the predictions for a set of observations from the current linear
- * regression fit
- *
- * @return A 1-dimensional array containing the predicted values
- * @see #setPredicted
- */
- public double[] getPredicted() { return(this.pred); }
-
- /**
- * Set the predicted values.
- *
- * This method should not be called outside this class
- *
- * @param predicted A 1-dimensional array of predicted values
- * @see #getPredicted
- */
- public void setPredicted(double[] predicted) {
- this.pred = new double[predicted.length];
- for (int i = 0; i < predicted.length; i++) this.pred[i] = predicted[i];
- }
-
- /**
- * Get the lower confidence bounds.
- *
- * Gets the lower confidence bounds for the predicted values of
- * the observations
- *
- * @return A 1-dimensional array of lower confidence bounds
- * @see #setLower
- */
- public double[] getLower() { return(this.lwr); }
-
- /**
- * Set the lower confidence bounds.
- *
- * This method should not be called outside this class
- *
- * @param lowerBounds A 1-dimensional array of lower confidence bounds
- * @see #getLower
- */
- public void setLower(double[] lowerBounds) {
- this.lwr = new double[lowerBounds.length];
- for (int i = 0; i < lowerBounds.length; i++) this.lwr[i] = lowerBounds[i];
- }
-
- /**
- * Get the upper confidence bounds.
- *
- * Gets the upper confidence bounds for the predicted values of
- * the observations
- *
- * @return A 1-dimensional array of upper confidence bounds
- * @see #setUpper
- */
- public double[] getUpper() { return(this.upr); }
-
- /**
- * Set the upper confidence bounds.
- *
- * This method should not be called outside this class
- *
- * @param upperBounds A 1-dimensional array of upper confidence bounds
- * @see #getUpper
- */
- public void setUpper(double[] upperBounds) {
- this.upr = new double[upperBounds.length];
- for (int i = 0; i < upperBounds.length; i++) this.upr[i] = upperBounds[i];
- }
-
-
- /**
- * Get the standard errors of prediction.
- *
- * @return A 1-dimensional array of standard errors
- * @see #setSEFit
- */
- public double[] getSEFit() { return(this.sefit); }
-
- /**
- * Set the standard errors of predictions.
- *
- * @param standardErrors A 1-dimensional array of standard errors
- * @see #getSEFit
- */
- public void setSEFit(double[] standardErrors) {
- this.sefit = new double[standardErrors.length];
- for (int i = 0; i < standardErrors.length; i++) this.sefit[i] = standardErrors[i];
- }
-
-}
-
-
diff --git a/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModelSummary.java b/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModelSummary.java
deleted file mode 100644
index 317319e..0000000
--- a/src/main/org/openscience/cdk/qsar/model/R/LinearRegressionModelSummary.java
+++ /dev/null
@@ -1,192 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2004-2008 Rajarshi Guha
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-package org.openscience.cdk.qsar.model.R;
-
-
-/** A class that represents a summary of a linear regression model.
- *
- * This class essentially wraps the result of summar.lm. As with other
- * backend classes this class should not be instantiated directly by the
- * user, though the various fields may be accessed with the provided
- * methods.
- *
- * @author Rajarshi Guha
- * @cdk.require r-project
- * @cdk.module qsar
- * @cdk.githash
- * @deprecated
- */
-public class LinearRegressionModelSummary {
-
- double[] residuals;
- double[][] coeff; // rows - vars, cols - stats
- double rsq, adjrsq, sigma;
- int df;
- int numdf, dendf;
- double fstat;
-
- String[] colNames;
- String[] rowNames;
-
- private double[][] vectorToMatrix(double[] v, int nrow, int ncol) {
- double[][] m = new double[nrow][ncol];
- for (int i = 0; i < ncol; i++) {
- for (int j = 0; j < nrow; j++) {
- m[j][i] = v[j + i*nrow];
- }
- }
- return(m);
- }
-
- /**
- * Constructor for an object that wraps the return value from summary.lm.
- *
- * This should not be instantiated directly. The class is meant to be instantiated
- * from an R session
- *
- * @param residuals An array of residuals
- * @param coeff An array of coeffs and associated statistics
- * @param coeffColNames The names of the columns for the coefficient matrix
- * @param coeffRowNames The names of the rows for the coefficient matrix
- * @param sigma The residual error
- * @param df The degrees of freedom
- * @param rsq The R^2 value
- * @param adjrsq The adjusted R^2 value
- * @param fstat The value of the F-statistic
- */
- public LinearRegressionModelSummary( double[] residuals , double coeff[],
- double sigma, double rsq, double adjrsq, int df,
- double[] fstat, String[] coeffRowNames, String[] coeffColNames) {
-
-
- this.residuals = new double[residuals.length];
- for (int i = 0; i < residuals.length; i++)
- this.residuals[i] = residuals[i];
-
- this.coeff = vectorToMatrix(coeff, coeff.length/4, 4);
-
-
- this.colNames = new String[coeffColNames.length];
- this.rowNames = new String[coeffRowNames.length];
- for (int i = 0; i < coeffColNames.length; i++) this.colNames[i] = coeffColNames[i];
- for (int i = 0; i < coeffRowNames.length; i++) this.rowNames[i] = coeffRowNames[i];
-
-
- this.sigma = sigma;
- this.df = df;
- this.rsq = rsq;
- this.adjrsq = adjrsq;
- this.numdf = (int)fstat[1];
- this.dendf = (int)fstat[2];
- this.fstat = fstat[0];
-
- }
-
- /**
- * Return the residuals of the fit.
- *
- * @return A 1-dimensional array of doubles containing the
- * residuals of the fit
- */
- public double[] getResiduals() {
- return(this.residuals);
- }
-
-
- /**
- * Returns the coefficients and associated statistics.
- *
- * This method will return the coefficients as well as the standard
- * error in the coefficients, t-values and p-values corresponding to the
- * t-values. Thus the return value is a 2D array of doubles, with rows equal
- * to the number of coefficients (ie 1+num predictor variables) and 4 columns
- * containing the estimated coefficients and the above statistics, in the
- * order mentioned above.
- *
- * @return A 2-D array of doubles containing the estimated coefficients and
- * associated statistics
- */
- public double[][] getCoeff() {
- return(this.coeff);
- }
-
- /**
- * Returns the R^2 value.
- *
- * @return The R^2 value
- */
- public double getRSQ() {
- return(this.rsq);
- }
-
- /**
- * Return the adjusted R^2 value.
- *
- * This statistic is generally a better indicator than plain R^2
- *
- * @return The adjusted R^2 value
- */
- public double getAdjRSQ() {
- return(this.adjrsq);
- }
-
- /**
- * Return the residual standard error.
- *
- * This method returns the residual standard error and the associated degrees
- * of freedom, in a 2 element array of doubles.
- *
- * @return A 2 element array of doubles containing the residual error and
- * DoF
- */
- public double[] getSigma() {
- double[] ret = {this.sigma, this.df};
- return( ret );
- }
-
- /**
- * Returns the value of the F-statistic.
- *
- * @return The F-statistic
- * @see #getFStatisticDF
- */
- public double getFStatistic() {
- return(this.fstat);
- }
-
- /**
- * Returns the degrees of freedom (DoF) for which the F-statistic was calculated.
- *
- * @return A 2 element int[]. The first element is the DoF of the numerator
- * and the second element is the DoF of the denominator
- * @see #getFStatistic
- */
- public int[] getFStatisticDF() {
- int[] ret = {this.numdf, this.dendf};
- return( ret );
- }
-
-}
-
diff --git a/src/main/org/openscience/cdk/qsar/model/R/PLSRegressionModel.java b/src/main/org/openscience/cdk/qsar/model/R/PLSRegressionModel.java
deleted file mode 100644
index 407c106..0000000
--- a/src/main/org/openscience/cdk/qsar/model/R/PLSRegressionModel.java
+++ /dev/null
@@ -1,585 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2004-2008 Rajarshi Guha
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-package org.openscience.cdk.qsar.model.R;
-
-import org.openscience.cdk.qsar.model.QSARModelException;
-
-import java.util.HashMap;
-
-/**
- * A modeling class that provides a PLS regression model.
- *
- * When instantiated this class ensures that the R/Java interface has been
- * initialized. The response and independent variables can be specified at construction
- * time or via the setParameters method.
- * The actual fitting procedure is carried out by build.
- * NOTE: For this class to work, you must have the
- * pls.pcr
- * package installed in your R library.
- *
- * When building the PLS model, parameters such as whether cross validation is to be used, the type of
- * PLS algorithm etc can be specified by making calls to setParameters. This method can also
- * be used to set a new X matrix for prediction.
- * The following table lists the parameters that can be set and their
- * expected types. More detailed information is available in the R documentation.
- *
- *
- *
- *
- * | Name | Java Type | Default | Notes |
- *
- *
- *
- *
- * | X | Double[][] | None | Variables should be in the columns, observations in the rows |
- *
- *
- * | Y | Double[][] | None | Length should be equal to the rows of X. Variables should be in the columns, observations in the rows |
- *
- *
- * | newX | Double[][] | None | A 2D array of values to make predictions for. Variables should be in the columns, observations in the rows |
- *
- *
- * | ncomp | Integer[] | {1,rank(X)} | This can be an array of length 1 or 2. If there is only one element
- * then only the specified number of latent variables will be assessed during modeling. If 2 values are specified
- * then the model will use N1 to N2 latent variables where N1 and N2 are the first and second elements respectively |
- *
- *
- * | method | String | "SIMPLS" | The type of PLS algorithm to use (can be SIMPLS or kernelPLS) |
- *
- *
- * | validation | String | "none" | Indicates whether cross validation should be used. To enable cross validation set this to "CV" |
- *
- *
- * | grpsize | Integer | 0 | The group size for the "CV" validation. By default this is ignored and niter is used to determine the value of this argument |
- *
- *
- * | niter | Integer | 10 | The number of iterations in the cross-validation. Note that if grpsize is set to a non-zero value then the value of niter will be calculated from the value of grpsize |
- *
- *
- * | nlv | Integer | None | The number of latent variables to use during prediction. By default this does not need to be specified and will be obtained from the fitted model |
- *
- *
- *
- *
- *
- * In general the getFit* methods provide access to results from the fit and
- * getPredict* methods provide access to results from the prediction. In case validation is specified
- * then the results from the CV can be obtained via the getValidation* methods.
- * The values returned correspond to the various
- * values returned by the pls and
- * predict.mvr
- * functions in R.
- *
- * See {@link RModel} for details regarding the R and SJava environment.
- *
- * @author Rajarshi Guha
- * @cdk.require r-project
- * @cdk.module qsar
- * @cdk.githash
- *
- * @cdk.keyword partial least squares
- * @cdk.keyword PLS
- * @cdk.keyword regression
- * @deprecated
- */
-public class PLSRegressionModel extends RModel {
-
- private static int globalID = 0;
- private int currentID;
- private PLSRegressionModelFit modelfit = null;
- private PLSRegressionModelPredict modelpredict = null;
-
- private HashMap params = null;
- private int nvar = 0;
-
- private void setDefaults() {
- this.params.put("ncomp", new Boolean(false));
- this.params.put("method", "SIMPLS");
- this.params.put("validation", "none");
- this.params.put("grpsize", Integer.valueOf(0));
- this.params.put("niter", Integer.valueOf(10));
- this.params.put("nlv", new Boolean(false));
- }
- /**
- * Constructs a PLSRegressionModel object.
- *
- * The constructor simply instantiates the model ID. Dependent and independent variables
- * should be set via setParameters().
- */
- public PLSRegressionModel(){
- super();
-
- this.params = new HashMap();
-
- this.currentID = PLSRegressionModel.globalID;
- PLSRegressionModel.globalID++;
- this.setModelName("cdkPLSRegressionModel"+this.currentID);
- this.setDefaults();
- }
-
- /**
- * Constructs a PLSRegressionModel object.
- *
- * The constructor allows the user to specify the
- * dependent and independent variables. The length of the dependent variable
- * array should equal the number of rows of the independent variable matrix. If this
- * is not the case an exception will be thrown.
- *
- * @param xx An array of independent variables. The observations should be in the rows
- * and the variables should be in the columns
- * @param yy An array containing the dependent variable
- * @throws QSARModelException if the number of observations in x and y do not match
- */
- public PLSRegressionModel(double[][] xx, double[] yy) throws QSARModelException{
- super();
-
- this.params = new HashMap();
-
- this.currentID = PLSRegressionModel.globalID;
- PLSRegressionModel.globalID++;
- this.setModelName("cdkPLSRegressionModel"+this.currentID);
- this.setDefaults();
-
- int nrow = yy.length;
- this.nvar = xx[0].length;
-
- if (nrow != xx.length) {
- throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix");
- }
-
- Double[][] x = new Double[nrow][this.nvar];
- Double[][] y = new Double[nrow][1];
-
- for (int i = 0; i < nrow; i++) {
- y[i][1] = new Double(yy[i]);
- for (int j = 0; j < this.nvar; j++)
- x[i][j] = new Double(xx[i][j]);
- }
-
- params.put("X", x);
- params.put("Y", y);
- }
-
-
- /**
- * Constructs a PLSRegressionModel object.
- *
- * The constructor allows the user to specify the
- * dependent and independent variables. This constructor will accept a matrix
- * of Y values.
- *
- * The length of the dependent variable
- * array should equal the number of rows of the independent variable matrix. If this
- * is not the case an exception will be thrown.
- *
- * @param xx An array of independent variables. The observations should be in the rows
- * and the variables should be in the columns
- * @param yy A 2D array containing the dependent variable
- * @throws QSARModelException if the number of observations in x and y do not match
- */
- public PLSRegressionModel(double[][] xx, double[][] yy) throws QSARModelException{
- super();
-
- this.params = new HashMap();
-
- this.currentID = PLSRegressionModel.globalID;
- PLSRegressionModel.globalID++;
- this.setModelName("cdkPLSRegressionModel"+this.currentID);
- this.setDefaults();
-
- int nrow = yy.length;
- int ncoly = yy[0].length;
- this.nvar = xx[0].length;
-
- if (nrow != xx.length) {
- throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix");
- }
-
- Double[][] x = new Double[nrow][this.nvar];
- Double[][] y = new Double[nrow][ncoly];
- //Double[] wts = new Double[nrow];
-
- for (int i = 0; i < nrow; i++) {
- for (int j = 0; j < ncoly; j++) {
- y[i][j] = new Double(yy[i][j]);
- }
- }
- for (int i = 0; i < nrow; i++) {
- for (int j = 0; j < this.nvar; j++)
- x[i][j] = new Double(xx[i][j]);
- }
- params.put("X", x);
- params.put("Y", y);
- }
-
- protected void finalize() {
- revaluator.voidEval("rm("+this.getModelName()+",pos=1)");
- }
-
-
-
- /**
- * Fits a PLS model.
- *
- * This method calls the R function to fit a PLS model
- * using the specified dependent and independent variables. If an error
- * occurs in the R session, an exception is thrown.
- */
- public void build() throws QSARModelException {
- // lets do some checks in case stuff was set via setParameters()
- Double[][] x,y;
- x = (Double[][])this.params.get("X");
- y = (Double[][])this.params.get("Y");
- if (this.nvar == 0) this.nvar = x[0].length;
- else {
- if (y.length != x.length) {
- throw new QSARModelException("Number of observations does no match number of rows in the design matrix");
- }
- }
-
- // lets build the model
- try {
- this.modelfit = (PLSRegressionModelFit)revaluator.call("buildPLS",
- new Object[]{ getModelName(), this.params });
- } catch (Exception re) {
- throw new QSARModelException(re.toString());
- }
- }
- /**
- * Uses a fitted model to predict the response for new observations.
- *
- * This function uses a previously fitted model to obtain predicted values
- * for a new set of observations. If the model has not been fitted prior to this
- * call an exception will be thrown. Use setParameters
- * to set the values of the independent variable for the new observations.
- */
- public void predict() throws QSARModelException {
- if (this.modelfit == null)
- throw new QSARModelException("Before calling predict() you must fit the model using build()");
-
- Double[][] newx = (Double[][])this.params.get(new String("newX"));
- if (newx[0].length != this.nvar) {
- throw new QSARModelException("Number of independent variables used for prediction must match those used for fitting");
- }
-
- try {
- this.modelpredict = (PLSRegressionModelPredict)revaluator.call("predictPLS",
- new Object[]{ getModelName(), this.params });
- } catch (Exception re) {
- throw new QSARModelException(re.toString());
- }
- }
-
- /**
- * Loads a PLSRegressionModel object from disk in to the current session.
- *
- * @param fileName The disk file containing the model
- * @throws QSARModelException if the model being loaded is not a PLS regression model
- * object
- */
- public void loadModel(String fileName) throws QSARModelException {
- // should probably check that the filename does exist
- Object model = (Object)revaluator.call("loadModel", new Object[]{ (Object)fileName });
- String modelName = (String)revaluator.call("loadModel.getName", new Object[] { (Object)fileName });
-
- if (model.getClass().getName().equals("org.openscience.cdk.qsar.model.R.PLSRegressionModelFit")) {
- this.modelfit = (PLSRegressionModelFit)model;
- this.setModelName(modelName);
- } else throw new QSARModelException("The loaded model was not a PLSRegressionModel");
- }
- /**
- * Loads an PLSRegressionModel object from a serialized string into the current session.
- *
- * @param serializedModel A String containing the serialized version of the model
- * @param modelName A String indicating the name of the model in the R session
- * @throws QSARModelException if the model being loaded is not a PLS regression model
- * object
- */
- public void loadModel(String serializedModel, String modelName) throws QSARModelException {
- // should probably check that the fileName does exist
- Object model = (Object)revaluator.call("unserializeModel", new Object[]{ (Object)serializedModel, (Object)modelName });
- String modelname = modelName;
-
- if (model.getClass().getName().equals("org.openscience.cdk.qsar.model.R.PLSRegressionModelFit")) {
- this.modelfit =(PLSRegressionModelFit)model;
- this.setModelName(modelname);
- } else throw new QSARModelException("The loaded model was not a PLSRegressionModel");
- }
-
-
-
- /**
- * Sets parameters required for building a PLS model or using one for prediction.
- *
- * This function allows the caller to set the various parameters available
- * for the pls() and predict.mvr() R routines. See the R help pages for the details of the available
- * parameters.
- *
- * @param key A String containing the name of the parameter as described in the
- * R help pages
- * @param obj An Object containing the value of the parameter
- * @throws QSARModelException if the type of the supplied value does not match the
- * expected type
- */
- public void setParameters(String key, Object obj) throws QSARModelException {
- // since we know the possible values of key we should check the coresponding
- // objects and throw errors if required. Note that this checking can't really check
- // for values (such as number of variables in the X matrix to build the model and the
- // X matrix to make new predictions) - these should be checked in functions that will
- // use these parameters. The main checking done here is for the class of obj and
- // some cases where the value of obj is not dependent on what is set before it
-
- if (key.equals("Y")) {
- if (!(obj instanceof Double[])) {
- throw new QSARModelException("The class of the 'Y' object must be Double[][]");
- }
- }
- if (key.equals("X")) {
- if (!(obj instanceof Double[][])) {
- throw new QSARModelException("The class of the 'X' object must be Double[][]");
- }
- }
- if (key.equals("method")) {
- if (!(obj instanceof String)) {
- throw new QSARModelException("The class of the 'method' object must be String");
- }
- if (!(obj.equals("SIMPLS") || obj.equals("kernelPLS"))) {
- throw new QSARModelException("The value of method must be: SIMPLS or kernelPLS ");
- }
- }
- if (key.equals("validation")) {
- if (!(obj instanceof String)) {
- throw new QSARModelException("The class of the 'validation' object must be String");
- }
- if (!(obj.equals("none") || obj.equals("CV"))) {
- throw new QSARModelException("The value of validation must be: none or CV");
- }
- }
-
- if (key.equals("newX")) {
- if ( !(obj instanceof Double[][])) {
- throw new QSARModelException("The class of the 'newX' object must be Double[][]");
- }
- }
- if (key.equals("grpsize")) {
- if (!(obj instanceof Integer)) {
- throw new QSARModelException("The class of the 'grpsize' object must be Integer");
- }
- }
- if (key.equals("niter")) {
- if (!(obj instanceof Integer)) {
- throw new QSARModelException("The class of the 'niter' object must be Integer");
- }
- }
- if (key.equals("nlv")) {
- if (!(obj instanceof Integer)) {
- throw new QSARModelException("The class of the 'nlv' object must be Integer");
- }
- }
-
- if (key.equals("ncomp")) {
- if (!(obj instanceof Integer[])) {
- throw new QSARModelException("The class of the 'ncomp' object must be Integer[]");
- }
- Integer[] tmp = (Integer[])obj;
- if (tmp.length != 1 && tmp.length != 2) {
- throw new QSARModelException("The 'ncomp' array can have a length of 1 or 2. See documentation");
- }
- }
-
- this.params.put(key,obj);
- }
-
-
-
- /* interface to fit object */
-
- /**
- * The method used to build the PLS model.
- *
- * @return String containing 'SIMPLS' or 'kernelPLS'
- */
- public String getFitMethod() {
- return(this.modelfit.getMethod());
- }
-
-
- /**
- * Returns the fit NComp value.
- *
- * @return An array of integers indicating the number of components
- * (latent variables)
- */
- public int[] getFitNComp() {
- return(this.modelfit.getNComp());
- }
-
- /**
- * Gets the coefficents.
- *
- * The return value is a 3D array. The first dimension corresponds
- * to the specific number of LV's (1 or 2 or 3 and so on). The second
- * dimension corresponds to the independent variables and the third
- * dimension corresponds to the Y variables.
- *
- * @return double[][][] containing the coefficients
- */
- public double[][][] getFitB() {
- return(this.modelfit.getB());
- }
-
- /**
- * Get the Root Mean Square (RMS) error for the fit.
- *
- * @return A 2-dimensional array of RMS errors.
- */
- public double[][] getFitRMS() {
- return(this.modelfit.getTrainingRMS());
- }
-
- /**
- * Get the predicted Y's.
- *
- * Each set of latent variables is used to make predictions for all the
- * Y variables.
- *
- * @return A 3-dimensional array of doubles. The first dimension corresponds
- * to the set of latent variables and the remaining two correspond to the
- * Y's themselves.
- */
- public double[][][] getFitYPred() {
- return(this.modelfit.getTrainingYPred());
- }
-
- /**
- * Get the X loadings.
- *
- * @return A 2-dimensional array of doubles containing the X loadings
- */
- public double[][] getFitXLoading() {
- return(this.modelfit.getXLoading());
- }
- /**
- * Get the Y loadings.
- *
- * @return A 2-dimensional array of doubles containing the Y loadings
- */
- public double[][] getFitYLoading() {
- return(this.modelfit.getYLoading());
- }
- /**
- * Get the X scores.
- *
- * @return A 2-dimensional array of doubles containing the X scores
- */
- public double[][] getFitXScores() {
- return(this.modelfit.getXScores());
- }
- /**
- * Get the Y scores.
- *
- * @return A 2-dimensional array of doubles containing the Y scores
- */
- public double[][] getFitYScores() {
- return(this.modelfit.getYScores());
- }
- /**
- * Indicates whether CV was used to build the model.
- *
- * @return A boolean indicating whether CV was used
- */
- public boolean getFitWasValidated() {
- return(this.modelfit.wasValidated());
- }
-
-
- /**
- * The number of iterations used during CV.
- *
- * @return An int value indicating the number of iterations in CV
- */
- public int getValidationIter() {
- return(this.modelfit.getValidationIter());
- }
- /**
- * The number of latent variables suggested by CV.
- *
- * @return An int value indicating the number of LV's
- */
- public int getValidationLV() {
- return(this.modelfit.getValidationLV());
- }
-
- /**
- * Get the R^2 value for validation.
- *
- * @return A 2-dimensional array of doubles
- */
- public double[][] getValidationR2() {
- return(this.modelfit.getValidationR2());
- }
- /**
- * Get the RMS value for validation.
- *
- * @return A 2-dimensional array of doubles
- */
- public double[][] getValidationRMS() {
- return(this.modelfit.getValidationRMS());
- }
- /**
- * Get the standard deviation of the RMS errrors for validation.
- *
- * @return A 2-dimensional array of doubles
- */
- public double[][] getValidationRMSsd() {
- return(this.modelfit.getValidationRMSSD());
- }
- /**
- * Get the predicted Y values from validation.
- *
- * @return A 2-dimensional array of doubles
- */
- public double[][][] getValidationYPred() {
- return(this.modelfit.getValidationYPred());
- }
-
-
-
-
- /* interface to predict object */
-
- /**
- * Returns the predicted values for the prediction set.
- *
- * This function only returns meaningful results if the predict
- * method of this class has been called.
- *
- * @return A double[][] containing the predicted values
- */
- public double[][] getPredictPredicted() {
- return(this.modelpredict.getPredictions());
- }
-}
diff --git a/src/main/org/openscience/cdk/qsar/model/R/PLSRegressionModelFit.java b/src/main/org/openscience/cdk/qsar/model/R/PLSRegressionModelFit.java
deleted file mode 100644
index cff1dab..0000000
--- a/src/main/org/openscience/cdk/qsar/model/R/PLSRegressionModelFit.java
+++ /dev/null
@@ -1,211 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2004-2008 Rajarshi Guha
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-package org.openscience.cdk.qsar.model.R;
-
-/**
- * A class that wraps the return value from R function, pls.
- *
- * This is an internal class used by R to return the result of
- * the call to
- * pls.
- * As a result it should not be instantiated by the user. The actual modeling
- * class, PLSRegressionModel, provides acess to the various
- * fields of this object.
- *
- *
- * @author Rajarshi Guha
- * @cdk.require r-project
- * @cdk.module qsar
- * @cdk.githash
- * @deprecated
- */
-
-class V2M {
- static double[][] VectorToMatrix(double[] v, int nrow, int ncol) {
- double[][] m = new double[nrow][ncol];
- for (int i = 0; i < ncol; i++) {
- for (int j = 0; j < nrow; j++) {
- m[j][i] = v[j + i*nrow];
- }
- }
- return(m);
- }
- static double[][][] VectorToCube(double[] v, int d1, int d2, int d3) {
- // d2 ~ nrow, d3 ~ ncol
- double[][][] m = new double[d1][d2][d3];
- for (int k = 0; k < d1; k++) {
- for (int i = 0; i < d3; i++) {
- for (int j = 0; j < d2; j++) {
- m[k][j][i] = v[j + i*d2 + k*d2*d3];
- }
- }
- }
- return(m);
- }
-}
-/*
- ncase tells us how many latent variable cases are being considered.
- So if ncase == 1, the model only considered 1 LV.
- if ncase == 2, the model considered the cases of 1 LV and 2 LV
-
- ncomp will contain the number of latent variables for each case. So
- if pls() was called with 2:3 there are 2 cases, the first case considered
- 2 LV's, the second case considered 3 LV's.
-
- But note that xscores, yscores, xload and yload will have the number
- of columns equal to the max value of ncomp. So even if ncomp contains 2:3
- these arrays will have 3 columns (for the three cases - 1LV, 2LV & 3LV)
-
- npvar is the number of Y variables
-
- rms - ncase x npvar
- yscores - nobs x max(ncomp)
- xscores - nobs x max(nncomp)
- yload - npvar x max(ncomp)
- xload - nvar x max(ncomp)
- ypred - ncase x nobs x npvar
- B - ncase x nvar x npvar
-*/
-class PLSTraining {
- double[][] rms = null;
- double[][] xscores = null;
- double[][] xload = null;
- double[][] yscores = null;
- double[][] yload = null;
- double[][][] B = null;
- double[][][] ypred = null;
-
- PLSTraining(int[] ncomp, double[] B, double[] ypred,
- double[] rms,
- double[] xscores, double[] xload,
- double[] yscores, double[] yload) {
-
- int ncase = ncomp.length;
- int nobs = xscores.length / ncase;
- int nvar = xload.length / ncase;
- int npvar = yload.length / ncase;
-
- int maxncomp = -999999;
- for (int i = 0; i < ncomp.length; i++) {
- if (ncomp[i] > maxncomp) maxncomp = ncomp[i];
- }
-
- this.rms = V2M.VectorToMatrix(rms, ncase, npvar);
- this.xscores = V2M.VectorToMatrix(xscores, nobs, maxncomp);
- this.yscores = V2M.VectorToMatrix(yscores, nobs, maxncomp);
- this.yload = V2M.VectorToMatrix(yload, npvar, maxncomp);
- this.xload = V2M.VectorToMatrix(xload, nvar, maxncomp);
- this.ypred = V2M.VectorToCube(ypred, ncase, nobs, npvar);
- this.B = V2M.VectorToCube(B, ncase, nvar, npvar);
- }
-}
-
-/*
- * npvar is the number of Y variables
- * rms - ncase x npvar
- * rmssd - ncase x npvar
- * r2 - ncase x npvar
- * ypred - ncase x nobs x npvar
- */
-class PLSValidation {
- double[][][] ypred = null;
- int niter, nlv;
- double[][] rms = null;
- double[][] rmssd = null;
- double[][] r2 = null;
-
- PLSValidation(int[] ncomp, int nobs, int niter, int nlv,
- double[] ypred, double[] rms, double[] rmssd, double[] r2) {
-
- int ncase = ncomp.length;
- int npvar = rms.length / ncase;
-
- this.niter = niter;
- this.nlv = nlv;
- this.rms = V2M.VectorToMatrix(rms, ncase, npvar);
- this.rmssd = V2M.VectorToMatrix(rmssd, ncase, npvar);
- this.r2 = V2M.VectorToMatrix(r2, ncase, npvar);;
- this.ypred = V2M.VectorToCube(ypred, ncase, nobs, npvar);
- }
-}
-
-public class PLSRegressionModelFit {
- int nobs, nvar, npvar, ncase;
- int[] ncomp = null;
- String method;
- PLSTraining train = null;
- PLSValidation valid = null;
-
- public PLSRegressionModelFit(int nobs, int nvar, int npred,
- int[] ncomp, String method) {
-
- this.nobs = nobs;
- this.nvar = nvar;
- this.npvar = npred;
- this.ncase = ncomp.length;
- this.method = method;
-
- this.ncomp = new int[this.ncase];
- for (int i = 0; i < this.ncase; i++) this.ncomp[i] = ncomp[i];
- }
-
- public void setTrainingData(double[] B, double[] ypred, double[] rms,
- double[] xscores, double[] yscores,
- double[] xload, double[] yload) {
- this.train = new PLSTraining(this.ncomp, B, ypred, rms, xscores, yscores, xload, yload);
- }
- public void setValidationData(int niter, int nlv,
- double[] ypred, double[] rms, double[] rmssd, double[] r2) {
- this.valid = new PLSValidation(this.ncomp, this.nobs, niter, nlv, ypred, rms, rmssd, r2);
- }
-
-
- public boolean wasValidated() {
- if (this.valid != null) return(true);
- else return(false);
- }
-
- public int[] getNComp() { return this.ncomp; }
- public String getMethod() { return this.method; }
-
- public double[][][] getB() { return this.train.B; }
- public double[][][] getTrainingYPred() { return this.train.ypred; }
- public double[][] getTrainingRMS() { return this.train.rms; }
- public double[][] getXScores() { return this.train.xscores; }
- public double[][] getYScores() { return this.train.yscores; }
- public double[][] getXLoading() { return this.train.xload; }
- public double[][] getYLoading() { return this.train.yload; }
-
- public double[][][] getValidationYPred() { return this.valid.ypred; }
- public double[][] getValidationRMS() { return this.valid.rms; }
- public double[][] getValidationRMSSD() { return this.valid.rmssd; }
- public double[][] getValidationR2() { return this.valid.r2; }
- public int getValidationLV() { return this.valid.nlv; }
- public int getValidationIter() { return this.valid.niter; }
-
-}
-
-
-
diff --git a/src/main/org/openscience/cdk/qsar/model/R/PLSRegressionModelPredict.java b/src/main/org/openscience/cdk/qsar/model/R/PLSRegressionModelPredict.java
deleted file mode 100644
index fbdcef1..0000000
--- a/src/main/org/openscience/cdk/qsar/model/R/PLSRegressionModelPredict.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2004-2008 Rajarshi Guha
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-package org.openscience.cdk.qsar.model.R;
-
-/**
- * A class that wraps the return value from the R function, predict.mvr.
- *
- * This is an internal class used by R to return the result of
- * the call to predict.mvr.
- * As a result it should not be instantiated by the user. The actual modeling
- * class, PLSRegressionModel, provides acess to the various
- * fields of this object.
- *
- *
- * @author Rajarshi Guha
- * @cdk.require r-project
- * @cdk.module qsar
- * @cdk.githash
- * @deprecated
- */
-public class PLSRegressionModelPredict {
- double[][] preds = null;
-
- private double[][] VectorToMatrix(double[] v, int nrow, int ncol) {
- double[][] m = new double[nrow][ncol];
- for (int i = 0; i < ncol; i++) {
- for (int j = 0; j < nrow; j++) {
- m[j][i] = v[j + i*nrow];
- }
- }
- return(m);
- }
-
- /**
- * Constructor to contain the results of a PLS prediction.
- *
- * This class should not be instantiated directly and is really
- * only meant to be instantiated from an R session
- *
- * @param ncol The number of predicted variables
- * @param preds A 1-dimensional array of predicted values
- */
- public PLSRegressionModelPredict(int ncol, double[] preds) {
- this.preds = VectorToMatrix(preds, preds.length/ncol, ncol);
- }
-
- /**
- * Get the predicted values.
- *
- * This method returns the predicted values obtained by using new data
- * with a previously built PLS regression model
- *
- * @return A 2-dimensional array of predictions, columns correspond to the
- * predicted variables
- */
- public double[][] getPredictions() {
- return(this.preds);
- }
-}
-
-
diff --git a/src/main/org/openscience/cdk/qsar/model/R/RModel.java b/src/main/org/openscience/cdk/qsar/model/R/RModel.java
deleted file mode 100644
index b057296..0000000
--- a/src/main/org/openscience/cdk/qsar/model/R/RModel.java
+++ /dev/null
@@ -1,345 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2004-2008 Rajarshi Guha
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-package org.openscience.cdk.qsar.model.R;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.io.InputStreamReader;
-import java.io.StringWriter;
-
-import org.omegahat.R.Java.REvaluator;
-import org.omegahat.R.Java.ROmegahatInterpreter;
-import org.openscience.cdk.qsar.model.IModel;
-import org.openscience.cdk.qsar.model.QSARModelException;
-import org.openscience.cdk.tools.ILoggingTool;
-import org.openscience.cdk.tools.LoggingToolFactory;
-
-/** Base class for modeling classes that use R as the backend.
- *
- * This cannot be directly instantiated as its sole function is
- * to initialize the SJava system and source R matcher/converter
- * functions into the loaded R session. The class variable revaluator
- * can be accessed from subclasses to make calls to the R session.
- *
- * Any class that builds models using R should be a subclass of this.
- *
- * An important feature to note when using the R backend is that the SJava
- * initialization must be done only once in a Java thread. As a result
- * when any model class based on RModel is instantiated the constructor for the
- * super class (i.e., Rmodel) makes sure that SJava is not already initialized.
- *
- * By default the intialization uses a temporary file which is sourced in the
- * R session. In some cases, such as web applications, temporary files might be
- * problematic. In this case the R backend can be initialized via strings. To
- * do this the application should specify -DinitRFromString=true on the command
- * line. Note that this approach will be slightly slower compared to initializsation
- * via a temporary file.
- *
- * NOTE: For the R backend to work, ensure that R is correctly installed
- * and that SJava is also installed, using the -c option. Finally, ensure
- * that the R_HOME environment variable points to the R installation.
- *
- *
- * @author Rajarshi Guha
- * @cdk.require r-project
- * @cdk.module qsar
- * @cdk.githash
- * @deprecated
- */
-public abstract class RModel implements IModel {
-
- private String modelName = null;
-
- /**
- * The object that performs the calls to the R engine.
- */
- public static REvaluator revaluator = null;
- /**
- * This object represents an instance of the R interpreter.
- *
- * Due to the design of R, only one interpreter can be instantiated in a given
- * thread. That is, the underlying R engine is not thread safe. As a result
- * care must be taken to have only one instance of the interpreter.
- */
- public static ROmegahatInterpreter interp = null;
-
- /**
- * A boolean that indicates whether the R/Java subsystem has been initialized or not.
- */
- private static boolean doneInit = false;
- private static ILoggingTool logger =
- LoggingToolFactory.createLoggingTool(RModel.class);
-
- private void loadRFunctions(REvaluator evaluator) {
- String scriptLocator = "org/openscience/cdk/qsar/model/data/cdkSJava.R";
- try {
- File scriptFile = File.createTempFile("XXXXX",".R");
- scriptFile.deleteOnExit();
-
- InputStreamReader reader = new InputStreamReader(
- this.getClass().getClassLoader().getResourceAsStream(scriptLocator));
- BufferedReader inFile = new BufferedReader(reader);
-
- FileWriter outFile = new FileWriter(scriptFile);
- BufferedWriter outBuffer = new BufferedWriter(outFile);
- String inputLine;
- while ( (inputLine = inFile.readLine()) != null) {
- outBuffer.write(inputLine,0,inputLine.length());
- outBuffer.newLine();
- }
- outBuffer.close();
- inFile.close();
- outFile.close();
-
- evaluator.voidEval("source(\""+scriptFile.getAbsolutePath()+"\")");
-
- } catch (Exception exception) {
- logger.error("Could not load CDK-SJava R script: ", scriptLocator);
- logger.debug(exception);
- }
- }
-
- private void loadRFunctionsAsStrings(REvaluator evaluator) {
- String[] scripts = {
- "init_1.R",
- "lm_2.R",
- "cnn_3.R", "cnn_4.R",
- "pls_5.R",
- "register_999.R"
- };
- String scriptPrefix = "org/openscience/cdk/qsar/model/data/";
- for (int i = 0; i < scripts.length; i++) {
-
- String scriptLocator = scriptPrefix + scripts[i];
- try {
- InputStreamReader reader = new InputStreamReader(
- this.getClass().getClassLoader().getResourceAsStream(scriptLocator));
- BufferedReader inFile = new BufferedReader(reader);
-
- StringWriter sw = new StringWriter();
- String inputLine;
- while ( (inputLine = inFile.readLine()) != null) {
- sw.write(inputLine);
- sw.write("\n");
- }
- sw.close();
-
- evaluator.voidEval("eval(parse(text=\""+sw.toString()+"\"))");
-
- } catch (Exception exception) {
- logger.error("Could not load CDK-SJava R scripts: ", scriptLocator);
- logger.debug(exception);
- }
-
- }
- }
-
-
- /**
- * Initializes SJava and R with the specified command line arguments (see R documentation).
- *
- * This constructor will initialize the R session via a temporary file
- *
- * @param args A String[] containing the command line parameters as elements
- */
- public RModel(String[] args) {
- String initRFromString = System.getProperty("initRFromString");
- boolean useDisk = true;
- if (initRFromString != null && initRFromString.equals("true")) {
- useDisk = false;
- }
-
- if (!doneInit) {
- RModel.interp = new ROmegahatInterpreter(ROmegahatInterpreter.fixArgs(args), false);
- RModel.revaluator = new REvaluator();
-
- if (useDisk) {
- loadRFunctions(RModel.revaluator);
- logger.info("Initializing from disk");
- } else {
- loadRFunctionsAsStrings(RModel.revaluator);
- logger.info("Initializing from strings");
- }
-
- doneInit = true;
- logger.info("SJava initialized");
- } else {
- logger.info("SJava already initialized");
- }
- }
-
- /**
- * Initializes SJava with the --vanilla, -q, --slave flags.
- *
- * This constructor will initialize the R session via a temporary file
- */
- public RModel() {
- String[] args = {"--vanilla","-q", "--slave"};
- String initRFromString = System.getProperty("initRFromString");
- boolean useDisk = true;
- if (initRFromString != null && initRFromString.equals("true")) {
- useDisk = false;
- }
-
- if (!doneInit) {
- RModel.interp = new ROmegahatInterpreter(ROmegahatInterpreter.fixArgs(args), false);
- RModel.revaluator = new REvaluator();
-
- if (useDisk) {
- loadRFunctions(RModel.revaluator);
- logger.info("Initializing from disk");
- } else {
- loadRFunctionsAsStrings(RModel.revaluator);
- logger.info("Initializing from strings");
- }
-
- doneInit = true;
- logger.info("SJava initialized");
- } else {
- logger.info("SJava already initialized");
- }
- }
-
-
- /**
- * Saves a R model to disk.
- *
- * This function can be used to save models built in a session, and then loaded
- * again in a different session.
- *
- * @param modelname The name of the model as returned by \code{getModelName}.
- * @param filename The file to which the model should be saved
- * @throws QSARModelException if the R session cannot save the model
- * @see #loadModel
- */
- public static void saveModel(String modelname, String filename) throws QSARModelException {
- if (filename.equals("") || filename == null) {
- filename = modelname+".rda";
- }
- //Boolean result = null;
- try {
- revaluator.call("saveModel",
- new Object[] { (Object)modelname, (Object)filename });
- } catch (Exception e) {
- System.out.println("Caught the exception");
- throw new QSARModelException("Error saving model");
- }
- }
-
-
- /**
- * Get the name of the model.
- *
- * This function returns the name of the variable that the actual
- * model is stored in within the R session. In general this is
- * not used for the end user. In the future this might be changed
- * to a private method.
- *
- * @return A String containing the name of the R variable
- * @see #setModelName
- */
- public String getModelName() {
- return(this.modelName);
- }
-
- /**
- * Set the name of the model.
- *
- * Ordinarily the user does not need to call this function as each model
- * is assigned a unique ID at instantiation. However, if a user saves a model
- * to disk and then later loads it, the loaded
- * model may overwrite a model in that session. In this situation, this method
- * can be used to assign a name to the model.
- *
- * @param newName The name of the model
- * @see #getModelName
- * @see #saveModel
- * @see #loadModel
- *
- */
- public void setModelName(String newName) {
- if (this.modelName != null && this.modelName.equals(newName)) return;
- String oldName = this.modelName;
- if (oldName != null) {
- revaluator.voidEval("if ('"+oldName+"' %in% ls()) {"+newName+"<-"+oldName+";rm("+oldName+")}");
- }
- this.modelName = newName;
- }
-
- abstract public void build() throws QSARModelException;
- abstract public void predict() throws QSARModelException;
-
- /**
- * Specifies the parameters value.
- *
- * @param key A String representing the name of the parameter (corresponding to the
- * name described in the R manpages)
- * @param obj The value of the parameter
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling function
- *
- */
- abstract public void setParameters(String key, Object obj) throws QSARModelException;
-
- /**
- * Abstract method to handle loading R models.
- *
- * This method can be used to load a previously saved R model object. Since
- * the user can save any arbitrary R object, checks must be made that the
- * object being returned is an instance of one of the current modeling classes.
- *
- * This is best achieved by forcing each modeling class to write its own loader.
- *
- * @param fileName The file containing the R object to load
- * @throws QSARModelException if the R session could not load the object or if the loaded model
- * does not correspond to the class that it was loaded from
- * @see #saveModel
- */
- abstract public void loadModel(String fileName) throws QSARModelException;
- /**
- * Abstract method to handle loading R models that were previously serialized.
- *
- * This method can be used to load a previously serialized R model object (usinging
- * serialize()). Since
- * the user can save any arbitrary R object, checks must be made that the
- * object being returned is an instance of one of the current modeling classes.
- * This is best achieved by forcing each modeling class to write its own loader.
- *
- * In addition
- * objects saved using serialize() do not have a name. As a result a name for the object must
- * be specified when using this method.
- *
- * @param serializedModel A String containing the ASCII sreialized R object
- * @param modelName The name of the model. (Within the R session, the model will be assigned to
- * a variable of this name)
- * @throws QSARModelException if the R session could not load the object or if the loaded model
- * does not correspond to the class that it was loaded from
- * @see #saveModel
- */
- abstract public void loadModel(String serializedModel, String modelName) throws QSARModelException;
-}
-
-
diff --git a/src/main/org/openscience/cdk/qsar/model/R2/CNNRegressionModel.java b/src/main/org/openscience/cdk/qsar/model/R2/CNNRegressionModel.java
deleted file mode 100644
index 9837085..0000000
--- a/src/main/org/openscience/cdk/qsar/model/R2/CNNRegressionModel.java
+++ /dev/null
@@ -1,673 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2004-2008 Rajarshi Guha
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-package org.openscience.cdk.qsar.model.R2;
-
-import java.io.File;
-import java.util.HashMap;
-
-import org.openscience.cdk.qsar.model.QSARModelException;
-import org.openscience.cdk.tools.ILoggingTool;
-import org.openscience.cdk.tools.LoggingToolFactory;
-import org.rosuda.JRI.RBool;
-import org.rosuda.JRI.REXP;
-import org.rosuda.JRI.RList;
-
-/**
- * A modeling class that provides a computational neural network regression model.
- *
- * When instantiated this class ensures that the R/Java interface has been
- * initialized. The response and independent variables can be specified at construction
- * time or via the setParameters method.
- * The actual fitting procedure is carried out by build after which
- * the model may be used to make predictions, via predict. An example of the use
- * of this class is shown below:
- *
- * double[][] x;
- * double[] y;
- * Double[] wts;
- * Double[][] newx;
- * ...
- * try {
- * CNNRegressionModel cnnrm = new CNNRegressionModel(x,y,3);
- * cnnrm.setParameters("Wts",wts);
- * cnnrm.build();
- *
- * double fitValue = cnnrm.getFitValue();
- *
- * cnnrm.setParameters("newdata", newx);
- * cnnrm.setParameters("type", "raw");
- * cnnrm.predict();
- *
- * double[][] preds = cnnrm.getPredictPredicted();
- * } catch (QSARModelException qme) {
- * System.out.println(qme.toString());
- * }
- *
- * The above code snippet builds a 3-3-1 CNN model.
- * Multiple output neurons are easily
- * specified by supplying a matrix for y (i.e., double[][]) with the output variables
- * in the columns.
- *
- * Nearly all the arguments to
- * nnet() are
- * supported via the setParameters method. The table below lists the names of the arguments,
- * the expected type of the argument and the default setting for the arguments supported by this wrapper class.
- *
- *
- *
- *
- * | Name | Java Type | Default | Notes |
- *
- *
- *
- * | x | Double[][] | None | This must be set by the caller via the constructors or via setParameters |
- * | y | Double[][] | None | This must be set by the caller via the constructors or via setParameters |
- * | weights | Double[] | rep(1,nobs) | The default case weights is a vector of 1's equal in length to the number of observations, nobs |
- * | size | Integer | None | This must be set by the caller via the constructors or via setParameters |
- * | subset | Integer[] | 1:nobs | This is supposed to be an index vector specifying which observations are to be used in building the model. The default indicates that all should be used |
- * | Wts | Double[] | runif(1,nwt) | The initial weight vector is set to a random vector of length equal to the number of weights if not set by the user |
- * | mask | Boolean[] | rep(TRUE,nwt) | All weights are to be optimized unless otherwise specified by the user |
- * | linout | Boolean | TRUE | Since this class performs regression this need not be changed |
- * | entropy | Boolean | FALSE | |
- * | softmax | Boolean | FALSE | |
- * | censored | Boolean | FALSE | |
- * | skip | Boolean | FALSE | |
- * | rang | Double | 0.7 | |
- * | decay | Double | 0.0 | |
- * | maxit | Integer | 100 | |
- * | Hess | Boolean | FALSE | |
- * | trace | Boolean | TRUE | |
- * | MaxNWts | Integer | 1000 | |
- * | abstol | Double | 1.0e-4 | |
- * | reltol | Double | 1.0e-8 | |
- *
- *
- *
- *
- * The values returned correspond to the various
- * values returned by the nnet and
- * predict.nnet functions
- * in R
- *
- * See {@link org.openscience.cdk.qsar.model.R.RModel} for details regarding the R and Java environment.
- *
- * @author Rajarshi Guha
- * @cdk.require r-project
- * @cdk.module qsar
- * @cdk.githash
- * @cdk.keyword neural network
- * @cdk.keyword R
- */
-
-public class CNNRegressionModel extends RModel {
- public static int globalID = 0;
- private int noutput = 0;
- private int nvar = 0;
-
- private double[][] modelPredict = null;
-
- private static ILoggingTool logger =
- LoggingToolFactory.createLoggingTool(CNNRegressionModel.class);
-
- private void setDefaults() {
- // lets set the default values of the arguments that are specified
- // to have default values in ?nnet
-
- // these params are vectors that depend on user defined stuff
- // so as a default we set them to FALSE so R can check if these
- // were not set
- this.params.put("subset", Boolean.FALSE);
- this.params.put("mask", Boolean.FALSE);
- this.params.put("Wts", Boolean.FALSE);
- this.params.put("weights", Boolean.FALSE);
-
- this.params.put("linout", Boolean.TRUE); // we want only regression
- this.params.put("entropy", Boolean.FALSE);
- this.params.put("softmax", Boolean.FALSE);
- this.params.put("censored", Boolean.FALSE);
- this.params.put("skip", Boolean.FALSE);
- this.params.put("rang", new Double(0.7));
- this.params.put("decay", new Double(0.0));
- this.params.put("maxit", Integer.valueOf(100));
- this.params.put("Hess", Boolean.FALSE);
- this.params.put("trace", Boolean.FALSE); // no need to see output
- this.params.put("MaxNWts", Integer.valueOf(1000));
- this.params.put("abstol", new Double(1.0e-4));
- this.params.put("reltol", new Double(1.0e-8));
- }
-
- /**
- * Constructs a CNNRegressionModel object.
- *
- * This constructor allows the user to simply set up an instance of a CNN
- * regression modeling class. This constructor simply sets the name for this
- * instance. It is expected all the relevent parameters for modeling will be
- * set at a later point.
- *
- * Other parameters that are required to be set should be done via
- * calls to setParameters. A number of parameters are set to the
- * defaults as specified in the manpage for
- * nnet.
- */
- public CNNRegressionModel() throws QSARModelException {
- super();
- params = new HashMap();
- int currentID = CNNRegressionModel.globalID;
- CNNRegressionModel.globalID++;
- setModelName("cdkCNNModel" + currentID);
- setDefaults();
-
-
- }
-
-
- /**
- * Constructs a CNNRegressionModel object.
- *
- * This constructor allows the user to specify the dependent and
- * independent variables along with the number of hidden layer neurons.
- * This constructor is suitable for cases when there is a single output
- * neuron. If the number of rows of the design matrix is not equal to
- * the number of observations in y an exception will be thrown.
- *
- * Other parameters that are required to be set should be done via
- * calls to setParameters. A number of parameters are set to the
- * defaults as specified in the manpage for
- * nnet.
- *
- * @param x An array of independent variables. Observations should be in
- * the rows and variables in the columns.
- * @param y An array (single column) of observed values
- * @param size The number of hidden layer neurons
- * @throws QSARModelException if the number of observations in x and y do not match
- */
- public CNNRegressionModel(double[][] x, double[] y, int size) throws QSARModelException {
- super();
- params = new HashMap();
- int currentID = CNNRegressionModel.globalID;
- CNNRegressionModel.globalID++;
- setModelName("cdkCNNModel" + currentID);
-
- int nrow = y.length;
- int ncol = x[0].length;
-
- if (nrow != x.length) {
- throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix");
- }
-
- nvar = ncol;
- noutput = 1;
-
- Double[][] xx = new Double[nrow][ncol];
- Double[][] yy = new Double[nrow][1];
-
- for (int i = 0; i < nrow; i++) {
- yy[i][0] = new Double(y[i]);
- for (int j = 0; j < ncol; j++) {
- xx[i][j] = new Double(x[i][j]);
- }
- }
- params.put("x", xx);
- params.put("y", yy);
- params.put("size", Integer.valueOf(size));
- setDefaults();
- }
-
- /**
- * Constructs a CNNRegressionModel object.
- *
- * This constructor allows the user to specify the dependent and
- * independent variables along with the number of hidden layer neurons.
- * This constructor is suitable for cases when there are multiple output
- * neuron. If the number of rows of the design matrix is not equal to
- * the number of observations in y an exception will be thrown.
- *
- * Other parameters that are required to be set should be done via
- * calls to setParameters. A number of parameters are set to the
- * defaults as specified in the manpage for
- * nnet.
- *
- * @param x An array of independent variables. Observations should be in
- * the rows and variables in the columns.
- * @param y An array (multiple columns) of observed values
- * @param size The number of hidden layer neurons
- * @throws QSARModelException if the number of observations in x and y do not match
- */
- public CNNRegressionModel(double[][] x, double[][] y, int size) throws QSARModelException {
- super();
- params = new HashMap();
- int currentID = CNNRegressionModel.globalID;
- CNNRegressionModel.globalID++;
- setModelName("cdkCNNModel" + currentID);
-
- int nrow = y.length;
- int ncol = x[0].length;
-
- if (nrow != x.length) {
- throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix");
- }
-
- nvar = ncol;
- noutput = y[0].length;
-
- Double[][] xx = new Double[nrow][ncol];
- Double[][] yy = new Double[nrow][noutput];
-
- for (int i = 0; i < nrow; i++) {
- for (int j = 0; j < ncol; j++) {
- xx[i][j] = new Double(x[i][j]);
- }
- }
- for (int i = 0; i < nrow; i++) {
- for (int j = 0; j < noutput; j++) {
- yy[i][j] = new Double(y[i][j]);
- }
- }
- params.put("x", xx);
- params.put("y", yy);
- params.put("size", Integer.valueOf(size));
- setDefaults();
- }
-
-
- /**
- * Sets parameters required for building a CNN model or using one for prediction.
- *
- * This function allows the caller to set the various parameters available
- * for the
- * nnet
- * and
- * predict.nnet
- * R routines. See the R help pages for the details of the available
- * parameters.
- *
- * @param key A String containing the name of the parameter as described in the
- * R help pages
- * @param obj An Object containing the value of the parameter
- * @throws QSARModelException if the type of the supplied value does not match the
- * expected type
- */
- public void setParameters(String key, Object obj) throws QSARModelException {
- // since we know the possible values of key we should check the coresponding
- // objects and throw errors if required. Note that this checking can't really check
- // for values (such as number of variables in the X matrix to build the model and the
- // X matrix to make new predictions) - these should be checked in functions that will
- // use these parameters. The main checking done here is for the class of obj and
- // some cases where the value of obj is not dependent on what is set before it
-
- if (key.equals("y")) {
- if (!(obj instanceof Double[][])) {
- throw new QSARModelException("The class of the 'y' object must be Double[][]");
- } else {
- noutput = ((Double[][]) obj)[0].length;
- }
- }
- if (key.equals("x")) {
- if (!(obj instanceof Double[][])) {
- throw new QSARModelException("The class of the 'x' object must be Double[][]");
- } else {
- nvar = ((Double[][]) obj)[0].length;
- }
- }
- if (key.equals("weights")) {
- if (!(obj instanceof Double[])) {
- throw new QSARModelException("The class of the 'weights' object must be Double[]");
- }
- }
- if (key.equals("size")) {
- if (!(obj instanceof Integer)) {
- throw new QSARModelException("The class of the 'size' object must be Integer");
- }
- }
- if (key.equals("subset")) {
- if (!(obj instanceof Integer[])) {
- throw new QSARModelException("The class of the 'size' object must be Integer[]");
- }
- }
- if (key.equals("Wts")) {
- if (!(obj instanceof Double[])) {
- throw new QSARModelException("The class of the 'Wts' object must be Double[]");
- }
- }
- if (key.equals("mask")) {
- if (!(obj instanceof Boolean[])) {
- throw new QSARModelException("The class of the 'mask' object must be Boolean[]");
- }
- }
- if (key.equals("linout") ||
- key.equals("entropy") ||
- key.equals("softmax") ||
- key.equals("censored") ||
- key.equals("skip") ||
- key.equals("Hess") ||
- key.equals("trace")) {
- if (!(obj instanceof Boolean)) {
- throw new QSARModelException("The class of the 'trace|skip|Hess|linout|entropy|softmax|censored' object must be Boolean");
- }
- }
- if (key.equals("rang") ||
- key.equals("decay") ||
- key.equals("abstol") ||
- key.equals("reltol")) {
- if (!(obj instanceof Double)) {
- throw new QSARModelException("The class of the 'reltol|abstol|decay|rang' object must be Double");
- }
- }
- if (key.equals("maxit") ||
- key.equals("MaxNWts")) {
- if (!(obj instanceof Integer)) {
- throw new QSARModelException("The class of the 'maxit|MaxNWts' object must be Integer");
- }
- }
-
- if (key.equals("newdata")) {
- if (!(obj instanceof Double[][])) {
- throw new QSARModelException("The class of the 'newdata' object must be Double[][]");
- }
- }
- params.put(key, obj);
- }
-
- /**
- * Fits a CNN regression model.
- *
- * This method calls the R function to fit a CNN regression model
- * to the specified dependent and independent variables. If an error
- * occurs in the R session, an exception is thrown.
- *
- * Note that, this method should be called prior to calling the various get
- * methods to obtain information regarding the fit.
- */
- public void build() throws QSARModelException {
- Double[][] x;
- Double[][] y;
- x = (Double[][]) this.params.get("x");
- y = (Double[][]) this.params.get("y");
- if (x.length != y.length)
- throw new QSARModelException("Number of observations does not match number of rows in the design matrix");
- if (nvar == 0) nvar = x[0].length;
-
- // lets build the model
- String paramVarName = loadParametersIntoRSession();
- String cmd = "buildCNN(\"" + getModelName() + "\", " + paramVarName + ")";
- REXP ret = rengine.eval(cmd);
- if (ret == null) {
- CNNRegressionModel.logger.debug("Error in buildCNN");
- throw new QSARModelException("Error in buildCNN");
- }
-
- // remove the parameter list
- rengine.eval("rm(" + paramVarName + ")");
-
- // save the model object on the Java side
- modelObject = ret.asList();
- }
-
- /**
- * Uses a fitted model to predict the response for new observations.
- *
- * This function uses a previously fitted model to obtain predicted values
- * for a new set of observations. If the model has not been fitted prior to this
- * call an exception will be thrown. Use setParameters
- * to set the values of the independent variable for the new observations and the
- * interval type.
- *
- * @throws org.openscience.cdk.qsar.model.QSARModelException
- * if the model has not been built prior to a call
- * to this method. Also if the number of independent variables specified for prediction
- * is not the same as specified during model building
- */
- public void predict() throws QSARModelException {
-
- if (modelObject == null)
- throw new QSARModelException("Before calling predict() you must fit the model using build()");
-
- Double[][] newx = (Double[][]) params.get("newdata");
- if (newx[0].length != nvar) {
- throw new QSARModelException("Number of independent variables used for prediction must match those used for fitting");
- }
-
- String pn = loadParametersIntoRSession();
- REXP ret = rengine.eval("predicCNN(\"" + getModelName() + "\", " + pn + ")");
- if (ret == null) throw new QSARModelException("Error occured in prediction");
-
- // remove the parameter list
- rengine.eval("rm(" + pn + ")");
-
- modelPredict = ret.asDoubleMatrix();
- }
-
- /**
- * Get the matrix of predicted values obtained from predict.nnet.
- *
- * @return The result of the prediction.
- */
- public double[][] getPredictions() {
- return modelPredict;
- }
-
- /**
- * Returns an RList object summarizing the nnet regression model.
- *
- * The return object can be queried via the RList methods to extract the
- * required components.
- *
- * @return A summary for the nnet regression model
- * @throws org.openscience.cdk.qsar.model.QSARModelException
- * if the model has not been built prior to a call
- * to this method
- */
- public RList summary() throws QSARModelException {
- if (modelObject == null)
- throw new QSARModelException("Before calling summary() you must fit the model using build()");
-
- REXP ret = rengine.eval("summary(" + getModelName() + ")");
- if (ret == null) {
- logger.debug("Error in summary()");
- throw new QSARModelException("Error in summary()");
- }
- return ret.asList();
- }
-
-
- /**
- * Loads a 'nnet' object from disk in to the current session.
- *
- * @param fileName The disk file containing the model
- * @throws org.openscience.cdk.qsar.model.QSARModelException
- * if the model being loaded is not a 'nnet' model
- * object or the file does not exist
- */
- public void loadModel(String fileName) throws QSARModelException {
- File f = new File(fileName);
- if (!f.exists()) throw new QSARModelException(fileName + " does not exist");
-
- rengine.assign("tmpFileName", fileName);
- REXP ret = rengine.eval("loadModel(tmpFileName)");
- if (ret == null) throw new QSARModelException("Model could not be loaded");
-
- String name = ret.asList().at("name").asString();
- if (!isOfClass(name, "nnet")) {
- removeObject(name);
- throw new QSARModelException("Loaded object was not of class \'nnet\'");
- }
-
- modelObject = ret.asList().at("model").asList();
- setModelName(name);
- nvar = (int) getN()[0];
- noutput = (int) getN()[2];
- }
-
- /**
- * Loads a 'nnet' object from a serialized string into the current session.
- *
- * @param serializedModel A String containing the serialized version of the model
- * @param modelName A String indicating the name of the model in the R session
- * @throws org.openscience.cdk.qsar.model.QSARModelException
- * if the model being loaded is not a 'nnet' model
- * object
- */
- public void loadModel(String serializedModel, String modelName) throws QSARModelException {
- rengine.assign("tmpSerializedModel", serializedModel);
- rengine.assign("tmpModelName", modelName);
- REXP ret = rengine.eval("unserializeModel(tmpSerializedModel, tmpModelName)");
-
- if (ret == null) throw new QSARModelException("Model could not be unserialized");
-
- String name = ret.asList().at("name").asString();
- if (!isOfClass(name, "nnet")) {
- removeObject(name);
- throw new QSARModelException("Loaded object was not of class \'nnet\'");
- }
-
- modelObject = ret.asList().at("model").asList();
- setModelName(name);
- nvar = (int) getN()[0];
- noutput = (int) getN()[2];
- }
-
-// Autogenerated code: assumes that 'modelObject' is
-// a RList object
-
-
- /**
- * Gets the censored field of an 'nnet' object.
- *
- * @return The value of the censored field
- */
- public RBool getCensored() {
- return modelObject.at("censored").asBool();
- }
-
- /**
- * Gets the conn field of an 'nnet' object.
- *
- * @return The value of the conn field
- */
- public double[] getConn() {
- return modelObject.at("conn").asDoubleArray();
- }
-
- /**
- * Gets the decay field of an 'nnet' object.
- *
- * @return The value of the decay field
- */
- public double getDecay() {
- return modelObject.at("decay").asDouble();
- }
-
- /**
- * Gets the entropy field of an 'nnet' object.
- *
- * @return The value of the entropy field
- */
- public RBool getEntropy() {
- return modelObject.at("entropy").asBool();
- }
-
- /**
- * Gets the fitted.values field of an 'nnet' object.
- *
- * @return The value of the fitted.values field
- */
- public double[][] getFittedValues() {
- return modelObject.at("fitted.values").asDoubleMatrix();
- }
-
- /**
- * Gets the n field of an 'nnet' object.
- *
- * @return The value of the n field
- */
- public double[] getN() {
- return modelObject.at("n").asDoubleArray();
- }
-
- /**
- * Gets the nconn field of an 'nnet' object.
- *
- * @return The value of the nconn field
- */
- public double[] getNconn() {
- return modelObject.at("nconn").asDoubleArray();
- }
-
- /**
- * Gets the nsunits field of an 'nnet' object.
- *
- * @return The value of the nsunits field
- */
- public double getNsunits() {
- return modelObject.at("nsunits").asDouble();
- }
-
- /**
- * Gets the nunits field of an 'nnet' object.
- *
- * @return The value of the nunits field
- */
- public double getNunits() {
- return modelObject.at("nunits").asDouble();
- }
-
- /**
- * Gets the residuals field of an 'nnet' object.
- *
- * @return The value of the residuals field
- */
- public double[][] getResiduals() {
- return modelObject.at("residuals").asDoubleMatrix();
- }
-
- /**
- * Gets the softmax field of an 'nnet' object.
- *
- * @return The value of the softmax field
- */
- public RBool getSoftmax() {
- return modelObject.at("softmax").asBool();
- }
-
- /**
- * Gets the value field of an 'nnet' object.
- *
- * @return The value of the value field
- */
- public double getValue() {
- return modelObject.at("value").asDouble();
- }
-
- /**
- * Gets the wts field of an 'nnet' object.
- *
- * @return The value of the wts field
- */
- public double[] getWts() {
- return modelObject.at("wts").asDoubleArray();
- }
-
-
-}
diff --git a/src/main/org/openscience/cdk/qsar/model/R2/LinearRegressionModel.java b/src/main/org/openscience/cdk/qsar/model/R2/LinearRegressionModel.java
deleted file mode 100644
index eb98d52..0000000
--- a/src/main/org/openscience/cdk/qsar/model/R2/LinearRegressionModel.java
+++ /dev/null
@@ -1,570 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2004-2008 Rajarshi Guha
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-package org.openscience.cdk.qsar.model.R2;
-
-import java.io.File;
-import java.util.HashMap;
-
-import org.openscience.cdk.qsar.model.QSARModelException;
-import org.openscience.cdk.tools.ILoggingTool;
-import org.openscience.cdk.tools.LoggingToolFactory;
-import org.rosuda.JRI.REXP;
-import org.rosuda.JRI.RList;
-
-/**
- * A modeling class that provides a linear least squares regression model.
- *
- * When instantiated this class ensures that the R/Java interface has been
- * initialized. The response and independent variables can be specified at construction
- * time or via the setParameters method. The actual fitting procedure is carried out by build after which
- * the model may be used to make predictions.
- *
- * Currently, the design of the class is quite sparse as it does not allow subsetting,
- * variable names, setting of contrasts and so on.
- * It is also assumed that the values of all the variables are defined (i.e., not such that
- * they are NA
- * in an R session).
- * The use of
- * this class is shown in the following code snippet
- *
- * double[][] x;
- * double[] y;
- * try {
- * LinearRegressionModel lrm = new LinearRegressionModel(x,y);
- * lrm.build();
- * lrm.setParameters("newdata", newx);
- * lrm.setParameters("interval", "confidence");
- * lrm.predict();
- * } catch (QSARModelException qme) {
- * System.out.println(qme.toString());
- * }
- * double[] fitted = lrm.getFittedValues()
- * double[] predicted = lrm.getModelPredict().asList.at("fit").asDoubleArray();
- *
- * Note that when making predictions, the new X matrix and interval type can be set by calls
- * to setParameters(). In general, the arguments for lm() and predict.lm() can be set via
- * calls to setParameters(). The following table lists the parameters that can be set and their
- * expected types. More detailed informationis available in the R documentation.
- *
- *
- *
- *
- * | Name | Java Type | Notes |
- *
- *
- *
- *
- * | x | Double[][] | |
- *
- *
- * | y | Double[] | Length should be equal to the rows of x |
- *
- *
- * | weights | Double[] | Length should be equal to rows of x |
- *
- *
- * | newdata | Double[][] | Number of columns should be the same as in x |
- *
- *
- * | interval | String | Can be 'confidence' or 'predicton' |
- *
- *
- *
- *
- * In general the getFit* methods provide access to results from the fit
- * and getPredict* methods provide access to results from the prediction (i.e.,
- * prediction using the model on new data). The values returned correspond to the various
- * values returned by the lm
- * and predict.lm
- * functions in R.
- *
- * See {@link RModel} for details regarding the R and rJava environment.
- *
- * @author Rajarshi Guha
- * @cdk.require r-project
- * @cdk.module qsar
- * @cdk.githash
- * @cdk.keyword linear regression
- * @cdk.keyword R
- */
-
-public class LinearRegressionModel extends org.openscience.cdk.qsar.model.R2.RModel {
-
- private static int globalID = 0;
- private int nvar = 0;
-
- private RList modelPredict = null;
-
- private static ILoggingTool logger =
- LoggingToolFactory.createLoggingTool(LinearRegressionModel.class);
-
- /**
- * Constructs a LinearRegressionModel object.
- *
- * The constructor simply instantiates the model ID. Dependent and independent variables
- * should be set via setParameters().
- *
- * An important feature of the current implementation is that all the
- * independent variables are used during the fit. Furthermore no subsetting is possible.
- * As a result when setting these via setParameters() the caller should specify only
- * the variables and observations that will be used for the fit.
- */
- public LinearRegressionModel() throws QSARModelException {
- super();
- params = new HashMap();
- int currentID = LinearRegressionModel.globalID;
- org.openscience.cdk.qsar.model.R2.LinearRegressionModel.globalID++;
- this.setModelName("cdkLMModel" + currentID);
- }
-
- /**
- * Constructs a LinearRegressionModel object.
- *
- * The constructor allows the user to specify the
- * dependent and independent variables. The length of the dependent variable
- * array should equal the number of rows of the independent variable matrix. If this
- * is not the case an exception will be thrown.
- *
- * An important feature of the current implementation is that all the
- * independent variables are used during the fit. Furthermore no subsetting is possible.
- * As a result when creating an instance of this object the caller should specify only
- * the variables and observations that will be used for the fit.
- *
- * @param xx An array of independent variables. The observations should be in the rows
- * and the variables should be in the columns
- * @param yy an array containing the dependent variable
- * @throws org.openscience.cdk.qsar.model.QSARModelException
- * if the number of observations in x and y do not match
- */
- public LinearRegressionModel(double[][] xx, double[] yy) throws QSARModelException {
- super();
-
- params = new HashMap();
- int currentID = LinearRegressionModel.globalID;
- LinearRegressionModel.globalID++;
- this.setModelName("cdkLMModel" + currentID);
-
- int nrow = yy.length;
- this.nvar = xx[0].length;
-
- if (nrow != xx.length) {
- throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix");
- }
-
- Double[][] x = new Double[nrow][this.nvar];
- Double[] y = new Double[nrow];
- Double[] weights = new Double[nrow];
-
- for (int i = 0; i < nrow; i++) {
- y[i] = new Double(yy[i]);
- weights[i] = new Double(1.0);
- }
- for (int i = 0; i < nrow; i++) {
- for (int j = 0; j < this.nvar; j++)
- x[i][j] = new Double(xx[i][j]);
- }
-
- params.put("x", x);
- params.put("y", y);
- params.put("weights", weights);
- }
-
-
- /**
- * Constructs a LinearRegressionModel object.
- *
- * The constructor allows the user to specify the
- * dependent and independent variables as well as weightings for
- * the observations.
- *
- * The length of the dependent variable
- * array should equal the number of rows of the independent variable matrix. If this
- * is not the case an exception will be thrown.
- *
- * An important feature of the current implementation is that all the
- * independent variables are used during the fit. Furthermore no subsetting is possible.
- * As a result when creating an instance of this object the caller should specify only
- * the variables and observations that will be used for the fit.
- *
- * @param xx An array of independent variables. The observations should be in the rows
- * and the variables should be in the columns
- * @param yy an array containing the dependent variable
- * @param weights Specifies the weights for each observation. Unit weights are equivilant
- * to OLS
- * @throws org.openscience.cdk.qsar.model.QSARModelException
- * if the number of observations in x and y do not match
- */
- public LinearRegressionModel(double[][] xx, double[] yy, double[] weights) throws QSARModelException {
- super();
-
- params = new HashMap();
-
- int currentID = LinearRegressionModel.globalID;
- org.openscience.cdk.qsar.model.R2.LinearRegressionModel.globalID++;
- this.setModelName("cdkLMModel" + currentID);
-
- int nrow = yy.length;
- this.nvar = xx[0].length;
-
- if (nrow != xx.length) {
- throw new QSARModelException("The number of values for the dependent variable does not match the number of rows of the design matrix");
- }
- if (nrow != weights.length) {
- throw new QSARModelException("The length of the weight vector does not match the number of rows of the design matrix");
- }
-
- Double[][] x = new Double[nrow][this.nvar];
- Double[] y = new Double[nrow];
- Double[] wts = new Double[nrow];
-
- for (int i = 0; i < nrow; i++) {
- y[i] = new Double(yy[i]);
- wts[i] = new Double(weights[i]);
- }
- for (int i = 0; i < nrow; i++) {
- for (int j = 0; j < this.nvar; j++)
- x[i][j] = new Double(xx[i][j]);
- }
- params.put("x", x);
- params.put("y", y);
- params.put("weights", wts);
- }
-
- /**
- * Fits a linear regression model.
- *
- * This method calls the R function to fit a linear regression model
- * to the specified dependent and independent variables. If an error
- * occurs in the R session, an exception is thrown.
- *
- * Note that, this method should be called prior to calling the various get
- * methods to obtain information regarding the fit.
- */
- public void build() throws QSARModelException {
- // lets do some checks in case stuff was set via setParameters()
- Double[][] x;
- Double[] y, weights;
- x = (Double[][]) this.params.get("x");
- y = (Double[]) this.params.get("y");
- weights = (Double[]) this.params.get("weights");
- if (this.nvar == 0) this.nvar = x[0].length;
- else {
- if (y.length != x.length) {
- throw new QSARModelException("Number of observations does no match number of rows in the design matrix");
- }
- if (weights.length != y.length) {
- throw new QSARModelException("The weight vector must have the same length as the number of observations");
- }
- }
-
- // lets build the model
- String paramVarName = loadParametersIntoRSession();
- String cmd = "buildLM(\"" + getModelName() + "\", " + paramVarName + ")";
- REXP ret = rengine.eval(cmd);
- if (ret == null) {
- logger.debug("Error in buildLM");
- throw new QSARModelException("Error in buildLM");
- }
-
- // remove the parameter list
- rengine.eval("rm(" + paramVarName + ")");
-
- // save the model object on the Java side
- modelObject = ret.asList();
- }
-
-
- /**
- * Sets parameters required for building a linear model or using one for prediction.
- *
- * This function allows the caller to set the various parameters available
- * for the lm() and predict.lm() R routines. See the R help pages for the details of the available
- * parameters.
- *
- * @param key A String containing the name of the parameter as described in the
- * R help pages
- * @param obj An Object containing the value of the parameter
- * @throws org.openscience.cdk.qsar.model.QSARModelException
- * if the type of the supplied value does not match the
- * expected type
- */
- public void setParameters(String key, Object obj) throws QSARModelException {
- // since we know the possible values of key we should check the coresponding
- // objects and throw errors if required. Note that this checking can't really check
- // for values (such as number of variables in the X matrix to build the model and the
- // X matrix to make new predictions) - these should be checked in functions that will
- // use these parameters. The main checking done here is for the class of obj and
- // some cases where the value of obj is not dependent on what is set before it
-
- if (key.equals("y")) {
- if (!(obj instanceof Double[])) {
- throw new QSARModelException("The class of the 'y' object must be Double[]");
- }
- }
- if (key.equals("x")) {
- if (!(obj instanceof Double[][])) {
- throw new QSARModelException("The class of the 'x' object must be Double[][]");
- }
- }
- if (key.equals("weights")) {
- if (!(obj instanceof Double[])) {
- throw new QSARModelException("The class of the 'weights' object must be Double[]");
- }
- }
- if (key.equals("interval")) {
- if (!(obj instanceof String)) {
- throw new QSARModelException("The class of the 'interval' object must be String");
- }
- if (!(obj.equals("confidence") || obj.equals("prediction"))) {
- throw new QSARModelException("The type of interval must be: prediction or confidence");
- }
- }
- if (key.equals("newdata")) {
- if (!(obj instanceof Double[][])) {
- throw new QSARModelException("The class of the 'newdata' object must be Double[][]");
- }
- }
- this.params.put(key, obj);
- }
-
-
- /**
- * Uses a fitted model to predict the response for new observations.
- *
- * This function uses a previously fitted model to obtain predicted values
- * for a new set of observations. If the model has not been fitted prior to this
- * call an exception will be thrown. Use setParameters
- * to set the values of the independent variable for the new observations and the
- * interval type.
- *
- * @throws org.openscience.cdk.qsar.model.QSARModelException
- * if the model has not been built prior to a call
- * to this method. Also if the number of independent variables specified for prediction
- * is not the same as specified during model building
- */
- public void predict() throws QSARModelException {
-
- if (modelObject == null)
- throw new QSARModelException("Before calling predict() you must fit the model using build()");
-
- Double[][] newx = (Double[][]) params.get("newdata");
- if (newx[0].length != nvar) {
- throw new QSARModelException("Number of independent variables used for prediction must match those used for fitting");
- }
-
- String pn = loadParametersIntoRSession();
- REXP ret = rengine.eval("predictLM(\"" + getModelName() + "\", " + pn + ")");
- if (ret == null) throw new QSARModelException("Error occured in prediction");
-
- // remove the parameter list
- rengine.eval("rm(" + pn + ")");
-
- modelPredict = ret.asList();
- }
-
- /**
- * Get the R object obtained from predict.lm().
- *
- * @return The result of the prediction. Contains a number of fields corresponding to
- * predicted values, SE and other items depending on the parameters that we set.
- * Note that the call to predict.lm() is performde with se.fit = TRUE
- */
- public RList getModelPredict() {
- return modelPredict;
- }
-
- /**
- * Returns an RList object summarizing the linear regression model.
- *
- * The return object can be queried via the RList methods to extract the
- * required components.
- *
- * @return A summary for the linear regression model
- * @throws org.openscience.cdk.qsar.model.QSARModelException
- * if the model has not been built prior to a call
- * to this method
- */
- public RList summary() throws QSARModelException {
- if (modelObject == null)
- throw new QSARModelException("Before calling summary() you must fit the model using build()");
-
- REXP ret = rengine.eval("summary(" + getModelName() + ")");
- if (ret == null) {
- logger.debug("Error in summary()");
- throw new QSARModelException("Error in summary()");
- }
- return ret.asList();
- }
-
-
- /**
- * Loads an LinearRegressionModel object from disk in to the current session.
- *
- * @param fileName The disk file containing the model
- * @throws org.openscience.cdk.qsar.model.QSARModelException
- * if the model being loaded is not a linear regression model
- * object or the file does not exist
- */
- public void loadModel(String fileName) throws QSARModelException {
- File f = new File(fileName);
- if (!f.exists()) throw new QSARModelException(fileName + " does not exist");
-
- rengine.assign("tmpFileName", fileName);
- REXP ret = rengine.eval("loadModel(tmpFileName)");
- if (ret == null) throw new QSARModelException("Model could not be loaded");
-
- String name = ret.asList().at("name").asString();
- if (!isOfClass(name, "lm")) {
- removeObject(name);
- throw new QSARModelException("Loaded object was not of class \'lm\'");
- }
-
- modelObject = ret.asList().at("model").asList();
- setModelName(name);
- nvar = getCoefficients().length - 1; // since the intercept is also returned
- }
-
- /**
- * Loads an LinearRegressionModel object from a serialized string into the current session.
- *
- * @param serializedModel A String containing the serialized version of the model
- * @param modelName A String indicating the name of the model in the R session
- * @throws org.openscience.cdk.qsar.model.QSARModelException
- * if the model being loaded is not a linear regression model
- * object
- */
- public void loadModel(String serializedModel, String modelName) throws QSARModelException {
- rengine.assign("tmpSerializedModel", serializedModel);
- rengine.assign("tmpModelName", modelName);
- REXP ret = rengine.eval("unserializeModel(tmpSerializedModel, tmpModelName)");
-
- if (ret == null) throw new QSARModelException("Model could not be unserialized");
-
- String name = ret.asList().at("name").asString();
- if (!isOfClass(name, "lm")) {
- removeObject(name);
- throw new QSARModelException("Loaded object was not of class \'lm\'");
- }
-
- modelObject = ret.asList().at("model").asList();
- setModelName(name);
- nvar = getCoefficients().length - 1; // as the intercept is also returned
- }
-
-// Autogenerated code: assumes that 'modelObject' is
-// a RList object
-
-
- /**
- * Gets the assign field of an 'lm' object.
- *
- * @return The value of the assign field
- */
- public int[] getAssign() {
- return modelObject.at("assign").asIntArray();
- }
-
- /**
- * Gets the coefficients field of an 'lm' object.
- *
- * @return The value of the coefficients field
- */
- public double[] getCoefficients() {
- return modelObject.at("coefficients").asDoubleArray();
- }
-
- /**
- * Gets the df.residual field of an 'lm' object.
- *
- * @return The value of the df.residual field
- */
- public int getDfResidual() {
- return modelObject.at("df.residual").asInt();
- }
-
- /**
- * Gets the effects field of an 'lm' object.
- *
- * @return The value of the effects field
- */
- public double[] getEffects() {
- return modelObject.at("effects").asDoubleArray();
- }
-
- /**
- * Gets the fitted.values field of an 'lm' object.
- *
- * @return The value of the fitted.values field
- */
- public double[] getFittedValues() {
- return modelObject.at("fitted.values").asDoubleArray();
- }
-
- /**
- * Gets the model field of an 'lm' object.
- *
- * @return The value of the model field
- */
- public RList getModel() {
- return modelObject.at("model").asList();
- }
-
- /**
- * Gets the qr field of an 'lm' object.
- *
- * @return The value of the qr field
- */
- public RList getQr() {
- return modelObject.at("qr").asList();
- }
-
- /**
- * Gets the rank field of an 'lm' object.
- *
- * @return The value of the rank field
- */
- public int getRank() {
- return modelObject.at("rank").asInt();
- }
-
- /**
- * Gets the residuals field of an 'lm' object.
- *
- * @return The value of the residuals field
- */
- public double[] getResiduals() {
- return modelObject.at("residuals").asDoubleArray();
- }
-
- /**
- * Gets the xlevels field of an 'lm' object.
- *
- * @return The value of the xlevels field
- */
- public RList getXlevels() {
- return modelObject.at("xlevels").asList();
- }
-
-
-}
diff --git a/src/main/org/openscience/cdk/qsar/model/R2/RModel.java b/src/main/org/openscience/cdk/qsar/model/R2/RModel.java
deleted file mode 100644
index b68dd2e..0000000
--- a/src/main/org/openscience/cdk/qsar/model/R2/RModel.java
+++ /dev/null
@@ -1,594 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2004-2008 Rajarshi Guha
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-package org.openscience.cdk.qsar.model.R2;
-
-import java.awt.FileDialog;
-import java.awt.Frame;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.io.InputStreamReader;
-import java.io.StringWriter;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Random;
-import java.util.Set;
-
-import org.openscience.cdk.qsar.model.IModel;
-import org.openscience.cdk.qsar.model.QSARModelException;
-import org.openscience.cdk.tools.ILoggingTool;
-import org.openscience.cdk.tools.LoggingToolFactory;
-import org.rosuda.JRI.REXP;
-import org.rosuda.JRI.RList;
-import org.rosuda.JRI.RMainLoopCallbacks;
-import org.rosuda.JRI.Rengine;
-
-/**
- * Base class for the R-CDK interface.
- *
- * This class provides the basis for all classes that wish to interface with
- * R functions from a CDK program.
- *
- * Since the R engine is multi-threaded only one instance of the R session can exist
- * for a given Java process. This implies that initialization must be perfored exactly once
- * within a Java process. This class ensure that this occurs.
- *
- * In addition, this class loads some helper functions into the R session. The loading
- * can be via a temporary file (the default) or via a String, which may be useful in
- * webservice scenarios.
- *
- * Requirement The class (and implementing subclasses) is dependent on the
- * JRI library. This provides an interface to R
- * for Java code. Though the rJava for R
- * includes JRI, the code here is only dependent on JRI and does not attempt to
- * go from R to Java. Hence rJava is not a requirement. To compile this code, the CDK
- * includes the JRI jar file. However to run the code, the JRI native library (libjri.so
- * on Linux) must be located in the users LD_LIBRARY_PATH. Also the versions of the JRI Java
- * API and native library should match and this is checked for.
- *
- * Currently the CDK uses JRI 0.3 (available from here)
- *
- *
- * Implementation Notes
- *
- * - If the user requires other initializations the only way to do so at
- * this point is to edit
helper.R or perform the initialization by hand
- * - An implementing class must call
super()
- * - Though this class provides a field to store the R model object as a
- *
RList the actual R variable will remain in the R session. This is useful
- * for saving the model as a .Rda file at one point. Also by storing the model on the R
- * side we do not not need to make repeated queries on the model via eval().
- * - Subclasses of this class are generally Java front-ends to a specific R model type
- * (such as linear regression, CNN etc.). Thus each subclass should provide getter methods
- * for the various components of such an object. Since this is tedious to do by hand,
- * you can use the
stubs.R script that comes with the CDK distribution to
- * generate source code for the getter methods for the individual components of an R model
- * object. Note, that the script currently ignores objects of classes 'call'
- * and 'formula'.
- *
- *
- * NOTE: For the R backend to work, ensure that R is correctly installed.
- * Other requirements are
- *
- * - LD_LIBRARY_PATH should include the directory that contains
libjri.so as well
- * as the dierctory that contains libR.so
- * - R_HOME should be set to the appropriate location
- *
- *
- * @author Rajarshi Guha
- * @cdk.require r-project
- * @cdk.require JRI.jar
- * @cdk.module qsar
- * @cdk.githash
- * @cdk.keyword R
- * @cdk.keyword JRI
- */
-public abstract class RModel implements IModel {
- private String modelName = null;
- protected RList modelObject = null;
- protected HashMap params = null;
-
- /**
- * The object that performs the calls to the R engine.
- */
- protected static Rengine rengine = null;
-
- /**
- * A boolean that indicates whether the R/Java subsystem has been initialized or not.
- */
- private static boolean doneInit = false;
- private static ILoggingTool logger =
- LoggingToolFactory.createLoggingTool(RModel.class);
-
- private void checkEnvironmentVariables() throws QSARModelException {
- String rhome = System.getenv("R_HOME");
- String ldlibrarypath = System.getenv("LD_LIBRARY_PATH");
- if (rhome == null || rhome.length() == 0 ||
- ldlibrarypath == null || ldlibrarypath.length() == 0) {
- throw new QSARModelException(
- "Cannot find R: R_HOME and LD_LIBRARY_PATH are not set."
- );
- }
- }
-
- private void initRengine(String[] args, boolean useDisk) throws QSARModelException {
- if (!doneInit) {
- rengine = new Rengine(args, false, new TextConsole());
- if (!rengine.waitForR()) {
- throw new QSARModelException("Could not load rJava");
- } else {
- logger.debug("Started R");
- }
- doneInit = true;
- if (useDisk) {
- loadRFunctions(rengine);
- logger.info("Initializing from disk");
- } else {
- loadRFunctionsAsStrings(rengine);
- logger.info("Initializing from strings");
- }
- logger.info("rJava initialized");
- } else {
- logger.info("rjava already intialized");
- }
- }
-
- private void loadRFunctions(Rengine engine) {
- // File.separator is used to be system independent
- // Fix me: After creating a jar file it don't work on a windwos OS
- // but within eclipse it won't work on while working with '/' on windows OS
- // No idea how to solve this
-
- // String scriptLocator = "org" + File.separator + "openscience" +
- // File.separator + "cdk" + File.separator + "qsar" + File.separator +
- // "model" + File.separator + "data" + File.separator + "helper.R";
- String scriptLocator = "org/openscience/cdk/qsar/model/data/helper.R";
- try {
- File scriptFile = File.createTempFile("XXXXX", ".R");
- scriptFile.deleteOnExit();
-
- InputStreamReader reader = new InputStreamReader(
- this.getClass().getClassLoader().getResourceAsStream(scriptLocator));
- BufferedReader inFile = new BufferedReader(reader);
-
- FileWriter outFile = new FileWriter(scriptFile);
- BufferedWriter outBuffer = new BufferedWriter(outFile);
- String inputLine;
- while ((inputLine = inFile.readLine()) != null) {
- outBuffer.write(inputLine, 0, inputLine.length());
- outBuffer.newLine();
- }
- outBuffer.close();
- inFile.close();
- outFile.close();
- // Necessary for windows user, R needs a '/' in the path of a file even on windows
- String path = scriptFile.getAbsolutePath();
- path = path.replaceAll("\\\\", "/");
- engine.eval("source(\"" + path + "\")");
-
- } catch (Exception exception) {
- logger.error("Could not load helper R script for JRI: ", scriptLocator);
- logger.debug(exception);
- }
- }
-
- private void loadRFunctionsAsStrings(Rengine evaluator) {
- String[] scripts = {
- "helper.R",
- };
- String scriptPrefix = "org/openscience/cdk/qsar/model/data/";
- for (int i = 0; i < scripts.length; i++) {
-
- String scriptLocator = scriptPrefix + scripts[i];
- try {
- InputStreamReader reader = new InputStreamReader(
- this.getClass().getClassLoader().getResourceAsStream(scriptLocator));
- BufferedReader inFile = new BufferedReader(reader);
-
- StringWriter sw = new StringWriter();
- String inputLine;
- while ((inputLine = inFile.readLine()) != null) {
- sw.write(inputLine);
- sw.write("\n");
- }
- sw.close();
-
- evaluator.eval("eval(parse(text=\"" + sw.toString() + "\"))");
-
- } catch (Exception exception) {
- logger.error("Could not load CDK-rJava R scripts: ", scriptLocator);
- logger.debug(exception);
- }
-
- }
- }
-
- /**
- * Initializes R with the --vanilla, --quiet, --slave flags.
- *
- * This constructor will initialize the R session via a temporary file or
- * from a String depending on whether the symbol initRFromString
- * is specified on the command line
- */
- public RModel() throws QSARModelException {
- checkEnvironmentVariables();
- // check that the JRI jar and .so match
- if (!Rengine.versionCheck()) {
- logger.debug("API version of the JRI library does not match that of the native binary");
- throw new QSARModelException("API version of the JRI library does not match that of the native binary");
- }
-
- params = new HashMap();
- String[] args = {"--vanilla", "--quiet", "--slave"};
-
- String initRFromString = System.getProperty("initRFromString");
- boolean useDisk = true;
- if (initRFromString != null && initRFromString.equals("true")) {
- useDisk = false;
- }
- initRengine(args, useDisk);
- }
-
-
- /**
- * Saves a R model to disk.
- *
- * This function can be used to save models built in a session, and then loaded
- * again in a different session.
- *
- * @param modelName The name of the model as returned by \code{getModelName}.
- * @param fileName The file to which the model should be saved
- * @throws QSARModelException if the R session cannot save the model
- * @see #loadModel
- */
- public void saveModel(String modelName, String fileName) throws QSARModelException {
- if (fileName == null || fileName.equals("")) {
- fileName = modelName + ".rda";
- }
- rengine.assign("tmpModelName", modelName);
- rengine.assign("tmpFileName", fileName);
- REXP result = rengine.eval("saveModel(tmpModelName, tmpFileName)");
- if (result == null) {
- logger.debug("Error in 'saveModel(tmpModelName, tmpFileName)'");
- throw new QSARModelException("Error saving model");
- }
- }
-
-
- /**
- * Get the name of the model.
- *
- * This function returns the name of the variable that the actual
- * model is stored in within the R session. In general this is
- * not used for the end user. In the future this might be changed
- * to a private method.
- *
- * @return A String containing the name of the R variable
- * @see #setModelName
- */
- public String getModelName() {
- return (this.modelName);
- }
-
- /**
- * Set the name of the model.
- *
- * Ordinarily the user does not need to call this function as each model
- * is assigned a unique ID at instantiation. However, if a user saves a model
- * to disk and then later loads it, the loaded
- * model may overwrite a model in that session. In this situation, this method
- * can be used to assign a name to the model.
- *
- * @param newName The name of the model
- * @see #getModelName
- * @see #saveModel
- * @see #loadModel
- */
- public void setModelName(String newName) {
- if (this.modelName != null && this.modelName.equals(newName)) return;
- String oldName = this.modelName;
- if (oldName != null) {
- rengine.eval("if ('" + oldName + "' %in% ls()) {" + newName + "<-" + oldName + ";rm(" + oldName + ")}");
- }
- this.modelName = newName;
- }
-
- /**
- * Get the instance of the Rengine.
- *
- * In case the R engine has not been initialized, it is initialized before
- * returning the object.
- *
- * @return The Rengine object
- */
- public static Rengine getRengine() {
- return rengine;
- }
-
- /**
- * Get the actual model object.
- *
- * @return An RList object representation of the model.
- */
- public RList getModelObject() {
- return modelObject;
- }
-
- /**
- * Get a unique String value.
- *
- * This method can be used to get unique variable names for use in an R session. The
- * String is generated from a combination of the prefix, the system time and a random
- * portion.
- *
- * @param prefix Any value. If empty or null, "var" is used.
- * @return A unique String value
- */
- public String getUniqueVariableName(String prefix) {
- if (prefix == null || prefix.equals("")) prefix = "var";
- Random rnd = new Random();
- long uid = ((System.currentTimeMillis() >>> 16) << 16) + rnd.nextLong();
- return prefix + String.valueOf(Math.abs(uid)).trim();
- }
-
- /**
- * Loads the parameters for a model into a list object in the R session.
- *
- * The method assigns the list to a (relatively) unique variable name and returns
- * the variable name to the caller so that the list can be accessed later on.
- *
- * @return
- * @throws QSARModelException if there are any problems within the R session.
- */
- protected String loadParametersIntoRSession() throws QSARModelException {
- REXP result;
- Set keys = params.keySet();
- String paramVariableName = getUniqueVariableName("param");
-
- for (Iterator iterator = keys.iterator(); iterator.hasNext();) {
- String name = (String) iterator.next();
- Object value = params.get(name);
-
- if (value instanceof Integer) {
- logger.debug("Assigning a Integer");
- Integer tmp1 = (Integer) value;
- int[] tmp2 = new int[]{tmp1.intValue()};
- rengine.assign(name, tmp2);
- } else if (value instanceof String) {
- logger.debug("Assigning a String");
- rengine.assign(name, (String) value);
- } else if (value instanceof Boolean) {
- logger.debug("Assigning a Boolean");
- Boolean tmp1 = (Boolean) value;
- if (tmp1.booleanValue()) result = rengine.eval(name + "<- TRUE");
- else result = rengine.eval(name + "<- FALSE");
- if (result == null) throw new QSARModelException("Error assigning a boolean");
- } else if (value instanceof Double) {
- logger.debug("Assigning a Double");
- Double tmp1 = (Double) value;
- double[] tmp2 = new double[]{tmp1.doubleValue()};
- rengine.assign(name, tmp2);
- } else if (value instanceof Integer[]) {
- logger.debug("Assigning a Integer[]");
- Integer[] tmp1 = (Integer[]) value;
- int[] tmp2 = new int[tmp1.length];
- for (int i = 0; i < tmp1.length; i++) tmp2[i] = tmp1[i].intValue();
- rengine.assign(name, tmp2);
- } else if (value instanceof Double[]) {
- logger.debug("Assigning a Double[]");
- Double[] tmp1 = (Double[]) value;
- double[] tmp2 = new double[tmp1.length];
- for (int i = 0; i < tmp1.length; i++) tmp2[i] = tmp1[i].doubleValue();
- rengine.assign(name, tmp2);
- } else if (value instanceof Integer[][]) {
- logger.debug("Assigning a Integer[][]");
- Integer[][] tmp1 = (Integer[][]) value;
- int nrow = tmp1.length;
- int ncol = tmp1[0].length;
- int[] tmp2 = new int[nrow * ncol];
- for (int i = 0; i < ncol; i++) {
- for (int j = 0; j < nrow; j++) {
- tmp2[i * nrow + j] = (tmp1[j][i]).intValue();
- }
- }
- rengine.assign(name, tmp2);
- result = rengine.eval(name + "<- matrix(" + name + ", nrow=" + nrow + ")");
- if (result == null) throw new QSARModelException("Error assigning a int[][]");
- } else if (value instanceof Double[][]) {
- logger.debug("Assigning a Double[][]");
- Double[][] tmp1 = (Double[][]) value;
- int nrow = tmp1.length;
- int ncol = tmp1[0].length;
- double[] tmp2 = new double[nrow * ncol];
- for (int i = 0; i < ncol; i++) {
- for (int j = 0; j < nrow; j++) {
- tmp2[i * nrow + j] = (tmp1[j][i]).doubleValue();
- }
- }
- rengine.assign(name, tmp2);
- result = rengine.eval(name + "<- matrix(" + name + ", nrow=" + nrow + ")");
- if (result == null) throw new QSARModelException("Error assigning a double[][]");
- }
- }
-
- // make the list command
- String cmd = paramVariableName + " <- list(";
- for (Iterator iterator = keys.iterator(); iterator.hasNext();) {
- String name = (String) iterator.next();
- cmd = cmd + name + " = " + name + ", ";
- }
- cmd = cmd + ")";
-
- // now eval the command
- result = rengine.eval(cmd);
- if (result == null) throw new QSARModelException("Error making the parameter list");
-
- // now lets remove all the variables we had assigned
- for (Iterator iterator = keys.iterator(); iterator.hasNext();) {
- String name = (String) iterator.next();
- rengine.eval("rm(" + name + ")");
- }
-
- return paramVariableName;
- }
-
- /**
- * Checks whether the class of a named object is of the specified class.
- *
- *
- * @param objectName The name of the R variable holding the object to check
- * @param objectClass The class to check for
- * @return true if the object is of the specified class, false if the object is not
- * of the specified class or the R command to obtain the class failed
- */
- public boolean isOfClass(String objectName, String objectClass) {
- REXP klass = rengine.eval("class(" + objectName + ")");
- if (klass == null) {
- return false;
- }
- return klass.asString().equals(objectClass);
- }
-
- /**
- * Removes an object from the R session.
- *
- * @param objectName The name of the R variable to remove
- * @throws QSARModelException if the 'rm' command failed
- */
- public void removeObject(String objectName) throws QSARModelException {
- REXP ret = rengine.eval("rm(\"" + objectName + "\")");
- if (ret == null) throw new QSARModelException("Error removing \'" + objectName + "\'");
- }
-
- /**
- * Abstract method to handle loading R models.
- *
- * This method can be used to load a previously saved R model object. Since
- * the user can save any arbitrary R object, checks must be made that the
- * object being returned is an instance of one of the current modeling classes.
- *
- * This is best achieved by forcing each modeling class to write its own loader.
- *
- * @param fileName The file containing the R object to load
- * @throws org.openscience.cdk.qsar.model.QSARModelException
- * if the R session could not load the object or if the loaded model
- * does not correspond to the class that it was loaded from
- * @see #saveModel
- */
- abstract public void loadModel(String fileName) throws QSARModelException;
-
- /**
- * Abstract method to handle loading R models that were previously serialized.
- *
- * This method can be used to load a previously serialized R model object (usinging
- * serialize()). Since
- * the user can save any arbitrary R object, checks must be made that the
- * object being returned is an instance of one of the current modeling classes.
- * This is best achieved by forcing each modeling class to write its own loader.
- *
- * In addition
- * objects saved using serialize() do not have a name. As a result a name for the object must
- * be specified when using this method.
- *
- * @param serializedModel A String containing the ASCII sreialized R object
- * @param modelName The name of the model. (Within the R session, the model will be assigned to
- * a variable of this name)
- * @throws QSARModelException if the R session could not load the object or if the loaded model
- * does not correspond to the class that it was loaded from
- * @see #saveModel
- */
- abstract public void loadModel(String serializedModel, String modelName) throws QSARModelException;
-
- /**
- * Specifies the parameters value.
- *
- * @param key A String representing the name of the parameter (corresponding to the
- * name described in the R manpages)
- * @param obj The value of the parameter
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling function
- */
- abstract public void setParameters(String key, Object obj) throws QSARModelException;
-
- abstract public void build() throws QSARModelException;
-
- abstract public void predict() throws QSARModelException;
-
- protected void finalize() {
- rengine.eval("rm(\"" + getModelName() + "\",pos=1)");
- }
-
- ;
-
-
- class TextConsole implements RMainLoopCallbacks {
- public void rWriteConsole(Rengine re, String text) {
- System.out.print(text);
- }
-
- public void rBusy(Rengine re, int which) {
- System.out.println("rBusy(" + which + ")");
- }
-
- public String rReadConsole(Rengine re, String prompt, int addToHistory) {
- System.out.print(prompt);
- try {
- BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
- String s = br.readLine();
- return (s == null || s.length() == 0) ? s : s + "\n";
- } catch (Exception e) {
- System.out.println("jriReadConsole exception: " + e.getMessage());
- }
- return null;
- }
-
- public void rShowMessage(Rengine re, String message) {
- System.out.println("rShowMessage \"" + message + "\"");
- }
-
- public String rChooseFile(Rengine re, int newFile) {
- FileDialog fd = new FileDialog(new Frame(), (newFile == 0) ? "Select a file" : "Select a new file", (newFile == 0) ? FileDialog.LOAD : FileDialog.SAVE);
- fd.pack();
- fd.setVisible(true);
- String res = null;
- if (fd.getDirectory() != null) res = fd.getDirectory();
- if (fd.getFile() != null) res = (res == null) ? fd.getFile() : (res + fd.getFile());
- return res;
- }
-
- public void rFlushConsole(Rengine re) {
- }
-
- public void rLoadHistory(Rengine re, String filename) {
- }
-
- public void rSaveHistory(Rengine re, String filename) {
- }
-
- public void rWriteConsole(Rengine arg0, String message, int arg2) {
- System.out.println("rShowMessage \"" + message + "\"");
- }
- }
-
-}
diff --git a/src/main/org/openscience/cdk/qsar/model/data/cdkSJava.R b/src/main/org/openscience/cdk/qsar/model/data/cdkSJava.R
deleted file mode 100644
index 0de3ec9..0000000
--- a/src/main/org/openscience/cdk/qsar/model/data/cdkSJava.R
+++ /dev/null
@@ -1,451 +0,0 @@
-#
-# Copyright (C) 2004-2007 The Chemistry Development Kit (CDK) project
-#
-# Contact: cdk-devel@lists.sourceforge.net
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public License
-# as published by the Free Software Foundation; either version 2.1
-# of the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-
-
-# Basically the idea is to be able to pass an arbitrary Java object
-# to an R session. For this to work, the object should be converted to
-# a valid R object within the R session.
-#
-# How does R know how to convert a Java object it recieves? This is done
-# by a matcher function. This looks at the class name of the object and if
-# it matches the class name in the matcher function, the converter is called
-#
-# The converter then accesses any methods for the Java object or uses the methods
-# provided by SJava to extract information from the Java object to create an R
-# object.
-#
-# After implementing matcher and converter functions they should be registered
-# with SJava using setJavaFunctionConverter()
-#
-# So the flow when calling an R function *from* a Java program and passing
-# an arbitrary Java object is:
-#
-# 1. The R function recieves the Java object
-# 2. Runs it through the matcher functions SJava knows about
-# 3. If a matcher function returns TRUE the corresponding converter function
-# is called. The return value if an R object (vector, data.frame, list etc)
-# 4. The function then works with the object as usual
-# 5. If no matcher was found in (2) then the R function will see the object
-# as an AnonymousOmegahatReference
-#
-# If the R function that was called from the Java session returns the recieved
-# object then Java will see it as a R object. So if the converter for a Java
-# vector turns it into a numeric() and returns it Java will get the object back
-# as a double[] which can be printed by ROmegahatInterpreter.show()
-#
-#
-#
-# Passing an arbitrary R object back to Java is done similarly. In this case
-# the converter function will call some Java function that creaates a
-# AnonymousOmegahatReference (or named) from the R object (possibly by
-# calling methods of the class). The matcher function uses the inherits function
-# in R to determine whether the R object is of the proper class. So in this case
-# the flow is :
-#
-# 1. Java calls a R function which does some calculation and returns an R object
-# 2. SJava looks for a matcher that matches the R class of the return value
-# and calls the corresponding converter function with the R object
-# 3. The converter will generally return a Java object containing the information
-# from the R object.
-#
-# For primitives such as vectors, this process is not required. But if we want
-# to return say a lm or nnet object we would create a Java class that contains
-# setter and getter methods. The R converter would create a new instance of this
-# wrapping class and set the fields with the values from the R object and return this
-# Java object which will then be passed back to the Java calling program
-
-
-require(SJava)
-if (!isJavaInitialized()) {
- .JavaInit()
-}
-library(nnet)
-#library(pls.pcr)
-
-saveModel <- function(modelname, filename) {
- resp <- try( do.call('save',list(modelname,file=filename)), silent=TRUE )
-}
-
-loadModel <- function(filename) {
- modelname <- load(filename, .GlobalEnv)
- get(modelname)
-}
-loadModel.getName <- function(filename) {
- modelname <- load(filename)
- modelname
-}
-unserializeModel <- function(modelstr, modelname) {
- zzz <- paste(paste(modelstr, sep='', collapse='\n'), '\n', sep='', collapse='')
- assign(modelname, unserialize(zzz), pos=1)
- get(modelname)
-}
-
-summaryModel <- function(modelname) {
- summary(get(modelname))
-}
-
-hashmap.to.list <- function(params) {
- keys <- unlist(params$keySet()$toArray())
- paramlist <- list()
- cnt <- 1
- for (key in keys) {
- paramlist[[cnt]] <- params$get(key)
- cnt <- cnt+1
- }
- names(paramlist) <- keys
- paramlist
-}
-
-#############################################
-# Linear regression fit/predict converters
-#############################################
-lmFitConverter <-
-function(obj,...)
-{
- .JNew('org.openscience.cdk.qsar.model.R.LinearRegressionModelFit',
- obj$coefficients, obj$residuals,
- obj$fitted, obj$rank, obj$df.residual)
-}
-lmPredictConverter <- function(preds,...) {
- .JNew('org.openscience.cdk.qsar.model.R.LinearRegressionModelPredict',
- preds$fit[,1], preds$se.fit, preds$fit[,2], preds$fit[,3],
- preds$df, preds$residual.scale)
-}
-lmSummaryConverter <- function(sumry,...) {
- .JNew('org.openscience.cdk.qsar.model.R.LinearRegressionModelSummary',
- sumry$residuals, sumry$coeff,
- sumry$sigma, sumry$r.squared, sumry$adj.r.squared,
- sumry$df[2], sumry$fstatistic,
- attr(sumry$coeff, 'dimnames')[[1]],
- attr(sumry$coeff, 'dimnames')[[2]])
-}
-
-#############################################
-# CNN regression fit/predict converters
-#############################################
-cnnSummaryConverter <-
-function(obj,...)
-{
- .JNew('org.openscience.cdk.qsar.model.R.CNNRegressionModelSummary',
- obj$n, obj$entropy, obj$softmax, obj$censored, obj$value, obj$residuals)
-}
-cnnFitConverter <-
-function(obj,...)
-{
- noutput <- ncol(obj$fitted)
- nobs <- nrow(obj$fitted)
- if ('Hessian' %in% names(obj)) {
- .JNew('org.openscience.cdk.qsar.model.R.CNNRegressionModelFit',
- noutput,nobs, obj$wts, obj$fitted, obj$residuals, obj$value, obj$Hessian)
- } else {
- .JNew('org.openscience.cdk.qsar.model.R.CNNRegressionModelFit',
- noutput, nobs,obj$wts, obj$fitted, obj$residuals, obj$value)
- }
-}
-cnnClassFitConverter <-
-function(obj,...)
-{
- noutput <- ncol(obj$fitted)
- nobs <- nrow(obj$fitted)
- if ('Hessian' %in% names(obj)) {
- .JNew('org.openscience.cdk.qsar.model.R.CNNClassificationModelFit',
- noutput,nobs, obj$wts, obj$fitted, obj$residuals, obj$value, obj$Hessian)
- } else {
- .JNew('org.openscience.cdk.qsar.model.R.CNNClassificationModelFit',
- noutput, nobs,obj$wts, obj$fitted, obj$residuals, obj$value)
- }
-}
-cnnPredictConverter <-
-function(obj,...) {
- # The obj we get is actually a 'matrix' but we set its class
- # to cnnregprediction so that SJava would send it specifically
- # to us. So we should convert obj back to class 'matrix' so
- # that SJava can send it correctly to the Java side
- class(obj) <- 'matrix'
- .JNew('org.openscience.cdk.qsar.model.R.CNNRegressionModelPredict',
- ncol(obj), obj)
-}
-cnnClassPredictConverter <-
-function(obj,...) {
- # The obj we get is actually a 'matrix' but we set its class
- # to cnnclsprediction so that SJava would send it specifically
- # to us. So we should convert obj back to class 'matrix' so
- # that SJava can send it correctly to the Java side
- if (class(obj[1]) == 'numeric') {
- class(obj) <- 'matrix'
- .JNew('org.openscience.cdk.qsar.model.R.CNNClassificationModelPredict',
- ncol(obj), obj)
- } else if (class(obj[1]) == 'character') {
- class(obj) <- 'character'
- .JNew('org.openscience.cdk.qsar.model.R.CNNClassificationModelPredict', obj)
- }
-}
-
-
-#############################################
-# PLS fit/predict converter
-#############################################
-plsFitConverter <-
-function(obj,...) {
- tmp <- .JNew('org.openscience.cdk.qsar.model.R.PLSRegressionModelFit',
- obj$nobj, obj$nvar, obj$npred, obj$ncomp, obj$method)
- tmp$setTrainingData(
- obj$training$B, obj$training$Ypred, obj$training$RMS,
- obj$training$Xscores, obj$training$Xload,
- obj$training$Yscores, obj$training$Yload)
- tmp$PLSRegressionModelSetTrain()
- if ('validat' %in% names(obj)) {
- # Add validation fields
- tmp$setValidationData(
- obj$valid$niter, obj$valid$nLV,
- obj$valid$Ypred, obj$valid$RMS, obj$valid$RMS.sd, obj$valid$R2)
- }
- tmp
-}
-plsPredictConverter <-
-function(obj,...) {
- class(obj) <- 'matrix'
- .JNew('org.openscience.cdk.qsar.model.R.PLSRegressionModelPredict',ncol(obj),obj)
-}
-
-#############################################
-# Register the fit/predict converter funcs
-#############################################
-setJavaFunctionConverter(lmFitConverter, function(x,...){inherits(x,'lm')},
- description='lm fit object to Java',
- fromJava=F)
-setJavaFunctionConverter(lmPredictConverter, function(x,...){inherits(x,'lmregprediction')},
- description='lm predict object to Java',
- fromJava=F)
-setJavaFunctionConverter(lmSummaryConverter, function(x,...){inherits(x,'summary.lm')},
- description='lm summary object to Java',
- fromJava=F)
-setJavaFunctionConverter(cnnClassFitConverter, function(x,...){inherits(x,'nnet.formula')},
- description='cnn (nnet) classification fit object to Java',
- fromJava=F)
-setJavaFunctionConverter(cnnSummaryConverter, function(x,...){inherits(x,'summary.nnet')},
- description='cnn (nnet) summary object to Java',
- fromJava=F)
-setJavaFunctionConverter(cnnFitConverter, function(x,...){inherits(x,'nnet')},
- description='cnn (nnet) fit object to Java',
- fromJava=F)
-setJavaFunctionConverter(cnnClassPredictConverter, function(x,...){inherits(x,'cnnclsprediction')},
- description='cnn (nnet) classification predict object to Java',
- fromJava=F)
-setJavaFunctionConverter(cnnPredictConverter, function(x,...){inherits(x,'cnnregprediction')},
- description='cnn (nnet) predict object to Java',
- fromJava=F)
-setJavaFunctionConverter(plsFitConverter, function(x,...){inherits(x,'mvr')},
- description='pls/pcr fit object to Java',
- fromJava=F)
-setJavaFunctionConverter(plsPredictConverter, function(x,...){inherits(x,'plsregressionprediction')},
- description='pls/pcr predict object to Java',
- fromJava=F)
-
-buildLM <- function(modelname, params) {
- # params is a java.util.HashMap containing the parameters
- # we need to extract them and add them to this environment
- paramlist <- hashmap.to.list(params)
- attach(paramlist)
-
- # x will come in as a double[][]
- x <- matrix(unlist(x), nrow=length(x), byrow=TRUE)
-
- # assumes y ~ all columns of x
- d <- data.frame(y=y,x)
- assign(modelname, lm(y~., d, weights=weights), pos=1)
- detach(paramlist)
- get(modelname)
-}
-
-predictLM <- function( modelname, params) {
- # params is a java.util.HashMap containing the parameters
- # we need to extract them and add them to this environment
- paramlist <- hashmap.to.list(params)
- attach(paramlist)
-
- newx <- data.frame( matrix(unlist(newdata), nrow=length(newdata), byrow=TRUE) )
- names(newx) <- names(get(modelname)$coef)[-1]
- if (interval == '' || !(interval %in% c('confidence','prediction')) ) {
- interval = 'confidence'
- }
- preds <- predict( get(modelname), newx, se.fit = TRUE, interval=interval);
- class(preds) <- 'lmregprediction'
-
- detach(paramlist)
- preds
-}
-
-buildCNN <- function(modelname, params) {
- paramlist <- hashmap.to.list(params)
- attach(paramlist)
-
- x <- matrix(unlist(x), nrow=length(x), byrow=TRUE)
- y <- matrix(unlist(y), nrow=length(y), byrow=TRUE)
- if (nrow(x) != nrow(y)) {
- stop('The number of observations in x & y dont match')
- }
-
- ninput <- ncol(x)
- nhidden <- size
- noutput <- ncol(y)
- nwt <- (ninput*nhidden) + (nhidden*noutput) + nhidden + noutput
-
- if (class(weights) == 'logical' && !weights) weights <- rep(1, nrow(y))
- if (class(subset) == 'logical' && !subset) subset <- 1:nrow(y)
- if (class(Wts) == 'logical' && !Wts) { Wts <- runif(nwt) }
- if (class(mask) == 'logical' && !mask) { mask <- rep(TRUE, nwt) }
-
- assign(modelname,
- nnet(x,y,weights=weights,size=size,Wts=Wts,mask=mask,linout=linout,
- entropy=entropy,softmax=softmax,censored=censored,skip=skip,rang=rang,
- decay=decay,maxit=maxit,Hess=Hess,trace=trace,MaxNWts=MaxNWts,
- abstol=abstol,reltol=reltol), pos=1)
-
- detach(paramlist)
- get(modelname)
-}
-
-
-buildCNNClass <- function(modelname, params) {
- paramlist <- hashmap.to.list(params)
- attach(paramlist)
-
- x <- matrix(unlist(x), nrow=length(x), byrow=TRUE)
- y <- factor(unlist(y)) # y will come in as a single vector
- if (nrow(x) != length(y)) { stop('The number of observations in x & y dont match') }
-
- ninput <- ncol(x)
- nhidden <- size
- if (length(levels(y)) == 2) noutput <- 1
- else noutput = length(levels(y))
-
- nwt <- (ninput*nhidden) + (nhidden*noutput) + nhidden + noutput
- if (class(weights) == 'logical' && !weights) weights <- rep(1, length(y))
- if (class(subset) == 'logical' && !subset) subset <- 1:length(y)
- if (class(Wts) == 'logical' && !Wts) { Wts <- runif(nwt) }
- if (class(mask) == 'logical' && !mask) { mask <- rep(TRUE, nwt) }
-
-
- assign(modelname,
- nnet(y~., data=data.frame(y=y,x=x),weights=weights,size=size,Wts=Wts,mask=mask,linout=linout,
- softmax=softmax,censored=censored,skip=skip,rang=rang,
- decay=decay,maxit=maxit,Hess=Hess,trace=trace,MaxNWts=MaxNWts,
- abstol=abstol,reltol=reltol), pos=1)
-
- detach(paramlist)
- get(modelname)
-}
-
-predictCNN <- function(modelname, params) {
- # Since buildCNN should have been called before this
- # we dont bother loading the nnet library
- paramlist <- hashmap.to.list(params)
- attach(paramlist)
-
- newx <- data.frame( matrix(unlist(newdata), nrow=length(newdata), byrow=TRUE) )
- names(newx) <- get(modelname)$coefnames
- if (type == '' || !(type %in% c('raw','class')) ) {
- type = 'raw'
- }
-
-
- preds <- predict( get(modelname), newdata=newx, type=type);
- class(preds) <- 'cnnregprediction'
-
- detach(paramlist)
- preds
-}
-predictCNNClass <- function(modelname, params) {
- # Since buildCNNClass should have been called before this
- # we dont bother loading the nnet library
- paramlist <- hashmap.to.list(params)
- attach(paramlist)
-
- newx <- data.frame( matrix(unlist(newdata), nrow=length(newdata), byrow=TRUE) )
- names(newx) <- get(modelname)$coefnames
- if (type == '' || !(type %in% c('raw','class')) ) {
- type = 'raw'
- }
-
- preds <- predict( get(modelname), newdata=newx, type=type);
- class(preds) <- 'cnnclsprediction'
- detach(paramlist)
- preds
-}
-
-buildPLS <- function(modelname, params) {
- library(pls.pcr)
- paramlist <- hasmap.to.list(params)
- attach(paramlist)
-
- x <- matrix(unlist(x), nrow=length(x), byrow=TRUE)
- y <- matrix(unlist(y), nrow=length(y), byrow=TRUE)
- if (nrow(x) != nrow(y)) { stop('The number of observations in x & y dont match') }
-
- if (!ncomp) {
- ncomp <- 1:ncol(x)
- } else {
- ncomp <- unlist(ncomp)
- }
-
- if (!(method %in% c('PCR','SIMPLS','kernelPLS'))) {
- stop('Invalid methopd specification')
- }
- if (!(validation %in% c('none','CV'))) {
- stop('Invalid validation sepcification')
- }
-
- if (niter == 0 && validation == 'CV') {
- niter = nrow(y)
- }
-
-
- # We should do this since when both grpsize and niter are specified niter
- # is used. So if grpsize comes in as 0 (which will be the default setting)
- # we specify only niter and if not zero we use grpsize and ignore niter
- if (grpsize != 0) {
- assign(modelname,
- pls(x=x,y=y,ncomp=ncomp,method=method,validation=validation,grpsize=grpsize),
- pos=1)
- } else {
- assign(modelname,
- pls(x=x,y=y,ncomp=ncomp,method=method,validation=validation,niter=niter),
- pos=1)
- }
- detach(paramlist)
- get(modelname)
-}
-predictPLS <- function(modelname, params) {
- paramlist <- hashmap.to.list(params)
- attach(paramlist)
-
- newX <- matrix(unlist(newX), nrow=length(x), byrow=TRUE)
- model <- get(modelname)
- if (ncol(newX) != model$nvar) {
- stop('The number of independent variables in the new data does not match that specified during building')
- }
- if (nlv == FALSE) {
- preds <- predict(model, newX)
- } else {
- preds <- predict(model, newX, nlv)
- }
- class(preds) <- 'plsregressionprediction'
- preds
-}
diff --git a/src/main/org/openscience/cdk/qsar/model/data/cnn_3.R b/src/main/org/openscience/cdk/qsar/model/data/cnn_3.R
deleted file mode 100644
index 732c923..0000000
--- a/src/main/org/openscience/cdk/qsar/model/data/cnn_3.R
+++ /dev/null
@@ -1,83 +0,0 @@
-#############################################
-# CNN regression fit/predict converters
-#############################################
-cnnSummaryConverter <-
-function(obj,...)
-{
- .JNew('org.openscience.cdk.qsar.model.R.CNNRegressionModelSummary',
- obj$n, obj$entropy, obj$softmax, obj$censored, obj$value, obj$residuals)
-}
-
-cnnFitConverter <-
-function(obj,...)
-{
- noutput <- ncol(obj$fitted)
- nobs <- nrow(obj$fitted)
- if ('Hessian' %in% names(obj)) {
- .JNew('org.openscience.cdk.qsar.model.R.CNNRegressionModelFit',
- noutput,nobs, obj$wts, obj$fitted, obj$residuals, obj$value, obj$Hessian)
- } else {
- .JNew('org.openscience.cdk.qsar.model.R.CNNRegressionModelFit',
- noutput, nobs,obj$wts, obj$fitted, obj$residuals, obj$value)
- }
-}
-cnnPredictConverter <-
-function(obj,...) {
- # The obj we get is actually a 'matrix' but we set its class
- # to cnnregprediction so that SJava would send it specifically
- # to us. So we should convert obj back to class 'matrix' so
- # that SJava can send it correctly to the Java side
- class(obj) <- 'matrix'
- .JNew('org.openscience.cdk.qsar.model.R.CNNRegressionModelPredict',
- ncol(obj), obj)
-}
-buildCNN <- function(modelname, params) {
- library(nnet)
- paramlist <- hashmap.to.list(params)
- attach(paramlist)
-
- x <- matrix(unlist(x), nrow=length(x), byrow=TRUE)
- y <- matrix(unlist(y), nrow=length(y), byrow=TRUE)
- if (nrow(x) != nrow(y)) {
- stop('The number of observations in x & y dont match')
- }
-
- ninput <- ncol(x)
- nhidden <- size
- noutput <- ncol(y)
- nwt <- (ninput*nhidden) + (nhidden*noutput) + nhidden + noutput
-
- if (class(weights) == 'logical' && !weights) weights <- rep(1, nrow(y))
- if (class(subset) == 'logical' && !subset) subset <- 1:nrow(y)
- if (class(Wts) == 'logical' && !Wts) { Wts <- runif(nwt) }
- if (class(mask) == 'logical' && !mask) { mask <- rep(TRUE, nwt) }
-
- assign(modelname,
- nnet(x,y,weights=weights,size=size,Wts=Wts,mask=mask,linout=linout,
- entropy=entropy,softmax=softmax,censored=censored,skip=skip,rang=rang,
- decay=decay,maxit=maxit,Hess=Hess,trace=trace,MaxNWts=MaxNWts,
- abstol=abstol,reltol=reltol), pos=1)
-
- detach(paramlist)
- get(modelname)
-}
-predictCNN <- function(modelname, params) {
- # Since buildCNN should have been called before this
- # we dont bother loading the nnet library
- paramlist <- hashmap.to.list(params)
- attach(paramlist)
-
- newx <- data.frame( matrix(unlist(newdata), nrow=length(newdata), byrow=TRUE) )
- names(newx) <- get(modelname)$coefnames
- if (type == '' || !(type %in% c('raw','class')) ) {
- type = 'raw'
- }
-
-
- preds <- predict( get(modelname), newdata=newx, type=type);
- class(preds) <- 'cnnregprediction'
-
- detach(paramlist)
- preds
-}
-
diff --git a/src/main/org/openscience/cdk/qsar/model/data/cnn_4.R b/src/main/org/openscience/cdk/qsar/model/data/cnn_4.R
deleted file mode 100644
index ea3050b..0000000
--- a/src/main/org/openscience/cdk/qsar/model/data/cnn_4.R
+++ /dev/null
@@ -1,80 +0,0 @@
-#############################################
-# CNN classification fit/predict converters
-#############################################
-cnnClassFitConverter <-
-function(obj,...)
-{
- noutput <- ncol(obj$fitted)
- nobs <- nrow(obj$fitted)
- if ('Hessian' %in% names(obj)) {
- .JNew('org.openscience.cdk.qsar.model.R.CNNClassificationModelFit',
- noutput,nobs, obj$wts, obj$fitted, obj$residuals, obj$value, obj$Hessian)
- } else {
- .JNew('org.openscience.cdk.qsar.model.R.CNNClassificationModelFit',
- noutput, nobs,obj$wts, obj$fitted, obj$residuals, obj$value)
- }
-}
-cnnClassPredictConverter <-
-function(obj,...) {
- # The obj we get is actually a 'matrix' but we set its class
- # to cnnclsprediction so that SJava would send it specifically
- # to us. So we should convert obj back to class 'matrix' so
- # that SJava can send it correctly to the Java side
- if (class(obj[1]) == 'numeric') {
- class(obj) <- 'matrix'
- .JNew('org.openscience.cdk.qsar.model.R.CNNClassificationModelPredict',
- ncol(obj), obj)
- } else if (class(obj[1]) == 'character') {
- class(obj) <- 'character'
- .JNew('org.openscience.cdk.qsar.model.R.CNNClassificationModelPredict', obj)
- }
-}
-buildCNNClass <- function(modelname, params) {
- library(nnet)
- paramlist <- hashmap.to.list(params)
- attach(paramlist)
-
- x <- matrix(unlist(x), nrow=length(x), byrow=TRUE)
- y <- factor(unlist(y)) # y will come in as a single vector
- if (nrow(x) != length(y)) { stop('The number of observations in x & y dont match') }
-
- ninput <- ncol(x)
- nhidden <- size
- if (length(levels(y)) == 2) noutput <- 1
- else noutput = length(levels(y))
-
- nwt <- (ninput*nhidden) + (nhidden*noutput) + nhidden + noutput
- if (class(weights) == 'logical' && !weights) weights <- rep(1, length(y))
- if (class(subset) == 'logical' && !subset) subset <- 1:length(y)
- if (class(Wts) == 'logical' && !Wts) { Wts <- runif(nwt) }
- if (class(mask) == 'logical' && !mask) { mask <- rep(TRUE, nwt) }
-
-
- assign(modelname,
- nnet(y~., data=data.frame(y=y,x=x),weights=weights,size=size,Wts=Wts,mask=mask,linout=linout,
- softmax=softmax,censored=censored,skip=skip,rang=rang,
- decay=decay,maxit=maxit,Hess=Hess,trace=trace,MaxNWts=MaxNWts,
- abstol=abstol,reltol=reltol), pos=1)
-
- detach(paramlist)
- get(modelname)
-}
-
-predictCNNClass <- function(modelname, params) {
- # Since buildCNNClass should have been called before this
- # we dont bother loading the nnet library
- paramlist <- hashmap.to.list(params)
- attach(paramlist)
-
- newx <- data.frame( y=1, x=matrix(unlist(newdata), nrow=length(newdata), byrow=TRUE) )
- names(newx) <- get(modelname)$coefnames
- if (type == '' || !(type %in% c('raw','class')) ) {
- type = 'raw'
- }
-
- preds <- predict( get(modelname), newdata=newx, type=type);
- class(preds) <- 'cnnclsprediction'
- detach(paramlist)
- preds
-}
-
diff --git a/src/main/org/openscience/cdk/qsar/model/data/helper.R b/src/main/org/openscience/cdk/qsar/model/data/helper.R
deleted file mode 100644
index db8c20a..0000000
--- a/src/main/org/openscience/cdk/qsar/model/data/helper.R
+++ /dev/null
@@ -1,141 +0,0 @@
-#
-# Copyright (C) 2004-2007 The Chemistry Development Kit (CDK) project
-#
-# Contact: cdk-devel@lists.sourceforge.net
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public License
-# as published by the Free Software Foundation; either version 2.1
-# of the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-
-
-# load some common packages that will always be installed
-
-library(MASS)
-library(nnet)
-
-# some helper functions
-saveModel <- function(modelname, filename) {
- resp <- try( do.call('save',list(modelname,file=filename)), silent=TRUE )
-}
-
-loadModel <- function(filename) {
- modelname <- load(filename, .GlobalEnv)
- list(model=get(modelname) , name=modelname)
-}
-
-unserializeModel <- function(modelstr, modelname) {
- zzz <- paste(paste(modelstr, sep='', collapse='\n'), '\n', sep='', collapse='')
- assign(modelname, unserialize(zzz), pos=1)
- list(model=get(modelname) , name=modelname)
-}
-
-buildLM <- function(modelname, paramlist) {
- attach(paramlist)
-
- # assumes y ~ all columns of x
- d <- data.frame(y=y,x)
- assign(modelname, lm(y~., d, weights=weights), pos=1)
- detach(paramlist)
- get(modelname)
-}
-
-predictLM <- function( modelname, paramlist) {
- attach(paramlist)
-
- newx <- data.frame( newdata )
- names(newx) <- names(get(modelname)$coef)[-1]
- if (interval == '' || !(interval %in% c('confidence','prediction')) ) {
- interval = 'confidence'
- }
- preds <- predict( get(modelname), newx, se.fit = TRUE, interval=interval);
- detach(paramlist)
- preds
-}
-
-buildCNN <- function(modelname, paramlist) {
- attach(paramlist)
- if (nrow(x) != nrow(y)) {
- stop('The number of observations in x & y dont match')
- }
-
- ninput <- ncol(x)
- nhidden <- size
- noutput <- ncol(y)
- nwt <- (ninput*nhidden) + (nhidden*noutput) + nhidden + noutput
-
- if (class(weights) == 'logical' && !weights) weights <- rep(1, nrow(y))
- if (class(subset) == 'logical' && !subset) subset <- 1:nrow(y)
- if (class(Wts) == 'logical' && !Wts) { Wts <- runif(nwt) }
- if (class(mask) == 'logical' && !mask) { mask <- rep(TRUE, nwt) }
-
- assign(modelname,
- nnet(x,y,weights=weights,size=size,Wts=Wts,mask=mask,linout=linout,
- entropy=entropy,softmax=softmax,censored=censored,skip=skip,rang=rang,
- decay=decay,maxit=maxit,Hess=Hess,trace=trace,MaxNWts=MaxNWts,
- abstol=abstol,reltol=reltol), pos=1)
-
- detach(paramlist)
- get(modelname)
-}
-
-
-buildCNNClass <- function(modelname, paramlist) {
-
- attach(paramlist)
- y <- factor(unlist(y)) # y will come in as a single vector
- if (nrow(x) != length(y)) { stop('The number of observations in x & y dont match') }
-
- ninput <- ncol(x)
- nhidden <- size
- if (length(levels(y)) == 2) noutput <- 1
- else noutput = length(levels(y))
-
- nwt <- (ninput*nhidden) + (nhidden*noutput) + nhidden + noutput
- if (class(weights) == 'logical' && !weights) weights <- rep(1, length(y))
- if (class(subset) == 'logical' && !subset) subset <- 1:length(y)
- if (class(Wts) == 'logical' && !Wts) { Wts <- runif(nwt) }
- if (class(mask) == 'logical' && !mask) { mask <- rep(TRUE, nwt) }
-
-
- assign(modelname,
- nnet(y~., data=data.frame(y=y,x=x),weights=weights,size=size,Wts=Wts,mask=mask,linout=linout,
- softmax=softmax,censored=censored,skip=skip,rang=rang,
- decay=decay,maxit=maxit,Hess=Hess,trace=trace,MaxNWts=MaxNWts,
- abstol=abstol,reltol=reltol), pos=1)
-
- detach(paramlist)
- get(modelname)
-}
-
-predictCNN <- function(modelname, paramlist) {
- attach(paramlist)
- newx <- data.frame( newdata )
- names(newx) <- get(modelname)$coefnames
- if (type == '' || !(type %in% c('raw','class')) ) {
- type = 'raw'
- }
- preds <- predict( get(modelname), newdata=newx, type=type);
- detach(paramlist)
- preds
-}
-predictCNNClass <- function(modelname, paramlist) {
- attach(paramlist)
- newx <- data.frame( newdata )
- names(newx) <- get(modelname)$coefnames
- if (type == '' || !(type %in% c('raw','class')) ) {
- type = 'raw'
- }
- preds <- predict( get(modelname), newdata=newx, type=type);
- detach(paramlist)
- preds
-}
diff --git a/src/main/org/openscience/cdk/qsar/model/data/init_1.R b/src/main/org/openscience/cdk/qsar/model/data/init_1.R
deleted file mode 100644
index c78aa5f..0000000
--- a/src/main/org/openscience/cdk/qsar/model/data/init_1.R
+++ /dev/null
@@ -1,42 +0,0 @@
-require(SJava)
-if (!isJavaInitialized()) {
- .JavaInit()
-}
-library(nnet)
-library(pls.pcr)
-options(show.error.messages=FALSE)
-
-saveModel <- function(modelname, filename) {
- resp <- try( do.call('save',list(modelname,file=filename)), silent=TRUE )
-}
-
-loadModel <- function(filename) {
- modelname <- load(filename, .GlobalEnv)
- get(modelname)
-}
-loadModel.getName <- function(filename) {
- modelname <- load(filename)
- modelname
-}
-unserializeModel <- function(modelstr, modelname) {
- zzz <- paste(paste(modelstr, sep='', collapse='\n'), '\n', sep='', collapse='')
- assign(modelname, unserialize(zzz), pos=1)
- get(modelname)
-}
-summaryModel <- function(modelname) {
- summary(get(modelname))
-}
-
-
-hashmap.to.list <- function(params) {
- keys <- unlist(params$keySet()$toArray())
- paramlist <- list()
- cnt <- 1
- for (key in keys) {
- paramlist[[cnt]] <- params$get(key)
- cnt <- cnt+1
- }
- names(paramlist) <- keys
- paramlist
-}
-
diff --git a/src/main/org/openscience/cdk/qsar/model/data/lm_2.R b/src/main/org/openscience/cdk/qsar/model/data/lm_2.R
deleted file mode 100644
index b53bb5c..0000000
--- a/src/main/org/openscience/cdk/qsar/model/data/lm_2.R
+++ /dev/null
@@ -1,59 +0,0 @@
-#############################################
-# Linear regression fit/predict converters
-#############################################
-lmFitConverter <-
-function(obj,...)
-{
- .JNew('org.openscience.cdk.qsar.model.R.LinearRegressionModelFit',
- obj$coefficients, obj$residuals,
- obj$fitted, obj$rank, obj$df.residual)
-}
-lmPredictConverter <- function(preds,...) {
- .JNew('org.openscience.cdk.qsar.model.R.LinearRegressionModelPredict',
- preds$fit[,1], preds$se.fit, preds$fit[,2], preds$fit[,3],
- preds$df, preds$residual.scale)
-}
-lmSummaryConverter <- function(sumry,...) {
- .JNew('org.openscience.cdk.qsar.model.R.LinearRegressionModelSummary',
- sumry$residuals, sumry$coeff,
- sumry$sigma, sumry$r.squared, sumry$adj.r.squared,
- sumry$df[2], sumry$fstatistic,
- attr(sumry$coeff, 'dimnames')[[1]],
- attr(sumry$coeff, 'dimnames')[[2]])
-}
-
-buildLM <- function(modelname, params) {
- # params is a java.util.HashMap containing the parameters
- # we need to extract them and add them to this environment
- paramlist <- hashmap.to.list(params)
- attach(paramlist)
-
- # x will come in as a double[][]
- x <- matrix(unlist(x), nrow=length(x), byrow=TRUE)
-
- # assumes y ~ all columns of x
- d <- data.frame(y=y,x)
- assign(modelname, lm(y~., d, weights=weights), pos=1)
- detach(paramlist)
- get(modelname)
-}
-
-predictLM <- function( modelname, params) {
- # params is a java.util.HashMap containing the parameters
- # we need to extract them and add them to this environment
- paramlist <- hashmap.to.list(params)
- attach(paramlist)
-
- newx <- data.frame( matrix(unlist(newdata), nrow=length(newdata), byrow=TRUE) )
- names(newx) <- names(get(modelname)$coef)[-1]
- if (interval == '' || !(interval %in% c('confidence','prediction')) ) {
- interval = 'confidence'
- }
- preds <- predict( get(modelname), newx, se.fit = TRUE, interval=interval);
- class(preds) <- 'lmregprediction'
-
- detach(paramlist)
- preds
-}
-
-
diff --git a/src/main/org/openscience/cdk/qsar/model/data/pls_5.R b/src/main/org/openscience/cdk/qsar/model/data/pls_5.R
deleted file mode 100644
index 9789c4c..0000000
--- a/src/main/org/openscience/cdk/qsar/model/data/pls_5.R
+++ /dev/null
@@ -1,84 +0,0 @@
-#############################################
-# PLS fit/predict converter
-#############################################
-plsFitConverter <-
-function(obj,...) {
- tmp <- .JNew('org.openscience.cdk.qsar.model.R.PLSRegressionModelFit',
- obj$nobj, obj$nvar, obj$npred, obj$ncomp, obj$method)
- tmp$setTrainingData(
- obj$training$B, obj$training$Ypred, obj$training$RMS,
- obj$training$Xscores, obj$training$Xload,
- obj$training$Yscores, obj$training$Yload)
- tmp$PLSRegressionModelSetTrain()
- if ('validat' %in% names(obj)) {
- # Add validation fields
- tmp$setValidationData(
- obj$valid$niter, obj$valid$nLV,
- obj$valid$Ypred, obj$valid$RMS, obj$valid$RMS.sd, obj$valid$R2)
- }
- tmp
-}
-plsPredictConverter <-
-function(obj,...) {
- class(obj) <- 'matrix'
- .JNew('org.openscience.cdk.qsar.model.R.PLSRegressionModelPredict',ncol(obj),obj)
-}
-buildPLS <- function(modelname, params) {
- library(pls.pcr)
- paramlist <- hasmap.to.list(params)
- attach(paramlist)
-
- x <- matrix(unlist(x), nrow=length(x), byrow=TRUE)
- y <- matrix(unlist(y), nrow=length(y), byrow=TRUE)
- if (nrow(x) != nrow(y)) { stop('The number of observations in x & y dont match') }
-
- if (!ncomp) {
- ncomp <- 1:ncol(x)
- } else {
- ncomp <- unlist(ncomp)
- }
-
- if (!(method %in% c('PCR','SIMPLS','kernelPLS'))) {
- stop('Invalid methopd specification')
- }
- if (!(validation %in% c('none','CV'))) {
- stop('Invalid validation sepcification')
- }
-
- if (niter == 0 && validation == 'CV') {
- niter = nrow(y)
- }
-
-
- # We should do this since when both grpsize and niter are specified niter
- # is used. So if grpsize comes in as 0 (which will be the default setting)
- # we specify only niter and if not zero we use grpsize and ignore niter
- if (grpsize != 0) {
- assign(modelname,
- pls(x=x,y=y,ncomp=ncomp,method=method,validation=validation,grpsize=grpsize),
- pos=1)
- } else {
- assign(modelname,
- pls(x=x,y=y,ncomp=ncomp,method=method,validation=validation,niter=niter),
- pos=1)
- }
- detach(paramlist)
- get(modelname)
-}
-predictPLS <- function(modelname, params) {
- paramlist <- hashmap.to.list(params)
- attach(paramlist)
-
- newX <- matrix(unlist(newX), nrow=length(x), byrow=TRUE)
- model <- get(modelname)
- if (ncol(newX) != model$nvar) {
- stop('The number of independent variables in the new data does not match that specified during building')
- }
- if (nlv == FALSE) {
- preds <- predict(model, newX)
- } else {
- preds <- predict(model, newX, nlv)
- }
- class(preds) <- 'plsregressionprediction'
- preds
-}
diff --git a/src/main/org/openscience/cdk/qsar/model/data/register_999.R b/src/main/org/openscience/cdk/qsar/model/data/register_999.R
deleted file mode 100644
index 810adc8..0000000
--- a/src/main/org/openscience/cdk/qsar/model/data/register_999.R
+++ /dev/null
@@ -1,31 +0,0 @@
-setJavaFunctionConverter(lmFitConverter, function(x,...){inherits(x,'lm')},
- description='lm fit object to Java',
- fromJava=F)
-setJavaFunctionConverter(lmPredictConverter, function(x,...){inherits(x,'lmregprediction')},
- description='lm predict object to Java',
- fromJava=F)
-setJavaFunctionConverter(lmSummaryConverter, function(x,...){inherits(x,'summary.lm')},
- description='lm summary object to Java',
- fromJava=F)
-setJavaFunctionConverter(cnnClassFitConverter, function(x,...){inherits(x,'nnet.formula')},
- description='cnn (nnet) classification fit object to Java',
- fromJava=F)
-setJavaFunctionConverter(cnnSummaryConverter, function(x,...){inherits(x,'summary.nnet')},
- description='cnn (nnet) summary object to Java',
- fromJava=F)
-setJavaFunctionConverter(cnnFitConverter, function(x,...){inherits(x,'nnet')},
- description='cnn (nnet) fit object to Java',
- fromJava=F)
-setJavaFunctionConverter(cnnClassPredictConverter, function(x,...){inherits(x,'cnnclsprediction')},
- description='cnn (nnet) classification predict object to Java',
- fromJava=F)
-setJavaFunctionConverter(cnnPredictConverter, function(x,...){inherits(x,'cnnregprediction')},
- description='cnn (nnet) predict object to Java',
- fromJava=F)
-setJavaFunctionConverter(plsFitConverter, function(x,...){inherits(x,'mvr')},
- description='pls/pcr fit object to Java',
- fromJava=F)
-setJavaFunctionConverter(plsPredictConverter, function(x,...){inherits(x,'plsregressionprediction')},
- description='pls/pcr predict object to Java',
- fromJava=F)
-
diff --git a/src/main/org/openscience/chemojava/libio/openbabel/OpenBabelConvert.java b/src/main/org/openscience/chemojava/libio/openbabel/OpenBabelConvert.java
deleted file mode 100644
index acdbfa5..0000000
--- a/src/main/org/openscience/chemojava/libio/openbabel/OpenBabelConvert.java
+++ /dev/null
@@ -1,186 +0,0 @@
-/* $RCSfile$
- * $Author: egonw $
- * $Date: 2008-03-22 14:36:05 +0100 (Sat, 22 Mar 2008) $
- * $Revision: 10494 $
- *
- * Copyright (C) 2004-2007 Miguel Rojas
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-package org.openscience.chemojava.libio.openbabel;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.InputStreamReader;
-
-import org.openscience.cdk.tools.LoggingTool;
-
-/**
- * File writer thats convert input files with OpenBabel.
- * It has the option to obtain the file as ChemFile.
- * First, it's necessary that you install correct Openbabel.
- * It was tested with OpenBabel-1.100.2. More information in
- * http://openbabel.sourceforge.net/babel.shtml.
- *
- * Currently supported types:
- *
- *
- * | alc(Alchemy file) | prep(Amber PREP file) | bs(Ball & Stick file) |
- *
- *
- * | caccrt(Cacao Cartesian file) | cacint(Cacao Internal file) | cache(CAChe MolStruct file) |
- *
- *
- * | ccc(CCC file) | c3d1(Chem3D Cartesian 1 file) | c3d2(Chem3D Cartesian 2 file) |
- *
- * | ct(ChemDraw Connection Table file) | cht(Chemtool file) | cml(Chemical Markup Language file) |
- *
- * | crk2d(CRK2D: Chemical Resource Kit 2D file) | crk3d(CRK3D: Chemical Resource Kit 3D file) | cssr(CSD CSSR file) |
- *
- * | box(Dock 3.5 Box file) | dmol(DMol3 Coordinates file) | feat(Feature file) |
- *
- * | fh(Fenske-Hall Z-Matrix file) | gam(GAMESS Output file) | gamin(GAMESS Input file) |
- *
- * | gamout(GAMESS Output file) | gcart(Gaussian Cartesian file) | gau(Gaussian Input file) |
- *
- * | gpr(Ghemical Project file) | mm1gp(Ghemical MM file) | qm1gp(Ghemical QM file) |
- *
- * | gr96a(GROMOS96(A)file) | gr96n(GROMOS96(nm)file) | hin(HyperChem HIN file) |
- *
- * | jout(Jaguar Output file) | bin(OpenEye Binary file) | mmd(MacroModel file) |
- *
- * | mmod(MacroModel file) | out(MacroModel file) | dat(MacroModel file) |
- *
- * | car(MSI Biosym/Insight II CAR file) | sdf(MDL Isis SDF file) | sd(MDL Isis SDF file) |
- *
- * | mdl(MDL Molfile file) | mol(MDL Molfile file) | mopcrt(MOPAC Cartesian file |
- *
- * | mopout(MOPAC Output file) | mmads(MMADS file) | mpqc(MPQC file) |
- *
- * | bgf(MSI BGF file) | nwo(NWChem Output file | pdb(PDB file) |
- *
- * | ent(PDB file) | pqs(PQS file) | qcout(Q-Chem Output file) |
- *
- * | res(ShelX file) | ins(ShelX file) | smi(SMILES file) |
- *
- * | fix(SMILES Fix file) | report(Report file) | pov(POV-Ray Output file) |
- *
- *
- * | mol2(Sybyl Mol2 file) | unixyz(UniChem XYZ file) | vmol(ViewMol file) |
- *
- * | | |
- *
- * | xed(XED file) | xyz(XYZ file) | zin(ZINDO Input file) |
- *
- * | | |
- *
- *
- *
- * @author Miguel Rojas
- * @cdk.module libio-openbabel
- * @cdk.svnrev $Revision: 10494 $
- */
-public class OpenBabelConvert {
-
- /* PATH to babel */
- private String pathToBabel = null;
-
- private final static LoggingTool logger = new LoggingTool(OpenBabelConvert.class);
-
- /**
- * Constructor of the ConvertOpenBabel
- *
- * @param path String which set the path of the progam OpenBabel. It will necessary
- * for windows systems.
- */
- public OpenBabelConvert() throws Exception {
- this(null);
- }
-
- /**
- * Constructor of the ConvertOpenBabel
- *
- * @param path String which set the path of the progam OpenBabel. It will necessary
- * for windows systems.
- */
- public OpenBabelConvert(String path) throws Exception {
- pathToBabel = getPath(path);
- }
-
- /**
- * Call the babel program.
- */
- public void convert(File inputFile, String inputType,
- File outputFile, String outputType,
- String addOptions) {
- try {
- String[] args = new String[6];
- args[0] = pathToBabel;
- args[1] = "-i" + inputType;
- args[2] = inputFile.getCanonicalPath();
- args[3] = "-o" + outputType;
- args[4] = outputFile.getCanonicalPath();
- args[5] = addOptions == null ? "" : addOptions;
-
- Process p = Runtime.getRuntime().exec(args);
- BufferedReader r = new BufferedReader(
- new InputStreamReader(p.getInputStream()));
- String x;
- while ((x = r.readLine()) != null) {
- System.out.println(x);
- }
- r.close();
- p.waitFor();
- } catch (Exception e) {
- System.err.println(e);
- System.err.println("There is some problem with babel. Check: ");
- System.err.println("PATH: " + pathToBabel);
- }
- }
-
- /**
- * Searches the babel executable from a set up reasonable picks.
- *
- * @param suggestedPath
- * @return
- */
- private static String getPath(String suggestedPath) throws Exception {
- if (suggestedPath != null) {
- File suggestion = new File(suggestedPath);
- if (suggestion.exists()) {
- return suggestedPath;
- }
- }
- String[] possibilities = {
- "C:/Programme/openbabel-2.0.0awins/babel.exe", // likely??
- "/usr/bin/babel", // most POSIX systems
- "/usr/local/bin/babel" // private installation
- };
- File path = null;
- for (int i=0; i
- * 2009 Egon Willighagen
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-package org.openscience.chemojava.libio.weka;
-
-import java.io.BufferedReader;
-import java.io.FileReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.Reader;
-import java.io.StringReader;
-import java.util.Vector;
-
-import weka.classifiers.Classifier;
-import weka.core.Instance;
-import weka.core.Instances;
-
-
-/**
- * Weka class is a library which use the program WEKA: a Machine Learning Project.
- * To inizalizate weka class is typically done like:
- * Classifier lr = new LinearRegression();
- * weka.setDataset("/some/where/dataTraining.arff", lr);
- * String testARFF = "/some/where/dataTest.arff";
- * double[] result = weka.getPrediction(testARFF);
- *
- * You have also the possibility to introduce directly values, done like:
- *
- * Classifier lr = new LinearRegression();
- * String[] attrib = {"X1","X2","X3","Y" };
- * int[] typAttrib = {Weka.NUMERIC,Weka.NUMERIC,Weka.NUMERIC,Weka.NUMERIC, };
- * weka.setDataset(attrib, typAttrib, y, x, lr);
- * double[] resultY = weka.getPrediction(testX);
- *
- *
- * @author Miguel Rojas
- * @cdk.created 2006-05-23
- * @cdk.license GPL
- * @cdk.module qsarweka
- * @cdk.svnrev $Revision: 9162 $
- * @cdk.keyword weka, machine learning
- * @cdk.depends weka.jar
- */
-public class Weka {
-
- public static final int NUMERIC = 0;
- public static final int NOMINAL = 1;
- public static final int REGULAR = 2;
- public static final int DATA = 3;
- public static final int STRING = 4;
-
- /** type of classifier*/
- private Classifier classifier = null;
- /** Class for handling an ordered set of weighted instances*/
- private Instances instances;
- /**String with the attribut class*/
- private String[] classAttrib = null;
-
- /**
- * Constructor of the Weka
- */
- public Weka() {
- }
-
- /**
- * Set the file format arff to analize which contains the dataset and the type of classifier.
- *
- *
- * @param pathTable Path of the dataset file format arff to train
- * @param classifier Type of Classifier
- * @return The Instances value
- * @throws Exception
- */
- public Instances setDataset(InputStream table, Object object) throws Exception{
- if (object instanceof Classifier) this.classifier = (Classifier) object;
- // OK, a bit dirty, but we cannot be sure we can rewind, so we cache
- // the content here locally as String
- String tableContent = streamAsString(table);
- BufferedReader insr = new BufferedReader(
- new StringReader(tableContent)
- );
- this.classAttrib = extractClass(insr);
- return createInstance(new StringReader(tableContent));
- }
-
- private String streamAsString(InputStream input) {
- StringBuffer out = new StringBuffer();
- try {
- byte[] b = new byte[4096];
- for (int n; (n = input.read(b)) != -1;) {
- out.append(new String(b, 0, n));
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
- return out.toString();
- }
-
- private Instances createInstance(Reader insr) throws Exception{
- instances = new Instances(insr);
- instances.setClassIndex(instances.numAttributes() - 1);
- if (classifier != null) classifier.buildClassifier(instances);
- return instances;
- }
- /**
- * Extract the class name attribute manually from the file
- *
- * @param input The BufferedReader
- * @return Array with the class attributes
- */
- private String[] extractClass(BufferedReader input) {
- Vector attribV = new Vector();
- String[] classAttrib = null;
- String line;
- try {
- while ((line = input.readLine()) != null) {
- if(line.startsWith("@attribute class {")){
- int strlen = line.length();
- String line_ = null;
- out:
- for (int i = 0; i < strlen; i++){
- switch(line.charAt(i)){
- case '{':
- line_ = line.substring(i);
- break out;
- }
- }
- StringBuffer edited = new StringBuffer();
- strlen = line_.length();
- edited = new StringBuffer();
- for (int i = 0; i < strlen; i++){
- switch(line_.charAt(i)){
- case '"':
- break;
- case ',':
- attribV.add(edited.toString());
- edited = new StringBuffer();
- break;
- case '{':
- break;
- case '}':
- attribV.add(edited.toString());
- break;
- default:
- edited.append(line_.charAt(i));
- }
- }
-
- }
- }
- if(attribV.size() > 0){
- classAttrib = new String[attribV.size()];
- attribV.copyInto(classAttrib);
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
- return classAttrib;
- }
- /**
- *
- * Set the array which contains the dataset and the type of classifier. This method
- * will be used for classifier which work with numerical values.
- *
- * @param attrib String with the attribut names
- * @param typAttrib Attribute type: NUMERICAL or NOMINAL.
- * @param y An array containing the dependent variable. It is possible numeric or string.
- * @param x An array of independent variables. The observations should be in the rows
- * and the variables should be in the columns
- * @param classifier Type of Classifier
- * @return The Instances value
- * @throws Exception
- */
- public Instances setDataset(String[] attrib, int[] typAttrib, Object[]y, Object[][] x, Classifier classifier) throws Exception{
- return setDataset(attrib, typAttrib ,null,y,x,classifier);
- }
- /**
- *
- * Set the array which contains the dataset and the type of classifier.This method
- * will be used for classifier which work with String values.
- *
- * @param attrib String with the attribut names.
- * @param typAttrib Attribute type: NUMERICAL or NOMINAL.
- * @param classAttrib String with the attribut class.
- * @param y An array containing the dependent variable. It is possible numeric or string.
- * @param x An array of independent variables. The observations should be in the rows
- * and the variables should be in the columns
- * @param classifier Type of classifier
- * @return The Instances value
- * @throws Exception
- */
- public Instances setDataset(String[] attrib, int[] typAttrib, String[] classAttrib, Object[]y, Object[][] x, Object object) throws Exception{
- if (object instanceof Classifier) this.classifier = (Classifier) object;
- this.classAttrib = classAttrib;
- Reader reader = createAttributes(attrib,typAttrib,classAttrib,y,x);
- instances = new Instances(reader);
- instances.setClassIndex(instances.numAttributes() - 1);
- if (object instanceof Classifier) classifier.buildClassifier(instances);
- return instances;
- }
- /**
- * Return of the predicted value
- *
- * @param value An array of independent variables which contians the values with whose to test
- * @return Result of the prediction
- * @throws Exception
- */
- public Object[] getPrediction(Object[][] value) throws Exception{
- Object[] object = new Object[value.length];
- for(int j = 0 ; j < value.length ; j++){
- Instance instance = new Instance(instances.numAttributes());
- instance.setDataset(instances);
- for(int i = 0 ; i < value[0].length ; i++){
- if(instance.attribute(i).isNumeric())
- instance.setValue(i, ((Double)value[j][i]).doubleValue());
- else if(instance.attribute(i).isString())
- instance.setValue(i, ""+value[j][i]);
- }
- instance.setValue(value[0].length, 0.0);
- double result = classifier.classifyInstance(instance);
- if(classAttrib != null){
- object[j] = classAttrib[(new Double(result)).intValue()];
- }
- else
- object[j] = new Double(result);
- }
- return object;
- }
- /**
- * Return of the predicted value. The file is found into src.
- *
- * @param pathARFF path of the file format arff which contains the values with whose to test.
- * @return Result of the prediction.
- * @throws Exception
- */
- public Object[] getPredictionCDK(String pathARFF) throws Exception{
- InputStream ins = this.getClass().getClassLoader().getResourceAsStream(pathARFF);
- Reader insr = new InputStreamReader(ins);
- return createObjects(new BufferedReader(insr));
- }
-
- /**
- * Return of the predicted value.
- *
- * @param pathARFF path of the file format arff which contains the values with whose to test.
- * @return Result of the prediction.
- * @throws Exception
- */
- public Object[] getPrediction(String pathARFF) throws Exception{
- BufferedReader br = new BufferedReader(new FileReader(pathARFF));
-
- return createObjects(br);
- }
- /**
- * initiate the object.
- *
- * @param br The BufferedReader
- * @return An Array of objects: classAttrib and Double
- * @throws Exception
- */
- private Object[] createObjects(BufferedReader br) throws Exception{
- Instances test = new Instances(br);
- Object[] object = new Object[test.numInstances()];
- for(int i = 0 ; i < test.numInstances(); i++){
- double result = classifier.classifyInstance(test.instance(i));
- if(classAttrib != null)
- object[i] = classAttrib[(new Double(result)).intValue()];
- else
- object[i] = new Double(result);
- }
- return object;
- }
- /**
- * create a Reader with necessary attributes to initiate a Instances for weka.
- *
- * @param attrib String with the attribut class
- * @param typAttrib Attribute type: NOMINAL or NUMERIC.
- * @param y An array containing the independent variable.
- * @param x An array of dependent variables.
- * @return The Reader containing the attributes
- * @throws IOException
- */
- private Reader createAttributes(String[] attrib, int[] typAttrib, String[] classAttrib, Object[]y, Object[][] x) throws IOException{
- String string ="@relation table1 \n";
- for(int i = 0; i < attrib.length ; i++){
- string += ("@attribute "+attrib[i]);
- if(typAttrib[i] == NUMERIC)
- string += " numeric \n";
- else if(typAttrib[i] == NOMINAL)
- string += " string \n";
- else if(typAttrib[i] == DATA)
- string += " data \n";
- else if(typAttrib[i] == REGULAR)
- string += " regular \n";
- else if(typAttrib[i] == STRING)
- string += " string \n";
- }
-
- if(classAttrib != null){
- string += "@attribute class ";
- string += "{";
- for(int i = 0; i < classAttrib.length ; i++){
- string += (classAttrib[i]);
- if(i != classAttrib.length -1)
- string += ",";
- }
- string += "}\n";
- }
-
- string += ("@data ");
- if(x != null && y != null){
- for(int j = 0 ; j < x.length; j++){
- for(int i = 0 ; i < x[0].length ; i++){
- if (x[j][i] instanceof Double) {
- if (((Double)x[j][i]).isNaN()) {
- string += "?,";
- continue;
- }
- }
- string += x[j][i]+",";
- }
- string += y[j]+ ", \n";
- }
- }
- if(x != null && y == null){
- for(int j = 0 ; j < x.length; j++){
- for(int i = 0 ; i < x[0].length ; i++){
- if (x[j][i] instanceof Double) {
- if (((Double)x[j][i]).isNaN()) {
- string += "?,";
- continue;
- }
- }
- string += x[j][i]+",";
- }
- string += "\n";
- }
- }
- return (Reader)new StringReader(string);
- }
-
- /**
- * get the value which belongs this position in the classification
- * @param result Position in the classification
- * @return Real value
- */
- /*private double[] getValue(double[] result) {
- Instance instance = instances.instance(0);
- instance.numClasses();
- return null;
- }*/
-}
diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/DensityBasedClustererModel.java b/src/main/org/openscience/chemojava/qsar/model/weka/DensityBasedClustererModel.java
deleted file mode 100644
index 37052c5..0000000
--- a/src/main/org/openscience/chemojava/qsar/model/weka/DensityBasedClustererModel.java
+++ /dev/null
@@ -1,335 +0,0 @@
-/* $RCSfile$
- * $Author: mariobaseda $
- * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $
- * $Revision: 5602 $
- *
- * Copyright (C) 2007 by Mario Baseda
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import weka.core.Instance;
-import weka.core.Instances;
-import weka.clusterers.DensityBasedClusterer;
-import weka.clusterers.EM;
-
-import org.openscience.chemojava.libio.weka.Weka;
-import org.openscience.cdk.qsar.model.QSARModelException;
-
-import java.io.BufferedReader;
-import java.io.FileReader;
-
-/**
- * Abstract clustering model that produces an estimate of the membership in each cluster using the Weka library.
- * The use of this class is shown in the following code snippet
- *
- * try {
- * DensityBasedClusterer dbc = new EM();
- * dbc.setData(attrib, typAttrib, classAttrib, data);
- * dbc.build();
- *
- * } catch (Exception e) {
- * System.out.println(e.toString());
- * }
- *
- *
- * Other option is set the data from a file format arff.
- *
- * DensityBasedClusterer dbc = new EM();
- * dbc.setData("/some/where/dataTraining.arff");
- *
- *
- * @author Mario Baseda
- * @cdk.require weka.jar
- * @cdk.license GPL
- * @cdk.module qsarweka
- * @cdk.svnrev $Revision: 9162 $
- * @see Weka
- *
- * @cdk.keyword clusterers, DensityBasedClusterer
- */
-public class DensityBasedClustererModel {
- /** An instance containing the data which should be classifier as arff file.*/
- private static Instances data;
- /**new instance of clusterer*/
- private DensityBasedClusterer dbc = new EM();
- private Weka weka = new Weka();
- /**An Array Object containing the independent variable*/
- private Object[][] newX = null;
- /**A String specifying the path of the file, format arff,
- * which contains the new independent values*/
- private String pathNewX = null;
-
- /**
- * Constructor of the NaiveBayesModel object
- */
- public DensityBasedClustererModel(){}
-
- /**
- * Set the variable data to the arff file
- *
- * @param filename The path of the file, format arff
- * @throws Exception if the parameters are of the wrong type for the given modeling function
- *
- */
- public void setData(String filename)throws Exception {
- data = weka.setDataset(
- this.getClass().getClassLoader().getResourceAsStream(filename),
- null
- );
- }
-
- /**
- * Parses a given list of data to an arff file, and set the variable data on it.
- *
- * @param attrib A string array containing the attributs
- * @param typAttrib Attribute type: NUMERICAL or NOMINAL.
- * @param classAttrib String with a list of the attribut class.
- * @param y An array containing the dependent variable (class value).
- * @param x A 2D array containing the independent variable (for example: qsar results).
- * @throws Exception if it is unable to parse the data
- *
- */
- public void setData(String[] attrib, int[] typAttrib, String[] classAttrib, Object[] y, Object[][] x) throws Exception{
- data = weka.setDataset(attrib,typAttrib,classAttrib,y,x,null);
- }
-
- /**
- * Builds the model.
- *
- * @throws Exception if errors occur in data types. See
- * the corresponding method in subclasses of this class for further details.
- */
- public void build() throws Exception {
- try {
-// build the model
- dbc.buildClusterer(data);
- } catch (Exception e) {
- e.printStackTrace();
- }
- return;
- }
-
- /**Returns the prior probability of each cluster
- * @return A double[] containing the prior probability of each cluster
- * @throws Exception
- */
- public double[] clusterPriors() throws Exception{
- return dbc.clusterPriors();
- }
-
- /**
- * Specifies the new parameters as arff file.
- *
- * @param path A String specifying the path of the file, format arff, which contains
- * the new values.
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling function
- *
- */
- public void setParameters(String path) throws QSARModelException {
- this.pathNewX = path;
- }
-
- /**
- * Specifies the new parameters as 2D array object.
- *
- * @param newX A 2D Array Object containing the new values.
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling function
- */
- public void setParameters(Object[][] newX) throws QSARModelException {
- this.newX = newX;
- }
-
- /** Returns the cluster probability distribution for an instance
- *
- * @return object An Object[][] containing the cluster probability distribution for an instance
- * @throws Exception if distribution could not be computed successfully
- */
- public Object[][] distributionForInstance() throws Exception{
- Object[][] object = null;
- try{
- if(pathNewX != null){
- BufferedReader br = new BufferedReader(new FileReader(pathNewX));
- Instances test = new Instances(br);
- object = new Object[test.numInstances()][];
- for(int i = 0 ; i < test.numInstances(); i++){
- double[] result = dbc.distributionForInstance(test.instance(i));
- object[i] = new Object[result.length];
- for (int z = 0; z < result.length; z++){
- object[i][z] = new Double(result[z]);
- }
- }
- }
- else if(newX != null){
- object = new Object[newX.length][];
- for(int j = 0 ; j < newX.length ; j++){
- Instance instance = new Instance(data.numAttributes());
- instance.setDataset(data);
- for(int i = 0 ; i < newX[0].length ; i++){
- if(instance.attribute(i).isNumeric())
- instance.setValue(i, ((Double)newX[j][i]).doubleValue());
- else if(instance.attribute(i).isString())
- instance.setValue(i, ""+newX[j][i]);
- }
- instance.setValue(newX[0].length, 0.0);
- double[] result = dbc.distributionForInstance(instance);
- object[j] = new Object[result.length];
- for (int z = 0; z < result.length; z++){
- object[j][z] = new Double(result[z]);
- }
- }
- }
- } catch ( Exception e){
- e.printStackTrace();
- }
- return object;
- }
-
- /** Computes the density for a given instance
- *
- * @return object An Object[] containing the density
- * @throws Exception if the density could not be computed successfully
- */
- public Object[] logDensityForInstance() throws Exception{
- Object[] object = null;
- try{
- if(pathNewX != null){
- BufferedReader br = new BufferedReader(new FileReader(pathNewX));
- Instances test = new Instances(br);
- object = new Object[test.numInstances()];
- for(int i = 0 ; i < test.numInstances(); i++){
- double result = dbc.logDensityForInstance(test.instance(i));
- object[i] = new Double(result);
- }
- }
- else if(newX != null){
- object = new Object[newX.length];
- for(int j = 0 ; j < newX.length ; j++){
- Instance instance = new Instance(data.numAttributes());
- instance.setDataset(data);
- for(int i = 0 ; i < newX[0].length ; i++){
- if(instance.attribute(i).isNumeric())
- instance.setValue(i, ((Double)newX[j][i]).doubleValue());
- else if(instance.attribute(i).isString())
- instance.setValue(i, ""+newX[j][i]);
- }
- instance.setValue(newX[0].length, 0.0);
- double result = dbc.logDensityForInstance(instance);
-
-
- object[j] = new Double(result);
- }
- }
- } catch ( Exception e){
- e.printStackTrace();
- }
- return object;
- }
-
- /** Computes the log of the conditional density (per cluster) for a given instance
- *
- * @return object An Object [][] containing the instance to compute the density for
- * @throws Exception if the density could not be computed successfully
- */
- public Object[][] logDensityPerClusterForInstance() throws Exception{
- Object[][] object = null;
- try{
- if(pathNewX != null){
- BufferedReader br = new BufferedReader(new FileReader(pathNewX));
- Instances test = new Instances(br);
- object = new Object[test.numInstances()][];
- for(int i = 0 ; i < test.numInstances(); i++){
- double[] result = dbc.logDensityPerClusterForInstance(test.instance(i));
- object[i] = new Object[result.length];
- for (int z = 0; z < result.length; z++){
- object[i][z] = new Double(result[z]);
- }
- }
- }
- else if(newX != null){
- object = new Object[newX.length][];
- for(int j = 0 ; j < newX.length ; j++){
- Instance instance = new Instance(data.numAttributes());
- instance.setDataset(data);
- for(int i = 0 ; i < newX[0].length ; i++){
- if(instance.attribute(i).isNumeric())
- instance.setValue(i, ((Double)newX[j][i]).doubleValue());
- else if(instance.attribute(i).isString())
- instance.setValue(i, ""+newX[j][i]);
- }
- instance.setValue(newX[0].length, 0.0);
- double[] result = dbc.logDensityPerClusterForInstance(instance);
- object[j] = new Object[result.length];
- for (int z = 0; z < result.length; z++){
- object[j][z] = new Double(result[z]);
- }
- }
- }
- } catch ( Exception e){
- e.printStackTrace();
- }
- return object;
- }
-
- /** Returns the logs of the joint densities for a given instance
- *
- * @return object An Object[][] containing the array of values
- * @throws Exception if values could not be computed
- */
- public Object[][] logJointDensitiesForInstance() throws Exception{
- Object[][] object = null;
- try{
- if(pathNewX != null){
- BufferedReader br = new BufferedReader(new FileReader(pathNewX));
- Instances test = new Instances(br);
- object = new Object[test.numInstances()][];
- for(int i = 0 ; i < test.numInstances(); i++){
- double[] result = dbc.logJointDensitiesForInstance(test.instance(i));
- object[i] = new Object[result.length];
- for (int z = 0; z < result.length; z++){
- object[i][z] = new Double(result[z]);
- }
- }
- }
- else if(newX != null){
- object = new Object[newX.length][];
- for(int j = 0 ; j < newX.length ; j++){
- Instance instance = new Instance(data.numAttributes());
- instance.setDataset(data);
- for(int i = 0 ; i < newX[0].length ; i++){
- if(instance.attribute(i).isNumeric())
- instance.setValue(i, ((Double)newX[j][i]).doubleValue());
- else if(instance.attribute(i).isString())
- instance.setValue(i, ""+newX[j][i]);
- }
- instance.setValue(newX[0].length, 0.0);
- double[] result = dbc.logJointDensitiesForInstance(instance);
- object[j] = new Object[result.length];
- for (int z = 0; z < result.length; z++){
- object[j][z] = new Double(result[z]);
- }
- }
- }
- } catch ( Exception e){
- e.printStackTrace();
- }
- return object;
- }
-}
diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/EMCluster.java b/src/main/org/openscience/chemojava/qsar/model/weka/EMCluster.java
deleted file mode 100644
index 5f19b88..0000000
--- a/src/main/org/openscience/chemojava/qsar/model/weka/EMCluster.java
+++ /dev/null
@@ -1,335 +0,0 @@
-/* $RCSfile$
- * $Author: mariobaseda $
- * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $
- * $Revision: 5602 $
- *
- * Copyright (C) 2007 by Mario Baseda
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import java.io.BufferedReader;
-import java.io.FileReader;
-
-import org.openscience.chemojava.libio.weka.Weka;
-import org.openscience.cdk.qsar.model.QSARModelException;
-
-import weka.clusterers.EM;
-import weka.core.Instance;
-import weka.core.Instances;
-
-/**
- * EM assigns a probability distribution to each instance which indicates the probability of it
- * belonging to each of the clusters by using Weka library.
- * EM can decide how many clusters to create by cross validation, or you may specify apriori
- * how many clusters to generate.
- *
- * The use of this class is shown in the following code snippet
- *
- * try {
- * EM em = new EM();
- * em.setOptions(options);
- * em.setData(attrib, typAttrib, classAttrib, data);
- * em.build();
- *
- * } catch (Exception e) {
- * System.out.println(e.toString());
- * }
- *
- * Other option is set the data from a file format arff.
- *
- * EM em = new EM();
- * em.setData("/some/where/dataTraining.arff");
- *
- * Valid options are (according weka library):
- * -V: Verbose
- * -N: Specify the number of clusters to generate. If omitted, EM will use cross
- * validation to select the number of clusters automatically
- * -I: Terminate after this many iterations if EM has not converged.
- * -S: Specify random number seed
- * -M: Set the minimum allowable standard deviation for normal density calculation.
- *
- * @author Mario Baseda
- * @cdk.require weka.jar
- * @cdk.license GPL
- * @cdk.module qsarweka
- * @cdk.svnrev $Revision: 9162 $
- * @see Weka
- *
- * @cdk.keyword clusterers
- * @cdk.keyword EM
- */
-public class EMCluster {
- /** An instance containing the data which should be clustering as arff file.*/
- private static Instances data;
- /**Array of strings containing the options*/
- private String[] options;
- /**new instance of clusterer*/
- private EM em = new EM();
- Weka weka = new Weka();
- /**An Array Object containing the independent variable*/
- private Object[][] newX = null;
- /**A String specifying the path of the file, format arff,
- * which contains the new independent values*/
- private String pathNewX = null;
- /** results of the density*/
- private Object[][] results = null;
-
- /**
- * Constructor of the EMCluster object
- */
- public EMCluster(){}
-
- /**
- * Parses a given list of options. The parameters are determited from weka.
- *
- * Valid options are (according weka library):
- * -V: Verbose
- * -N: Specify the number of clusters to generate. If omitted, EM will use cross
- * validation to select the number of clusters automatically
- * -I: Terminate after this many iterations if EM has not converged.
- * -S: Specify random number seed
- * -M: Set the minimum allowable standard deviation for normal density calculation.
- *
- * @param options An Array of strings containing the options
- * @throws Exception if the options are of the wrong type for the given modeling function
- *
- */
- public void setOptions(String[] options) throws Exception{
- this.options = options;
- }
-
- /**
- * Set the variable data to the arff file
- *
- * @param filename The path of the file, format arff
- * @throws Exception if the parameters are of the wrong type for the given modeling function
- *
- */
- public void setData(String filename)throws Exception{
- data = weka.setDataset(
- this.getClass().getClassLoader().getResourceAsStream(filename),
- null
- );
- }
-
- /**
- * Parses a given list of data to an arff file, and set the variable data on it.
- *
- * @param attrib A string array containing the attributs
- * @param typAttrib Attribute type: NUMERICAL or NOMINAL.
- * @param classAttrib String with a list of the attribut classes.
- * @param y An array containing the dependent variable (class value).
- * @param x A 2D array containing the independent variable (for example: qsar results).
- * @throws Exception if it is unable to parse the data
- *
- */
- public void setData(String[] attrib, int[] typAttrib, String[] classAttrib, Object[] y, Object[][] x) throws Exception{
- data = weka.setDataset(attrib,typAttrib,classAttrib,y,x,null);
- }
-
- /**
- * Builds the cluster.
- *
- * @throws Exception if errors occur in data types. See
- * the corresponding method in subclasses of this class for further details.
- */
- public void build() throws Exception {
- try {
- if(options != null)
-// set the options
- em.setOptions(options);
-// build the clusterer
- em.buildClusterer(data);
- } catch (Exception e) {
- e.printStackTrace();
- }
- return;
- }
-
- /**
- * Specifies the new parameters as arff file.
- *
- * @param path A String specifying the path of the file, format arff, which contains
- * the new values.
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling function
- *
- */
- public void setParameters(String path) throws QSARModelException {
- this.pathNewX = path;
- }
-
- /**
- * Specifies the new parameters as 2D array object.
- *
- * @param newX A 2D Array Object containing the new values.
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling function
- */
- public void setParameters(Object[][] newX) throws QSARModelException {
- this.newX = newX;
- }
-
- /** Computes the log of the conditional density (per cluster) for a given instance
- * This function only returns meaningful results if the build
- * method of this class has been called.
- *
- * @throws Exception if the density could not be computed successfully
- */
- public void logDensityPerClusterForInstance() throws Exception{
- try{
- if(pathNewX != null){
- BufferedReader br = new BufferedReader(new FileReader(pathNewX));
- Instances test = new Instances(br);
- results = new Object[test.numInstances()][];
- for(int i = 0 ; i < test.numInstances(); i++){
- double[] result = em.distributionForInstance(test.instance(i));
- results[i] = new Object[result.length];
- for (int z = 0; z < result.length; z++){
- results[i][z] = new Double(result[z]);
- }
- }
- }
- else if(newX != null){
- results = new Object[newX.length][];
- for(int j = 0 ; j < newX.length ; j++){
- Instance instance = new Instance(data.numAttributes());
- instance.setDataset(data);
- for(int i = 0 ; i < newX[0].length ; i++){
- if(instance.attribute(i).isNumeric())
- instance.setValue(i, ((Double)newX[j][i]).doubleValue());
- else if(instance.attribute(i).isString())
- instance.setValue(i, ""+newX[j][i]);
- }
- instance.setValue(newX[0].length, 0.0);
- double[] result = em.distributionForInstance(instance);
- results[j] = new Object[result.length];
- for (int z = 0; z < result.length; z++){
- results[j][z] = new Double(result[z]);
- }
- }
- }
- } catch ( Exception e){
- e.printStackTrace();
- }
- }
-
- /**
- * Returns the log of the conditional density (per cluster) for a given instance.
- * This function only returns meaningful results if the logDensityPerClusterForInstance
- * method of this class has been called.
- * @return An Object[][] containing the density
- */
- public Object[][] getLogDensityPerClusterForInstance(){
- return results;
- }
-
- /**
- * Returns the number of clusters.
- *
- * @return the number of builded cluster as integer
- * @throws Exception if number of clusters could not be returned successfully
- */
- public int numberOfCluster() throws Exception{
- return em.numberOfClusters();
- }
-
- /**
- * Returns the cluster priors.
- *
- * @return the prior probability for each cluster as double array
- */
- public double[] clusterPriors() {
- return em.getClusterPriors();
- }
-
- /**
- * Return the normal distributions for the cluster models
- *
- * @return the normal distributions for the cluster models as double 3D array
- */
- public double[][][] getClusterModelsNumericAtts(){
- return em.getClusterModelsNumericAtts();
- }
-
- /**
- * Return the priors for the clusters
- *
- * @return the prior for the clusters as double array
- */
- public double[] getClusterPriors() {
- return em.getClusterPriors();
- }
-
- /**
- * Get debug mode
- *
- * @return true or false
- */
- public boolean getDebug() {
- return em.getDebug();
- }
-
- /**
- * Get the maximum number of iterations
- *
- * @return the number of iterations as integer
- */
- public int getMaxIterations() {
- return em.getMaxIterations();
- }
-
- /**
- * Get the minimum allowable standard deviation.
- *
- * @return the minumum allowable standard deviation as double
- */
- public double getMinStdDev() {
- return em.getMinStdDev();
- }
-
- /**
- * Get the number of clusters
- *
- * @return the number of clusters as integer
- */
- public int getNumClusters() {
- return em.getNumClusters();
- }
-
- /**
- * Gets the current settings of EM
- *
- * @return an array of strings containing the options
-
- */
- public String[] getOptions() {
- return em.getOptions();
- }
-
- /**
- * Get the random number seed
- *
- * @return the seed as integer
-
- */
- public int getSeed() {
- return em.getSeed();
- }
-}
diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/FilterSupervisedResample.java b/src/main/org/openscience/chemojava/qsar/model/weka/FilterSupervisedResample.java
deleted file mode 100644
index f0921d7..0000000
--- a/src/main/org/openscience/chemojava/qsar/model/weka/FilterSupervisedResample.java
+++ /dev/null
@@ -1,203 +0,0 @@
-/* $RCSfile$
- * $Author: mariobaseda $
- * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $
- * $Revision: 5602 $
- *
- * Copyright (C) 2007 by Mario Baseda
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import org.openscience.chemojava.libio.weka.Weka;
-
-import weka.core.Instance;
-import weka.core.Instances;
-import weka.filters.supervised.instance.Resample;
-
-/**
- * Produces a random subsample of a dataset using sampling with
- * replacement by using Weka library. The original dataset must fit entirely in memory.
- * The number of instances in the generated dataset may be specified.
- * The dataset must have a nominal class attribute.
- * If not, use the unsupervised version. The filter can be made to
- * maintain the class distribution in the subsample, or to bias the
- * class distribution toward a uniform distribution. When used in batch
- * mode, subsequent batches are not resampled
- *
- * The use of this class is shown in the following code snippet
- *
- * try {
- * FilterSupervisedResample filter = new FilterSupervisedResample();
- * filter.setOptions(options);
- * filter.setData(attrib, typAttrib, classAttrib, data);
- * filter.build();
- *
- * } catch (Exception e) {
- * System.out.println(e.toString());
- * }
- *
- * Other option is set the data from a file format arff.
- *
- * FilterSupervisedResample filter = new FilterSupervisedResample();
- * filter.setData("/some/where/dataTraining.arff");
- *
- * Valid options are (according weka library):
- * -S num: Specify the random number seed (default 1).
- * -B num: Specify a bias towards uniform class distribution.
- * 0 = distribution in input data, 1 = uniform class distribution (default 0).
- * -Z percent: Specify the size of the output dataset, as a percentage of the input dataset (default 100).
- *
- *
- *
- * @author Mario Baseda
- * @cdk.require weka.jar
- * @cdk.license GPL
- * @cdk.module qsarweka
- * @cdk.svnrev $Revision: 9162 $
- * @see Weka
- *
- * @cdk.keyword Filter
- * @cdk.keyword SupervisedFilter, Resample
- */
-public class FilterSupervisedResample {
- /** An instance containing the data as arff file.*/
- private static Instances data;
- /**Array of strings containing the options*/
- private String[] options;
- private Resample filter = new Resample();
- Weka weka = new Weka();
-
- /**
- * Constructor of the FilterSupervisedResample object
- */
- public FilterSupervisedResample(){}
-
- /**
- * Parses a given list of options. The parameters are determited from weka.
- *
- * Valid options are (according weka library):
- * -S num: Specify the random number seed (default 1).
- * -B num: Specify a bias towards uniform class distribution.
- * 0 = distribution in input data, 1 = uniform class distribution (default 0).
- * -Z percent: Specify the size of the output dataset, as a percentage of the input dataset (default 100).
- *
- * @param options An Array of strings containing the options
- * @throws Exception if the options are of the wrong type for the given modeling function
- *
- */
- public void setOptions(String[] options) throws Exception{
- this.options = options;
- }
- /**
- * Set the variable data to the arff file
- *
- * @param filename The path of the file, format arff
- * @throws Exception if the parameters are of the wrong type
- *
- */
- public void setData(String filename)throws Exception{
- data = weka.setDataset(
- this.getClass().getClassLoader().getResourceAsStream(filename),
- null
- );
- }
-
- /**
- * Parses a given list of data to an arff file, and set the variable data on it.
- *
- * @param attrib A string array containing the attributs
- * @param typAttrib Attribute type: NUMERICAL or NOMINAL.
- * @param classAttrib String with a list of the attribut classes.
- * @param y An array containing the dependent variable (class value).
- * @param x A 2D array containing the independent variable (for example: qsar results).
- * @throws Exception if it is unable to parse the data
- *
- */
- public void setData(String[] attrib, int[] typAttrib, String[] classAttrib, Object[] y, Object[][] x) throws Exception{
- data = weka.setDataset(attrib,typAttrib,classAttrib,y,x,null);
- }
-
- /**
- * The execute method for the supervised resample filter
- *
- * @throws Exception if errors occur in data types. See
- * the corresponding method in subclasses of this class for further details.
- */
- public void build() throws Exception {
- try {
- if(options != null)
-// set the options
- filter.setOptions(options);
-// setInputFormat
- filter.setInputFormat(data);
- for (int i = 0; i < data.numInstances(); i++) {
- filter.input(data.instance(i));
- }
-// Signify that this batch of input to the filter is finished
- filter.batchFinished();
- Instances newData = filter.getOutputFormat();
- Instance processed;
- while ((processed = filter.output()) != null) {
- newData.add(processed);
- }
- } catch (Exception exception){
- exception.printStackTrace();
- }
- }
-
- /**
- * Returns the bias towards a uniform class. A value of 0 leaves
- * the class distribution as-is, a value of 1 ensures the class
- * distributions are uniform in the output data.
- *
- * @return the current bias as double
- */
- public double getBiasToUniformClass(){
- return filter.getBiasToUniformClass();
- }
-
- /**
- * Returns the current settings of the filter.
- *
- * @return an array of strings containing the options
- */
- public String[] getOptions(){
- return filter.getOptions();
- }
-
- /**
- * Returns the random number seed.
- *
- * @return the random number seed as integer
- */
- public int getRandomSeed(){
- return filter.getRandomSeed();
- }
-
- /**
- * Returns the subsample size as a percentage of the original set.
- *
- * @return the subsample size as double
- */
- public double getSampleSizePercent(){
- return filter.getSampleSizePercent();
- }
-
-}
diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedRandomize.java b/src/main/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedRandomize.java
deleted file mode 100644
index 8a71eb5..0000000
--- a/src/main/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedRandomize.java
+++ /dev/null
@@ -1,168 +0,0 @@
-/* $RCSfile$
- * $Author: mariobaseda $
- * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $
- * $Revision: 5602 $
- *
- * Copyright (C) 2007 by Mario Baseda
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import org.openscience.chemojava.libio.weka.Weka;
-
-import weka.core.Instance;
-import weka.core.Instances;
-import weka.filters.unsupervised.instance.Randomize;
-
-/**
- * This filter randomly shuffles the order of instances passed through it.
- * The random number generator is reset with the seed value whenever setInputFormat() is called.
- *
- * The use of this class is shown in the following code snippet
- *
- * try {
- * FilterUnSupervisedRandomize filter = new FilterUnSupervisedRandomize();
- * filter.setOptions(options);
- * filter.setData(attrib, typAttrib, classAttrib, data);
- * filter.build();
- *
- * } catch (Exception e) {
- * System.out.println(e.toString());
- * }
- *
- * Other option is set the data from a file format arff.
- *
- * FilterUnSupervisedRandomize filter = new FilterUnSupervisedRandomize();
- * filter.setData("/some/where/dataTraining.arff");
- *
- * Valid options are (according weka library):
- * -S num: Specify the random number seed (default 42).
- *
- * @author Mario Baseda
- * @cdk.require weka.jar
- * @cdk.license GPL
- * @cdk.module qsarweka
- * @cdk.svnrev $Revision: 9162 $
- * @see Weka
- *
- * @cdk.keyword Filter
- * @cdk.keyword UnSupervisedFilter, Randomize
- */
-public class FilterUnSupervisedRandomize {
- /** An instance containing the data as arff file.*/
- private static Instances data;
- /**Array of strings containing the options*/
- private String[] options;
- private Randomize filter = new Randomize();
- Weka weka = new Weka();
-
- /**
- * Constructor of the FilterUnSupervisedRandomize object
- */
- public FilterUnSupervisedRandomize(){}
-
- /**
- * Parses a given list of options. The parameters are determited from weka.
- *
- * Valid options are (according weka library):
- * -S num: Specify the random number seed (default 42).
- *
- * @param options An Array of strings containing the options
- * @throws Exception if the options are of the wrong type for the given modeling function
- *
- */
- public void setOptions(String[] options) throws Exception{
- this.options = options;
- }
- /**
- * Set the variable data to the arff file
- *
- * @param filename The path of the file, format arff
- * @throws Exception if the parameters are of the wrong type for the given modeling function
- *
- */
- public void setData(String filename)throws Exception{
- data = weka.setDataset(
- this.getClass().getClassLoader().getResourceAsStream(filename),
- null
- );
- }
-
- /**
- * Parses a given list of data to an arff file, and set the variable data on it.
- *
- * @param attrib A string array containing the attributs
- * @param typAttrib Attribute type: NUMERICAL or NOMINAL.
- * @param classAttrib String with a list of the attribut classes.
- * @param y An array containing the dependent variable (class value).
- * @param x A 2D array containing the independent variable (for example: qsar results).
- * @throws Exception if it is unable to parse the data
- *
- */
- public void setData(String[] attrib, int[] typAttrib, String[] classAttrib, Object[] y, Object[][] x) throws Exception{
- data = weka.setDataset(attrib,typAttrib,classAttrib,y,x,null);
- }
-
- /**
- * The execute method for the unsupervised randomize filter
- *
- * @throws Exception if errors occur in data types. See
- * the corresponding method in subclasses of this class for further details.
- */
- public void build() throws Exception {
- try {
- if(options != null)
-// set the options
- filter.setOptions(options);
-// setInputFormat
- filter.setInputFormat(data);
- for (int i = 0; i < data.numInstances(); i++) {
- filter.input(data.instance(i));
- }
-// Signify that this batch of input to the filter is finished
- filter.batchFinished();
- Instances newData = filter.getOutputFormat();
- Instance processed;
- while ((processed = filter.output()) != null) {
- newData.add(processed);
- }
- } catch (Exception exception){
- exception.printStackTrace();
- }
- }
-
- /**
- * Returns the current settings of the filter.
- *
- * @return an array of strings containing the options
- */
- public String[] getOptions(){
- return filter.getOptions();
- }
-
- /**
- * Returns the random number seed.
- *
- * @return the random number seed as integer
- */
- public int getRandomSeed(){
- return filter.getRandomSeed();
- }
-}
diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedResample.java b/src/main/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedResample.java
deleted file mode 100644
index fdcfc67..0000000
--- a/src/main/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedResample.java
+++ /dev/null
@@ -1,185 +0,0 @@
-/* $RCSfile$
- * $Author: mariobaseda $
- * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $
- * $Revision: 5602 $
- *
- * Copyright (C) 2007 by Mario Baseda
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import org.openscience.chemojava.libio.weka.Weka;
-
-import weka.core.Instance;
-import weka.core.Instances;
-import weka.filters.unsupervised.instance.Resample;
-
-/**
- * Produces a random subsample of a dataset using sampling with replacement.
- * The original dataset must fit entirely in memory. The number of instances
- * in the generated dataset may be specified. When used in batch mode, subsequent
- * batches are not resampled
- *
- * The use of this class is shown in the following code snippet
- *
- * try {
- * FilterUnSupervisedResample filter = new FilterUnSupervisedResample();
- * filter.setOptions(options);
- * filter.setData(attrib, typAttrib, classAttrib, data);
- * filter.build();
- *
- * } catch (Exception e) {
- * System.out.println(e.toString());
- * }
- *
- * Other option is set the data from a file format arff.
- *
- * FilterUnSupervisedResample filter = new FilterUnSupervisedResample();
- * filter.setData("/some/where/dataTraining.arff");
- *
- * Valid options are (according weka library):
- * -S num: Specify the random number seed (default 1).
- * -Z percent: Specify the size of the output dataset, as a percentage of the input dataset (default 100).
- *
- *
- *
- * @author Mario Baseda
- * @cdk.require weka.jar
- * @cdk.license GPL
- * @cdk.module qsarweka
- * @cdk.svnrev $Revision: 9162 $
- * @see Weka
- *
- * @cdk.keyword Filter
- * @cdk.keyword UnSupervisedFilter, Resample
- */
-public class FilterUnSupervisedResample {
- /** An instance containing the data as arff file.*/
- private static Instances data;
- /**Array of strings containing the options*/
- private String[] options;
- private Resample filter = new Resample();
- Weka weka = new Weka();
-
- /**
- * Constructor of the FilterUnSupervisedResample object
- */
- public FilterUnSupervisedResample(){}
-
- /**
- * Parses a given list of options. The parameters are determited from weka.
- *
- * Valid options are (according weka library):
- * -S num: Specify the random number seed (default 1).
- * -B num: Specify a bias towards uniform class distribution.
- * 0 = distribution in input data, 1 = uniform class distribution (default 0).
- * -Z percent: Specify the size of the output dataset, as a percentage of the input dataset (default 100).
- *
- * @param options An Array of strings containing the options
- * @throws Exception if the options are of the wrong type for the given modeling function
- *
- */
- public void setOptions(String[] options) throws Exception{
- this.options = options;
- }
- /**
- * Set the variable data to the arff file
- *
- * @param filename The path of the file, format arff
- * @throws Exception if the parameters are of the wrong type for the given modeling function
- *
- */
- public void setData(String filename)throws Exception{
- data = weka.setDataset(
- this.getClass().getClassLoader().getResourceAsStream(filename),
- null
- );
- }
-
- /**
- * Parses a given list of data to an arff file, and set the variable data on it.
- *
- * @param attrib A string array containing the attributs
- * @param typAttrib Attribute type: NUMERICAL or NOMINAL.
- * @param classAttrib String with a list of the attribut classes.
- * @param y An array containing the dependent variable (class value).
- * @param x A 2D array containing the independent variable (for example: qsar results).
- * @throws Exception if it is unable to parse the data
- *
- */
- public void setData(String[] attrib, int[] typAttrib, String[] classAttrib, Object[] y, Object[][] x) throws Exception{
- data = weka.setDataset(attrib,typAttrib,classAttrib,y,x,null);
- }
-
- /**
- * The execute method for the unsupervised resample filter
- *
- * @throws Exception if errors occur in data types. See
- * the corresponding method in subclasses of this class for further details.
- */
- public void build() throws Exception {
- try {
- if(options != null)
-// set the options
- filter.setOptions(options);
-// setInputFormat
- filter.setInputFormat(data);
- for (int i = 0; i < data.numInstances(); i++) {
- filter.input(data.instance(i));
- }
-// Signify that this batch of input to the filter is finished
- filter.batchFinished();
- Instances newData = filter.getOutputFormat();
- Instance processed;
- while ((processed = filter.output()) != null) {
- newData.add(processed);
- }
- } catch (Exception exception){
- exception.printStackTrace();
- }
- }
-
- /**
- * Returns the current settings of the filter.
- *
- * @return an array of strings containing the options
- */
- public String[] getOptions(){
- return filter.getOptions();
- }
-
- /**
- * Returns the random number seed.
- *
- * @return the random number seed as integer
- */
- public int getRandomSeed(){
- return filter.getRandomSeed();
- }
-
- /**
- * Returns the subsample size as a percentage of the original set.
- *
- * @return the subsample size as double
- */
- public double getSampleSizePercent(){
- return filter.getSampleSizePercent();
- }
-}
diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/IWekaModel.java b/src/main/org/openscience/chemojava/qsar/model/weka/IWekaModel.java
deleted file mode 100644
index d5a7224..0000000
--- a/src/main/org/openscience/chemojava/qsar/model/weka/IWekaModel.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/* $Revision: 6228 $ $Author: egonw $ $Date: 2006-05-11 18:34:42 +0200 (Thu, 11 May 2006) $
- *
- * Copyright (C) 2006-2007 Miguel Rojas
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import org.openscience.cdk.qsar.model.IModel;
-import org.openscience.cdk.qsar.model.QSARModelException;
-
-/** Base class for modeling classes that use weka methods as the backend.
- *
- * This cannot be directly instantiated as its sole function is
- * to initialize the weka algorithms.
- * Any class that builds models using weka algorithms should be a subclass of this.
- *
- * @author Miguel Rojas
- * @cdk.module qsarweka
- * @cdk.svnrev $Revision: 9162 $
- */
-public interface IWekaModel extends IModel {
-
- /**
- * Parses a given list of options. The parameters are determited from weka. And are specific for each
- * algorithm.
- *
- * @param options An Array of strings containing the options
- * @throws QSARModelException if the options are of the wrong type for the given modeling function
- *
- */
- abstract public void setOptions(String[] options) throws QSARModelException;
-
- /**
- * Get the current settings of the classifier. The parameters are determited from weka. And are specific for each
- * algorithm.
- *
- * @return An Array of strings containing the options
- * @throws QSARModelException if the options are of the wrong type for the given modeling function
- *
- */
- abstract public String[] getOptions() throws QSARModelException;
-
-
- /**
- * Specifies the parameters to predict. In this case will be the dependent varibles.
- * It's found into cdk.src
- *
- * @param path A String specifying the path of the file, format arff, which contians
- * the dependent values with whose to predict. It's found into cdk.src
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling function
- *
- */
- abstract public void setParametersCDK(String path) throws QSARModelException;
-
-
- /**
- * Specifies the parameters to predict. In this case will be the independent varibles.
- *
- * @param x A Array Object containing the independent variable.
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling function
- */
- abstract public void setParameters(Object[][] x) throws QSARModelException;
-
-
- /**
- * Returns the predicted values for the prediction set.
- *
- * This function only returns meaningful results if the predict
- * method of this class has been called.
- *
- * @return A Object[] containing the predicted values
- */
- abstract public Object[] getPredictPredicted();
-
-
-}
-
-
diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/J48WModel.java b/src/main/org/openscience/chemojava/qsar/model/weka/J48WModel.java
deleted file mode 100644
index dca9fc5..0000000
--- a/src/main/org/openscience/chemojava/qsar/model/weka/J48WModel.java
+++ /dev/null
@@ -1,296 +0,0 @@
-/* $Revision: 6228 $ $Author: egonw $ $Date: 2006-05-11 18:34:42 +0200 (Thu, 11 May 2006) $
- *
- * Copyright (C) 2006-2007 Miguel Rojas
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import org.openscience.chemojava.libio.weka.Weka;
-import org.openscience.cdk.qsar.model.QSARModelException;
-
-import weka.classifiers.trees.J48;
-/**
- * A modeling class that provides the Quinlan's model C4.5 known as J48
- * using Weka library.
- * The use of this class is shown in the following code snippet
- *
- * try {
- * J48WModel j48 = new J48WModel(typAttrib,classAttrib,x,y); or J48WModel j48 = new J48WModel(typAttrib,classAttrib,x,y,attrib);
- * j48.setOptions(options);
- * j48.build();
- * j48.setParameters(newX);
- * j48.predict();
- * String[] predictedvalues = (String[])j48.getPredictPredicted();
- * } catch (QSARModelException qme) {
- * System.out.println(qme.toString());
- * }
- *
- * Other option is set the data from a file format arff.
- *
- * J48WModel j48 = new J48WModel("/some/where/dataTraining.arff");
- *
- * Note that when making predictions, the new X matrix must be set by calls
- * to setParameters(). The following table lists the parameters that can be set and their
- * expected types.
- *
- *
- *
- *
- * | Name | Java Type | Notes |
- *
- *
- *
- *
- * | x | Double[][] | |
- *
- *
- * | y | Double[] | Length should be equal to the rows of x |
- *
- *
- * | newX | Double[][] | Number of columns should be the same as in x |
- *
- *
- * | typAttrib | String[] | Length should be equal to the rows of x |
- *
- *
- * | classAttrib | String[] | Length should be equal to number of different classes |
- *
- *
- *
- *
- * Valid options are (according weka library):
- * -U: Use unpruned tree.
- * -C confidence: Set confidence threshold for pruning. (Defalult:0.25)
- * -M number: Set minimum number of instances per leaf.(Default 2)
- * -R: Use reduced error pruning. No subte raising is performed.
- * -N number: Set number of folds for reduced error pruning. One fold is used
- * as the pruning set.(Deafult:3)
- * -B: Use binary splits for nominal attributes
- * -S: Don't perform subtree raising
- * -L: Do not clean up alfter the tree has been built
- * -A: If set, Laplace smoothing is used for predicted probabilities
- * -Q:The seed for reduced-error pruning
- *
- * @author Miguel Rojas
- * @cdk.require weka.jar
- * @cdk.license GPL
- * @cdk.module qsarweka
- * @cdk.svnrev $Revision: 9162 $
- * @see Weka
- *
- * @cdk.keyword decision and regression trees
- * @cdk.keyword J48
- */
-public class J48WModel implements IWekaModel{
- /**Dependent variable */
- private Object[] y;
- /**Independent variable */
- private Object[][] x;
- private Weka weka = null;
- /**Array of strings containing the options*/
- private String[] options;
- /**A String specifying the path of the file, format arff,
- * which contians the variables and attributes with whose to test.*/
- private String pathTest = null;
- /** results of the prediction*/
- private String[] results = null;
- /**A Array Object containing the independent variable*/
- private Object[][] newX = null;
- /**A String specifying the path of the file, format arff,
- * which contians the independent values with whose to predict.*/
- private String pathNewX = null;
- /**Attribute type: NUMERICAL or NOMINAL*/
- private int[] typAttrib;
- /**String with the attribut class*/
- private String[] classAttrib;
- private boolean cdkResource;
- /** String with the attributs*/
- private String[] attrib;
- /** Boolean if the attributs was set*/
- private boolean setAttrib = false;
-
- /**
- * Constructor of the J48WModel object from varibles
- *
- * @param typAttrib Attribute type: NUMERICAL or NOMINAL.
- * @param classAttrib String with the attribut class.
- * @param y An array containing the dependent variable.
- * @param x An double array containing the independent variable.
- */
- public J48WModel(int[] typAttrib, String[] classAttrib, Object[] y, Object[][] x){
- this.typAttrib = typAttrib;
- this.classAttrib = classAttrib;
- this.y = y;
- this.x = x;
- }
-
- /**
- * Constructor of the J48WModel object from varibles
- *
- * @param typAttrib Attribute type: NUMERICAL or NOMINAL.
- * @param classAttrib String with a list of the attribut class.
- * @param y An array containing the dependent variable (class value).
- * @param x A 2D array containing the independent variable (for example: qsar results).
- * @param attrib A string array containing the attributs
- */
- public J48WModel(int[] typAttrib, String[] classAttrib, Object[] y, Object[][] x, String[] attrib){
- this.typAttrib = typAttrib;
- this.classAttrib = classAttrib;
- this.y = y;
- this.x = x;
- this.attrib = attrib;
- setAttrib = true;
- }
- /**
- * Constructor of the J48WModel object from file. Default the file is found into cdk.src
- *
- * @param True, if the file is found into cdk.src resource
- * @param pathTest Path of the dataset file format arff to train
- */
- public J48WModel(boolean cdkResource, String pathTest){
- this.pathTest = pathTest;
- this.cdkResource = cdkResource;
- }
-
- /**
- * Parses a given list of options. The parameters are determited from weka.
- *
- * Valid options are (according weka library):
- * -U: Use unpruned tree.
- * -C confidence: Set confidence threshold for pruning. (Defalult:0.25)
- * -M number: Set minimum number of instances per leaf.(Default 2)
- * -R: Use reduced error pruning. No subte raising is performed.
- * -N number: Set number of folds for reduced error pruning. One fold is used
- * as the pruning set.(Deafult:3)
- * -B: Use binary splits for nominal attributes
- * -S: Don't perform subtree raising
- * -L: Do not clean up alfter the tree has been built
- * -A: If set, Laplace smoothing is used for predicted probabilities
- * -Q:The seed for reduced-error pruning
- *
- * @param options An Array of strings containing the options
- * @throws QSARModelException if the options are of the wrong type for the given modeling function
- *
- */
- public void setOptions(String[] options) throws QSARModelException {
- this.options = options;
- }
- /**
- * Get the current settings of the classifier. The parameters are determited from weka. And are specific for each
- * algorithm.
- *
- * @return An Array of strings containing the options
- * @throws QSARModelException if the options are of the wrong type for the given modeling function
- *
- */
- public String[] getOptions() throws QSARModelException {
- return options;
- }
- /**
- * Builds (trains) the model.
- *
- * @throws QSARModelException if errors occur in data types, calls to the R session. See
- * the corresponding method in subclasses of this class for further details.
- */
- public void build() throws QSARModelException {
- weka = new Weka();
- try {
- J48 j48 = new J48();
- if(options != null)
- j48.setOptions(options);
-
- if(pathTest != null){
- weka.setDataset(
- this.getClass().getClassLoader().getResourceAsStream(pathTest),
- j48
- );
- }else{
- if (!(setAttrib)){
- this.attrib = new String[x[0].length];
- for(int i = 0 ; i < x[0].length; i++){
- attrib[i] = "X"+i;
- }
- }
- weka.setDataset(attrib,typAttrib,classAttrib,y,x,j48);
- }
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- /**
- * Specifies the parameters to predict. In this case will be the dependent varibles.
- * It's found into cdk.src
- *
- * @param path A String specifying the path of the file, format arff, which contians
- * the dependent values with whose to predict. It's found into cdk.src
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling function
- *
- */
- public void setParametersCDK(String path) throws QSARModelException {
- this.pathNewX = path;
- }
- /**
- * Specifies the parameters to predict. In this case will be the independent varibles.
- *
- * @param newX A 2D array Object containing the independent variable.
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling function
- */
- public void setParameters(Object[][] newX) throws QSARModelException {
- this.newX = newX;
- }
-
- /**
- * Makes predictions using a previously built model.
- *
- * @throws QSARModelException if errors occur in data types, calls to the R session. See
- * the corresponding method in subclasses of this class for further details.
- */
- public void predict() throws QSARModelException {
- try{
- if(pathNewX != null){
- Object[] object = weka.getPredictionCDK(pathNewX);
- results = new String[object.length];
- for(int i = 0 ; i < object.length; i++){
- results[i] = (String)object[i];
- }
- }
- else if(newX != null){
- Object[] object = weka.getPrediction(newX);
- results = new String[object.length];
- for(int i = 0 ; i < results.length; i++){
- results[i] = (String)object[i];
- }
- }
-
- } catch ( Exception e){
- e.printStackTrace();
- }
- }
- /**
- * Returns the predicted values for the prediction set.
- *
- * This function only returns meaningful results if the predict
- * method of this class has been called.
- *
- * @return A String[] containing the predicted values
- */
- public Object[] getPredictPredicted() {
- return results;
- }
-
-}
diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/KmeansCluster.java b/src/main/org/openscience/chemojava/qsar/model/weka/KmeansCluster.java
deleted file mode 100644
index 550a5b7..0000000
--- a/src/main/org/openscience/chemojava/qsar/model/weka/KmeansCluster.java
+++ /dev/null
@@ -1,301 +0,0 @@
-/* $RCSfile$
- * $Author: mariobaseda $
- * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $
- * $Revision: 5602 $
- *
- * Copyright (C) 2007 by Mario Baseda
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import java.io.BufferedReader;
-import java.io.FileReader;
-
-import org.openscience.chemojava.libio.weka.Weka;
-import org.openscience.cdk.qsar.model.QSARModelException;
-
-import weka.clusterers.Clusterer;
-import weka.clusterers.SimpleKMeans;
-import weka.core.Instance;
-import weka.core.Instances;
-
-/**
- * A clustering class that provides the k means Cluster
- * using Weka library.
- * The use of this class is shown in the following code snippet
- *
- * try {
- * KmeansCluster kMeans = new KmeansCluster();
- * kMeans.setOptions(options);
- * kMeans.setData(attrib, typAttrib, classAttrib, data);
- * kMeans.build();
- *
- * } catch (Exception e) {
- * System.out.println(e.toString());
- * }
- *
- * Other option is set the data from a file format arff.
- *
- * KmeansCluster kMeans = new KmeansCluster();
- * kMeans.setData("/some/where/dataTraining.arff");
- *
- * Valid options are (according weka library):
- * -N: Specify the number of clusters to generate.
- * -S: Specify random number seed.
- *
- * @author Mario Baseda
- * @cdk.require weka.jar
- * @cdk.license GPL
- * @cdk.module qsarweka
- * @cdk.svnrev $Revision: 9162 $
- * @see Weka
- *
- * @cdk.keyword clusterers
- * @cdk.keyword SimpleKMeans
- */
-public class KmeansCluster{ // implements IWekaModel{
- /** An instance containing the data which should be clustering as arff file.*/
- private static Instances data;
- /**Array of strings containing the options*/
- private String[] options;
- /**new instance of clusterer*/
- private SimpleKMeans kMeans = new SimpleKMeans();
- Weka weka = new Weka();
- /**An Array Object containing the independent variable*/
- private Object[][] newX = null;
- /**A String specifying the path of the file, format arff,
- * which contains the new independent values*/
- private String pathNewX = null;
- /** results of the classifying*/
- private Object[] results = null;
-
- /**
- * Constructor of the KmeansCluster object
- */
- public KmeansCluster(){}
-
- /**
- * Parses a given list of options. The parameters are determited from weka.
- *
- * Valid options are (according weka library):
- * -N: Specify the number of clusters to generate.
- * -S: Specify random number seed.
- *
- * @param options An Array of strings containing the options
- * @throws Exception if the options are of the wrong type for the given modeling function
- *
- */
- public void setOptions(String[] options) throws Exception{
- this.options = options;
- }
- /**
- * Set the variable data to the arff file
- *
- * @param filename The path of the file, format arff
- * @throws Exception if the parameters are of the wrong type for the given modeling function
- *
- */
- public void setData(String filename)throws Exception{
- data = weka.setDataset(
- this.getClass().getClassLoader().getResourceAsStream(filename),
- null
- );
- }
-
- /**
- * Parses a given list of data to an arff file, and set the variable data on it.
- *
- * @param attrib A string array containing the attributs
- * @param typAttrib Attribute type: NUMERICAL or NOMINAL.
- * @param classAttrib String with a list of the attribut classes.
- * @param y An array containing the dependent variable (class value).
- * @param x A 2D array containing the independent variable (for example: qsar results).
- * @throws Exception if it is unable to parse the data
- *
- */
- public void setData(String[] attrib, int[] typAttrib, String[] classAttrib, Object[] y, Object[][] x) throws Exception{
- data = weka.setDataset(attrib,typAttrib,classAttrib,y,x,null);
- }
-
- /**
- * Specifies the new parameters as arff file.
- *
- * @param path A String specifying the path of the file, format arff, which contains
- * the new values.
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling function
- *
- */
- public void setParameters(String path) throws QSARModelException {
- this.pathNewX = path;
- }
-
- /**
- * Specifies the new parameters as 2D array object.
- *
- * @param newX A 2D array Object containing the new values.
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling function
- */
- public void setParameters(Object[][] newX) throws QSARModelException {
- this.newX = newX;
- }
-
- /**
- * Builds the cluster.
- *
- * @throws Exception if errors occur in data types. See
- * the corresponding method in subclasses of this class for further details.
- */
- public void build() throws Exception {
- try {
- if(options != null)
-// set the options
- kMeans.setOptions(options);
-// build the clusterer
- kMeans.buildClusterer(data);
- } catch (Exception e) {
- e.printStackTrace();
- }
- return;
- }
-
- /** Classifies a given instance.
- * This function only returns meaningful results if the build
- * method of this class has been called.
- *
- * @throws Exception if instance could not be classified successfully
- */
- public void clusterInstance() throws Exception{
- try{
- if(pathNewX != null){
- BufferedReader br = new BufferedReader(new FileReader(pathNewX));
- Instances test = new Instances(br);
- results = new Object[test.numInstances()];
- for(int i = 0 ; i < test.numInstances(); i++){
- results[i] = new Double(kMeans.clusterInstance(test.instance(i)));
- }
- }
- else if(newX != null){
- results = new Object[newX.length];
- for(int j = 0 ; j < newX.length ; j++){
- Instance instance = new Instance(data.numAttributes());
- instance.setDataset(data);
- for(int i = 0 ; i < newX[0].length ; i++){
- if(instance.attribute(i).isNumeric())
- instance.setValue(i, ((Double)newX[j][i]).doubleValue());
- else if(instance.attribute(i).isString())
- instance.setValue(i, ""+newX[j][i]);
- }
- instance.setValue(newX[0].length, 0.0);
- results[j] = new Double(kMeans.clusterInstance(instance));
- }
- }
- } catch ( Exception e){
- e.printStackTrace();
- }
- }
-
- /**
- * Returns the number of the assigned cluster if the class is enumerated, otherwise the predicted value.
- * This function only returns meaningful results if the clusterInstance
- * method of this class has been called.
- * @return An Object[] containing the assigned cluster as Double values
- * if the class is enumerated, otherwise the predicted values.
- */
- public Object[] getClusterInstance(){
- return results;
- }
-
- /**
- * Returns the number of clusters.
- *
- * @return the number of builded cluster as integer
- * @throws Exception
- */
- public int numberOfCluster() throws Exception{
- return kMeans.numberOfClusters();
- }
-
- /**
- * Returns the cluster sum of squared errors.
- *
- * @return the sum of squared errors of the cluster as double
- */
- public double getSquaredError(){
- return kMeans.getSquaredError();
- }
-
- /**
- * Returns the cluster centroid values.
- *
- * @return the cluster centroid values as 2D double array
- * @throws Exception
- */
- public double[][] getClusterCentroids() throws Exception{
- double[][] results = new double[kMeans.numberOfClusters()][];
- for (int x = 0; x < kMeans.numberOfClusters(); x++){
- results[x] = new double[kMeans.getClusterCentroids().instance(x).numValues()];
- results[x] = kMeans.getClusterCentroids().instance(x).toDoubleArray();
- }
- return results;
- }
-
- /**
- * Returns the cluster standard deviations.
- *
- * @return the cluster standard deviations as 2D double array
- * @throws Exception
- */
- public double[][] getClusterStandardDevs() throws Exception{
- double[][] results = new double[kMeans.numberOfClusters()][];
- for (int x = 0; x < kMeans.numberOfClusters(); x++){
- results[x] = new double[kMeans.getClusterStandardDevs().instance(x).numValues()];
- results[x] = kMeans.getClusterStandardDevs().instance(x).toDoubleArray();
- }
- return results;
- }
-
- /**
- * Returns the cluster sizes.
- *
- * @return the cluster sizes as integer array
- */
- public int[] getClusterSizes() {
- return kMeans.getClusterSizes();
- }
-
- /**
- * Returns the cluster.
- *
- * @return the builded Clusterer
- */
- public Clusterer getClusterer(){
- return kMeans;
- }
-
- /**
- * Gets the current settings of the model
- *
- * @return an array of strings containing the options
-
- */
- public String[] getOptions() {
- return kMeans.getOptions();
- }
-}
diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/LinearRegressionWModel.java b/src/main/org/openscience/chemojava/qsar/model/weka/LinearRegressionWModel.java
deleted file mode 100644
index 5c7f16a..0000000
--- a/src/main/org/openscience/chemojava/qsar/model/weka/LinearRegressionWModel.java
+++ /dev/null
@@ -1,268 +0,0 @@
-/* $Revision: 6228 $ $Author: egonw $ $Date: 2006-05-11 18:34:42 +0200 (Thu, 11 May 2006) $
- *
- * Copyright (C) 2006-2007 Miguel Rojas
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import org.openscience.chemojava.libio.weka.Weka;
-import org.openscience.cdk.qsar.model.QSARModelException;
-
-import weka.classifiers.functions.LinearRegression;
-/**
- * A modeling class that provides a linear least squares regression model using Weka library.
- * The use of this class is shown in the following code snippet
- *
- * try {
- * LinearRegressionWModel lrm = new LinearRegressionWModel(x,y); or LinearRegressionWModel lrm = new LinearRegressionWModel(typAttrib,classAttrib,x,y,attrib);
- * lrm.setOptions(options);
- * lrm.build();
- * lrm.setParameters(newX);
- * lrm.predict();
- * Double[] predictedvalues = (Double[])lrm.getPredictPredicted();
- *
- * } catch (QSARModelException qme) {
- * System.out.println(qme.toString());
- * }
- *
- * Other option is set the data from a file format arff.
- *
- * LinearRegressionWModel lrm = new LinearRegressionWModel("/some/where/dataTraining.arff");
- *
- * Note that when making predictions, the new X matrix must be set by calls
- * to setParameters(). The following table lists the parameters that can be set and their
- * expected types.
- *
- *
- *
- *
- * | Name | Java Type | Notes |
- *
- *
- *
- *
- * | x | Double[][] | |
- *
- *
- * | y | Double[] | Length should be equal to the rows of x |
- *
- *
- * | newX | Double[][] | Number of columns should be the same as in x |
- *
- *
- *
- *
- *
- * Valid options are (according weka library):
- * -D: Produce debugging output.
- * -S num: Set the attriute selection method to use. 1=None, 2=Greedy(default 0=M5' method)
- *
-C: Do no try to eleminate colinear attributes
- * -R num: The ridge parameter(default 1.0e-8)
- *
- * @author Miguel Rojas
- * @cdk.require weka.jar
- * @cdk.license GPL
- * @cdk.module qsarweka
- * @cdk.svnrev $Revision: 9162 $
- * @see Weka
- *
- * @cdk.keyword regression, linear
- */
-public class LinearRegressionWModel implements IWekaModel{
- /**Dependent variable */
- private Object[] y;
- /**Independent variable */
- private Object[][] x;
- private Weka weka = null;
- /**Array of strings containing the options*/
- private String[] options;
- /**A String specifying the path of the file, format arff,
- * which contians the variables and attributes with whose to test.*/
- private String pathTest = null;
- /** results of the prediction*/
- private Double[] results;
- /**A Array Object containing the independent variable*/
- private Object[][] newX = null;
- /**A String specifying the path of the file, format arff,
- * which contians the independent values with whose to predict.*/
- private String pathNewX = null;
- /**Attribute type: NUMERICAL or NOMINAL*/
- private int[] typAttrib;
- /** String with the attributs*/
- private String[]attrib;
- /** Boolean if the attributs was set*/
- private boolean setAttrib = false;
-
- /**
- * Constructor of the LinearRegressionWModel object from varibles
- * @param y An array containing the dependent variable.
- * @param x An double array containing the independent variable.
- */
- public LinearRegressionWModel(Object[] y, Object[][] x){
- this.y = y;
- this.x = x;
- }
-
- /**
- * Constructor of the LinearRegressionWModel object from varibles
- *
- * @param y An array containing the dependent variable (class value).
- * @param x A 2D array containing the independent variable (for example: qsar results).
- * @param typAttrib An integer array containing the attribute type
- * @param attrib A string array containing the attributs
- */
- public LinearRegressionWModel(int[] typAttrib, Object[] y, Object[][] x, String[] attrib){
- this.y = y;
- this.x = x;
- this.typAttrib = typAttrib;
- this.attrib = attrib;
- setAttrib = true;
- }
- /**
- * Constructor of the LinearRegressionWModel object from file
- * @param pathTest Path of the dataset file format arff to train
- */
- public LinearRegressionWModel(String pathTest){
- this.pathTest = pathTest;
- }
-
- /**
- * Parses a given list of options. The parameters are determited from weka.
- *
- * Valid options are (according weka library):
- * -D: Produce debugging output.
- * -S num: Set the attriute selection method to use. 1=None, 2=Greedy(default 0=M5' method)
- *
-C: Do no try to eleminate colinear attributes
- * -R num: The ridge parameter(default 1.0e-8)
- *
- * @param options An Array of strings containing the options
- * @throws QSARModelException if the options are of the wrong type for the given modeling function
- *
- */
- public void setOptions(String[] options) throws QSARModelException {
- this.options = options;
- }
- /**
- * Get the current settings of the classifier. The parameters are determited from weka. And are specific for each
- * algorithm.
- *
- * @return An Array of strings containing the options
- * @throws QSARModelException if the options are of the wrong type for the given modeling function
- *
- */
- public String[] getOptions() throws QSARModelException {
- return options;
- }
- /**
- * Builds (trains) the model.
- *
- * @throws QSARModelException if errors occur in data types, calls to the R session. See
- * the corresponding method in subclasses of this class for further details.
- */
- public void build() throws QSARModelException {
- weka = new Weka();
- try {
- LinearRegression lr = new LinearRegression();
- if(options != null)
- lr.setOptions(options);
-
- if(pathTest != null){
- weka.setDataset(
- this.getClass().getClassLoader().getResourceAsStream(pathTest),
- lr
- );
- }else{
- if (!(setAttrib)){
- this.attrib = new String[x[0].length+1];
- this.typAttrib = new int[x[0].length+1];
- for(int i = 0 ; i < x[0].length; i++){
- attrib[i] = "X"+i;
- typAttrib[i] = Weka.NUMERIC;
- }
- attrib[x[0].length] = "Y";
- typAttrib[x[0].length] = Weka.NUMERIC;
- }
- weka.setDataset(attrib,typAttrib,y,x,lr);
- }
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- /**
- * Specifies the parameters to predict. In this case will be the dependent varibles.
- * It's found into cdk.src
- *
- * @param path A String specifying the path of the file, format arff, which contians
- * the dependent values with whose to predict. It's found into cdk.src
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling function
- *
- */
- public void setParametersCDK(String path) throws QSARModelException {
- this.pathNewX = path;
- }
- /**
- * Specifies the parameters to predict. In this case will be the independent varibles.
- *
- * @param newX A 2D array Object containing the independent variable.
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling function
- */
- public void setParameters(Object[][] newX) throws QSARModelException {
- this.newX = newX;
- }
-
- /**
- * Makes predictions using a previously built model.
- *
- * @throws QSARModelException if errors occur in data types, calls to the R session. See
- * the corresponding method in subclasses of this class for further details.
- */
- public void predict() throws QSARModelException {
- try{
- if(pathNewX != null){
- Object[] object = weka.getPredictionCDK(pathNewX);
- results = new Double[object.length];
- for(int i = 0 ; i < object.length; i++){
- results[i] = (Double)object[i];
- }
- }
- else if(newX != null){
- Object[] object = weka.getPrediction(newX);
- results = new Double[object.length];
- for(int i = 0 ; i < object.length; i++){
- results[i] = (Double)object[i];
- }
- }
-
- } catch ( Exception e){
- e.printStackTrace();
- }
- }
- /**
- * Returns the predicted values for the prediction set.
- *
- * This function only returns meaningful results if the predict
- * method of this class has been called.
- *
- * @return A Double[] containing the predicted values
- */
- public Object[] getPredictPredicted() {
- return results;
- }
-
-
-}
diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/MultilayerPerceptronModel.java b/src/main/org/openscience/chemojava/qsar/model/weka/MultilayerPerceptronModel.java
deleted file mode 100644
index ac9adb7..0000000
--- a/src/main/org/openscience/chemojava/qsar/model/weka/MultilayerPerceptronModel.java
+++ /dev/null
@@ -1,368 +0,0 @@
-/* $RCSfile$
- * $Author: mariobaseda $
- * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $
- * $Revision: 5602 $
- *
- * Copyright (C) 2007 by Mario Baseda
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import weka.core.Instance;
-import weka.core.Instances;
-import weka.classifiers.functions.MultilayerPerceptron;
-
-import org.openscience.chemojava.libio.weka.Weka;
-import org.openscience.cdk.qsar.model.QSARModelException;
-
-import java.io.BufferedReader;
-import java.io.FileReader;
-
-/**
- * A modelling class that uses backpropagation to classify instances using Weka library.
- * The use of this class is shown in the following code snippet
- *
- * try {
- * MultilayerPerceptron mp = new MultilayerPerceptron();
- * mp.setOptions(options);
- * mp.setData(attrib, typAttrib, classAttrib, data);
- * mp.build();
- *
- * } catch (Exception e) {
- * System.out.println(e.toString());
- * }
- *
- * Other option is set the data from a file format arff.
- *
- * MultilayerPerceptron mp = new MultilayerPerceptron();
- * mp.setData("/some/where/dataTraining.arff");
- *
- * Valid options are (according weka library):
- * -L num: Set the learning rate. (default 0.3)
- * -M num: Set the momentum (default 0.2)
- * -N num: Set the number of epochs to train through. (default 500)
- * -V num: Set the percentage size of the validation set from the training to use
- * (default 0 (no validation set is used, instead num of epochs is used)
- * -S num: Set the seed for the random number generator. (default 0)
- * -E num: Set the threshold for the number of consequetive errors allowed
- * during validation testing. (default 20)
- * -G: Bring up a GUI for the neural net.
- * -A: Do not automatically create the connections in the net. (can only be used if -G is specified)
- * -B: Do Not automatically preprocess the instances with a nominal to binary filter
- * -H str: Set the number of nodes to be used on each layer. Each number represents
- * its own layer and the num of nodes on that layer. Each number should be comma seperated.
- * There are also the wildcards 'a', 'i', 'o', 't' (default 4)
- * -C: Do not automatically Normalize the class if it's numeric.
- * -I: Do not automatically Normalize the attributes
- * -R: Do not allow the network to be automatically reset
- * -D: Cause the learning rate to decay as training is done
- *
- * @author Mario Baseda
- * @cdk.require weka.jar
- * @cdk.license GPL
- * @cdk.module qsarweka
- * @cdk.svnrev $Revision: 9162 $
- * @see Weka
- *
- * @cdk.keyword classifiers, funktions, MultilayerPerceptron
- */
-public class MultilayerPerceptronModel {
- /** An instance containing the data which should be classifier as arff file.*/
- private static Instances data;
- /**Array of strings containing the options*/
- private String[] options;
- /**new instance of classifier*/
- private MultilayerPerceptron mp = new MultilayerPerceptron();
- private Weka weka = new Weka();
- /**An Array Object containing the independent variable*/
- private Object[][] newX = null;
- /**A String specifying the path of the file, format arff,
- * which contains the new independent values*/
- private String pathNewX = null;
- /** An array object which contains the results of the prediction*/
- private Object[][] object = null;
-
-
- /**
- * Constructor of the MultilayerPerceptron object
- */
- public MultilayerPerceptronModel(){}
-
- /**
- * Set the variable data to the arff file
- *
- * @param filename The path of the file, format arff
- * @throws Exception if the parameters are of the wrong type for the given modeling function
- *
- */
- public void setData(String filename)throws Exception{
- data = weka.setDataset(
- this.getClass().getClassLoader().getResourceAsStream(filename),
- null
- );
- }
-
- /**
- * Parses a given list of data to an arff file, and set the variable data on it.
- *
- * @param typAttrib Attribute type: NUMERICAL or NOMINAL.
- * @param classAttrib String with a list of the attribut classes.
- * @param y An array containing the dependent variable (class value).
- * @param x A 2D array containing the independent variable (for example: qsar results).
- * @param attrib A string array containing the attributs
- * @throws Exception if it is unable to parse the data
- *
- */
- public void setData(String[] attrib, int[] typAttrib, String[] classAttrib, Object[] y, Object[][] x) throws Exception{
- data = weka.setDataset(attrib,typAttrib,classAttrib,y,x,null);
- }
-
- /**
- * Parses a given list of options. The parameters are determited from weka.
- *
- * Valid options are (according weka library):
- * -L num: Set the learning rate. (default 0.3)
- * -M num: Set the momentum (default 0.2)
- * -N num: Set the number of epochs to train through. (default 500)
- * -V num: Set the percentage size of the validation set from the training to use
- * (default 0 (no validation set is used, instead num of epochs is used)
- * -S num: Set the seed for the random number generator. (default 0)
- * -E num: Set the threshold for the number of consequetive errors allowed
- * during validation testing. (default 20)
- * -G: Bring up a GUI for the neural net.
- * -A: Do not automatically create the connections in the net. (can only be used if -G is specified)
- * -B: Do Not automatically preprocess the instances with a nominal to binary filter
- * -H str: Set the number of nodes to be used on each layer. Each number represents
- * its own layer and the num of nodes on that layer. Each number should be comma seperated.
- * There are also the wildcards 'a', 'i', 'o', 't' (default 4)
- * -C: Do not automatically Normalize the class if it's numeric.
- * -I: Do not automatically Normalize the attributes
- * -R: Do not allow the network to be automatically reset
- * -D: Cause the learning rate to decay as training is done
- *
- * @param options An Array of strings containing the options
- * @throws QSARModelException if the options are of the wrong type for the given modeling function
- *
- */
- public void setOptions(String[] options) throws QSARModelException{
- this.options = options;
- }
-
- /**
- * Builds the model.
- *
- * @throws Exception if errors occur in data types. See
- * the corresponding method in subclasses of this class for further details.
- */
- public void build() throws Exception {
- try {
- if(options != null)
-// set the options
- mp.setOptions(options);
-// build the model
- mp.buildClassifier(data);
- } catch (Exception e) {
- e.printStackTrace();
- }
- return;
- }
-
- /**
- * Specifies the new parameters as arff file.
- *
- * @param path A String specifying the path of the file, format arff, which contians
- * the new values.
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling functionn
- *
- */
- public void setParameters(String path) throws QSARModelException {
- this.pathNewX = path;
- }
-
- /**
- * Specifies the new parameters as 2D array object.
- *
- * @param newX A 2D array Object containing the new values.
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling function
- */
- public void setParameters(Object[][] newX) throws QSARModelException {
- this.newX = newX;
- }
-
-
- /** Predict the class of an instance once a classification model has been built with the buildClassifier call
- *
- * @throws Exception
- */
- public void predict() throws Exception{
- try{
- if(pathNewX != null){
- BufferedReader br = new BufferedReader(new FileReader(pathNewX));
- Instances test = new Instances(br);
- this.object = new Object[test.numInstances()][];
- for(int i = 0 ; i < test.numInstances(); i++){
- double[] result = mp.distributionForInstance(test.instance(i));
- this.object[i] = new Object[result.length];
- for (int z = 0; z < result.length; z++){
- this.object[i][z] = new Double(result[z]);
- }
- }
- }
- else if(newX != null){
- this.object = new Object[newX.length][];
- for(int j = 0 ; j < newX.length ; j++){
- Instance instance = new Instance(data.numAttributes());
- instance.setDataset(data);
- for(int i = 0 ; i < newX[0].length ; i++){
- if(instance.attribute(i).isNumeric())
- instance.setValue(i, ((Double)newX[j][i]).doubleValue());
- else if(instance.attribute(i).isString())
- instance.setValue(i, ""+newX[j][i]);
- }
- instance.setValue(newX[0].length, 0.0);
- double[] result = mp.distributionForInstance(instance);
- this.object[j] = new Object[result.length];
- for (int z = 0; z < result.length; z++){
- this.object[j][z] = new Double(result[z]);
- }
- }
- }
- } catch ( Exception e){
- e.printStackTrace();
- }
- }
-
- /**
- * Returns the probabilities of each class type.
- *
- * This function only returns meaningful results if the predict
- * method of this class has been called.
- *
- * @return An Object[][] containing the probabilities of each class type
- */
- public Object[][] getPredictPredicted() {
- return this.object;
- }
-
- /**Get the hidden layers
- * @return A string containig the hidden layers
- */
- public String getHiddenLayers(){
- return mp.getHiddenLayers();
- }
-
- /**Get the value of the learning rate
- * @return A double containig the value of the learning rate
- */
- public double getLearningRate(){
- return mp.getLearningRate();
- }
-
- /**Get the value of the momentum
- * @return A double containig the value of the momentum
- */
- public double getMomentum(){
- return mp.getMomentum();
- }
-
- /**Get the value of auto build
- * @return true or false
- */
- public boolean getAutoBuild(){
- return mp.getAutoBuild();
- }
-
- /**Get the value of decay
- * @return true or false
- */
- public boolean getDecay(){
- return mp.getDecay();
- }
-
- /**Get the value of GUI
- * @return true or false
- */
- public boolean getGUI(){
- return mp.getGUI();
- }
-
- /**Get the value of NominalToBinaryFilter
- * @return true or false
- */
- public boolean getNominalToBinaryFilter(){
- return mp.getNominalToBinaryFilter();
- }
-
- /**Get the value of NormalizeAttributes
- * @return true or false
- */
- public boolean getNormalizeAttributes(){
- return mp.getNormalizeAttributes();
- }
-
- /**Get the value of NormalizeNumericClass
- * @return true or false
- */
- public boolean getNormalizeNumericClass(){
- return mp.getNormalizeNumericClass();
- }
-
- /**Gets the current settings of NeuralNet
- * @return A String[] containig the settings of NeuralNet
- */
- public String[] getOptions(){
- return mp.getOptions();
- }
-
- /**Get the value of the random seed
- * @return A long containig the value of the random seed
- */
- public long getRandomSeed(){
- return mp.getRandomSeed();
- }
-
- /**Get the number of epochs to train through
- * @return An double containig the number of epochs to train through
- */
- public double getTrainingTime(){
- return mp.getTrainingTime();
- }
-
- /**Get the percentage size of the validation set
- * @return An double containig the percentage size of the validation seth
- */
- public double getValidationSetSize(){
- return mp.getValidationSetSize();
- }
-
- /**Get the threshold used for validation testing
- * @return An double containig the threshold used for validation testing
- */
- public double getValidationThreshold(){
- return mp.getValidationThreshold();
- }
-
- /**Get the flag for reseting the network
- * @return true or false
- */
- public boolean getReset(){
- return mp.getReset();
- }
-}
diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/NaiveBayesModel.java b/src/main/org/openscience/chemojava/qsar/model/weka/NaiveBayesModel.java
deleted file mode 100644
index f92268e..0000000
--- a/src/main/org/openscience/chemojava/qsar/model/weka/NaiveBayesModel.java
+++ /dev/null
@@ -1,292 +0,0 @@
-/* $RCSfile$
- * $Author: mariobaseda $
- * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $
- * $Revision: 5602 $
- *
- * Copyright (C) 2007 by Mario Baseda
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import weka.core.Instance;
-import weka.core.Instances;
-import weka.classifiers.bayes.NaiveBayes;
-
-import org.openscience.chemojava.libio.weka.Weka;
-import org.openscience.cdk.qsar.model.QSARModelException;
-
-import java.io.BufferedReader;
-import java.io.FileReader;
-
-/**
- * A modelling class for a Naive Bayes classifier using estimator classes from the Weka library.
- * Numeric estimator precision values are chosen based on analysis of the training data.
- * For this reason, the classifier is not an UpdateableClassifier.
- * The use of this class is shown in the following code snippet
- *
- * try {
- * NaiveBayes nb = new NaivesBayes();
- * nb.setOptions(options);
- * nb.setData(attrib, typAttrib, classAttrib, data);
- * nb.build();
- *
- * } catch (Exception e) {
- * System.out.println(e.toString());
- * }
- *
- *
- * !!!Other option is set the data from a file format arff.!!!
- * !!!!!!!!!!!!!THIS OPTION SUPPLIES AN UNSUSPECTED BUG BY CALCULATING THE PROBABILITIES!!!!!!!!!!!!!!
- *
- * !!!MultilayerPerceptron mp = new MultilayerPerceptron();!!!
- * !!!mp.setData("/some/where/dataTraining.arff");!!!
- * !!!!!!!!!!!!!THIS OPTION SUPPLIES AN UNSUSPECTED BUG BY CALCULATING THE PROBABILITIES!!!!!!!!!!!!!!
- *
- * Valid options are (according weka library):
- * -K: Use kernel estimation for modelling numeric attributes rather than a single normal distribution.
- * -D: Use supervised discretization to process numeric attributes
- *
- * @author Mario Baseda
- * @cdk.require weka.jar
- * @cdk.license GPL
- * @cdk.module qsarweka
- * @cdk.svnrev $Revision: 9162 $
- * @see Weka
- *
- * @cdk.keyword classifiers, bayes, NaiveBayes
- */
-public class NaiveBayesModel {
- /** An instance containing the data which should be classifier as arff file.*/
- private static Instances data;
- /**Array of strings containing the options*/
- private String[] options;
- /**new instance of classifier*/
- private NaiveBayes nb = new NaiveBayes();
- private Weka weka = new Weka();
- /**An Array Object containing the independent variable*/
- private Object[][] newX = null;
- /**A String specifying the path of the file, format arff,
- * which contains the new independent values*/
- private String pathNewX = null;
- /** An array object which contains the results of the probabilities*/
- private Object[][] object = null;
-
-
- /**
- * Constructor of the NaiveBayesModel object
- */
- public NaiveBayesModel(){}
-
- /**
- * Set the variable data to the arff file
- *
- * @param filename The path of the file, format arff
- * @throws Exception if the parameters are of the wrong type for the given modeling function
- *
- */
- public void setData(String filename)throws Exception{
- data = weka.setDataset(
- this.getClass().getClassLoader().getResourceAsStream(filename),
- null
- );
- }
-
- /**
- * Parses a given list of data to an arff file, and set the variable data on it.
- *
- * @param typAttrib Attribute type: NUMERICAL or NOMINAL.
- * @param classAttrib String with a list of the attribut classes.
- * @param y An array containing the dependent variable (class value).
- * @param x A 2D array containing the independent variable (for example: qsar results).
- * @param attrib A string array containing the attributs
- * @throws Exception if it is unable to parse the data
- *
- */
- public void setData(String[] attrib, int[] typAttrib, String[] classAttrib, Object[] y, Object[][] x) throws Exception{
- data = weka.setDataset(attrib,typAttrib,classAttrib,y,x,null);
- }
-
- /**
- * Parses a given list of options. The parameters are determited from weka.
- *
- * Valid options are (according weka library):
- * -K: Use kernel estimation for modelling numeric attributes rather than a single normal distribution.
- * -D: Use supervised discretization to process numeric attributes
- *
- * @param options An Array of strings containing the options
- * @throws QSARModelException if the options are of the wrong type for the given modeling function
- *
- */
- public void setOptions(String[] options) throws QSARModelException{
- this.options = options;
- }
-
- /**
- * Builds the model.
- *
- * @throws Exception if errors occur in data types. See
- * the corresponding method in subclasses of this class for further details.
- */
- public void build() throws Exception {
- try {
- if(options != null)
-// set the options
- nb.setOptions(options);
-// build the model
- nb.buildClassifier(data);
- } catch (Exception e) {
- e.printStackTrace();
- }
- return;
- }
-
- /**
- * Specifies the new parameters as arff file.
- *
- * @param path A String specifying the path of the file, format arff, which contains
- * the new values.
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling function
- *
- */
- public void setParameters(String path) throws QSARModelException {
- this.pathNewX = path;
- }
-
- /**
- * Specifies the new parameters as 2D array object.
- *
- * @param newX A 2D array Object containing the new values.
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling function
- */
- public void setParameters(Object[][] newX) throws QSARModelException {
- this.newX = newX;
- }
-
-
- /** Calculates the class membership probabilities for the given instance once a classification
- * model has been built with the buildClassifier call
- *
- * @throws Exception
- */
- public void probabilities() throws Exception{
- try{
- if(pathNewX != null){
-// This supplies an unsuspected bug (Array index out of bounce error)
- BufferedReader br = new BufferedReader(new FileReader(pathNewX));
- Instances test = new Instances(br);
- this.object = new Object[test.numInstances()][];
- for(int i = 0 ; i < test.numInstances(); i++){
- // Here occures the bug
- double[] result = nb.distributionForInstance(test.instance(i));
- this.object[i] = new Object[result.length];
- for (int z = 0; z < result.length; z++){
- this.object[i][z] = new Double(result[z]);
- }
- }
- }
- else if(newX != null){
- this.object = new Object[newX.length][];
- for(int j = 0 ; j < newX.length ; j++){
- Instance instance = new Instance(data.numAttributes());
- instance.setDataset(data);
- for(int i = 0 ; i < newX[0].length ; i++){
- if(instance.attribute(i).isNumeric())
- instance.setValue(i, ((Double)newX[j][i]).doubleValue());
- else if(instance.attribute(i).isString())
- instance.setValue(i, ""+newX[j][i]);
- }
- instance.setValue(newX[0].length, 0.0);
- double[] result = nb.distributionForInstance(instance);
- this.object[j] = new Object[result.length];
- for (int z = 0; z < result.length; z++){
- this.object[j][z] = new Double(result[z]);
- }
- }
- }
- } catch ( Exception e){
- e.printStackTrace();
- }
- }
-
- /**
- * Returns the probabilities of each class type.
- *
- * This function only returns meaningful results if the probabilities
- * method of this class has been called.
- *
- * @return An Object[][] containing the probabilities of each class type
- */
- public Object[][] getProbabilities() {
- return this.object;
- }
-
- /** Updates the classifier with the given instance once a classification model has been built with the buildClassifier call
- *
- * @throws Exception
- */
- public void updateClassifier() throws Exception{
- try{
- if(pathNewX != null){
- BufferedReader br = new BufferedReader(new FileReader(pathNewX));
- Instances test = new Instances(br);
- for(int i = 0 ; i < test.numInstances(); i++){
- nb.updateClassifier(test.instance(i));
- }
- }
- else if(newX != null){
- for(int j = 0 ; j < newX.length ; j++){
- Instance instance = new Instance(data.numAttributes());
- instance.setDataset(data);
- for(int i = 0 ; i < newX[0].length ; i++){
- if(instance.attribute(i).isNumeric())
- instance.setValue(i, ((Double)newX[j][i]).doubleValue());
- else if(instance.attribute(i).isString())
- instance.setValue(i, ""+newX[j][i]);
- }
- instance.setValue(newX[0].length, 0.0);
- nb.updateClassifier(instance);
- }
- }
- } catch ( Exception e){
- e.printStackTrace();
- }
- }
-
- /**Gets the current settings of the classifier
- * @return A String[] containig the settings of NeuralNet
- */
- public String[] getOptions(){
- return nb.getOptions();
- }
-
- /**Gets if kernel estimator is being used
- * @return true or false
- */
- public boolean getUseKernelEstimator(){
- return nb.getUseKernelEstimator();
- }
-
- /**Get whether supervised discretization is to be used
- * @return true or false
- */
- public boolean getUseSupervisedDiscretization(){
- return nb.getUseSupervisedDiscretization();
- }
-}
diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/SMOModel.java b/src/main/org/openscience/chemojava/qsar/model/weka/SMOModel.java
deleted file mode 100644
index a3a2b6b..0000000
--- a/src/main/org/openscience/chemojava/qsar/model/weka/SMOModel.java
+++ /dev/null
@@ -1,395 +0,0 @@
-/* $RCSfile$
- * $Author: mariobaseda $
- * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $
- * $Revision: 5602 $
- *
- * Copyright (C) 2007 by Mario Baseda
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import weka.core.Instance;
-import weka.core.Instances;
-import weka.classifiers.functions.SMO;
-import org.openscience.chemojava.libio.weka.Weka;
-import org.openscience.cdk.qsar.model.QSARModelException;
-
-import java.io.BufferedReader;
-import java.io.FileReader;
-
-/**
- * A modelling class that provides the John C. Platt's sequential minimal optimization algorithm (SMO)
- * for training a support vector classifier using Weka library.
- * The use of this class is shown in the following code snippet
- *
- * try {
- * SMO smo = new SMO();
- * smo.setOptions(options);
- * smo.setData(attrib, typAttrib, classAttrib, data);
- * smo.build();
- *
- * } catch (Exception e) {
- * System.out.println(e.toString());
- * }
- *
- * Other option is set the data from a file format arff.
- *
- * SMO smo = new SMO();
- * smo.setData("/some/where/dataTraining.arff");
- *
- * Valid options are (according weka library):
- * -C num: The complexity constant C. (default 1)
- * -E num: The exponent for the polynomial kernel. (default 1)
- * -G num: Gamma for the RBF kernel. (default 0.01)
- * -N <0|1|2>: Whether to 0=normalize/1=standardize/2=neither. (default 0=normalize)
- * -F: Feature-space normalization (only for non-linear polynomial kernels).
- * -O: Use lower-order terms (only for non-linear polynomial kernels).
- * -R: Use the RBF kernel. (default poly)
- * -A num: Sets the size of the kernel cache. Should be a prime number. (default 250007, use 0 for full cache)
- * -L num: Sets the tolerance parameter. (default 1.0e-3)
- * -P num: Sets the epsilon for round-off error. (default 1.0e-12)
- * -M: Fit logistic models to SVM outputs.
- * -V num: Number of folds for cross-validation used to generate data for logistic models. (default -1, use training data)
- * -W num: Random number seed for cross-validation. (default 1)
-
-
- *
- * @author Mario Baseda
- * @cdk.require weka.jar
- * @cdk.license GPL
- * @cdk.module qsarweka
- * @cdk.svnrev $Revision: 9162 $
- * @see Weka
- *
- * @cdk.keyword classifiers, funktions, SMO
- */
-public class SMOModel {
- /** An instance containing the data which should be classifier as arff file.*/
- private static Instances data;
- /**Array of strings containing the options*/
- private String[] options;
- /**new instance of classifier*/
- private SMO smo = new SMO();
- private Weka weka = new Weka();
- /**An Array Object containing the independent variable*/
- private Object[][] newX = null;
- /**A String specifying the path of the file, format arff,
- * which contains the new independent values*/
- private String pathNewX = null;
- /** An array object which contains the probabilitiesof the new values*/
- private Object[][] object = null;
-
-
- /**
- * Constructor of the SMO object
- */
- public SMOModel(){}
-
- /**
- * Parses a given list of options. The parameters are determited from weka.
- *
- * Valid options are (according weka library):
- * Valid options are (according weka library):
- * -C num: The complexity constant C. (default 1)
- * -E num: The exponent for the polynomial kernel. (default 1)
- * -G num: Gamma for the RBF kernel. (default 0.01)
- * -N <0|1|2>: Whether to 0=normalize/1=standardize/2=neither. (default 0=normalize)
- * -F: Feature-space normalization (only for non-linear polynomial kernels).
- * -O: Use lower-order terms (only for non-linear polynomial kernels).
- * -R: Use the RBF kernel. (default poly)
- * -A num: Sets the size of the kernel cache. Should be a prime number. (default 250007, use 0 for full cache)
- * -L num: Sets the tolerance parameter. (default 1.0e-3)
- * -P num: Sets the epsilon for round-off error. (default 1.0e-12)
- * -M: Fit logistic models to SVM outputs.
- * -V num: Number of folds for cross-validation used to generate data for logistic models. (default -1, use training data)
- * -W num: Random number seed for cross-validation. (default 1)
- *
- * @param options An Array of strings containing the options
- * @throws QSARModelException if the options are of the wrong type for the given modeling function
- *
- */
- public void setOptions(String[] options) throws QSARModelException{
- this.options = options;
- }
-
- /**
- * Set the variable data to the arff file
- *
- * @param filename The path of the file, format arff
- * @throws Exception if the parameters are of the wrong type for the given modeling function
- *
- */
- public void setData(String filename)throws Exception{
- data = weka.setDataset(
- this.getClass().getClassLoader().getResourceAsStream(filename),
- null
- );
- }
-
- /**
- * Parses a given list of data to an arff file, and set the variable data on it.
- *
- * @param typAttrib Attribute type: NUMERICAL or NOMINAL.
- * @param classAttrib String with a list of the attribut classes.
- * @param y An array containing the dependent variable (class value).
- * @param x A 2D array containing the independent variable (for example: qsar results).
- * @param attrib A string array containing the attributs
- * @throws Exception if it is unable to parse the data
- *
- */
- public void setData(String[] attrib, int[] typAttrib, String[] classAttrib, Object[] y, Object[][] x) throws Exception{
- data = weka.setDataset(attrib,typAttrib,classAttrib,y,x,null);
- }
-
- /**
- * Builds the model.
- *
- * @throws Exception if errors occur in data types. See
- * the corresponding method in subclasses of this class for further details.
- */
- public void build() throws Exception {
- try {
- if(options != null)
-// set the options
- smo.setOptions(options);
-// build the model
- smo.buildClassifier(data);
- } catch (Exception e) {
- e.printStackTrace();
- }
- return;
- }
-
- /**
- * Specifies the new parameters as arff file.
- *
- * @param path A String specifying the path of the file, format arff, which contians
- * the new values.
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling function
- *
- */
- public void setParameters(String path) throws QSARModelException {
- this.pathNewX = path;
- }
-
- /**
- * Specifies the new parameters as 2D array object.
- *
- * @param newX A 2D array Object containing the new values.
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling function
- */
- public void setParameters(Object[][] newX) throws QSARModelException {
- this.newX = newX;
- }
-
-
- /** Estimates the probabilities for the given instance, which was built from the new parameters before.
- *
- * @throws Exception
- */
- public void probabilities() throws Exception{
- try{
- if(pathNewX != null){
- BufferedReader br = new BufferedReader(new FileReader(pathNewX));
- Instances test = new Instances(br);
- this.object = new Object[test.numInstances()][];
- for(int i = 0 ; i < test.numInstances(); i++){
- double[] result = smo.distributionForInstance(test.instance(i));
- this.object[i] = new Object[result.length];
- for (int z = 0; z < result.length; z++){
- this.object[i][z] = new Double(result[z]);
- }
- }
- }
- else if(newX != null){
- this.object = new Object[newX.length][];
- for(int j = 0 ; j < newX.length ; j++){
- Instance instance = new Instance(data.numAttributes());
- instance.setDataset(data);
- for(int i = 0 ; i < newX[0].length ; i++){
- if(instance.attribute(i).isNumeric())
- instance.setValue(i, ((Double)newX[j][i]).doubleValue());
- else if(instance.attribute(i).isString())
- instance.setValue(i, ""+newX[j][i]);
- }
- instance.setValue(newX[0].length, 0.0);
- double[] result = smo.distributionForInstance(instance);
- this.object[j] = new Object[result.length];
- for (int z = 0; z < result.length; z++){
- this.object[j][z] = new Double(result[z]);
- }
- }
- }
- } catch ( Exception e){
- e.printStackTrace();
- }
- }
-
- /**
- * Returns the probabilities of the new parameters.
- *
- * This function only returns meaningful results if the probabilities
- * method of this class has been called.
- *
- * @return An Object[][] containing the probabilities values as Double
- */
- public Object[][] getProbabilities() {
- return this.object;
- }
-
- /** Returns the attribute names
- * @return A String[][][] containing the attribute names
- */
- public String[][][] attributeNames(){
- return smo.attributeNames();
- }
-
- /** Returns the bias of each binary SMO
- * @return A double[][] containing the bias of each binary SMO
- */
- public double[][] bias(){
- return smo.bias();
- }
-
- /**Returns the class attribute names
- * @return A String[] containing the class attribute names
- */
- public String[] classAttributeNames(){
- return smo.classAttributeNames();
- }
-
- /**Get the value of buildLogisticModels
- * @return true or false
- */
- public boolean getBuildLogisticModels(){
- return smo.getBuildLogisticModels();
- }
-
- /**Get the value of C
- * @return A double containig the value of C
- */
- public double getC(){
- return smo.getC();
- }
-
- /**Get the size of the kernel cache
- * @return An integer containing the size of the kernel cache
- */
- public int getCacheSize(){
- return smo.getCacheSize();
- }
-
- /**Get the value of epsilon
- * @return A double containing the value of epsilon
- */
- public double getEpsilon(){
- return smo.getEpsilon();
- }
-
- /**Get the value of exponent
- * @return A double containing the value of exponent
- */
- public double getExponent(){
- return smo.getExponent();
- }
-
- /**Check whether feature spaces is being normalized.
- * @return true or false
- */
- public boolean getFeatureSpaceNormalization() throws Exception{
- return smo.getFeatureSpaceNormalization();
- }
-
- /**Get the value of gamma
- * @return A double containing the value of gamma
- */
- public double getGamma(){
- return smo.getGamma();
- }
-
- /**Check whether lower-order terms are being used
- * @return true or false
- */
- public boolean getLowerOrderTerms(){
- return smo.getLowerOrderTerms();
- }
-
- /**Get the value of numFolds.
- * @return An integer containing the value of numFolds
- */
- public int getNumFolds(){
- return smo.getNumFolds();
- }
-
- /**Get the value of randomSeed
- * @return An integer containing the value of randomSeed
- */
- public int getRandomSeed(){
- return smo.getRandomSeed();
- }
-
- /**Get the value of tolerance parameter
- * @return A double containig the value of tolerance parameter
- */
- public double getToleranceParameter(){
- return smo.getToleranceParameter();
- }
-
- /**Check if the RBF kernel is to be used
- * @return true or false
- */
- public boolean getUserRBF(){
- return smo.getUseRBF();
- }
-
- /**Return the number of class attribute values
- * @return An integer containing the class attribute values
- */
- public int numClassAttributeValues(){
- return smo.numClassAttributeValues();
- }
-
- /**Returns the indices in sparse format
- * @return An int[][][] containing the indices in sparse format
- */
- public int[][][] sparseIndices(){
- return smo.sparseIndices();
- }
-
- /**Returns the weights in sparse format.
- * @return A double[][][] containing the weights in sparse format
- */
- public double[][][] sparseWeights(){
- return smo.sparseWeights();
- }
-
- /**
- * Get the current settings of the classifier. The parameters are determited from weka. And are specific for each
- * algorithm.
- *
- * @return An Array of strings containing the options
- * @throws QSARModelException if the options are of the wrong type for the given modeling function
- *
- */
- public String[] getOptions() throws QSARModelException{
- return this.options;
- }
-}
-
diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/SMOregModel.java b/src/main/org/openscience/chemojava/qsar/model/weka/SMOregModel.java
deleted file mode 100644
index c067ff7..0000000
--- a/src/main/org/openscience/chemojava/qsar/model/weka/SMOregModel.java
+++ /dev/null
@@ -1,289 +0,0 @@
-/* $RCSfile$
- * $Author: mariobaseda $
- * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $
- * $Revision: 5602 $
- *
- * Copyright (C) 2007 by Mario Baseda
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import weka.core.Instance;
-import weka.core.Instances;
-import weka.classifiers.functions.SMOreg;
-import org.openscience.chemojava.libio.weka.Weka;
-import org.openscience.cdk.qsar.model.QSARModelException;
-
-import java.io.BufferedReader;
-import java.io.FileReader;
-
-/**
- * A modelling class that provides the Alex J.Smola and Bernhard Scholkopf
- * sequential minimal optimization algorithm for training a support vector regression
- * using Weka library (polynomial or RBF kernels). This implementation globally replaces all missing
- * values and transforms nominal attributes into binary ones. It also normalizes all
- * attributes by default.
- * The use of this class is shown in the following code snippet
- *
- * try {
- * SMOreg smoreg = new SMOreg();
- * smo.setData(attrib, typAttrib, classAttrib, data); or smo.setData(x, y);
- * smo.build();
- *
- * } catch (Exception e) {
- * System.out.println(e.toString());
- * }
- *
- * Other option is set the data from a file format arff.
- *
- * SMO smo = new SMO();
- * smo.setData("/some/where/dataTraining.arff");
- *
- *
- * @author Mario Baseda
- * @cdk.require weka.jar
- * @cdk.license GPL
- * @cdk.module qsarweka
- * @cdk.svnrev $Revision: 9162 $
- * @see Weka
- *
- * @cdk.keyword classifiers, funktions, SMOreg
- */
-public class SMOregModel {
- /** An instance containing the data which should be classifier as arff file.*/
- private static Instances data;
- /**new instance of classifier*/
- private SMOreg smoreg = new SMOreg();
- private Weka weka = new Weka();
- /**An Array Object containing the independent variable*/
- private Object[][] newX = null;
- /**A String specifying the path of the file, format arff,
- * which contains the new independent values*/
- private String pathNewX = null;
- /** An array object which contains the classifacation of the new values*/
- private Object[] object = null;
-
- /**
- * Constructor of the SMOregModel object
- */
- public SMOregModel(){}
-
- /**
- * Set the variable data to the arff file
- *
- * @param filename The path of the file, format arff
- * @throws Exception if the parameters are of the wrong type for the given modeling function
- *
- */
- public void setData(String filename)throws Exception{
- data = weka.setDataset(
- this.getClass().getClassLoader().getResourceAsStream(filename),
- null
- );
- }
-
- /**
- * Parses a given list of data to an arff file, and set the variable data on it.
- * @param y An array containing the dependent variable.
- * @param x An double array containing the independent variable.
- */
- public void setData(Object[] y, Object[][] x) throws Exception{
- String[] attrib = new String[x[0].length+1];
- int[] typAttrib = new int[x[0].length+1];
- for(int i = 0 ; i < x[0].length; i++){
- attrib[i] = "X"+i;
- typAttrib[i] = Weka.NUMERIC;
- }
- attrib[x[0].length] = "Y";
- typAttrib[x[0].length] = Weka.NUMERIC;
- data = weka.setDataset(attrib,typAttrib,y,x,null);
- }
-
- /**
- * Parses a given list of data to an arff file, and set the variable data on it.
- *
- * @param typAttrib Attribute type: NUMERICAL or NOMINAL.
- * @param classAttrib String with a list of the attribut classes.
- * @param y An array containing the dependent variable (class value).
- * @param x A 2D array containing the independent variable (for example: qsar results).
- * @param attrib A string array containing the attributs
- * @throws Exception if it is unable to parse the data
- *
- */
- public void setData(int[] typAttrib, Object[] y, Object[][] x, String[] attrib) throws Exception{
- data = weka.setDataset(attrib,typAttrib,y,x,null);
- }
-
- /**
- * Builds the model.
- *
- * @throws Exception if errors occur in data types. See
- * the corresponding method in subclasses of this class for further details.
- */
- public void build() throws Exception {
- try {
-// build the model
- smoreg.buildClassifier(data);
- } catch (Exception e) {
- e.printStackTrace();
- };
- return;
- }
-
- /**
- * Specifies the new parameters as arff file.
- *
- * @param path A String specifying the path of the file, format arff, which contains
- * the new values.
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling function
- *
- */
- public void setParameters(String path) throws QSARModelException {
- this.pathNewX = path;
- }
-
- /**
- * Specifies the new parameters as 2D array object.
- *
- * @param newX A 2D array Object containing the new values.
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling function
- */
- public void setParameters(Object[][] newX) throws QSARModelException {
- this.newX = newX;
- }
-
- /** Classifies a given instance, which was built from the new parameters before.
- *
- * @throws Exception
- */
- public void classifyInstance() throws Exception{
- try{
- if(pathNewX != null){
- BufferedReader br = new BufferedReader(new FileReader(pathNewX));
- Instances test = new Instances(br);
- this.object = new Object[test.numInstances()];
- for(int i = 0 ; i < test.numInstances(); i++){
- double result = smoreg.classifyInstance(test.instance(i));
- this.object[i] = new Double(result);
- }
- }
- else if(newX != null){
- this.object = new Object[newX.length];
- for(int j = 0 ; j < newX.length ; j++){
- Instance instance = new Instance(data.numAttributes());
- instance.setDataset(data);
- for(int i = 0 ; i < newX[0].length ; i++){
- if(instance.attribute(i).isNumeric())
- instance.setValue(i, ((Double)newX[j][i]).doubleValue());
- else if(instance.attribute(i).isString())
- instance.setValue(i, ""+newX[j][i]);
- }
- instance.setValue(newX[0].length, 0.0);
- double result = smoreg.classifyInstance(instance);
- this.object[j] = new Double(result);
- }
- }
- } catch ( Exception e){
- e.printStackTrace();
- }
- }
-
- /**
- * Returns the classification of the new parameters.
- *
- * This function only returns meaningful results if the classifyInstance
- * method of this class has been called.
- *
- * @return An Object[] containing the classification values as Double
- */
- public Object[] getClassification() {
- return this.object;
- }
-
- /**Get the value of C
- * @return A double containig the value of C
- */
- public double getC(){
- return smoreg.getC();
- }
-
- /**Get the size of the kernel cache
- * @return An integer containing the size of the kernel cache
- */
- public int getCacheSize(){
- return smoreg.getCacheSize();
- }
-
- /**Get the value of epsilon
- * @return A double containing the value of epsilon
- */
- public double getEpsilon(){
- return smoreg.getEpsilon();
- }
-
- /**Get the value of exponent
- * @return A double containing the value of exponent
- */
- public double getExponent(){
- return smoreg.getExponent();
- }
-
- /**Check whether feature spaces is being normalized.
- * @return true or false
- */
- public boolean getFeatureSpaceNormalization() throws Exception{
- return smoreg.getFeatureSpaceNormalization();
- }
-
- /**Get the value of gamma
- * @return A double containing the value of gamma
- */
- public double getGamma(){
- return smoreg.getGamma();
- }
-
- /**Check whether lower-order terms are being used
- * @return true or false
- */
- public boolean getLowerOrderTerms(){
- return smoreg.getLowerOrderTerms();
- }
-
- /**Get the value of tolerance parameter
- * @return A double containig the value of tolerance parameter
- */
- public double getToleranceParameter(){
- return smoreg.getToleranceParameter();
- }
-
- /**Check if the RBF kernel is to be used
- * @return true or false
- */
- public boolean getUserRBF(){
- return smoreg.getUseRBF();
- }
-
- /**Get the value of eps
- * @return A double containing the value of eps
- */
- public double getEps(){
- return smoreg.getEps();
- }
-}
diff --git a/src/main/org/openscience/chemojava/qsar/model/weka/SimpleLinearRegressionModel.java b/src/main/org/openscience/chemojava/qsar/model/weka/SimpleLinearRegressionModel.java
deleted file mode 100644
index d2f2a2e..0000000
--- a/src/main/org/openscience/chemojava/qsar/model/weka/SimpleLinearRegressionModel.java
+++ /dev/null
@@ -1,237 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2007 by Mario Baseda
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import org.openscience.chemojava.libio.weka.Weka;
-import org.openscience.cdk.qsar.model.QSARModelException;
-
-import weka.classifiers.functions.SimpleLinearRegression;
-
-/**
- * A modeling class that provides a simple linear least squares regression model using Weka library.
- * The use of this class is shown in the following code snippet
- *
- * try {
- * SimpleLinearRegressionModel slrm = new SimpleLinearRegressionModel(x,y); or SimpleLinearRegressionModel slrm = new SimpleLinearRegressionModel(typAttrib,classAttrib,x,y,attrib);
- * slrm.build();
- * slrm.setParameters(newX);
- * slrm.predict();
- * Double[] predictedvalues = (Double[])slrm.getPredictPredicted();
- *
- * } catch (QSARModelException qme) {
- * System.out.println(qme.toString());
- * }
- *
- * Other option is set the data from a file format arff.
- *
- * SimpleLinearRegressionModel slrm = new SimpleLinearRegressionModel("/some/where/dataTraining.arff");
- *
- * Note that when making predictions, the new X matrix must be set by calls
- * to setParameters(). The following table lists the parameters that can be set and their
- * expected types.
- *
- *
- *
- *
- * | Name | Java Type | Notes |
- *
- *
- *
- *
- * | x | Double[][] | |
- *
- *
- * | y | Double[] | Length should be equal to the rows of x |
- *
- *
- * | newX | Double[][] | Number of columns should be the same as in x |
- *
- *
- *
- *
- *
- *
- * @author Mario Baseda
- * @cdk.require weka.jar
- * @cdk.license GPL
- * @cdk.module qsarweka
- * @cdk.svnrev $Revision: 9162 $
- * @see Weka
- *
- * @cdk.keyword regression, simple linear
- */
-public class SimpleLinearRegressionModel {
- /**Dependent variable */
- private Object[] y;
- /**Independent variable */
- private Object[][] x;
- private Weka weka = null;
- /**A String specifying the path of the file, format arff,
- * which contians the variables and attributes with whose to test.*/
- private String pathTest = null;
- /** results of the prediction*/
- private Double[] results;
- /**A Array Object containing the independent variable*/
- private Object[][] newX = null;
- /**A String specifying the path of the file, format arff,
- * which contians the independent values with whose to predict.*/
- private String pathNewX = null;
- /**Attribute type: NUMERICAL or NOMINAL*/
- private int[] typAttrib;
- /** String with the attributs*/
- private String[]attrib;
- /** Boolean if the attributs was set*/
- private boolean setAttrib = false;
-
- /**
- * Constructor of the SimpleLinearRegressionModel object from varibles
- * @param y An array containing the dependent variable.
- * @param x An double array containing the independent variable.
- */
- public SimpleLinearRegressionModel(Object[] y, Object[][] x){
- this.y = y;
- this.x = x;
- }
-
- /**
- * Constructor of the SimpleLinearRegressionModel object from varibles
- * @param y An array containing the dependent variable (class value).
- * @param x A 2D array containing the independent variable (for example: qsar results).
- * @param typAttrib An integer array containing the attribute type
- * @param attrib A string array containing the attributs
- */
- public SimpleLinearRegressionModel(int[] typAttrib, Object[] y, Object[][] x, String[] attrib){
- this.y = y;
- this.x = x;
- this.typAttrib = typAttrib;
- this.attrib = attrib;
- setAttrib = true;
- }
-
- /**
- * Constructor of the SimpleLinearRegressionModel object from file
- * @param pathTest Path of the dataset file format arff to train
- */
- public SimpleLinearRegressionModel(String pathTest){
- this.pathTest = pathTest;
- }
-
- /**
- * Builds (trains) the model.
- *
- * @throws QSARModelException if errors occur in data types, calls to the R session. See
- * the corresponding method in subclasses of this class for further details.
- */
- public void build() throws QSARModelException {
- weka = new Weka();
- try {
- SimpleLinearRegression slr = new SimpleLinearRegression();
-
- if(pathTest != null){
- weka.setDataset(
- this.getClass().getClassLoader().getResourceAsStream(pathTest),
- slr
- );
- }else{
- if (!(setAttrib)){
- this.attrib = new String[x[0].length+1];
- this.typAttrib = new int[x[0].length+1];
- for(int i = 0 ; i < x[0].length; i++){
- attrib[i] = "X"+i;
- typAttrib[i] = Weka.NUMERIC;
- }
- attrib[x[0].length] = "Y";
- typAttrib[x[0].length] = Weka.NUMERIC;
- }
- weka.setDataset(attrib,typAttrib,y,x,slr);
- }
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
- /**
- * Specifies the parameters to predict. In this case will be the dependent varibles.
- *
- * @param path A String specifying the path of the file, format arff, which contians
- * the dependent values with whose to predict.
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling function
- *
- */
- public void setParameters(String path) throws QSARModelException {
- this.pathNewX = path;
- }
-
- /**
- * Specifies the parameters to predict. In this case will be the independent varibles.
- *
- * @param newX A 2D array Object containing the independent variable.
- * @throws QSARModelException if the parameters are of the wrong type for the given modeling function
- */
- public void setParameters(Object[][] newX) throws QSARModelException {
- this.newX = newX;
- }
-
- /**
- * Makes predictions using a previously built model.
- *
- * @throws QSARModelException if errors occur in data types, calls to the R session. See
- * the corresponding method in subclasses of this class for further details.
- */
- public void predict() throws QSARModelException {
- try{
- if(pathNewX != null){
- Object[] object = weka.getPrediction(pathNewX);
- results = new Double[object.length];
- for(int i = 0 ; i < object.length; i++){
- results[i] = (Double)object[i];
- }
- }
- else if(newX != null){
- Object[] object = weka.getPrediction(newX);
- results = new Double[object.length];
- for(int i = 0 ; i < object.length; i++){
- results[i] = (Double)object[i];
- }
- }
-
- } catch ( Exception e){
- e.printStackTrace();
- }
- }
-
- /**
- * Returns the predicted values for the prediction set.
- *
- * This function only returns meaningful results if the predict
- * method of this class has been called.
- *
- * @return A Double[] containing the predicted values
- */
- public Object[] getPredictPredicted() {
- return results;
- }
-}
diff --git a/src/test/org/openscience/chemojava/libio/openbabel/OpenBabelConvertTest.java b/src/test/org/openscience/chemojava/libio/openbabel/OpenBabelConvertTest.java
deleted file mode 100644
index a7a8543..0000000
--- a/src/test/org/openscience/chemojava/libio/openbabel/OpenBabelConvertTest.java
+++ /dev/null
@@ -1,91 +0,0 @@
-/* $RCSfile$
- * $Author: egonw $
- * $Date: 2008-03-22 16:05:21 +0100 (Sat, 22 Mar 2008) $
- * $Revision: 10503 $
- *
- * Copyright (C) 2004-2007 Miguel Rojas
- *
- * Contact: cdk-devel@slists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- * */
-package org.openscience.chemojava.libio.openbabel;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.InputStream;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.openscience.cdk.CDKTestCase;
-import org.openscience.cdk.tools.LoggingTool;
-import org.openscience.chemojava.libio.openbabel.OpenBabelConvert;
-
-/**
- * TestCase for the convertor using OpenBabel.
- *
- * @cdk.module nocompile
- *
- * @author Miguel Rojas
- */
-public class OpenBabelConvertTest extends CDKTestCase {
-
- private static LoggingTool logger = new LoggingTool(OpenBabelConvert.class);
-
- @Test public void test5_Hexen_3_one() throws Exception {
- String filenameInput = "data/mdl/540545.mol";
- InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filenameInput);
- File fileOutput = File.createTempFile("540545.", ".mol");
- FileOutputStream outs = new FileOutputStream(fileOutput);
- try {
- byte[] buf = new byte[1024];
- int i = 0;
- while ((i = ins.read(buf)) != -1) {
- outs.write(buf, 0, i);
- }
- } catch (Exception e) {
- throw e;
- } finally {
- if (ins != null) ins.close();
- if (outs != null) outs.close();
- }
-
- logger.info("Testing: " + fileOutput.getAbsolutePath());
- System.out.println("testing: " + fileOutput.getAbsolutePath());
-
- OpenBabelConvert convertOB = new OpenBabelConvert();
-
- File tmpFile = File.createTempFile("540545.", ".cml");
- System.out.println("testing: " + tmpFile.getAbsolutePath());
- convertOB.convert(fileOutput, "mol", tmpFile, "cml", "-h");
-
- BufferedReader reader = new BufferedReader(new FileReader(tmpFile));
- String line = reader.readLine();
- int lineCount = 0;
- while (line != null) {
- System.out.println("Line: " + line);
- lineCount++;
- line = reader.readLine();
- }
- Assert.assertTrue(lineCount > 0);
- }
-
-}
diff --git a/src/test/org/openscience/chemojava/libio/weka/WekaTest.java b/src/test/org/openscience/chemojava/libio/weka/WekaTest.java
deleted file mode 100644
index ffc50e1..0000000
--- a/src/test/org/openscience/chemojava/libio/weka/WekaTest.java
+++ /dev/null
@@ -1,143 +0,0 @@
-/* $RCSfile$
- * $Author: egonw $
- * $Date: 2006-05-01 10:49:56 +0200 (Mo, 01 Mai 2006) $
- * $Revision: 6096 $
- *
- * Copyright (C) 2004-2007 Miguel Rojas
- *
- * Contact: cdk-devel@slists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- * */
-package org.openscience.chemojava.libio.weka;
-
-import java.io.InputStream;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.openscience.cdk.CDKTestCase;
-
-import weka.classifiers.functions.LinearRegression;
-import weka.classifiers.trees.J48;
-
-/**
- * TestCase for Weka class.
- *
- * @cdk.module test-qsarweka
- */
-public class WekaTest extends CDKTestCase {
-
- /**
- * A unit test for JUnit. Test and prediction using file arff format,
- * algorithm = Linear Regression.
- */
- @Test public void test1() throws Exception {
- LinearRegression lr = new LinearRegression();
- String[] options = new String[4];
- options[0] = "-U";
- options[1] = "0";
- options[2] = "-R";
- options[3] = "0.00000008";
- lr.setOptions(options);
- Weka weka = new Weka();
- InputStream stream = this.getClass().getClassLoader().
- getResourceAsStream("data/arff/Table1.arff");
- weka.setDataset(stream, lr);
- Object[] result = weka.getPredictionCDK("data/arff/Table2.arff");
- Assert.assertNotNull(result);
- }
-
- /**
- * A unit test for JUnit. Test using file arrf and prediction using Array,
- * algorithm = Linear Regression.
- */
- @Test public void test2() throws Exception {
- LinearRegression lr = new LinearRegression();
- String[] options = new String[4];
- options[0] = "-U";
- options[1] = "0";
- options[2] = "-R";
- options[3] = "0.00000008";
- lr.setOptions(options);
- Weka weka = new Weka();
- InputStream stream = this.getClass().getClassLoader().
- getResourceAsStream("data/arff/Table1.arff");
- weka.setDataset(stream, lr);
- Object[][] testX = {{new Double(2),new Double(2)},
- {new Double(5),new Double(5)}
- };
- Object[] result = weka.getPrediction(testX);
- Assert.assertNotNull(result);
- }
-
- /**
- * A unit test for JUnit. Test and prediction using Array, algorithm =
- * Linear Regression.
- */
- @Test public void test3() throws Exception {
- LinearRegression lr = new LinearRegression();
- String[] attrib = {"X2","X1", "Y" };
- int[] typAttrib = {Weka.NUMERIC,Weka.NUMERIC,Weka.NUMERIC};
- double[][] x = {{ 1,1},{3,3},{4,4},{6,6} };
- Double[][] xD = new Double[x.length][x[0].length];
- for(int i = 0 ; i< xD.length; i++)
- for(int j = 0 ; j < xD[i].length ; j++)
- xD[i][j] = new Double(x[i][j]);
-
- double[] y = { 0,2,3,5};
- Double[] yD = new Double[y.length];
- for(int i = 0 ; i< yD.length; i++)
- yD[i] = new Double(y[i]);
-
- Weka weka = new Weka();
- weka.setDataset(attrib, typAttrib, yD, xD, lr);
- Double[][] testX = {{new Double(2),new Double(2)},
- {new Double(5),new Double(5)}
- };
- Object[] result = weka.getPrediction(testX);
- Assert.assertNotNull(result);
- }
-
- /**
- * A unit test for JUnit. Test prediction using Array, algorithm = J48
- */
- @Test public void test4() throws Exception {
- String[] options = new String[1];
- options[0] = "-U";
- J48 j48 = new J48();
- j48.setOptions(options);
-
- String[] attrib = {"aX","bX","cX"};
- int[] typAttrib = {Weka.NUMERIC,Weka.NUMERIC,Weka.NUMERIC};
- String[] classAttrib = {"A_","B_","C_"};
- double[][] x = {{10,10 ,10 },{10 , -10 , -10},{-10 , -10 , -10},
- {11,11 ,11 },{11 , -11 , -11},{-11 , -11 , -11}};
- Double[][] xD = new Double[x.length][x[0].length];
- for(int i = 0 ; i< xD.length; i++)
- for(int j = 0 ; j < xD[i].length ; j++)
- xD[i][j] = new Double(x[i][j]);
- String[] y = { "A_","B_" ,"C_","A_","B_" ,"C_"};
- Weka weka = new Weka();
- weka.setDataset(attrib, typAttrib, classAttrib, y, xD, j48);
- Double[][] testX = {{new Double(11),new Double(-11),new Double(-11)},
- {new Double(-10),new Double(-10),new Double(-10)}};
- Object[] resultY = weka.getPrediction(testX);
- Assert.assertNotNull(resultY);
- }
-}
diff --git a/src/test/org/openscience/chemojava/modulesuites/MqsarwekaTests.java b/src/test/org/openscience/chemojava/modulesuites/MqsarwekaTests.java
deleted file mode 100644
index 440cf86..0000000
--- a/src/test/org/openscience/chemojava/modulesuites/MqsarwekaTests.java
+++ /dev/null
@@ -1,21 +0,0 @@
-package org.openscience.chemojava.modulesuites;
-
-import org.junit.runner.RunWith;
-import org.junit.runners.Suite;
-import org.junit.runners.Suite.SuiteClasses;
-import org.openscience.chemojava.libio.weka.WekaTest;
-
-/**
- * TestSuite that runs all the tests for the CDK reaction module.
- *
- * @cdk.module test-qsarweka
- * @cdk.depends weka.jar
- * @cdk.depends junit.jar
- */
-@RunWith(Suite.class)
-@SuiteClasses({
- WekaTest.class
-})
-public class MqsarwekaTests {
-
-}
diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/DensityBasedClustererModelTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/DensityBasedClustererModelTest.java
deleted file mode 100644
index d3af2ab..0000000
--- a/src/test/org/openscience/chemojava/qsar/model/weka/DensityBasedClustererModelTest.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/* $RCSfile$
- * $Author: mariobaseda $
- * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $
- * $Revision: 5602 $
- *
- * Copyright (C) 2007 by Mario Baseda
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.openscience.cdk.CDKTestCase;
-import org.openscience.chemojava.libio.weka.Weka;
-
-/**
- * TestSuite that runs a test for the DensityBasedClustererModel
- *
- * @author Mario Baseda
- * @cdk.module test-qsar
- */
-public class DensityBasedClustererModelTest extends CDKTestCase{
-
- /**
- * @throws Exception
- */
- @Test public void testDensityBasedClustererModel() throws Exception{
- DensityBasedClustererModel test = new DensityBasedClustererModel();
- int[] typAttrib = {Weka.NUMERIC, Weka.NUMERIC, Weka.NUMERIC};
- String[] classAttrib = {"A_", "B_", "C_"};
- double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10},
- {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}};
- Double[][] xD = new Double[x.length][x[0].length];
- for (int i = 0; i < xD.length; i++)
- for (int j = 0; j < xD[i].length; j++)
- xD[i][j] = new Double(x[i][j]);
- String[] y = {"A_", "B_", "C_", "A_", "B_", "C_"};
- String[] attrib = {"X1", "X2", "X3"};
- test.setData(attrib, typAttrib, classAttrib, y, xD);
- //test.setData(attrib, typAttrib, null, null, xD);
- test.build();
- Double[][] newx = {
- {new Double(99), new Double(89), new Double(79)},
- {new Double(19), new Double(29), new Double(39)},
- };
- test.setParameters(newx);
- Assert.assertNotNull (test.distributionForInstance());
- Assert.assertNotNull (test.logDensityForInstance());
- Assert.assertNotNull (test.logDensityPerClusterForInstance());
- Assert.assertNotNull (test.logJointDensitiesForInstance());
- Assert.assertNotNull (test.clusterPriors());
- }
-
- /**
- * @throws Exception
- */
-// public void testDensityBasedClustererModel2() throws Exception{
-// DensityBasedClustererModel test = new DensityBasedClustererModel();
-// test.setData("X:\\cdk\\src\\data\\arff\\Table3.arff");
-// test.build();
-// test.setParameters("X:\\cdk\\src\\data\\arff\\Table4.arff");
-// assertNotNull (test.distributionForInstance());
-// assertNotNull (test.logDensityForInstance());
-// assertNotNull (test.logDensityPerClusterForInstance());
-// assertNotNull (test.logJointDensitiesForInstance());
-// assertNotNull (test.clusterPriors());
-// }
-}
diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/EMClusterTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/EMClusterTest.java
deleted file mode 100644
index 30566f3..0000000
--- a/src/test/org/openscience/chemojava/qsar/model/weka/EMClusterTest.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/* $RCSfile$
- * $Author: mariobaseda $
- * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $
- * $Revision: 5602 $
- *
- * Copyright (C) 2007 by Mario Baseda
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.openscience.cdk.CDKTestCase;
-import org.openscience.chemojava.libio.weka.Weka;
-
-/**
- * TestSuite that runs a test for the EMCluster
- *
- * @author Mario Baseda
- * @cdk.module test-qsar
- */
-public class EMClusterTest extends CDKTestCase{
-
- /**
- * @throws Exception
- */
- @Test public void testEMCluster() throws Exception{
-// String[] options = {"-N", "3"};
- EMCluster test = new EMCluster();
-// test.setOptions(options);
- int[] typAttrib = {Weka.NUMERIC, Weka.NUMERIC, Weka.NUMERIC};
- String[] classAttrib = {"A_", "B_", "C_"};
- double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10},
- {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}};
- Double[][] xD = new Double[x.length][x[0].length];
- for (int i = 0; i < xD.length; i++)
- for (int j = 0; j < xD[i].length; j++)
- xD[i][j] = new Double(x[i][j]);
- String[] y = {"A_", "B_", "C_", "A_", "B_", "C_"};
- String[] attrib = {"X1", "X2", "X3"};
- test.setData(attrib, typAttrib, classAttrib, y, xD);
- test.build();
- Double[][] newx = {
- {new Double(99), new Double(89), new Double(79)},
- {new Double(19), new Double(29), new Double(39)},
- };
- test.setParameters(newx);
- test.logDensityPerClusterForInstance();
- Assert.assertNotNull (test.clusterPriors());
- Assert.assertNotNull (test.getClusterModelsNumericAtts());
- Assert.assertNotNull (test.getClusterPriors());
- Assert.assertNotNull (test.getLogDensityPerClusterForInstance());
- Assert.assertNotNull (test.getClusterModelsNumericAtts());
- Assert.assertEquals(100, test.getSeed());
- Assert.assertEquals(-1, test.getNumClusters());
- Assert.assertEquals(1.0E-6, test.getMinStdDev(), 0.001);
- Assert.assertEquals(100, test.getMaxIterations());
- Assert.assertEquals(1, test.numberOfCluster());
- }
-
- /**
- * @throws Exception
- */
-// public void testEMCluster2() throws Exception{
-// EMCluster test = new EMCluster();
-// test.setData("X:\\cdk\\src\\data\\arff\\Table3.arff");
-// test.build();
-// test.setParameters("X:\\cdk\\src\\data\\arff\\Table4.arff");
-// test.logDensityPerClusterForInstance();
-// assertNotNull (test.clusterPriors());
-// assertNotNull (test.getClusterModelsNumericAtts());
-// assertNotNull (test.getClusterPriors());
-// assertNotNull (test.getLogDensityPerClusterForInstance());
-// assertNotNull (test.getClusterModelsNumericAtts());
-// assertEquals(100, test.getSeed());
-// assertEquals(-1, test.getNumClusters());
-// assertEquals(1.0E-6, test.getMinStdDev(), 0.001);
-// assertEquals(100, test.getMaxIterations());
-// assertEquals(1, test.numberOfCluster());
-// }
-}
diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/FilterSupervisedResampleTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/FilterSupervisedResampleTest.java
deleted file mode 100644
index ecba9ec..0000000
--- a/src/test/org/openscience/chemojava/qsar/model/weka/FilterSupervisedResampleTest.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/* $RCSfile$
- * $Author: mariobaseda $
- * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $
- * $Revision: 5602 $
- *
- * Copyright (C) 2007 by Mario Baseda
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.openscience.cdk.CDKTestCase;
-import org.openscience.chemojava.libio.weka.Weka;
-
-/**
- * TestSuite that runs a test for the FilterSupervisedResample
- *
- * @author Mario Baseda
- * @cdk.module test-qsar
- */
-public class FilterSupervisedResampleTest extends CDKTestCase{
-
- /**
- * @throws Exception
- */
- @Test public void testFilterSupervisedResample() throws Exception{
- //String[] options = {"-S", "1"};
- FilterSupervisedResample test = new FilterSupervisedResample();
- //test.setOptions(options);
- int[] typAttrib = {Weka.NUMERIC, Weka.NUMERIC, Weka.NUMERIC};
- String[] classAttrib = {"A_", "B_", "C_"};
- double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10},
- {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}};
- Double[][] xD = new Double[x.length][x[0].length];
- for (int i = 0; i < xD.length; i++)
- for (int j = 0; j < xD[i].length; j++)
- xD[i][j] = new Double(x[i][j]);
- String[] y = {"A_", "B_", "C_", "A_", "B_", "C_"};
- String[] attrib = {"X1", "X2", "X3"};
- test.setData(attrib, typAttrib, classAttrib, y, xD);
- test.build();
- Assert.assertEquals(0.0, test.getBiasToUniformClass(), 0.001);
- Assert.assertEquals(1, test.getRandomSeed());
- Assert.assertEquals(100.0, test.getSampleSizePercent(), 0.001);
- }
-
- /**
- * @throws Exception
- */
-// public void testFilterSupervisedResample2() throws Exception{
-// //String[] options = {"-S", "1"};
-// FilterSupervisedResample test = new FilterSupervisedResample();
-// //test.setOptions(options);
-// test.setData("X:\\cdk\\src\\data\\arff\\Table3.arff");
-// test.build();
-// assertEquals(0.0, test.getBiasToUniformClass(), 0.001);
-// assertEquals(1, test.getRandomSeed());
-// assertEquals(100.0, test.getSampleSizePercent(), 0.001);
-// }
-}
diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedRandomizeTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedRandomizeTest.java
deleted file mode 100644
index 7cf9463..0000000
--- a/src/test/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedRandomizeTest.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/* $RCSfile$
- * $Author: mariobaseda $
- * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $
- * $Revision: 5602 $
- *
- * Copyright (C) 2007 by Mario Baseda
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.openscience.cdk.CDKTestCase;
-import org.openscience.chemojava.libio.weka.Weka;
-
-/**
- * TestSuite that runs a test for the FilterUnSupervisedRandomize
- *
- * @author Mario Baseda
- * @cdk.module test-qsar
- */
-public class FilterUnSupervisedRandomizeTest extends CDKTestCase{
-
- /**
- * @throws Exception
- */
- @Test public void testFilterUnSupervisedRandomize() throws Exception{
- //String[] options = {"-S", "1"};
- FilterUnSupervisedRandomize test = new FilterUnSupervisedRandomize();
- //test.setOptions(options);
- int[] typAttrib = {Weka.NUMERIC, Weka.NUMERIC, Weka.NUMERIC};
- String[] classAttrib = {"A_", "B_", "C_"};
- double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10},
- {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}};
- Double[][] xD = new Double[x.length][x[0].length];
- for (int i = 0; i < xD.length; i++)
- for (int j = 0; j < xD[i].length; j++)
- xD[i][j] = new Double(x[i][j]);
- String[] y = {"A_", "B_", "C_", "A_", "B_", "C_"};
- String[] attrib = {"X1", "X2", "X3"};
- test.setData(attrib, typAttrib, classAttrib, y, xD);
- test.build();
- Assert.assertEquals(42, test.getRandomSeed());
- }
-
- /**
- * @throws Exception
- */
-// public void testFilterUnSupervisedRandomize2() throws Exception{
-// //String[] options = {"-S", "1"};
-// FilterUnSupervisedRandomize test = new FilterUnSupervisedRandomize();
-// //test.setOptions(options);
-// test.setData("X:\\cdk\\src\\data\\arff\\Table3.arff");
-// test.build();
-// assertEquals(42, test.getRandomSeed());
-// }
-}
diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedResampleTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedResampleTest.java
deleted file mode 100644
index 6f0182a..0000000
--- a/src/test/org/openscience/chemojava/qsar/model/weka/FilterUnSupervisedResampleTest.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/* $RCSfile$
- * $Author: mariobaseda $
- * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $
- * $Revision: 5602 $
- *
- * Copyright (C) 2007 by Mario Baseda
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.openscience.cdk.CDKTestCase;
-import org.openscience.chemojava.libio.weka.Weka;
-
-/**
- * TestSuite that runs a test for the FilterUnSupervisedResample
- *
- * @author Mario Baseda
- * @cdk.module test-qsar
- */
-public class FilterUnSupervisedResampleTest extends CDKTestCase{
-
-
- /**
- * @throws Exception
- */
- @Test public void testFilterUnSupervisedResample() throws Exception{
- //String[] options = {"-S", "1"};
- FilterUnSupervisedResample test = new FilterUnSupervisedResample();
- //test.setOptions(options);
- int[] typAttrib = {Weka.NUMERIC, Weka.NUMERIC, Weka.NUMERIC};
- String[] classAttrib = {"A_", "B_", "C_"};
- double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10},
- {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}};
- Double[][] xD = new Double[x.length][x[0].length];
- for (int i = 0; i < xD.length; i++)
- for (int j = 0; j < xD[i].length; j++)
- xD[i][j] = new Double(x[i][j]);
- String[] y = {"A_", "B_", "C_", "A_", "B_", "C_"};
- String[] attrib = {"X1", "X2", "X3"};
- test.setData(attrib, typAttrib, classAttrib, y, xD);
- test.build();
- Assert.assertEquals(1, test.getRandomSeed());
- Assert.assertEquals(100.0, test.getSampleSizePercent(), 0.001);
- }
-
- /**
- * @throws Exception
- */
-// public void testFilterUnSupervisedResample2() throws Exception{
-// //String[] options = {"-S", "1"};
-// FilterUnSupervisedResample test = new FilterUnSupervisedResample();
-// //test.setOptions(options);
-// test.setData("X:\\cdk\\src\\data\\arff\\Table3.arff");
-// test.build();
-// assertEquals(1, test.getRandomSeed());
-// assertEquals(100.0, test.getSampleSizePercent(), 0.001);
-// }
-}
diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/J48WModelTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/J48WModelTest.java
deleted file mode 100644
index f1a7ab3..0000000
--- a/src/test/org/openscience/chemojava/qsar/model/weka/J48WModelTest.java
+++ /dev/null
@@ -1,96 +0,0 @@
-/* $RCSfile$
- * $Author: miguelrojasch $
- * $Date: 2006-05-11 14:25:07 +0200 (Do, 11 Mai 2006) $
- * $Revision: 6221 $
- *
- * Copyright (C) 2004-2007 Miguel Rojas
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-package org.openscience.chemojava.qsar.model.weka;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.openscience.cdk.CDKTestCase;
-import org.openscience.cdk.exception.CDKException;
-import org.openscience.cdk.qsar.model.QSARModelException;
-import org.openscience.chemojava.libio.weka.Weka;
-
-/**
- * TestSuite that runs a test for the J48WModel
- *
- * @author Miguel Rojas
- * @cdk.module test-qsar
- */
-public class J48WModelTest extends CDKTestCase {
-
- /**
- * @throws CDKException
- * @throws Exception
- * @throws QSARModelException
- */
- @Test public void testJ48WModel1() throws Exception {
- int[] typAttrib = {Weka.NUMERIC, Weka.NUMERIC, Weka.NUMERIC};
- String[] classAttrib = {"A_", "B_", "C_"};
- double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10},
- {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}};
- Double[][] xD = new Double[x.length][x[0].length];
- for (int i = 0; i < xD.length; i++)
- for (int j = 0; j < xD[i].length; j++)
- xD[i][j] = new Double(x[i][j]);
- String[] y = {"A_", "B_", "C_", "A_", "B_", "C_"};
-
- J48WModel j48 = new J48WModel(typAttrib, classAttrib, y, xD);
- String[] options = new String[1];
- options[0] = "-U";
- j48.setOptions(options);
- j48.build();
-
- /* Test predictions */
- Double[][] testX = {{new Double(11), new Double(-11), new Double(-11)},
- {new Double(-10), new Double(-10), new Double(-10)}};
-
- j48.setParameters(testX);
- j48.predict();
-
- String[] preds = (String[]) j48.getPredictPredicted();
- Assert.assertEquals("B_", preds[0]);
- Assert.assertEquals("C_", preds[1]);
- }
-
- /**
- * @throws CDKException
- * @throws Exception
- * @throws QSARModelException
- */
- @Test public void testJ48WModel2() throws CDKException, java.lang.Exception, QSARModelException {
- J48WModel j48 = new J48WModel(true, "data/arff/Table3.arff");
- String[] options = new String[1];
- options[0] = "-U";
- j48.setOptions(options);
- j48.build();
- Double[][] testX = {{new Double(11), new Double(-11), new Double(-11)},
- {new Double(-10), new Double(-10), new Double(-10)}};
- j48.setParameters(testX);
- j48.predict();
- String[] preds = (String[]) j48.getPredictPredicted();
- Assert.assertEquals("B_", preds[0]);
- Assert.assertEquals("C_", preds[1]);
- }
-}
-
diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/KmeansClusterTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/KmeansClusterTest.java
deleted file mode 100644
index 5c97eb3..0000000
--- a/src/test/org/openscience/chemojava/qsar/model/weka/KmeansClusterTest.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/* $RCSfile$
- * $Author: mariobaseda $
- * $Date: 2007-01-03 17:57:14 +0100 (We, 03 Jan 2007) $
- * $Revision: 5602 $
- *
- * Copyright (C) 2007 by Mario Baseda
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.openscience.cdk.CDKTestCase;
-import org.openscience.chemojava.libio.weka.Weka;
-
-/**
- * TestSuite that runs a test for the KmeansCluster
- *
- * @author Mario Baseda
- * @cdk.module test-qsar
- */
-public class KmeansClusterTest extends CDKTestCase {
-
- /**
- * @throws Exception
- */
- @Test public void testKmeansCluster_N() throws Exception{
- String[] options = {"-N", "3"};
- KmeansCluster test = new KmeansCluster();
- test.setOptions(options);
- int[] typAttrib = {Weka.NUMERIC, Weka.NUMERIC, Weka.NUMERIC};
- String[] classAttrib = {"A_", "B_", "C_"};
- double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10},
- {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}};
- Double[][] xD = new Double[x.length][x[0].length];
- for (int i = 0; i < xD.length; i++)
- for (int j = 0; j < xD[i].length; j++)
- xD[i][j] = new Double(x[i][j]);
- String[] y = {"A_", "B_", "C_", "A_", "B_", "C_"};
- String[] attrib = {"X1", "X2", "X3"};
- test.setData(attrib, typAttrib, classAttrib, y, xD);
- test.build();
- Double[][] newx = {
- {new Double(99), new Double(89), new Double(79)},
- {new Double(19), new Double(29), new Double(39)},
- };
- test.setParameters(newx);
- test.clusterInstance();;
- Assert.assertNotNull (test.getClusterInstance());
- Assert.assertNotNull (test.getClusterCentroids());
- Assert.assertEquals(3.0, Math.rint(test.getSquaredError()), 0.01);
- Assert.assertNotNull (test.getClusterStandardDevs());
- Assert.assertNotNull (test.getClusterSizes());
- Assert.assertEquals(3, test.numberOfCluster());
- }
-
- /**
- * @throws Exception
- */
- @Test public void testKmeansCluster_S() throws Exception{
- String[] options = {"-S", "4"};
- KmeansCluster test = new KmeansCluster();
- test.setOptions(options);
- int[] typAttrib = {Weka.NUMERIC, Weka.NUMERIC, Weka.NUMERIC};
- String[] classAttrib = {"A_", "B_", "C_"};
- double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10},
- {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}};
- Double[][] xD = new Double[x.length][x[0].length];
- for (int i = 0; i < xD.length; i++)
- for (int j = 0; j < xD[i].length; j++)
- xD[i][j] = new Double(x[i][j]);
- String[] y = {"A_", "B_", "C_", "A_", "B_", "C_"};
- String[] attrib = {"X1", "X2", "X3"};
- test.setData(attrib, typAttrib, classAttrib, y, xD);
- test.build();
- Double[][] newx = {
- {new Double(99), new Double(89), new Double(79)},
- {new Double(19), new Double(29), new Double(39)},
- };
- test.setParameters(newx);
- test.clusterInstance();
- Assert.assertNotNull (test.getClusterInstance());
- Assert.assertEquals(2, test.numberOfCluster());
- Assert.assertNotNull (test.getClusterCentroids());
- Assert.assertEquals(4.0, Math.rint(test.getSquaredError()), 0.01);
- Assert.assertNotNull (test.getClusterStandardDevs());
- Assert.assertNotNull (test.getClusterSizes());
- }
-}
diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/LinearRegressionWModelTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/LinearRegressionWModelTest.java
deleted file mode 100644
index 0dc11d4..0000000
--- a/src/test/org/openscience/chemojava/qsar/model/weka/LinearRegressionWModelTest.java
+++ /dev/null
@@ -1,278 +0,0 @@
-/* $RCSfile$
- * $Author: miguelrojasch $
- * $Date: 2006-05-11 14:25:07 +0200 (Do, 11 Mai 2006) $
- * $Revision: 6221 $
- *
- * Copyright (C) 2004-2007 Miguel Rojas
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.openscience.cdk.CDKTestCase;
-import org.openscience.cdk.exception.CDKException;
-import org.openscience.cdk.qsar.model.QSARModelException;
-
-/**
- * TestSuite that runs a test for the LinearRegressionWModel
- *
- * @author Miguel Rojas
- * @cdk.module test-qsar
- */
-public class LinearRegressionWModelTest extends CDKTestCase {
- /**
- * @throws CDKException
- * @throws Exception
- * @throws QSARModelException
- */
- @Test public void testLinearRegressionWModel1() throws Exception {
-
- double[][] x = {{1, 1}, {3, 3}, {4, 4}, {6, 6}};
- Double[][] xD = new Double[x.length][x[0].length];
- for (int i = 0; i < xD.length; i++)
- for (int j = 0; j < xD[i].length; j++)
- xD[i][j] = new Double(x[i][j]);
- double[] y = {0, 2, 3, 5};
-
- Double[] yD = new Double[y.length];
- for (int i = 0; i < yD.length; i++)
- yD[i] = new Double(y[i]);
-
- LinearRegressionWModel lrm = new LinearRegressionWModel(yD, xD);
- String[] options = new String[4];
- options[0] = "-U";
- options[1] = "0";
- options[2] = "-R";
- options[3] = "0.0001";
- lrm.setOptions(options);
- lrm.build();
-
- /* Test predictions */
- Double[][] newx = {
- {new Double(2), new Double(2)},
- {new Double(5), new Double(5)},
- };
-
- lrm.setParameters(newx);
- lrm.predict();
-
- Double[] preds = (Double[]) lrm.getPredictPredicted();
- Assert.assertEquals(1.0, (preds[0]).doubleValue(), 0.001);
- Assert.assertEquals(4.0, (preds[1]).doubleValue(), 0.001);
- }
-
- /**
- * @throws CDKException
- * @throws Exception
- * @throws QSARModelException
- */
- public void testLinearRegressionWModel2() throws CDKException, java.lang.Exception, QSARModelException {
- LinearRegressionWModel lrm = new LinearRegressionWModel("data/arff/Table1.arff");
- String[] options = new String[4];
- options[0] = "-U";
- options[1] = "0";
- options[2] = "-R";
- options[3] = "0.00000008";
- lrm.setOptions(options);
- lrm.build();
- lrm.setParametersCDK("data/arff/Table2.arff");
- lrm.predict();
- Double[] result = (Double[]) lrm.getPredictPredicted();
- Assert.assertNotNull(result);
- Assert.assertEquals(1.0, (result[0]).doubleValue(), 0.001);
- Assert.assertEquals(4.0, (result[1]).doubleValue(), 0.001);
- }
-// /**
-// *
-// * @throws CDKException
-// * @throws java.lang.Exception
-// * @throws QSARModelException
-// */
-// public void testLinearRegressionWModel3() throws CDKException, java.lang.Exception, QSARModelException {
-//
-// double[][] x = {{ 5.33029143313, 8.13257437501, 2.66720308462 },
-// { 3.29906147519, 5.06835102093, 6.47319431067 },
-// { 5.69553153292, 5.88043843898, 9.73312992111 },
-// { 5.29194559083, 6.78243188133, 3.2602449344 },
-// { 6.18105762768, 3.36588488672, 3.94539328809 },
-// { 1.32223357975, 8.78797039033, 7.77485740688 },
-// { 0.391740629966, 5.08060997023, 8.28722389016 },
-// { 4.27475126706, 8.52015977633, 7.21468545649 },
-// { 7.14131409262, 8.67086866827, 7.64228671009 },
-// { 8.55502719447, 5.25013245421, 5.73240025988 },
-// { 5.31791067667, 7.99313789208, 1.64835209014 },
-// { 9.03149835466, 1.94042287241, 9.28020345543 },
-// { 0.925468187342, 4.97155215507, 7.69457858258 },
-// { 9.16182426614, 4.74534182996, 6.58111071706 },
-// { 1.15220637861, 1.78078924823, 2.24407287943 },
-// { 9.24209878847, 7.87658524713, 2.38732162601 },
-// { 8.50715035908, 9.16453417058, 0.618727514944 },
-// { 5.84019865932, 5.20208546615, 6.61838858253 },
-// { 3.76256505014, 0.329738943471, 0.874419640166 },
-// { 9.96004184517, 9.14019090437, 4.90929645109 },
-// { 4.44743194213, 3.95642974577, 7.62629150218 },
-// { 1.24177865105, 1.48660423923, 1.20830798956 },
-// { 8.35590316383, 1.14743031542, 6.29868134513 },
-// { 6.12876561357, 4.63929392357, 5.87722199543 },
-// { 8.11829752127, 0.13950274139, 2.54723293455 },
-// { 4.40852772122, 5.07291389291, 0.100128243526 },
-// { 2.58403059855, 1.78831569742, 5.19817475725 },
-// { 8.04282601008, 3.76076347262, 1.43904088129 },
-// { 3.43713025153, 4.35105074191, 0.0189145485124 },
-// { 5.0236445539, 1.06317719489, 5.10306592945 },
-// { 9.77434875025, 9.0666617274, 6.99448050277 },
-// { 4.06797047248, 7.62659701718, 9.83152424086 },
-// { 6.48920287132, 0.156594507329, 5.46872113685 },
-// { 6.42883928789, 2.01940454563, 6.46523071259 },
-// { 1.16293901493, 5.15391581673, 3.56182526491 },
-// { 7.38000931385, 0.453325117578, 6.61031329357 },
-// { 9.32963370626, 2.12590745134, 0.405388324151 },
-// { 0.737255223472, 7.39059871721, 2.86079226118 },
-// { 6.85301380605, 2.1615949728, 8.87574040247 },
-// { 3.74156226774, 4.24620341057, 4.35371571862 },
-// { 2.18208535888, 3.53972126321, 3.59052000965 },
-// { 4.72006492073, 3.3574566235, 9.62444364758 },
-// { 2.84331278854, 1.74554945195, 4.51285607572 },
-// { 3.86999763691, 9.49323614413, 5.08797427552 },
-// { 7.43099014174, 6.2755590307, 4.57542355747 },
-// { 6.01320531795, 8.25706473123, 7.40439342966 },
-// { 6.46384266575, 3.51112862363, 9.47435948698 },
-// { 2.29011620065, 0.401145254435, 7.28671287627 },
-// { 5.7219136188, 4.43209346253, 0.0622901932013 },
-// { 4.30214056802, 1.68925570283, 7.89926376252 },
-// { 0.64305256706, 8.22063584536, 4.33019352991 },
-// { 6.44843380824, 9.10336359279, 6.8777037869 },
-// { 2.45354486215, 5.34166315571, 8.04822795875 },
-// { 9.13675572384, 8.19635101591, 6.85475060116 },
-// { 8.0492824201, 7.55216736195, 3.73472402973 },
-// { 4.40590062277, 5.27106603309, 2.59962025805 },
-// { 0.313960278741, 0.11866096726, 4.07985095305 },
-// { 0.462136466507, 0.415202739102, 1.03258083165 },
-// { 6.74723654049, 7.7080622951, 7.22322407979 },
-// { 1.97571555403, 3.18544339131, 5.56211977273 },
-// { 3.14021838165, 0.81551917817, 3.95156287418 },
-// { 4.05709817216, 2.98004731237, 5.8975379443 },
-// { 4.25420450429, 7.78663760941, 5.98061090504 },
-// { 6.2650372416, 7.96507652177, 6.43631309268 },
-// { 0.248308143147, 5.07557198176, 7.06413762375 },
-// { 2.83741089895, 0.652445391344, 3.32535947415 },
-// { 5.98115064142, 9.88913498552, 9.3923706794 },
-// { 3.45667026676, 1.37451287268, 2.35331272082 },
-// { 7.83964781871, 2.22111016571, 9.10723793073 },
-// { 0.509210152705, 7.97088780188, 6.17963669424 },
-// { 5.50910552235, 6.92372624674, 8.43151367671 },
-// { 9.94686419266, 5.16899669191, 1.77353096261 },
-// { 1.46501561342, 4.39317416608, 4.66752677391 },
-// { 7.34126711314, 1.50352255841, 7.42777093653 },
-// { 6.80122177161, 2.48753341584, 4.30535748793 },
-// { 3.43057685209, 9.11458889251, 8.1389601215 },
-// { 7.82076320157, 4.99727977399, 8.31875065375 },
-// { 8.62799832715, 5.67304190345, 1.40517550057 },
-// { 2.20910090066, 5.45236965227, 0.190013284925 },
-// { 8.27876352499, 3.23706166886, 6.23912802837 },
-// { 8.69440791615, 0.729194277167, 3.45645694332 },
-// { 8.30552885891, 2.53977734839, 0.498635632483 },
-// { 6.35009207052, 5.87727519703, 4.92604761655 },
-// { 2.21876644613, 3.85669457256, 9.44139826683 },
-// { 5.49181700898, 1.69048597254, 2.29475976286 },
-// { 3.79777411904, 0.437885574937, 8.10175192316 },
-// { 8.11720195104, 8.84115458961, 6.25490466144 },
-// { 4.58878775312, 5.51332276174, 3.85400216514 },
-// { 6.01729101329, 9.69817519935, 7.63607038602 },
-// { 4.14247512757, 9.633551519, 0.543555309265 },
-// { 1.69925453337, 4.77655288911, 0.950497583032 },
-// { 3.84897216241, 3.27769006984, 9.17922626403 },
-// { 2.79348258306, 4.38230737375, 7.26219595942 },
-// { 4.88988551153, 2.95206506434, 3.65797143803 },
-// { 1.91134803528, 0.829719567085, 1.73891604909 },
-// { 5.5514711696, 8.80684284298, 2.66911304157 },
-// { 2.95100011358, 0.832983961872, 4.19266815334 },
-// { 4.19942346415, 5.92478285192, 8.33053966924 },
-// { 3.11127058351, 3.25340097022, 7.07258377268 },
-// { 7.61105416732, 8.46642439572, 5.61730141222 }};
-// Double[][] xD = new Double[x.length][x[0].length];
-// for(int i = 0 ; i< xD.length; i++)
-// for(int j = 0 ; j < xD[i].length ; j++)
-// xD[i][j] = new Double(x[i][j]);
-//
-// double[] y = { 0.548279405588, 0.749557798438, 0.704786225556, 0.064272559019, 0.959196778261, 0.443650457811, 0.139588310157, 0.697614953528, 0.894633307417, 0.288986449536, 0.968020911596, 0.00941763156173, 0.803870693657, 0.457124742168, 0.728543899161, 0.88083354383, 0.624089352674, 0.470379461181, 0.86877991158, 0.622721685808, 0.0250057478044, 0.2376603194, 0.112920370051, 0.608780223601, 0.62741359624, 0.39753977229, 0.396823887458, 0.0259021311271, 0.433022176171, 0.94665816668, 0.788805032857, 0.831096752197, 0.981239642073, 0.72411413954, 0.585272152663, 0.694317542691, 0.890624533901, 0.244048473797, 0.422902339036, 0.597269134374, 0.911340032927, 0.00186723050398, 0.439586593554, 0.714613974993, 0.815341829936, 0.726336948414, 0.742772100572, 0.597295528478, 0.305955366581, 0.155579392014, 0.000873693540479, 0.339225424495, 0.433434106377, 0.109738110471, 0.0193980726758, 0.258795872246, 0.322462583569, 0.326807898424, 0.079866937163, 0.741776416238, 0.597174006951, 0.289816194377, 0.691182117374, 0.113315930392, 0.302120795811, 0.616653275971, 0.833480904688, 0.881803762099, 0.734675438389, 0.269429129873, 0.977225860294, 0.327410536298, 0.319292292397, 0.876227987007, 0.832930007711, 0.941552570764, 0.0433177729231, 0.333665283905, 0.889264621262, 0.367930824862, 0.143633644589, 0.0106269520474, 0.623817520313, 0.237853599409, 0.301794094647, 0.912166461213, 0.663976930266, 0.918081800984, 0.909573924607, 0.976541368479, 0.340915467396, 0.617160565805, 0.0315242385532, 0.869413665191, 0.695610662213, 0.144537534715, 0.619567870639, 0.159550199731, 0.536333432502, 0.837898880743 };
-// logger.debug("yl: "+y.length);
-// Double[] yD = new Double[y.length];
-// for(int i = 0 ; i< yD.length; i++)
-// yD[i] = new Double(y[i]);
-//
-// LinearRegressionWModel lrm = new LinearRegressionWModel(yD,xD);
-// String[] options = new String[4];
-// options[0] = "-U";
-// options[1] = "0";
-// options[2] = "-R";
-// options[3] = "0.0001";
-// lrm.setOptions(options);
-// lrm.build();
-//
-// /* Test predictions */
-// Double[][] newx = {
-// { new Double(9.81536768251), new Double(3.82849269659), new Double(7.22212024421) },
-// { new Double(0.197449829806), new Double(0.324130354642), new Double(2.8329420321) },
-//// { new Double(0.548460836141), new Double(7.28037586863), new Double(8.13728493983) },
-//// { new Double(1.76049278788), new Double(6.41731766803), new Double(5.53986167864) },
-//// { new Double(3.4541825491), new Double(9.78038580407), new Double(3.58954097059) }
-// };
-//
-// lrm.setParameters(newx);
-// lrm.predict();
-//
-// double[] preds = lrm.getPredictPredicted();
-// for(int i = 0; i < preds.length; i++)
-// logger.debug("result< "+i+"="+preds[i]);
-// assertTrue(preds != null);
-// assertEquals(0.5235362, preds[0], 0.001);/*result extracted from test LinearRegressionTest*/
-// assertEquals(0.5030381, preds[1], 0.0000001);
-// assertEquals(0.5184706, preds[2], 0.0000001);
-// assertEquals(0.5232108, preds[3], 0.0000001);
-// assertEquals(0.5436967, preds[4], 0.0000001);
-//
-// assertEquals(96, lrm.getPredictDF(), 0.1);
-//
-// }
-//
-// /**
-// *
-// * @throws CDKException
-// * @throws java.lang.Exception
-// * @throws QSARModelException
-// */
-// public void testLinearRegressionWModel_4() throws CDKException, java.lang.Exception, QSARModelException {
-// LinearRegressionWModel lrm = new LinearRegressionWModel("data/arff/LinearRegressionWeka_Test.arff");
-// String[] options = new String[4];
-// options[0] = "-U";
-// options[1] = "0";
-// options[2] = "-R";
-// options[3] = "0.00000008";
-// lrm.setOptions(options);
-// lrm.build();
-// lrm.setParameters("data/arff/LinearRegressionWeka_Prediction.arff");
-// lrm.predict();
-// double[] result = lrm.getPredictPredicted();
-// for(int i = 0; i < result.length; i++)
-// logger.debug("result< "+i+"="+result[i]);
-// assertNotNull(result);
-// }
-}
-
diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/MultilayerPerceptronModelTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/MultilayerPerceptronModelTest.java
deleted file mode 100644
index 289069c..0000000
--- a/src/test/org/openscience/chemojava/qsar/model/weka/MultilayerPerceptronModelTest.java
+++ /dev/null
@@ -1,101 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2007 by Mario Baseda
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.openscience.cdk.CDKTestCase;
-import org.openscience.cdk.exception.CDKException;
-import org.openscience.cdk.qsar.model.QSARModelException;
-import org.openscience.chemojava.libio.weka.Weka;
-
-/**
- * TestSuite that runs a test for the MultilayerPerceptronModel
- *
- * @author Mario Baseda
- * @cdk.module test-qsar
- */
-public class MultilayerPerceptronModelTest extends CDKTestCase{
-
- /**
- * @throws CDKException
- * @throws Exception
- * @throws QSARModelException
- */
- @Test public void testMultilayerPerceptronModel() throws CDKException, java.lang.Exception, QSARModelException{
- MultilayerPerceptronModel test = new MultilayerPerceptronModel();
-// test.setOptions(new String[] {"-G"});
- int[] typAttrib = {Weka.NUMERIC, Weka.NUMERIC, Weka.NUMERIC};
- String[] classAttrib = {"A_", "B_", "C_"};
- double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10},
- {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}};
- Double[][] xD = new Double[x.length][x[0].length];
- for (int i = 0; i < xD.length; i++)
- for (int j = 0; j < xD[i].length; j++)
- xD[i][j] = new Double(x[i][j]);
- String[] y = {"A_", "B_", "C_", "A_", "B_", "C_"};
- String[] attrib = {"X1", "X2", "X3"};
- test.setData(attrib, typAttrib, classAttrib, y, xD);
- test.build();
- Double[][] newx = {
- {new Double(99), new Double(89), new Double(79)},
- {new Double(19), new Double(29), new Double(39)},
- };
- test.setParameters(newx);
- test.predict();
- Object[][] result = test.getPredictPredicted();
- Assert.assertNotNull(result);
- Assert.assertEquals ("a", test.getHiddenLayers());
- Assert.assertEquals (0.3, test.getLearningRate(), 0.01);
- Assert.assertEquals(0.2, test.getMomentum(), 0.01);
- Assert.assertEquals (0, test.getRandomSeed());
- Assert.assertEquals (500.0, test.getTrainingTime(), 0.01);
- Assert.assertEquals (0.0, test.getValidationSetSize(), 0.001);
- Assert.assertEquals (20.0, test.getValidationThreshold(), 0.01);
- }
-
- /**
- * @throws CDKException
- * @throws Exception
- * @throws QSARModelException
- */
-// public void testMultilayerPerceptronModel2() throws CDKException, java.lang.Exception, QSARModelException {
-// MultilayerPerceptronModel test = new MultilayerPerceptronModel();
-// test.setData("X:\\cdk\\src\\data\\arff\\Table1.arff");
-// test.build();
-// test.setParameters("X:\\cdk\\src\\data\\arff\\Table2.arff");
-// test.predict();
-// Object[][] result = test.getPredictPredicted();
-// assertNotNull(result);
-// assertEquals ("a", test.getHiddenLayers());
-// assertEquals (0.3, test.getLearningRate(), 0.01);
-// assertEquals(0.2, test.getMomentum(), 0.01);
-// assertEquals (0, test.getRandomSeed());
-// assertEquals (500.0, test.getTrainingTime(), 0.01);
-// assertEquals (0.0, test.getValidationSetSize(), 0.001);
-// assertEquals (20.0, test.getValidationThreshold(), 0.01);
-// }
-}
diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/NaiveBayesModelTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/NaiveBayesModelTest.java
deleted file mode 100644
index e1e28ce..0000000
--- a/src/test/org/openscience/chemojava/qsar/model/weka/NaiveBayesModelTest.java
+++ /dev/null
@@ -1,89 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2007 by Mario Baseda
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.openscience.cdk.CDKTestCase;
-import org.openscience.cdk.exception.CDKException;
-import org.openscience.cdk.qsar.model.QSARModelException;
-import org.openscience.chemojava.libio.weka.Weka;
-
-/**
- * TestSuite that runs a test for the NaiveBayesModel
- *
- * @author Mario Baseda
- * @cdk.module test-qsar
- */
-public class NaiveBayesModelTest extends CDKTestCase{
-
- /**
- * @throws CDKException
- * @throws Exception
- * @throws QSARModelException
- */
- @Test public void testNaiveBayesModel() throws CDKException, java.lang.Exception, QSARModelException{
- NaiveBayesModel test = new NaiveBayesModel();
-// test.setOptions(new String[] {"-G"});
- int[] typAttrib = {Weka.NUMERIC, Weka.NUMERIC, Weka.NUMERIC};
- String[] classAttrib = {"A_", "B_", "C_"};
- double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10},
- {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}};
- Double[][] xD = new Double[x.length][x[0].length];
- for (int i = 0; i < xD.length; i++)
- for (int j = 0; j < xD[i].length; j++)
- xD[i][j] = new Double(x[i][j]);
- String[] y = {"A_", "B_", "C_", "A_", "B_", "C_"};
- String[] attrib = {"X1", "X2", "X3"};
- test.setData(attrib, typAttrib, classAttrib, y, xD);
- test.build();
- Double[][] newx = {
- {new Double(99), new Double(89), new Double(79)},
- {new Double(19), new Double(29), new Double(39)},
- };
- test.setParameters(newx);
- test.probabilities();
- Object[][] result = test.getProbabilities();
- test.updateClassifier();
- Assert.assertNotNull(result);
- }
-
- /**
- * @throws CDKException
- * @throws Exception
- * @throws QSARModelException
- */
-// public void testNaiveBayesModel2() throws CDKException, java.lang.Exception, QSARModelException {
-// NaiveBayesModel test = new NaiveBayesModel();
-// test.setData("X:\\cdk\\src\\data\\arff\\Table3.arff");
-// test.build();
-// test.setParameters("X:\\cdk\\src\\data\\arff\\Table3.arff");
-// test.probabilities();
-// Object[][] result = test.getProbabilities();
-// test.updateClassifier();
-// assertNotNull(result);
-// }
-}
diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/QSARWekaModelTests.java b/src/test/org/openscience/chemojava/qsar/model/weka/QSARWekaModelTests.java
deleted file mode 100644
index d61effe..0000000
--- a/src/test/org/openscience/chemojava/qsar/model/weka/QSARWekaModelTests.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/* $RCSfile$
- * $Author: miguelrojasch $
- * $Date: 2006-05-11 14:25:07 +0200 (Do, 11 Mai 2006) $
- * $Revision: 6221 $
- *
- * Copyright (C) 2004-2007 Miguel Rojas
- *
- * Contact: cdk-devel@lists.sourceforge.net
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import org.junit.runner.RunWith;
-import org.junit.runners.Suite;
-import org.junit.runners.Suite.SuiteClasses;
-
-/**
- * TestSuite that runs all the tests for the CDK libio-weka module.
- *
- * @cdk.module test-qsar
- * @cdk.depends junit.jar
- */
-@RunWith(Suite.class)
-@SuiteClasses({
- J48WModelTest.class,
- LinearRegressionWModelTest.class,
- DensityBasedClustererModelTest.class,
- EMClusterTest.class,
- FilterSupervisedResampleTest.class,
- FilterUnSupervisedResampleTest.class,
- FilterSupervisedResampleTest.class,
- KmeansClusterTest.class,
- MultilayerPerceptronModelTest.class,
- NaiveBayesModelTest.class,
- SimpleLinearRegressionModelTest.class,
- SMOModelTest.class,
- SMOregModelTest.class
-})
-public class QSARWekaModelTests {
-
-}
diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/SMOModelTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/SMOModelTest.java
deleted file mode 100644
index 5f3181f..0000000
--- a/src/test/org/openscience/chemojava/qsar/model/weka/SMOModelTest.java
+++ /dev/null
@@ -1,123 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2007 by Mario Baseda
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.openscience.cdk.exception.CDKException;
-import org.openscience.cdk.qsar.model.QSARModelException;
-import org.openscience.cdk.CDKTestCase;
-import org.openscience.chemojava.libio.weka.Weka;
-import org.openscience.chemojava.qsar.model.weka.SMOModel;
-
-/**
- * TestSuite that runs a test for the SMOModel
- *
- * @author Mario Baseda
- * @cdk.module test-qsar
- */
-public class SMOModelTest extends CDKTestCase{
-
- /**
- * @throws CDKException
- * @throws Exception
- * @throws QSARModelException
- */
- @Test public void testSMOModel() throws Exception{
- SMOModel test = new SMOModel();
- int[] typAttrib = {Weka.NUMERIC, Weka.NUMERIC, Weka.NUMERIC};
- String[] classAttrib = {"A_", "B_", "C_"};
- double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10},
- {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}};
- Double[][] xD = new Double[x.length][x[0].length];
- for (int i = 0; i < xD.length; i++)
- for (int j = 0; j < xD[i].length; j++)
- xD[i][j] = new Double(x[i][j]);
- String[] y = {"A_", "B_", "C_", "A_", "B_", "C_"};
- String[] attrib = {"X1", "X2", "X3"};
- test.setData(attrib, typAttrib, classAttrib, y, xD);
- test.build();
- Double[][] newx = {
- {new Double(99), new Double(89), new Double(79)},
- {new Double(19), new Double(29), new Double(39)},
- };
- test.setParameters(newx);
- test.probabilities();
- Object[][] result = test.getProbabilities();
- Assert.assertNotNull (test.attributeNames());
- Assert.assertNotNull (test.bias());
- Assert.assertNotNull(test.classAttributeNames());
- Assert.assertNotNull (test.sparseIndices());
- Assert.assertNotNull (test.sparseWeights());
- Assert.assertNotNull(result);
- Assert.assertEquals (false, test.getBuildLogisticModels());
- Assert.assertEquals (1.0, test.getC(), 0.001);
- Assert.assertEquals (250007, test.getCacheSize());
- Assert.assertEquals (1.0E-12, test.getEpsilon(), 0.001);
- Assert.assertEquals (1.0, test.getExponent(), 0.01);
- Assert.assertEquals (false, test.getFeatureSpaceNormalization());
- Assert.assertEquals (0.01, test.getGamma(), 0.001);
- Assert.assertEquals (false, test.getLowerOrderTerms());
- Assert.assertEquals (-1, test.getNumFolds());
- Assert.assertEquals (1, test.getRandomSeed());
- Assert.assertEquals (0.0010, test.getToleranceParameter(), 0.001);
- Assert.assertEquals (false, test.getUserRBF());
- Assert.assertEquals (3, test.numClassAttributeValues());
- }
-
- /**
- * @throws CDKException
- * @throws Exception
- * @throws QSARModelException
- */
-// public void testSMOModel2() throws CDKException, java.lang.Exception, QSARModelException {
-// SMOModel test = new SMOModel();
-// test.setData("X:\\cdk\\src\\data\\arff\\Table3.arff");
-// test.build();
-// test.setParameters("X:\\cdk\\src\\data\\arff\\Table4.arff");
-// test.probabilities();
-// Object[][] result = test.getProbabilities();
-// assertNotNull (test.attributeNames());
-// assertNotNull (test.bias());
-// assertNotNull(test.classAttributeNames());
-// assertNotNull (test.sparseIndices());
-// assertNotNull (test.sparseWeights());
-// assertNotNull(result);
-// assertEquals (false, test.getBuildLogisticModels());
-// assertEquals (1.0, test.getC(), 0.001);
-// assertEquals (250007, test.getCacheSize());
-// assertEquals (1.0E-12, test.getEpsilon(), 0.001);
-// assertEquals (1.0, test.getExponent(), 0.01);
-// assertEquals (false, test.getFeatureSpaceNormalization());
-// assertEquals (0.01, test.getGamma(), 0.001);
-// assertEquals (false, test.getLowerOrderTerms());
-// assertEquals (-1, test.getNumFolds());
-// assertEquals (1, test.getRandomSeed());
-// assertEquals (0.0010, test.getToleranceParameter(), 0.001);
-// assertEquals (false, test.getUserRBF());
-// assertEquals (3, test.numClassAttributeValues());
-// }
-}
diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/SMOregModelTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/SMOregModelTest.java
deleted file mode 100644
index 8e44ced..0000000
--- a/src/test/org/openscience/chemojava/qsar/model/weka/SMOregModelTest.java
+++ /dev/null
@@ -1,102 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2007 by Mario Baseda
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.openscience.cdk.CDKTestCase;
-import org.openscience.cdk.exception.CDKException;
-import org.openscience.cdk.qsar.model.QSARModelException;
-
-/**
- * TestSuite that runs a test for the SMOregModel
- *
- * @author Mario Baseda
- * @cdk.module test-qsar
- */
-public class SMOregModelTest extends CDKTestCase{
-
- /**
- * @throws CDKException
- * @throws Exception
- * @throws QSARModelException
- */
- @Test public void testSMOregModel() throws Exception{
- SMOregModel test = new SMOregModel();
- double[][] x = {{10, 10, 10}, {10, -10, -10}, {-10, -10, -10},
- {11, 11, 11}, {11, -11, -11}, {-11, -11, -11}};
- Double[][] xD = new Double[x.length][x[0].length];
- for (int i = 0; i < xD.length; i++)
- for (int j = 0; j < xD[i].length; j++)
- xD[i][j] = new Double(x[i][j]);
- Object[] y = {new Double(100), new Double(200), new Double(300), new Double(100), new Double(200), new Double(300)};
- test.setData(y, xD);
- test.build();
- Double[][] newx = {
- {new Double(99), new Double(89), new Double(79)},
- {new Double(19), new Double(29), new Double(39)},
- };
- test.setParameters(newx);
- test.classifyInstance();
- Object[] result = test.getClassification();
- Assert.assertNotNull (result);
- Assert.assertEquals (1.0, test.getC(), 0.001);
- Assert.assertEquals (250007, test.getCacheSize());
- Assert.assertEquals (0.0010, test.getEpsilon(), 0.001);
- Assert.assertEquals (1.0, test.getExponent(), 0.01);
- Assert.assertEquals (false, test.getFeatureSpaceNormalization());
- Assert.assertEquals (0.01, test.getGamma(), 0.001);
- Assert.assertEquals (false, test.getLowerOrderTerms());
- Assert.assertEquals (0.0010, test.getToleranceParameter(), 0.001);
- Assert.assertEquals (false, test.getUserRBF());
- Assert.assertEquals (1.0E-12, test.getEps(), 0.001);
- }
-
- /**
- * @throws CDKException
- * @throws Exception
- * @throws QSARModelException
- */
-// public void testSMOregModel2() throws CDKException, java.lang.Exception, QSARModelException {
-// SMOregModel test = new SMOregModel();
-// test.setData("X:\\cdk\\src\\data\\arff\\Table1.arff");
-// test.build();
-// test.setParameters("X:\\cdk\\src\\data\\arff\\Table2.arff");
-// test.classifyInstance();
-// Object[] result = test.getClassification();
-// assertNotNull (result);
-// assertEquals (1.0, test.getC(), 0.001);
-// assertEquals (250007, test.getCacheSize());
-// assertEquals (0.0010, test.getEpsilon(), 0.001);;
-// assertEquals (1.0, test.getExponent(), 0.01);
-// assertEquals (false, test.getFeatureSpaceNormalization());
-// assertEquals (0.01, test.getGamma(), 0.001);
-// assertEquals (false, test.getLowerOrderTerms());
-// assertEquals (0.0010, test.getToleranceParameter(), 0.001);
-// assertEquals (false, test.getUserRBF());
-// assertEquals (1.0E-121, test.getEps(), 0.001);
-// }
-}
diff --git a/src/test/org/openscience/chemojava/qsar/model/weka/SimpleLinearRegressionModelTest.java b/src/test/org/openscience/chemojava/qsar/model/weka/SimpleLinearRegressionModelTest.java
deleted file mode 100644
index b4dfdab..0000000
--- a/src/test/org/openscience/chemojava/qsar/model/weka/SimpleLinearRegressionModelTest.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/* $RCSfile$
- * $Author$
- * $Date$
- * $Revision$
- *
- * Copyright (C) 2007 by Mario Baseda
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- * All we ask is that proper credit is given for our work, which includes
- * - but is not limited to - adding the above copyright notice to the beginning
- * of your source code files, and to any copyright notice that you may distribute
- * with programs based on this work.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-package org.openscience.chemojava.qsar.model.weka;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.openscience.cdk.CDKTestCase;
-import org.openscience.cdk.exception.CDKException;
-import org.openscience.cdk.qsar.model.QSARModelException;
-
-/**
- * TestSuite that runs a test for the SimpleLinearRegressionModel
- *
- * @author Mario Baseda
- * @cdk.module test-qsar
- */
-public class SimpleLinearRegressionModelTest extends CDKTestCase{
-
- /**
- * @throws CDKException
- * @throws Exception
- * @throws QSARModelException
- */
- @Test public void testSimpleLinearRegressionModel1() throws Exception {
-
- double[][] x = {{1, 1}, {3, 3}, {4, 4}, {6, 6}};
- Double[][] xD = new Double[x.length][x[0].length];
- for (int i = 0; i < xD.length; i++)
- for (int j = 0; j < xD[i].length; j++)
- xD[i][j] = new Double(x[i][j]);
- double[] y = {0, 2, 3, 5};
-
- Double[] yD = new Double[y.length];
- for (int i = 0; i < yD.length; i++)
- yD[i] = new Double(y[i]);
-
- SimpleLinearRegressionModel slrm = new SimpleLinearRegressionModel(yD, xD);
- slrm.build();
-
- /* Test predictions */
- Double[][] newx = {
- {new Double(2), new Double(2)},
- {new Double(5), new Double(5)},
- };
-
- slrm.setParameters(newx);
- slrm.predict();
-
- Double[] preds = (Double[]) slrm.getPredictPredicted();
- Assert.assertEquals(1.0, (preds[0]).doubleValue(), 0.001);
- Assert.assertEquals(4.0, (preds[1]).doubleValue(), 0.001);
- }
-
- /**
- * @throws CDKException
- * @throws Exception
- * @throws QSARModelException
- */
-// public void testSimpleLinearRegressionWModel2() throws CDKException, java.lang.Exception, QSARModelException {
-// SimpleLinearRegressionModel slrm = new SimpleLinearRegressionModel("X:\\cdk\\src\\data\\arff\\Table1.arff");
-// String[] options = new String[4];
-// slrm.build();
-// slrm.setParameters("X:\\cdk\\src\\data\\arff\\Table2.arff");
-// slrm.predict();
-// Double[] result = (Double[]) slrm.getPredictPredicted();
-// assertNotNull(result);
-// assertEquals(1.0, (result[0]).doubleValue(), 0.001);
-// assertEquals(4.0, (result[1]).doubleValue(), 0.001);
-// }
-}