diff --git a/build.xml b/build.xml
index 77d91cf9..6f041439 100644
--- a/build.xml
+++ b/build.xml
@@ -29,11 +29,6 @@
-
-
-
-
-
@@ -44,19 +39,11 @@
-
-
-
-
-
-
-
-
-
-
+
+
@@ -82,13 +69,7 @@
-
-
-
-
-
-
diff --git a/capture-stream.diff b/capture-stream.diff
new file mode 100644
index 00000000..f713e26b
--- /dev/null
+++ b/capture-stream.diff
@@ -0,0 +1,54 @@
+diff -r 8d6fc90e7bbf src/share/classes/java/lang/invoke/InnerClassLambdaMetafactory.java
+--- a/src/share/classes/java/lang/invoke/InnerClassLambdaMetafactory.java Mon Dec 03 16:57:46 2012 +0100
++++ b/src/share/classes/java/lang/invoke/InnerClassLambdaMetafactory.java Wed Dec 19 11:04:00 2012 -0500
+@@ -29,6 +29,7 @@
+ import java.lang.reflect.Method;
+ import java.security.ProtectionDomain;
+ import java.util.concurrent.atomic.AtomicInteger;
++import java.util.concurrent.ConcurrentHashMap;
+ import jdk.internal.org.objectweb.asm.*;
+ import static jdk.internal.org.objectweb.asm.Opcodes.*;
+ import sun.misc.Unsafe;
+@@ -38,7 +39,7 @@
+ /**
+ * InnerClassLambdaMetafactory
+ */
+-/*non-public*/ final class InnerClassLambdaMetafactory extends AbstractValidatingLambdaMetafactory {
++public final class InnerClassLambdaMetafactory extends AbstractValidatingLambdaMetafactory {
+ private static final int CLASSFILE_VERSION = 51;
+ private static final Type TYPE_VOID = Type.getType(void.class);
+ private static final String METHOD_DESCRIPTOR_VOID = Type.getMethodDescriptor(Type.VOID_TYPE);
+@@ -156,6 +157,17 @@
+ }
+ }
+
++
++
++ private static final ConcurrentHashMap classByteArrayTable = new ConcurrentHashMap();
++
++ public static byte[] getBytesForClassName(String className) {
++ return classByteArrayTable.get(className);
++ }
++ public static void setBytesForClassName( String className, byte[] classBytes) {
++ classByteArrayTable.put(className,classBytes);
++ }
++
+ /**
+ * Generate a class file which implements the functional
+ * interface, define and return the class.
+@@ -227,7 +239,15 @@
+ }
+ }
+ );
++
+
++ // ecaspole 121211
++ System.out.println("spinInnerClass: lambdaClassName = " + lambdaClassName +
++ ", classBytes = " + classBytes +
++ ", loader = " + loader );
++
++ setBytesForClassName(lambdaClassName, classBytes);
++
+ return (Class extends T>) Unsafe.getUnsafe().defineClass(lambdaClassName, classBytes, 0, classBytes.length, loader, pd);
+ }
+
diff --git a/com.amd.aparapi.jni/src/cpp/aparapi.cpp b/com.amd.aparapi.jni/src/cpp/aparapi.cpp
index c56cb589..afcf6f2d 100644
--- a/com.amd.aparapi.jni/src/cpp/aparapi.cpp
+++ b/com.amd.aparapi.jni/src/cpp/aparapi.cpp
@@ -139,11 +139,13 @@ class KernelArg{
static jfieldID typeFieldID;
static jfieldID sizeInBytesFieldID;
static jfieldID numElementsFieldID;
+ static jfieldID fieldHolderFieldID;
public:
static jfieldID javaArrayFieldID;
public:
JNIContext *jniContext;
jobject argObj; // the Java KernelRunner.KernelArg object that we are mirroring.
+ jobject fieldHolder; // The java Object to pull this field from (for lambda it could be block, KernelRunner or lambda's this)
jobject javaArg; // global reference to the corresponding java KernelArg object we grabbed our own global reference so that the object won't be collected until we dispose!
char *name; // used for debugging printfs
jint type; // a bit mask determining the type of this arg
@@ -273,6 +275,7 @@ jfieldID KernelArg::typeFieldID=0;
jfieldID KernelArg::javaArrayFieldID=0;
jfieldID KernelArg::sizeInBytesFieldID=0;
jfieldID KernelArg::numElementsFieldID=0;
+jfieldID KernelArg::fieldHolderFieldID=0;
class JNIContext{
private:
@@ -454,6 +457,7 @@ KernelArg::KernelArg(JNIEnv *jenv, JNIContext *jniContext, jobject argObj):
if (argClazz == 0){
jclass c = jenv->GetObjectClass(argObj);
nameFieldID = jenv->GetFieldID(c, "name", "Ljava/lang/String;"); ASSERT_FIELD(name);
+ fieldHolderFieldID = jenv->GetFieldID(c, "fieldHolder", "Ljava/lang/Object;"); ASSERT_FIELD(name);
typeFieldID = jenv->GetFieldID(c, "type", "I"); ASSERT_FIELD(type);
javaArrayFieldID = jenv->GetFieldID(c, "javaArray", "Ljava/lang/Object;"); ASSERT_FIELD(javaArray);
sizeInBytesFieldID = jenv->GetFieldID(c, "sizeInBytes", "I"); ASSERT_FIELD(sizeInBytes);
@@ -461,6 +465,9 @@ KernelArg::KernelArg(JNIEnv *jenv, JNIContext *jniContext, jobject argObj):
argClazz = c;
}
type = jenv->GetIntField(argObj, typeFieldID);
+
+ // Need to delete this
+ fieldHolder = jenv->NewGlobalRef( jenv->GetObjectField(argObj, fieldHolderFieldID) );
jstring nameString = (jstring)jenv->GetObjectField(argObj, nameFieldID);
const char *nameChars = jenv->GetStringUTFChars(nameString, NULL);
#ifdef _WIN32
@@ -482,18 +489,22 @@ cl_int KernelArg::setLocalBufferArg(JNIEnv *jenv, int argIdx, int argPos){
}
cl_int KernelArg::setPrimitiveArg(JNIEnv *jenv, int argIdx, int argPos){
cl_int status = CL_SUCCESS;
+
+ // Get the class of the object holding this field
+ jclass fieldHolderClass = jenv->GetObjectClass(fieldHolder);
+
if (isFloat()){
if (isStatic()){
- jfieldID fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, name, "F");
- jfloat f = jenv->GetStaticFloatField(jniContext->kernelClass, fieldID);
+ jfieldID fieldID = jenv->GetStaticFieldID(fieldHolderClass, name, "F");
+ jfloat f = jenv->GetStaticFloatField(fieldHolderClass, fieldID);
if (config->isVerbose()){
fprintf(stderr, "clSetKernelArg static primitive float '%s' index=%d pos=%d value=%f\n",
name, argIdx, argPos, f);
}
status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jfloat), &f);
}else{
- jfieldID fieldID = jenv->GetFieldID(jniContext->kernelClass, name, "F");
- jfloat f = jenv->GetFloatField(jniContext->kernelObject, fieldID);
+ jfieldID fieldID = jenv->GetFieldID(fieldHolderClass, name, "F");
+ jfloat f = jenv->GetFloatField(fieldHolder, fieldID);
if (config->isVerbose()){
fprintf(stderr, "clSetKernelArg primitive float '%s' index=%d pos=%d value=%f\n",
name, argIdx, argPos, f);
@@ -502,16 +513,16 @@ cl_int KernelArg::setPrimitiveArg(JNIEnv *jenv, int argIdx, int argPos){
}
}else if (isInt()){
if (isStatic()){
- jfieldID fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, name, "I");
- jint i = jenv->GetStaticIntField(jniContext->kernelClass, fieldID);
+ jfieldID fieldID = jenv->GetStaticFieldID(fieldHolderClass, name, "I");
+ jint i = jenv->GetStaticIntField(fieldHolderClass, fieldID);
if (config->isVerbose()){
fprintf(stderr, "clSetKernelArg static primitive int '%s' index=%d pos=%d value=%d\n",
name, argIdx, argPos, i);
}
status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jint), &i);
}else{
- jfieldID fieldID = jenv->GetFieldID(jniContext->kernelClass, name, "I");
- jint i = jenv->GetIntField(jniContext->kernelObject, fieldID);
+ jfieldID fieldID = jenv->GetFieldID(fieldHolderClass, name, "I");
+ jint i = jenv->GetIntField(fieldHolder, fieldID);
if (config->isVerbose()){
fprintf(stderr, "clSetKernelArg primitive int '%s' index=%d pos=%d value=%d\n",
name, argIdx, argPos, i);
@@ -520,16 +531,16 @@ cl_int KernelArg::setPrimitiveArg(JNIEnv *jenv, int argIdx, int argPos){
}
}else if (isBoolean()){
if (isStatic()){
- jfieldID fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, name, "Z");
- jboolean z = jenv->GetStaticBooleanField(jniContext->kernelClass, fieldID);
+ jfieldID fieldID = jenv->GetStaticFieldID(fieldHolderClass, name, "Z");
+ jboolean z = jenv->GetStaticBooleanField(fieldHolderClass, fieldID);
if (config->isVerbose()){
fprintf(stderr, "clSetKernelArg static primitive boolean '%s' index=%d pos=%d value=%d\n",
name, argIdx, argPos, z);
}
status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jboolean), &z);
}else{
- jfieldID fieldID = jenv->GetFieldID(jniContext->kernelClass, name, "Z");
- jboolean z = jenv->GetBooleanField(jniContext->kernelObject, fieldID);
+ jfieldID fieldID = jenv->GetFieldID(fieldHolderClass, name, "Z");
+ jboolean z = jenv->GetBooleanField(fieldHolder, fieldID);
if (config->isVerbose()){
fprintf(stderr, "clSetKernelArg primitive boolean '%s' index=%d pos=%d value=%d\n",
name, argIdx, argPos, z);
@@ -538,16 +549,16 @@ cl_int KernelArg::setPrimitiveArg(JNIEnv *jenv, int argIdx, int argPos){
}
}else if (isByte()){
if (isStatic()){
- jfieldID fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, name, "B");
- jbyte b = jenv->GetStaticByteField(jniContext->kernelClass, fieldID);
+ jfieldID fieldID = jenv->GetStaticFieldID(fieldHolderClass, name, "B");
+ jbyte b = jenv->GetStaticByteField(fieldHolderClass, fieldID);
if (config->isVerbose()){
fprintf(stderr, "clSetKernelArg static primitive byte '%s' index=%d pos=%d value=%d\n",
name, argIdx, argPos, b);
}
status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jbyte), &b);
}else{
- jfieldID fieldID = jenv->GetFieldID(jniContext->kernelClass, name, "B");
- jbyte b = jenv->GetByteField(jniContext->kernelObject, fieldID);
+ jfieldID fieldID = jenv->GetFieldID(fieldHolderClass, name, "B");
+ jbyte b = jenv->GetByteField(fieldHolder, fieldID);
if (config->isVerbose()){
fprintf(stderr, "clSetKernelArg primitive byte '%s' index=%d pos=%d value=%d\n",
name, argIdx, argPos, b);
@@ -556,16 +567,16 @@ cl_int KernelArg::setPrimitiveArg(JNIEnv *jenv, int argIdx, int argPos){
}
}else if (isLong()){
if (isStatic()){
- jfieldID fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, name, "J");
- jlong j = jenv->GetStaticLongField(jniContext->kernelClass, fieldID);
+ jfieldID fieldID = jenv->GetStaticFieldID(fieldHolderClass, name, "J");
+ jlong j = jenv->GetStaticLongField(fieldHolderClass, fieldID);
if (config->isVerbose()){
fprintf(stderr, "clSetKernelArg static primitive long '%s' index=%d pos=%d value=%ld\n",
name, argIdx, argPos, j);
}
status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jlong), &j);
}else{
- jfieldID fieldID = jenv->GetFieldID(jniContext->kernelClass, name, "J");
- jlong j = jenv->GetLongField(jniContext->kernelObject, fieldID);
+ jfieldID fieldID = jenv->GetFieldID(fieldHolderClass, name, "J");
+ jlong j = jenv->GetLongField(fieldHolder, fieldID);
if (config->isVerbose()){
fprintf(stderr, "clSetKernelArg primitive long '%s' index=%d pos=%d value=%ld\n",
name, argIdx, argPos, j);
@@ -574,16 +585,16 @@ cl_int KernelArg::setPrimitiveArg(JNIEnv *jenv, int argIdx, int argPos){
}
}else if (isDouble()){
if (isStatic()){
- jfieldID fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, name, "D");
- jdouble d = jenv->GetStaticDoubleField(jniContext->kernelClass, fieldID);
+ jfieldID fieldID = jenv->GetStaticFieldID(fieldHolderClass, name, "D");
+ jdouble d = jenv->GetStaticDoubleField(fieldHolderClass, fieldID);
if (config->isVerbose()){
fprintf(stderr, "clSetKernelArg static primitive long '%s' index=%d pos=%d value=%lf\n",
name, argIdx, argPos, d);
}
status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jdouble), &d);
}else{
- jfieldID fieldID = jenv->GetFieldID(jniContext->kernelClass, name, "D");
- jdouble d = jenv->GetDoubleField(jniContext->kernelObject, fieldID);
+ jfieldID fieldID = jenv->GetFieldID(fieldHolderClass, name, "D");
+ jdouble d = jenv->GetDoubleField(fieldHolder, fieldID);
if (config->isVerbose()){
fprintf(stderr, "clSetKernelArg primitive long '%s' index=%d pos=%d value=%lf\n",
name, argIdx, argPos, d);
@@ -791,6 +802,21 @@ jint updateNonPrimitiveReferences(JNIEnv *jenv, jobject jobj, JNIContext* jniCon
+JNI_JAVA(jint, KernelRunner, updateLambdaBlockJNI)
+ (JNIEnv *jenv, jobject jobj, jlong jniContextHandle, jobject newHolder, jint argc) {
+
+ JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle);
+
+ for(int argIdx=0; argIdxDeleteGlobalRef(jniContext->args[argIdx]->fieldHolder);
+ // in with the new!
+ jniContext->args[argIdx]->fieldHolder = jenv->NewGlobalRef(newHolder);
+ }
+
+ return 0;
+}
+
JNI_JAVA(jint, KernelRunner, runKernelJNI)
(JNIEnv *jenv, jobject jobj, jlong jniContextHandle, jobject _range, jboolean needSync, jint passes) {
if (config== NULL){
diff --git a/com.amd.aparapi.jni/src/cpp/jniHelper.cpp b/com.amd.aparapi.jni/src/cpp/jniHelper.cpp
index 3997bd5b..8437e3c4 100644
--- a/com.amd.aparapi.jni/src/cpp/jniHelper.cpp
+++ b/com.amd.aparapi.jni/src/cpp/jniHelper.cpp
@@ -234,7 +234,7 @@ jobject JNIHelper::getStaticFieldObject(JNIEnv *jenv, char *className, char *fie
return(NULL);
}
- jobject value = jenv->GetStaticObjectField(NULL, fieldId);
+ jobject value = jenv->GetStaticObjectField(theClass, fieldId);
if (value == NULL || jenv->ExceptionCheck()) {
jenv->ExceptionDescribe();
jenv->ExceptionClear();
diff --git a/com.amd.aparapi/build.xml b/com.amd.aparapi/build.xml
index d3017f52..85384ab1 100644
--- a/com.amd.aparapi/build.xml
+++ b/com.amd.aparapi/build.xml
@@ -18,9 +18,8 @@
OS Version: ${os.version}OS Arch: ${os.arch}
-
-
-
+
+
@@ -76,6 +75,11 @@
+
+
+
+
+
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/Annotations.java b/com.amd.aparapi/src/java/com/amd/aparapi/Annotations.java
deleted file mode 100644
index 39368a17..00000000
--- a/com.amd.aparapi/src/java/com/amd/aparapi/Annotations.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
-Copyright (c) 2010-2011, Advanced Micro Devices, Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
-following conditions are met:
-
-Redistributions of source code must retain the above copyright notice, this list of conditions and the following
-disclaimer.
-
-Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
-disclaimer in the documentation and/or other materials provided with the distribution.
-
-Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products
-derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
-INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-If you use the software (in whole or in part), you shall adhere to all applicable U.S., European, and other export
-laws, including but not limited to the U.S. Export Administration Regulations ("EAR"), (15 C.F.R. Sections 730 through
-774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June 2000. Further, pursuant to Section 740.6 of the EAR,
-you hereby certify that, except pursuant to a license granted by the United States Department of Commerce Bureau of
-Industry and Security or as otherwise permitted pursuant to a License Exception under the U.S. Export Administration
-Regulations ("EAR"), you will not (1) export, re-export or release to a national of a country in Country Groups D:1,
-E:1 or E:2 any restricted technology, software, or source code you receive hereunder, or (2) export to Country Groups
-D:1, E:1 or E:2 the direct product of such technology or software, if such foreign produced direct product is subject
-to national security controls as identified on the Commerce Control List (currently found in Supplement 1 to Part 774
-of EAR). For the most current Country Group listings, or for additional information about the EAR or your obligations
-under those regulations, please refer to the U.S. Bureau of Industry and Security's website at http://www.bis.doc.gov/.
-
-*/
-package com.amd.aparapi;
-
-/**
- * A collection of annotations used at dev time to tag intent.
- *
- * We should be able to remove all of these before OpenSource release.
- *
- * @author gfrost
- */
-class Annotations{
-
- /**
- * Use this annotation to tag stuff that needs Java Doc added.
- *
- * @author gfrost
- */
- @interface DocMe {
- }
-
- /**
- * Use this annotation to tag fields that we think need to be removed (method/field/var).
- *
- * @author gfrost
- */
- @interface RemoveMe {
- }
-
- /**
- * Used to tag experimental features (methods/fields).
- *
- * Do not rely on anything tagged as experimental, it will probably be retracted/refactored.
- *
- * @author gfrost
- *
- */
- @interface Experimental {
- }
-
- /**
- * Used to tag unused features (methods/fields).
- *
- * Do not rely on anything tagged as unused, it will probably be retracted/refactored.
- *
- * @author gfrost
- *
- */
- @interface Unused {
- }
-}
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/Aparapi.java b/com.amd.aparapi/src/java/com/amd/aparapi/Aparapi.java
new file mode 100644
index 00000000..ad564815
--- /dev/null
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/Aparapi.java
@@ -0,0 +1,182 @@
+package com.amd.aparapi;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+//import java.lang.invoke.InnerClassLambdaMetafactory;
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.util.concurrent.CyclicBarrier;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.BrokenBarrierException;
+import java.util.function.IntBlock;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import java.util.HashMap;
+import java.util.List;
+
+import com.amd.aparapi.ClassModel.ConstantPool.MethodEntry;
+import com.amd.aparapi.InstructionSet.AccessField;
+import com.amd.aparapi.InstructionSet.MethodCall;
+import com.amd.aparapi.InstructionSet.VirtualMethodCall;
+
+public class Aparapi{
+
+ private static Logger logger = Logger.getLogger(Config.getLoggerName());
+
+ public interface KernelI {
+ void run(int x);
+ }
+ public interface KernelII {
+ void run(int x, int y);
+ }
+ public interface KernelIII {
+ void run(int x, int y, int id);
+ }
+
+ public interface KernelSAM {
+ void run();
+ }
+
+ static void wait(CyclicBarrier barrier){
+ try {
+ barrier.await();
+ } catch (InterruptedException ex) {
+ } catch (BrokenBarrierException ex) {
+ }
+ }
+
+/*
+ static public void forEach(int width, int height, KernelII kernel){
+ final int threads = Runtime.getRuntime().availableProcessors();
+ final CyclicBarrier barrier = new CyclicBarrier(threads+1);
+ for (int t=0; t{
+ for (int x=finalt*(width/threads); x<(finalt+1)*(width/threads); x++){
+ for (int y=0; y{
+ for (int x=finalt*(width/threads); x<(finalt+1)*(width/threads); x++){
+ kernel.run(x);
+ }
+ wait(barrier);
+ }).start();
+ }
+ wait(barrier);
+ }
+*/
+
+
+ static public void forEachJava(int jobSize, IntBlock block) {
+
+ // Single threaded solution
+ //for (int i=0; i{
+ for (int x=finalt*(width/threads); x<(finalt+1)*(width/threads); x++){
+ block.accept(x);
+ }
+ wait(barrier);
+ }).start();
+ }
+ wait(barrier);
+
+ }
+
+
+ static final ConcurrentHashMap kernels = new ConcurrentHashMap();
+ static final ConcurrentHashMap haveGoodKernel = new ConcurrentHashMap();
+
+
+ static public void forEach(int jobSize, IntBlock block) {
+
+ // Note it is a new Block object each time
+
+ KernelRunner kernelRunner = kernels.get(block.getClass());
+ Boolean haveKernel = haveGoodKernel.get(block.getClass());
+
+ try {
+
+ if ((kernelRunner == null) && (haveKernel == null)) {
+ kernelRunner = new KernelRunner(block);
+ }
+
+ if ((kernelRunner != null) && (kernelRunner.getRunnable() == true)) {
+ boolean success = kernelRunner.execute(block, Range.create(jobSize), 1);
+ if (success == true) {
+ kernels.put(block.getClass(), kernelRunner);
+ haveGoodKernel.put(block.getClass(), true);
+ }
+ kernelRunner.setRunnable(success);
+
+ } else {
+ forEachJava(jobSize, block);
+ }
+
+ return;
+
+ } catch (AparapiException e) {
+ System.err.println(e);
+ e.printStackTrace();
+
+ if (logger.isLoggable(Level.FINE)) {
+ logger.fine("Kernel failed, try to revert to java.");
+ }
+
+ haveGoodKernel.put(block.getClass(), false);
+
+ if (kernelRunner != null) {
+ kernelRunner.setRunnable(false);
+ }
+ }
+
+ if (logger.isLoggable(Level.FINE)) {
+ logger.fine("Running java.");
+ }
+
+ forEachJava(jobSize, block);
+ }
+/*
+ static public void forEach(int[][] intArray, KernelIII kernel){
+ final int width = intArray.length;
+ final int threads = Runtime.getRuntime().availableProcessors();
+ final CyclicBarrier barrier = new CyclicBarrier(threads+1);
+ for (int t=0; t{
+ for (int x=finalt*(width/threads); x<(finalt+1)*(width/threads); x++){
+ int[] arr = intArray[x];
+ int arrLen = arr.length;
+ for (int y=0; y map = new HashMap();
+
+ static class Transformer implements ClassFileTransformer{
+ @Override public byte[] transform(ClassLoader loader, String name, Class> clazz, ProtectionDomain domain, byte[] bytes){
+ if (name != null && name.contains("$$")){
+ map.put(name.replace('/','.'), bytes);
+ System.out.println("+"+name+" length="+bytes.length);
+ }
+ return(bytes);
+ }
+ }
+ static void premain(String agentArgs, Instrumentation inst){
+ System.out.println("inside premain!");
+ inst.addTransformer(new Transformer());
+ }
+
+ static public byte[] getBytes(Class> clazz){
+ byte[] bytes = map.get(clazz.getName());
+ if (bytes == null){
+ System.out.println("can't get bytes for ="+clazz);
+ }else{
+ System.out.println("getting bytes for ="+clazz+" size="+bytes.length);
+ }
+ return(bytes);
+ }
+
+}
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/BlockWriter.java b/com.amd.aparapi/src/java/com/amd/aparapi/BlockWriter.java
index e2336381..05913f53 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/BlockWriter.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/BlockWriter.java
@@ -120,6 +120,8 @@ protected void newLine() {
write(" ");
}
}
+
+ String getLambdaIterationIntArgName() { return null; }
protected void writeConditionalBranch16(ConditionalBranch16 _branch16, boolean _invert) throws CodeGenException {
@@ -372,7 +374,22 @@ protected String convertCast(String _cast) {
return ("(" + raw + ")");
}
- void writeInstruction(Instruction _instruction) throws CodeGenException {
+
+ String filterLambdaIterationVariable(Instruction _instruction) {
+ assert _instruction instanceof AccessLocalVariable : "Must be AccessLocalVariable";
+ AccessLocalVariable localVariableLoadInstruction = (AccessLocalVariable) _instruction;
+ LocalVariableInfo localVariable = localVariableLoadInstruction.getLocalVariableInfo();
+ String local = localVariable.getVariableName();
+ if (local.equals(this.getLambdaIterationIntArgName())) {
+ //System.out.println("## Inserting get_global_id for lambdaIterationIntArgName = " + this.getLambdaIterationIntArgName());
+ // this is the lambda iteration variable, so substitute get_global_id
+ return "get_global_id(0)";
+ }
+ return local;
+ }
+
+
+ void writeInstruction(Instruction _instruction) throws CodeGenException {
if (_instruction instanceof CompositeIfElseInstruction) {
write("(");
Instruction lhs = writeConditional(((CompositeInstruction) _instruction).getBranchSet());
@@ -495,9 +512,9 @@ void writeInstruction(Instruction _instruction) throws CodeGenException {
}
} else if (_instruction instanceof AccessLocalVariable) {
- AccessLocalVariable localVariableLoadInstruction = (AccessLocalVariable) _instruction;
- LocalVariableInfo localVariable = localVariableLoadInstruction.getLocalVariableInfo();
- write(localVariable.getVariableName());
+
+ write(filterLambdaIterationVariable(_instruction));
+
} else if (_instruction instanceof I_IINC) {
I_IINC location = (I_IINC) _instruction;
LocalVariableInfo localVariable = location.getLocalVariableInfo();
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/ClassModel.java b/com.amd.aparapi/src/java/com/amd/aparapi/ClassModel.java
index e75266b6..3db0e5d0 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/ClassModel.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/ClassModel.java
@@ -140,7 +140,7 @@ interface LocalVariableTableEntry extends Iterable<
// Find better way to do this check
// The java.lang.Object test is for unit test framework to succeed - should
// not occur in normal use
- if ((mySuper != null) && (!mySuper.getName().equals(Kernel.class.getName()))
+ if ((mySuper != null) /* && (!mySuper.getName().equals(Kernel.class.getName())) */
&& (!mySuper.getName().equals("java.lang.Object"))) {
superClazz = new ClassModel(mySuper);
}
@@ -194,7 +194,7 @@ ClassModel getSuperClazz() {
return superClazz;
}
- @Annotations.DocMe void replaceSuperClazz(ClassModel c) {
+ void replaceSuperClazz(ClassModel c) {
if (this.superClazz != null) {
assert c.isSuperClass(this.getClassWeAreModelling()) == true : "not my super";
if (this.superClazz.getClassWeAreModelling().getName().equals(c.getClassWeAreModelling().getName())) {
@@ -2405,8 +2405,19 @@ ClassModel getClassModel() {
}
public String toString() {
- return getClassModel().getClassWeAreModelling().getName() + "." +
- getName() + " " + getDescriptor();
+ ClassModel cm = getClassModel();
+ assert cm != null : "ClassModel should not be null";
+
+ // ecaspole 121211 - I think this is an actual bug where clazz is not set when creating
+ // ClassModel from the stream
+ //Class theClass = cm.getClassWeAreModelling();
+ //assert theClass != null : "ClassModel should not be null";
+
+ //System.out.println(" Hi Eric: " + ClassModel.this.getClassEntry().getNameUTF8Entry().getUTF8());
+
+ //return getClassModel().getClassWeAreModelling().getName() + "." +
+ // getName() + " " + getDescriptor();
+ return getName() + " " + getDescriptor();
}
}
@@ -2453,6 +2464,10 @@ void parse(Class> _class) throws ClassParseException {
* @throws ClassParseException
*/
private void parse(ClassLoader _classLoader, String _className) throws ClassParseException {
+ String asDotClass = _className.replace('.', '/') + ".class";
+ InputStream is = _classLoader.getResourceAsStream(asDotClass);
+ System.out.println("stream is: " + is);
+
parse(_classLoader.getResourceAsStream(_className.replace('.', '/') + ".class"));
}
@@ -2545,6 +2560,9 @@ ClassModelField getField(String _name) {
}
ClassModelMethod getMethod(String _name, String _descriptor) {
+
+ //System.out.println("methods = " + methods);
+
for (ClassModelMethod entry : methods) {
if (entry.getName().equals(_name) && entry.getDescriptor().equals(_descriptor)) {
return (entry);
@@ -2602,6 +2620,10 @@ ClassModelMethod getMethod(MethodEntry _methodEntry, boolean _isSpecial) {
MethodModel getMethodModel(String _name, String _signature) throws AparapiException {
ClassModelMethod method = getMethod(_name, _signature);
+
+
+
+ assert method != null : "ClassModelMethod should not be null";
return new MethodModel(method);
}
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/ClassParseException.java b/com.amd.aparapi/src/java/com/amd/aparapi/ClassParseException.java
index c375baff..b4a07c75 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/ClassParseException.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/ClassParseException.java
@@ -80,7 +80,8 @@ public static enum TYPE {
ACCESSEDOBJECTSETTERARRAY("Passing array arguments to Intrinsics in expression form is not supported"), //
MULTIDIMENSIONARRAYASSIGN("Can't assign to two dimension array"), //
MULTIDIMENSIONARRAYACCESS("Can't access through a two dimensional array"), //
- MISSINGLOCALVARIABLETABLE("Method does not contain a local variable table (recompile with -g?)");
+ MISSINGLOCALVARIABLETABLE("Method does not contain a local variable table (recompile with -g?)"),
+ UNHANDLEDMAPPEDMETHOD("Unhandled JDK mapped method ");
private String description;
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/Entrypoint.java b/com.amd.aparapi/src/java/com/amd/aparapi/Entrypoint.java
index 34868697..e7ec473e 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/Entrypoint.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/Entrypoint.java
@@ -152,6 +152,17 @@ Object getKernelInstance() {
void setKernelInstance(Object _k) {
kernelInstance = _k;
}
+
+ int lambdaActualParamsCount = 0;
+
+ // This could be done by parsing the signature
+ void setLambdaActualParamsCount(int count) {
+ lambdaActualParamsCount = count;
+ }
+
+ int getLambdaActualParamsCount() {
+ return lambdaActualParamsCount;
+ }
Map getObjectArrayFieldsClasses() {
return objectArrayFieldsClasses;
@@ -199,8 +210,11 @@ static Field getFieldFromClassHierarchy(Class> _clazz, String _name) throws Ap
logger.fine("looking for " + _name + " in " + mySuper.getName());
}
- // Find better way to do this check
- while (!mySuper.getName().equals(Kernel.class.getName())) {
+ assert _clazz != null && mySuper != null : "Classes should not be null";
+
+ // For the jambi demo, we are not operating on Kernel subclass, keep looking
+ while ((!mySuper.getName().equals(Object.class.getName())) /* &&
+ (!mySuper.getName().equals(Kernel.class.getName())) */ ) {
try {
field = mySuper.getDeclaredField(_name);
int modifiers = field.getModifiers();
@@ -214,8 +228,6 @@ static Field getFieldFromClassHierarchy(Class> _clazz, String _name) throws Ap
}
throw new ClassParseException(ClassParseException.TYPE.OBJECTFIELDREFERENCE);
} else {
- // This should be looger fine...
- //System.out.println("field " + _name + " not suitable: " + java.lang.reflect.Modifier.toString(modifiers));
return null;
}
} catch (NoSuchFieldException nsfe) {
@@ -235,6 +247,8 @@ static Field getFieldFromClassHierarchy(Class> _clazz, String _name) throws Ap
*
* It is important to have only one ClassModel for each class used in the kernel
* and only one MethodModel per method, so comparison operations work properly.
+ *
+ * The className param is in dot form, not slashes
*/
ClassModel getOrUpdateAllClassAccesses(String className) throws AparapiException {
ClassModel memberClassModel = allFieldsClasses.get(className);
@@ -242,6 +256,11 @@ ClassModel getOrUpdateAllClassAccesses(String className) throws AparapiException
try {
Class> memberClass = Class.forName(className);
+ // Quick and dirty way to bail out from unhandled Math methods etc
+ if (className.startsWith("java.lang")) {
+ throw new ClassParseException(ClassParseException.TYPE.UNHANDLEDMAPPEDMETHOD);
+ }
+
// Immediately add this class and all its supers if necessary
memberClassModel = new ClassModel(memberClass);
if (logger.isLoggable(Level.FINEST)) {
@@ -403,7 +422,7 @@ void updateObjectMemberFieldAccesses(String className, FieldEntry field) throws
ClassModelMethod resolveCalledMethod(MethodCall methodCall, ClassModel classModel) throws AparapiException {
MethodEntry methodEntry = methodCall.getConstantPoolMethodEntry();
int thisClassIndex = classModel.getThisClassConstantPoolIndex();//arf
- boolean isMapped = (thisClassIndex != methodEntry.getClassIndex()) && Kernel.isMappedMethod(methodEntry);
+ boolean isMapped = (thisClassIndex != methodEntry.getClassIndex()) && KernelRunner.isMappedMethod(methodEntry);
if (logger.isLoggable(Level.FINE)) {
if (methodCall instanceof I_INVOKESPECIAL) {
logger.fine("Method call to super: " + methodEntry);
@@ -664,11 +683,12 @@ && getFieldFromClassHierarchy(getClassModel().getClassWeAreModelling(), assigned
} else if (instruction instanceof I_INVOKEVIRTUAL) {
I_INVOKEVIRTUAL invokeInstruction = (I_INVOKEVIRTUAL) instruction;
MethodEntry methodEntry = invokeInstruction.getConstantPoolMethodEntry();
- if (Kernel.isMappedMethod(methodEntry)) { //only do this for intrinsics
+ if (KernelRunner.isMappedMethod(methodEntry)) { //only do this for intrinsics
- if (Kernel.usesAtomic32(methodEntry)) {
- setRequiresAtomics32Pragma(true);
- }
+ // I forgot what atomics are for, check with Gary
+// if (Kernel.usesAtomic32(methodEntry)) {
+// setRequiresAtomics32Pragma(true);
+// }
Arg methodArgs[] = methodEntry.getArgs();
if (methodArgs.length > 0 && methodArgs[0].isArray()) { //currently array arg can only take slot 0
@@ -844,7 +864,7 @@ ClassModel getClassModel() {
*/
MethodModel getCallTarget(MethodEntry _methodEntry, boolean _isSpecial) {
ClassModelMethod target = getClassModel().getMethod(_methodEntry, _isSpecial);
- boolean isMapped = Kernel.isMappedMethod(_methodEntry);
+ boolean isMapped = KernelRunner.isMappedMethod(_methodEntry);
if (logger.isLoggable(Level.FINE) && target == null) {
logger.fine("Did not find call target: " + _methodEntry + " in " +
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/InstructionSet.java b/com.amd.aparapi/src/java/com/amd/aparapi/InstructionSet.java
index 755a45db..202a6583 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/InstructionSet.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/InstructionSet.java
@@ -2233,6 +2233,9 @@ static class I_GETFIELD extends Index16 implements AccessInstanceField{
return (1);
}
+ @Override public String toString() {
+ return super.toString() + " " + getConstantPoolFieldEntry().getNameAndTypeEntry().getNameUTF8Entry().getUTF8();
+ }
}
static class I_GETSTATIC extends Index16 implements AccessField{
@@ -3024,6 +3027,10 @@ static class I_INVOKEVIRTUAL extends Index16 implements VirtualMethodCall{
}
return (child);
}
+
+ @Override public String toString() {
+ return super.toString() + " " + method.getConstantPool().getMethodEntry(getConstantPoolMethodIndex());
+ }
@Override public Instruction getInstanceReference() {
return (getFirstChild());
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java b/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java
deleted file mode 100644
index 26721605..00000000
--- a/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java
+++ /dev/null
@@ -1,2217 +0,0 @@
-/*
-Copyright (c) 2010-2011, Advanced Micro Devices, Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
-following conditions are met:
-
-Redistributions of source code must retain the above copyright notice, this list of conditions and the following
-disclaimer.
-
-Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
-disclaimer in the documentation and/or other materials provided with the distribution.
-
-Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products
-derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
-INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-If you use the software (in whole or in part), you shall adhere to all applicable U.S., European, and other export
-laws, including but not limited to the U.S. Export Administration Regulations ("EAR"), (15 C.F.R. Sections 730 through
-774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June 2000. Further, pursuant to Section 740.6 of the EAR,
-you hereby certify that, except pursuant to a license granted by the United States Department of Commerce Bureau of
-Industry and Security or as otherwise permitted pursuant to a License Exception under the U.S. Export Administration
-Regulations ("EAR"), you will not (1) export, re-export or release to a national of a country in Country Groups D:1,
-E:1 or E:2 any restricted technology, software, or source code you receive hereunder, or (2) export to Country Groups
-D:1, E:1 or E:2 the direct product of such technology or software, if such foreign produced direct product is subject
-to national security controls as identified on the Commerce Control List (currently found in Supplement 1 to Part 774
-of EAR). For the most current Country Group listings, or for additional information about the EAR or your obligations
-under those regulations, please refer to the U.S. Bureau of Industry and Security's website at http://www.bis.doc.gov/.
-
-*/
-package com.amd.aparapi;
-
-import java.lang.annotation.Retention;
-import java.lang.annotation.RetentionPolicy;
-import java.lang.reflect.Method;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.BrokenBarrierException;
-import java.util.concurrent.CyclicBarrier;
-import java.util.logging.Logger;
-
-import com.amd.aparapi.ClassModel.ConstantPool.MethodReferenceEntry;
-
-/**
- * A kernel encapsulates a data parallel algorithm that will execute either on a GPU
- * (through conversion to OpenCL) or on a CPU via a Java Thread Pool.
- *
- * To write a new kernel, a developer extends the Kernel class and overrides the Kernel.run() method.
- * To execute this kernel, the developer creates a new instance of it and calls Kernel.execute(int globalSize) with a suitable 'global size'. At runtime
- * Aparapi will attempt to convert the Kernel.run() method (and any method called directly or indirectly
- * by Kernel.run()) into OpenCL for execution on GPU devices made available via the OpenCL platform.
- *
- * Note that Kernel.run() is not called directly. Instead,
- * the Kernel.execute(int globalSize) method will cause the overridden Kernel.run()
- * method to be invoked once for each value in the range 0...globalSize.
- *
- * On the first call to Kernel.execute(int _globalSize), Aparapi will determine the EXECUTION_MODE of the kernel.
- * This decision is made dynamically based on two factors:
- *
- *
Whether OpenCL is available (appropriate drivers are installed and the OpenCL and Aparapi dynamic libraries are included on the system path).
- *
Whether the bytecode of the run() method (and every method that can be called directly or indirectly from the run() method)
- * can be converted into OpenCL.
- *
- *
- * Below is an example Kernel that calculates the square of a set of input values.
- *
- *
- * class SquareKernel extends Kernel{
- * private int values[];
- * private int squares[];
- * public SquareKernel(int values[]){
- * this.values = values;
- * squares = new int[values.length];
- * }
- * public void run() {
- * int gid = getGlobalID();
- * squares[gid] = values[gid]*values[gid];
- * }
- * public int[] getSquares(){
- * return(squares);
- * }
- * }
- *
- *
- * To execute this kernel, first create a new instance of it and then call execute(Range _range).
- *
- *
- * int[] values = new int[1024];
- * // fill values array
- * Range range = Range.create(values.length); // create a range 0..1024
- * SquareKernel kernel = new SquareKernel(values);
- * kernel.execute(range);
- *
- *
- * When execute(Range) returns, all the executions of Kernel.run() have completed and the results are available in the squares array.
- *
- * A different approach to creating kernels that avoids extending Kernel is to write an anonymous inner class:
- *
- *
- *
- * final int[] values = new int[1024];
- * // fill the values array
- * final int[] squares = new int[values.length];
- * final Range range = Range.create(values.length);
- *
- * Kernel kernel = new Kernel(){
- * public void run() {
- * int gid = getGlobalID();
- * squares[gid] = values[gid]*values[gid];
- * }
- * };
- * kernel.execute(range);
- * for (int i=0; i< values.length; i++){
- * System.out.printf("%4d %4d %8d\n", i, values[i], squares[i]);
- * }
- *
- *
- *
- *
- * @author gfrost AMD Javalabs
- * @version Alpha, 21/09/2010
- */
-
-public abstract class Kernel implements Cloneable{
- @Retention(RetentionPolicy.RUNTIME) @interface OpenCLMapping {
- String mapTo() default "";
-
- boolean atomic32() default false;
-
- boolean atomic64() default false;
- }
-
- @Retention(RetentionPolicy.RUNTIME) @interface OpenCLDelegate {
-
- }
-
- /**
- * We can use this Annotation to 'tag' intended local buffers.
- *
- * So we can either annotate the buffer
- *
- * @Local int[] buffer = new int[1024];
- *
- * Or use a special suffix
- *
- * int[] buffer_$local$ = new int[1024];
- *
- *
- * @see LOCAL_SUFFIX
- *
- *
- */
- public @Retention(RetentionPolicy.RUNTIME) @interface Local {
-
- }
-
- /**
- * We can use this Annotation to 'tag' intended constant buffers.
- *
- * So we can either annotate the buffer
- *
- * @Constant int[] buffer = new int[1024];
- *
- * Or use a special suffix
- *
- * int[] buffer_$constant$ = new int[1024];
- *
- *
- * @see LOCAL_SUFFIX
- *
- *
- */
- public @Retention(RetentionPolicy.RUNTIME) @interface Constant {
-
- }
-
- /**
- * We can use this suffix to 'tag' intended local buffers.
- *
- *
- * So either name the buffer
- *
- * int[] buffer_$local$ = new int[1024];
- *
- * Or use the Annotation form
- *
- * @Local int[] buffer = new int[1024];
- *
- */
-
- final static String LOCAL_SUFFIX = "_$local$";
-
- /**
- * We can use this suffix to 'tag' intended constant buffers.
- *
- *
- * So either name the buffer
- *
- * int[] buffer_$constant$ = new int[1024];
- *
- * Or use the Annotation form
- *
- * @Constant int[] buffer = new int[1024];
- *
- */
-
- final static String CONSTANT_SUFFIX = "_$constant$";
-
- private static Logger logger = Logger.getLogger(Config.getLoggerName());
-
- public abstract class Entry{
- public abstract void run();
-
- public Kernel execute(Range _range) {
- return (Kernel.this.execute("foo", _range, 1));
- }
- }
-
- /**
- * The execution mode ENUM enumerates the possible modes of executing a kernel.
- * One can request a mode of execution using the values below, and query a kernel after it first executes to
- * determine how it executed.
- *
- *
- * Aparapi supports 4 execution modes.
- *
- *
- *
Enum value
Execution
- *
GPU
Execute using OpenCL on first available GPU device
- *
CPU
Execute using OpenCL on first available CPU device
- *
JTP
Execute using a Java Thread Pool (one thread spawned per available core)
- *
SEQ
Execute using a single loop. This is useful for debugging but will be less
- * performant than the other modes
- *
- *
- *
- * To request that a kernel is executed in a specific mode, call Kernel.setExecutionMode(EXECUTION_MODE) before the
- * kernel first executes.
- *
- *
- * int[] values = new int[1024];
- * // fill values array
- * SquareKernel kernel = new SquareKernel(values);
- * kernel.setExecutionMode(Kernel.EXECUTION_MODE.JTP);
- * kernel.execute(values.length);
- *
- *
- * Alternatively, the property com.amd.aparapi.executionMode can be set to one of JTP,GPU,CPU,SEQ
- * when an application is launched.
- *
- * Generally setting the execution mode is not recommended (it is best to let Aparapi decide automatically) but the option
- * provides a way to compare a kernel's performance under multiple execution modes.
- *
- * @author gfrost AMD Javalabs
- * @version Alpha, 21/09/2010
- */
-
- public static enum EXECUTION_MODE {
- /**
- * A dummy value to indicate an unknown state.
- */
- NONE,
- /**
- * The value representing execution on a GPU device via OpenCL.
- */
- GPU,
- /**
- * The value representing execution on a CPU device via OpenCL.
- *
- * Note not all OpenCL implementations support OpenCL compute on the CPU.
- */
- CPU,
- /**
- * The value representing execution on a Java Thread Pool.
- *
- * By default one Java thread is started for each available core and each core will execute globalSize/cores work items.
- * This creates a total of globalSize%cores threads to complete the work.
- * Choose suitable values for globalSize to minimize the number of threads that are spawned.
- */
- JTP,
- /**
- * The value representing execution sequentially in a single loop.
- *
- * This is meant to be used for debugging a kernel.
- */
- SEQ;
- /* static boolean openCLAvailable;
-
- static {
- String arch = System.getProperty("os.arch");
- logger.fine("arch = " + arch);
-
- String libName = null;
- try {
-
- if (arch.equals("amd64") || arch.equals("x86_64")) {
-
- libName = "aparapi_x86_64";
- logger.fine("attempting to load shared lib " + libName);
- System.loadLibrary(libName);
- openCLAvailable = true;
- } else if (arch.equals("x86") || arch.equals("i386")) {
- libName = "aparapi_x86";
- logger.fine("attempting to load shared lib " + libName);
- System.loadLibrary(libName);
- openCLAvailable = true;
- } else {
- logger.warning("Expected property os.arch to contain amd64 or x86 but found " + arch
- + " don't know which library to load.");
-
- }
- } catch (UnsatisfiedLinkError e) {
- logger.warning("Check your environment. Failed to load aparapi native library "
- + libName
- + " or possibly failed to locate opencl native library (opencl.dll/opencl.so). Ensure that both are in your PATH (windows) or in LD_LIBRARY_PATH (linux).");
-
- openCLAvailable = false;
- }
- }
- */
-
- static EXECUTION_MODE getDefaultExecutionMode() {
- EXECUTION_MODE defaultExecutionMode = OpenCLJNI.getJNI().isOpenCLAvailable() ? GPU : JTP;
- String executionMode = Config.executionMode;
- if (executionMode != null) {
- try {
- EXECUTION_MODE requestedExecutionMode;
- requestedExecutionMode = getExecutionModeFromString(executionMode).iterator().next();
- logger.fine("requested execution mode =");
- if ((OpenCLJNI.getJNI().isOpenCLAvailable() && requestedExecutionMode.isOpenCL())
- || !requestedExecutionMode.isOpenCL()) {
- defaultExecutionMode = requestedExecutionMode;
- }
- } catch (Throwable t) {
- // we will take the default
- }
- }
-
- logger.fine("default execution modes = " + defaultExecutionMode);
-
- return (defaultExecutionMode);
- }
-
- static LinkedHashSet getDefaultExecutionModes() {
- LinkedHashSet defaultExecutionModes = new LinkedHashSet();
- if (OpenCLJNI.getJNI().isOpenCLAvailable()) {
- defaultExecutionModes.add(GPU);
- defaultExecutionModes.add(JTP);
- } else {
- defaultExecutionModes.add(JTP);
- }
- String executionMode = Config.executionMode;
- if (executionMode != null) {
- try {
- LinkedHashSet requestedExecutionModes;
- requestedExecutionModes = EXECUTION_MODE.getExecutionModeFromString(executionMode);
- logger.fine("requested execution mode =");
- for (EXECUTION_MODE mode : requestedExecutionModes) {
- logger.fine(" " + mode);
- }
- if ((OpenCLJNI.getJNI().isOpenCLAvailable() && EXECUTION_MODE.anyOpenCL(requestedExecutionModes))
- || !EXECUTION_MODE.anyOpenCL(requestedExecutionModes)) {
- defaultExecutionModes = requestedExecutionModes;
- }
- } catch (Throwable t) {
- // we will take the default
- }
- }
-
- logger.info("default execution modes = " + defaultExecutionModes);
-
- for (EXECUTION_MODE e : defaultExecutionModes) {
- logger.info("SETTING DEFAULT MODE: " + e.toString());
- }
-
- return (defaultExecutionModes);
- }
-
- static LinkedHashSet getExecutionModeFromString(String executionMode) {
- LinkedHashSet executionModes = new LinkedHashSet();
- for (String mode : executionMode.split(",")) {
- executionModes.add(valueOf(mode.toUpperCase()));
- }
- return executionModes;
- }
-
- static EXECUTION_MODE getFallbackExecutionMode() {
- EXECUTION_MODE defaultFallbackExecutionMode = JTP;
- logger.info("fallback execution mode = " + defaultFallbackExecutionMode);
- return (defaultFallbackExecutionMode);
- }
-
- static boolean anyOpenCL(LinkedHashSet _executionModes) {
- for (EXECUTION_MODE mode : _executionModes) {
- if (mode == GPU || mode == CPU) {
- return true;
- }
- }
- return false;
- }
-
- boolean isOpenCL() {
- return this == GPU || this == CPU;
- }
-
- };
-
- int[] globalId = new int[] {
- 0,
- 0,
- 0
- };
-
- int[] localId = new int[] {
- 0,
- 0,
- 0
- };
-
- int[] groupId = new int[] {
- 0,
- 0,
- 0
- };
-
- Range range;
-
- int passId;
-
- volatile CyclicBarrier localBarrier;
-
- /**
- * Determine the globalId of an executing kernel.
- *
- * The kernel implementation uses the globalId to determine which of the executing kernels (in the global domain space) this invocation is expected to deal with.
- *
- * For example in a SquareKernel implementation:
- *
- *
- * class SquareKernel extends Kernel{
- * private int values[];
- * private int squares[];
- * public SquareKernel(int values[]){
- * this.values = values;
- * squares = new int[values.length];
- * }
- * public void run() {
- * int gid = getGlobalID();
- * squares[gid] = values[gid]*values[gid];
- * }
- * public int[] getSquares(){
- * return(squares);
- * }
- * }
- *
- *
- * Each invocation of SquareKernel.run() retrieves it's globalId by calling getGlobalId(), and then computes the value of square[gid] for a given value of value[gid].
- *
- * @return The globalId for the Kernel being executed
- *
- * @see #getLocalId()
- * @see #getGroupId()
- * @see #getGlobalSize()
- * @see #getNumGroups()
- * @see #getLocalSize()
- */
-
- @OpenCLDelegate protected final int getGlobalId() {
- return (getGlobalId(0));
- }
-
- @OpenCLDelegate protected final int getGlobalId(int _dim) {
- return (globalId[_dim]);
- }
-
- /*
- @OpenCLDelegate protected final int getGlobalX() {
- return (getGlobalId(0));
- }
-
- @OpenCLDelegate protected final int getGlobalY() {
- return (getGlobalId(1));
- }
-
- @OpenCLDelegate protected final int getGlobalZ() {
- return (getGlobalId(2));
- }
- */
- /**
- * Determine the groupId of an executing kernel.
- *
- * When a Kernel.execute(int globalSize) is invoked for a particular kernel, the runtime will break the work into various 'groups'.
- *
- * A kernel can use getGroupId() to determine which group a kernel is currently
- * dispatched to
- *
- * The following code would capture the groupId for each kernel and map it against globalId.
- *
- * final int[] groupIds = new int[1024];
- * Kernel kernel = new Kernel(){
- * public void run() {
- * int gid = getGlobalId();
- * groupIds[gid] = getGroupId();
- * }
- * };
- * kernel.execute(groupIds.length);
- * for (int i=0; i< values.length; i++){
- * System.out.printf("%4d %4d\n", i, groupIds[i]);
- * }
- *
- *
- * @see #getLocalId()
- * @see #getGlobalId()
- * @see #getGlobalSize()
- * @see #getNumGroups()
- * @see #getLocalSize()
- *
- * @return The groupId for this Kernel being executed
- */
- @OpenCLDelegate protected final int getGroupId() {
- return (getGroupId(0));
- }
-
- @OpenCLDelegate protected final int getGroupId(int _dim) {
- return (groupId[_dim]);
- }
-
- /*
- @OpenCLDelegate protected final int getGroupX() {
- return (getGroupId(0));
- }
-
- @OpenCLDelegate protected final int getGroupY() {
- return (getGroupId(1));
- }
-
- @OpenCLDelegate protected final int getGroupZ() {
- return (getGroupId(2));
- }
- */
- /**
- * Determine the passId of an executing kernel.
- *
- * When a Kernel.execute(int globalSize, int passes) is invoked for a particular kernel, the runtime will break the work into various 'groups'.
- *
- * A kernel can use getPassId() to determine which pass we are in. This is ideal for 'reduce' type phases
- *
- * @see #getLocalId()
- * @see #getGlobalId()
- * @see #getGlobalSize()
- * @see #getNumGroups()
- * @see #getLocalSize()
- *
- * @return The groupId for this Kernel being executed
- */
- @OpenCLDelegate protected final int getPassId() {
- return (passId);
- }
-
- /**
- * Determine the local id of an executing kernel.
- *
- * When a Kernel.execute(int globalSize) is invoked for a particular kernel, the runtime will break the work into
- * various 'groups'.
- * getLocalId() can be used to determine the relative id of the current kernel within a specific group.
- *
- * The following code would capture the groupId for each kernel and map it against globalId.
- *
- * final int[] localIds = new int[1024];
- * Kernel kernel = new Kernel(){
- * public void run() {
- * int gid = getGlobalId();
- * localIds[gid] = getLocalId();
- * }
- * };
- * kernel.execute(localIds.length);
- * for (int i=0; i< values.length; i++){
- * System.out.printf("%4d %4d\n", i, localIds[i]);
- * }
- *
- *
- * @see #getGroupId()
- * @see #getGlobalId()
- * @see #getGlobalSize()
- * @see #getNumGroups()
- * @see #getLocalSize()
- *
- * @return The local id for this Kernel being executed
- */
- @OpenCLDelegate protected final int getLocalId() {
- return (getLocalId(0));
- }
-
- @OpenCLDelegate protected final int getLocalId(int _dim) {
- return (localId[_dim]);
- }
-
- /*
- @OpenCLDelegate protected final int getLocalX() {
- return (getLocalId(0));
- }
-
- @OpenCLDelegate protected final int getLocalY() {
- return (getLocalId(1));
- }
-
- @OpenCLDelegate protected final int getLocalZ() {
- return (getLocalId(2));
- }
- */
- /**
- * Determine the size of the group that an executing kernel is a member of.
- *
- * When a Kernel.execute(int globalSize) is invoked for a particular kernel, the runtime will break the work into
- * various 'groups'. getLocalSize() allows a kernel to determine the size of the current group.
- *
- * Note groups may not all be the same size. In particular, if (global size)%(# of compute devices)!=0, the runtime can choose to dispatch kernels to
- * groups with differing sizes.
- *
- * @see #getGroupId()
- * @see #getGlobalId()
- * @see #getGlobalSize()
- * @see #getNumGroups()
- * @see #getLocalSize()
- *
- * @return The size of the currently executing group.
- */
- @OpenCLDelegate protected final int getLocalSize() {
- return (range.getLocalSize(0));
- }
-
- @OpenCLDelegate protected final int getLocalSize(int _dim) {
- return (range.getLocalSize(_dim));
- }
-
- /*
- @OpenCLDelegate protected final int getLocalWidth() {
- return (range.getLocalSize(0));
- }
-
- @OpenCLDelegate protected final int getLocalHeight() {
- return (range.getLocalSize(1));
- }
-
- @OpenCLDelegate protected final int getLocalDepth() {
- return (range.getLocalSize(2));
- }
- */
- /**
- * Determine the value that was passed to Kernel.execute(int globalSize) method.
- *
- * @see #getGroupId()
- * @see #getGlobalId()
- * @see #getNumGroups()
- * @see #getLocalSize()
- *
- * @return The value passed to Kernel.execute(int globalSize) causing the current execution.
- */
- @OpenCLDelegate protected final int getGlobalSize() {
- return (range.getGlobalSize(0));
- }
-
- @OpenCLDelegate protected final int getGlobalSize(int _dim) {
- return (range.getGlobalSize(_dim));
- }
-
- /*
- @OpenCLDelegate protected final int getGlobalWidth() {
- return (range.getGlobalSize(0));
- }
-
- @OpenCLDelegate protected final int getGlobalHeight() {
- return (range.getGlobalSize(1));
- }
-
- @OpenCLDelegate protected final int getGlobalDepth() {
- return (range.getGlobalSize(2));
- }
- */
- /**
- * Determine the number of groups that will be used to execute a kernel
- *
- * When Kernel.execute(int globalSize) is invoked, the runtime will split the work into
- * multiple 'groups'. getNumGroups() returns the total number of groups that will be used.
- *
- * @see #getGroupId()
- * @see #getGlobalId()
- * @see #getGlobalSize()
- * @see #getNumGroups()
- * @see #getLocalSize()
- *
- * @return The number of groups that kernels will be dispatched into.
- */
- @OpenCLDelegate protected final int getNumGroups() {
- return (range.getNumGroups(0));
- }
-
- @OpenCLDelegate protected final int getNumGroups(int _dim) {
- return (range.getNumGroups(_dim));
- }
-
- /*
- @OpenCLDelegate protected final int getNumGroupsWidth() {
- return (range.getGroups(0));
- }
-
- @OpenCLDelegate protected final int getNumGroupsHeight() {
- return (range.getGroups(1));
- }
-
- @OpenCLDelegate protected final int getNumGroupsDepth() {
- return (range.getGroups(2));
- }
- */
- /**
- * The entry point of a kernel.
- *
- *
- * Every kernel must override this method.
- */
- public abstract void run();
-
- /**
- * When using a Java Thread Pool Aparapi uses clone to copy the initial instance to each thread.
- *
- *
- * If you choose to override clone() you are responsible for delegating to super.clone();
- */
- @Override protected Object clone() {
- try {
- Kernel worker = (Kernel) super.clone();
- worker.groupId = new int[] {
- 0,
- 0,
- 0
- };
- worker.localId = new int[] {
- 0,
- 0,
- 0
- };
- worker.globalId = new int[] {
- 0,
- 0,
- 0
- };
- return worker;
- } catch (CloneNotSupportedException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- return (null);
- }
- }
-
- /**
- * Delegates to either {@link java.lang.Math#acos(double)} (Java) or acos(float) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param a value to delegate to {@link java.lang.Math#acos(double)}/acos(float)
- * @return {@link java.lang.Math#acos(double)} casted to float/acos(float)
- *
- * @see java.lang.Math#acos(double)
- * @see acos(float)
- */
- @OpenCLMapping(mapTo = "acos") protected float acos(float a) {
- return (float) Math.acos(a);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#acos(double)} (Java) or acos(double) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param a value to delegate to {@link java.lang.Math#acos(double)}/acos(double)
- * @return {@link java.lang.Math#acos(double)}/acos(double)
- *
- * @see java.lang.Math#acos(double)
- * @see acos(double)
- */
- @OpenCLMapping(mapTo = "acos") protected double acos(double a) {
- return Math.acos(a);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#asin(double)} (Java) or asin(float) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _f value to delegate to {@link java.lang.Math#asin(double)}/asin(float)
- * @return {@link java.lang.Math#asin(double)} casted to float/asin(float)
- *
- * @see java.lang.Math#asin(double)
- * @see asin(float)
- */
- @OpenCLMapping(mapTo = "asin") protected float asin(float _f) {
- return (float) Math.asin(_f);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#asin(double)} (Java) or asin(double) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _d value to delegate to {@link java.lang.Math#asin(double)}/asin(double)
- * @return {@link java.lang.Math#asin(double)}/asin(double)
- *
- * @see java.lang.Math#asin(double)
- * @see asin(double)
- */
- @OpenCLMapping(mapTo = "asin") protected double asin(double _d) {
- return Math.asin(_d);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#atan(double)} (Java) or atan(float) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _f value to delegate to {@link java.lang.Math#atan(double)}/atan(float)
- * @return {@link java.lang.Math#atan(double)} casted to float/atan(float)
- *
- * @see java.lang.Math#atan(double)
- * @see atan(float)
- */
- @OpenCLMapping(mapTo = "atan") protected float atan(float _f) {
- return (float) Math.atan(_f);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#atan(double)} (Java) or atan(double) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _d value to delegate to {@link java.lang.Math#atan(double)}/atan(double)
- * @return {@link java.lang.Math#atan(double)}/atan(double)
- *
- * @see java.lang.Math#atan(double)
- * @see atan(double)
- */
- @OpenCLMapping(mapTo = "atan") protected double atan(double _d) {
- return Math.atan(_d);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#atan2(double, double)} (Java) or atan2(float, float) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _f1 value to delegate to first argument of {@link java.lang.Math#atan2(double, double)}/atan2(float, float)
- * @param _f2 value to delegate to second argument of {@link java.lang.Math#atan2(double, double)}/atan2(float, float)
- * @return {@link java.lang.Math#atan2(double, double)} casted to float/atan2(float, float)
- *
- * @see java.lang.Math#atan2(double, double)
- * @see atan2(float, float)
- */
- @OpenCLMapping(mapTo = "atan2") protected float atan2(float _f1, float _f2) {
- return (float) Math.atan2(_f1, _f2);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#atan2(double, double)} (Java) or atan2(double, double) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _d1 value to delegate to first argument of {@link java.lang.Math#atan2(double, double)}/atan2(double, double)
- * @param _d2 value to delegate to second argument of {@link java.lang.Math#atan2(double, double)}/atan2(double, double)
- * @return {@link java.lang.Math#atan2(double, double)}/atan2(double, double)
- *
- * @see java.lang.Math#atan2(double, double)
- * @see atan2(double, double)
- */
- @OpenCLMapping(mapTo = "atan2") protected double atan2(double _d1, double _d2) {
- return Math.atan2(_d1, _d2);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#ceil(double)} (Java) or ceil(float) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _f value to delegate to {@link java.lang.Math#ceil(double)}/ceil(float)
- * @return {@link java.lang.Math#ceil(double)} casted to float/ceil(float)
- *
- * @see java.lang.Math#ceil(double)
- * @see ceil(float)
- */
- @OpenCLMapping(mapTo = "ceil") protected float ceil(float _f) {
- return (float) Math.ceil(_f);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#ceil(double)} (Java) or ceil(double) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _d value to delegate to {@link java.lang.Math#ceil(double)}/ceil(double)
- * @return {@link java.lang.Math#ceil(double)}/ceil(double)
- *
- * @see java.lang.Math#ceil(double)
- * @see ceil(double)
- */
- @OpenCLMapping(mapTo = "ceil") protected double ceil(double _d) {
- return Math.ceil(_d);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#cos(double)} (Java) or cos(float) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _f value to delegate to {@link java.lang.Math#cos(double)}/cos(float)
- * @return {@link java.lang.Math#cos(double)} casted to float/cos(float)
- *
- * @see java.lang.Math#cos(double)
- * @see cos(float)
- */
- @OpenCLMapping(mapTo = "cos") protected float cos(float _f) {
- return (float) Math.cos(_f);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#cos(double)} (Java) or cos(double) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _d value to delegate to {@link java.lang.Math#cos(double)}/cos(double)
- * @return {@link java.lang.Math#cos(double)}/cos(double)
- *
- * @see java.lang.Math#cos(double)
- * @see cos(double)
- */
- @OpenCLMapping(mapTo = "cos") protected double cos(double _d) {
- return Math.cos(_d);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#exp(double)} (Java) or exp(float) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _f value to delegate to {@link java.lang.Math#exp(double)}/exp(float)
- * @return {@link java.lang.Math#exp(double)} casted to float/exp(float)
- *
- * @see java.lang.Math#exp(double)
- * @see exp(float)
- */
- @OpenCLMapping(mapTo = "exp") protected float exp(float _f) {
- return (float) Math.exp(_f);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#exp(double)} (Java) or exp(double) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _d value to delegate to {@link java.lang.Math#exp(double)}/exp(double)
- * @return {@link java.lang.Math#exp(double)}/exp(double)
- *
- * @see java.lang.Math#exp(double)
- * @see exp(double)
- */
- @OpenCLMapping(mapTo = "exp") protected double exp(double _d) {
- return Math.exp(_d);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#abs(float)} (Java) or fabs(float) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _f value to delegate to {@link java.lang.Math#abs(float)}/fabs(float)
- * @return {@link java.lang.Math#abs(float)}/fabs(float)
- *
- * @see java.lang.Math#abs(float)
- * @see fabs(float)
- */
- @OpenCLMapping(mapTo = "fabs") protected float abs(float _f) {
- return Math.abs(_f);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#abs(double)} (Java) or fabs(double) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _d value to delegate to {@link java.lang.Math#abs(double)}/fabs(double)
- * @return {@link java.lang.Math#abs(double)}/fabs(double)
- *
- * @see java.lang.Math#abs(double)
- * @see fabs(double)
- */
- @OpenCLMapping(mapTo = "fabs") protected double abs(double _d) {
- return Math.abs(_d);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#abs(int)} (Java) or abs(int) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param n value to delegate to {@link java.lang.Math#abs(int)}/abs(int)
- * @return {@link java.lang.Math#abs(int)}/abs(int)
- *
- * @see java.lang.Math#abs(int)
- * @see abs(int)
- */
- @OpenCLMapping(mapTo = "abs") protected int abs(int n) {
- return Math.abs(n);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#abs(long)} (Java) or abs(long) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param n value to delegate to {@link java.lang.Math#abs(long)}/abs(long)
- * @return {@link java.lang.Math#abs(long)}/abs(long)
- *
- * @see java.lang.Math#abs(long)
- * @see abs(long)
- */
- @OpenCLMapping(mapTo = "abs") protected long abs(long n) {
- return Math.abs(n);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#floor(double)} (Java) or floor(float) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _f value to delegate to {@link java.lang.Math#floor(double)}/floor(float)
- * @return {@link java.lang.Math#floor(double)} casted to float/floor(float)
- *
- * @see java.lang.Math#floor(double)
- * @see floor(float)
- */
- @OpenCLMapping(mapTo = "floor") protected float floor(float _f) {
- return (float) Math.floor(_f);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#floor(double)} (Java) or floor(double) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _d value to delegate to {@link java.lang.Math#floor(double)}/floor(double)
- * @return {@link java.lang.Math#floor(double)}/floor(double)
- *
- * @see java.lang.Math#floor(double)
- * @see floor(double)
- */
- @OpenCLMapping(mapTo = "floor") protected double floor(double _d) {
- return Math.floor(_d);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#max(float, float)} (Java) or fmax(float, float) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _f1 value to delegate to first argument of {@link java.lang.Math#max(float, float)}/fmax(float, float)
- * @param _f2 value to delegate to second argument of {@link java.lang.Math#max(float, float)}/fmax(float, float)
- * @return {@link java.lang.Math#max(float, float)}/fmax(float, float)
- *
- * @see java.lang.Math#max(float, float)
- * @see fmax(float, float)
- */
- @OpenCLMapping(mapTo = "fmax") protected float max(float _f1, float _f2) {
- return Math.max(_f1, _f2);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#max(double, double)} (Java) or fmax(double, double) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _d1 value to delegate to first argument of {@link java.lang.Math#max(double, double)}/fmax(double, double)
- * @param _d2 value to delegate to second argument of {@link java.lang.Math#max(double, double)}/fmax(double, double)
- * @return {@link java.lang.Math#max(double, double)}/fmax(double, double)
- *
- * @see java.lang.Math#max(double, double)
- * @see fmax(double, double)
- */
- @OpenCLMapping(mapTo = "fmax") protected double max(double _d1, double _d2) {
- return Math.max(_d1, _d2);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#max(int, int)} (Java) or max(int, int) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param n1 value to delegate to {@link java.lang.Math#max(int, int)}/max(int, int)
- * @param n2 value to delegate to {@link java.lang.Math#max(int, int)}/max(int, int)
- * @return {@link java.lang.Math#max(int, int)}/max(int, int)
- *
- * @see java.lang.Math#max(int, int)
- * @see max(int, int)
- */
- @OpenCLMapping(mapTo = "max") protected int max(int n1, int n2) {
- return Math.max(n1, n2);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#max(long, long)} (Java) or max(long, long) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param n1 value to delegate to first argument of {@link java.lang.Math#max(long, long)}/max(long, long)
- * @param n2 value to delegate to second argument of {@link java.lang.Math#max(long, long)}/max(long, long)
- * @return {@link java.lang.Math#max(long, long)}/max(long, long)
- *
- * @see java.lang.Math#max(long, long)
- * @see max(long, long)
- */
- @OpenCLMapping(mapTo = "max") protected long max(long n1, long n2) {
- return Math.max(n1, n2);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#min(float, float)} (Java) or fmin(float, float) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _f1 value to delegate to first argument of {@link java.lang.Math#min(float, float)}/fmin(float, float)
- * @param _f2 value to delegate to second argument of {@link java.lang.Math#min(float, float)}/fmin(float, float)
- * @return {@link java.lang.Math#min(float, float)}/fmin(float, float)
- *
- * @see java.lang.Math#min(float, float)
- * @see fmin(float, float)
- */
- @OpenCLMapping(mapTo = "fmin") protected float min(float _f1, float _f2) {
- return Math.min(_f1, _f2);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#min(double, double)} (Java) or fmin(double, double) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _d1 value to delegate to first argument of {@link java.lang.Math#min(double, double)}/fmin(double, double)
- * @param _d2 value to delegate to second argument of {@link java.lang.Math#min(double, double)}/fmin(double, double)
- * @return {@link java.lang.Math#min(double, double)}/fmin(double, double)
- *
- * @see java.lang.Math#min(double, double)
- * @see fmin(double, double)
- */
- @OpenCLMapping(mapTo = "fmin") protected double min(double _d1, double _d2) {
- return Math.min(_d1, _d2);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#min(int, int)} (Java) or min(int, int) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param n1 value to delegate to first argument of {@link java.lang.Math#min(int, int)}/min(int, int)
- * @param n2 value to delegate to second argument of {@link java.lang.Math#min(int, int)}/min(int, int)
- * @return {@link java.lang.Math#min(int, int)}/min(int, int)
- *
- * @see java.lang.Math#min(int, int)
- * @see min(int, int)
- */
- @OpenCLMapping(mapTo = "min") protected int min(int n1, int n2) {
- return Math.min(n1, n2);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#min(long, long)} (Java) or min(long, long) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param n1 value to delegate to first argument of {@link java.lang.Math#min(long, long)}/min(long, long)
- * @param n2 value to delegate to second argument of {@link java.lang.Math#min(long, long)}/min(long, long)
- * @return {@link java.lang.Math#min(long, long)}/min(long, long)
- *
- * @see java.lang.Math#min(long, long)
- * @see min(long, long)
- */
- @OpenCLMapping(mapTo = "min") protected long min(long n1, long n2) {
- return Math.min(n1, n2);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#log(double)} (Java) or log(float) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _f value to delegate to {@link java.lang.Math#log(double)}/log(float)
- * @return {@link java.lang.Math#log(double)} casted to float/log(float)
- *
- * @see java.lang.Math#log(double)
- * @see log(float)
- */
- @OpenCLMapping(mapTo = "log") protected float log(float _f) {
- return (float) Math.log(_f);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#log(double)} (Java) or log(double) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _d value to delegate to {@link java.lang.Math#log(double)}/log(double)
- * @return {@link java.lang.Math#log(double)}/log(double)
- *
- * @see java.lang.Math#log(double)
- * @see log(double)
- */
- @OpenCLMapping(mapTo = "log") protected double log(double _d) {
- return Math.log(_d);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#pow(double, double)} (Java) or pow(float, float) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _f1 value to delegate to first argument of {@link java.lang.Math#pow(double, double)}/pow(float, float)
- * @param _f2 value to delegate to second argument of {@link java.lang.Math#pow(double, double)}/pow(float, float)
- * @return {@link java.lang.Math#pow(double, double)} casted to float/pow(float, float)
- *
- * @see java.lang.Math#pow(double, double)
- * @see pow(float, float)
- */
- @OpenCLMapping(mapTo = "pow") protected float pow(float _f1, float _f2) {
- return (float) Math.pow(_f1, _f2);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#pow(double, double)} (Java) or pow(double, double) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _d1 value to delegate to first argument of {@link java.lang.Math#pow(double, double)}/pow(double, double)
- * @param _d2 value to delegate to second argument of {@link java.lang.Math#pow(double, double)}/pow(double, double)
- * @return {@link java.lang.Math#pow(double, double)}/pow(double, double)
- *
- * @see java.lang.Math#pow(double, double)
- * @see pow(double, double)
- */
- @OpenCLMapping(mapTo = "pow") protected double pow(double _d1, double _d2) {
- return Math.pow(_d1, _d2);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#IEEEremainder(double, double)} (Java) or remainder(float, float) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _f1 value to delegate to first argument of {@link java.lang.Math#IEEEremainder(double, double)}/remainder(float, float)
- * @param _f2 value to delegate to second argument of {@link java.lang.Math#IEEEremainder(double, double)}/remainder(float, float)
- * @return {@link java.lang.Math#IEEEremainder(double, double)} casted to float/remainder(float, float)
- *
- * @see java.lang.Math#IEEEremainder(double, double)
- * @see remainder(float, float)
- */
- @OpenCLMapping(mapTo = "remainder") protected float IEEEremainder(float _f1, float _f2) {
- return (float) Math.IEEEremainder(_f1, _f2);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#IEEEremainder(double, double)} (Java) or remainder(double, double) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _d1 value to delegate to first argument of {@link java.lang.Math#IEEEremainder(double, double)}/remainder(double, double)
- * @param _d2 value to delegate to second argument of {@link java.lang.Math#IEEEremainder(double, double)}/remainder(double, double)
- * @return {@link java.lang.Math#IEEEremainder(double, double)}/remainder(double, double)
- *
- * @see java.lang.Math#IEEEremainder(double, double)
- * @see remainder(double, double)
- */
- @OpenCLMapping(mapTo = "remainder") protected double IEEEremainder(double _d1, double _d2) {
- return Math.IEEEremainder(_d1, _d2);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#toRadians(double)} (Java) or radians(float) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _f value to delegate to {@link java.lang.Math#toRadians(double)}/radians(float)
- * @return {@link java.lang.Math#toRadians(double)} casted to float/radians(float)
- *
- * @see java.lang.Math#toRadians(double)
- * @see radians(float)
- */
- @OpenCLMapping(mapTo = "radians") protected float toRadians(float _f) {
- return (float) Math.toRadians(_f);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#toRadians(double)} (Java) or radians(double) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _d value to delegate to {@link java.lang.Math#toRadians(double)}/radians(double)
- * @return {@link java.lang.Math#toRadians(double)}/radians(double)
- *
- * @see java.lang.Math#toRadians(double)
- * @see radians(double)
- */
- @OpenCLMapping(mapTo = "radians") protected double toRadians(double _d) {
- return Math.toRadians(_d);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#toDegrees(double)} (Java) or degrees(float) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _f value to delegate to {@link java.lang.Math#toDegrees(double)}/degrees(float)
- * @return {@link java.lang.Math#toDegrees(double)} casted to float/degrees(float)
- *
- * @see java.lang.Math#toDegrees(double)
- * @see degrees(float)
- */
- @OpenCLMapping(mapTo = "degrees") protected float toDegrees(float _f) {
- return (float) Math.toDegrees(_f);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#toDegrees(double)} (Java) or degrees(double) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _d value to delegate to {@link java.lang.Math#toDegrees(double)}/degrees(double)
- * @return {@link java.lang.Math#toDegrees(double)}/degrees(double)
- *
- * @see java.lang.Math#toDegrees(double)
- * @see degrees(double)
- */
- @OpenCLMapping(mapTo = "degrees") protected double toDegrees(double _d) {
- return Math.toDegrees(_d);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#rint(double)} (Java) or rint(float) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _f value to delegate to {@link java.lang.Math#rint(double)}/rint(float)
- * @return {@link java.lang.Math#rint(double)} casted to float/rint(float)
- *
- * @see java.lang.Math#rint(double)
- * @see rint(float)
- */
- @OpenCLMapping(mapTo = "rint") protected float rint(float _f) {
- return (float) Math.rint(_f);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#rint(double)} (Java) or rint(double) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _d value to delegate to {@link java.lang.Math#rint(double)}/rint(double)
- * @return {@link java.lang.Math#rint(double)}/rint(double)
- *
- * @see java.lang.Math#rint(double)
- * @see rint(double)
- */
- @OpenCLMapping(mapTo = "rint") protected double rint(double _d) {
- return Math.rint(_d);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#round(float)} (Java) or round(float) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _f value to delegate to {@link java.lang.Math#round(float)}/round(float)
- * @return {@link java.lang.Math#round(float)}/round(float)
- *
- * @see java.lang.Math#round(float)
- * @see round(float)
- */
- @OpenCLMapping(mapTo = "round") protected int round(float _f) {
- return Math.round(_f);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#round(double)} (Java) or round(double) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _d value to delegate to {@link java.lang.Math#round(double)}/round(double)
- * @return {@link java.lang.Math#round(double)}/round(double)
- *
- * @see java.lang.Math#round(double)
- * @see round(double)
- */
- @OpenCLMapping(mapTo = "round") protected long round(double _d) {
- return Math.round(_d);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#sin(double)} (Java) or sin(float) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _f value to delegate to {@link java.lang.Math#sin(double)}/sin(float)
- * @return {@link java.lang.Math#sin(double)} casted to float/sin(float)
- *
- * @see java.lang.Math#sin(double)
- * @see sin(float)
- */
- @OpenCLMapping(mapTo = "sin") protected float sin(float _f) {
- return (float) Math.sin(_f);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#sin(double)} (Java) or sin(double) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _d value to delegate to {@link java.lang.Math#sin(double)}/sin(double)
- * @return {@link java.lang.Math#sin(double)}/sin(double)
- *
- * @see java.lang.Math#sin(double)
- * @see sin(double)
- */
- @OpenCLMapping(mapTo = "sin") protected double sin(double _d) {
- return Math.sin(_d);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#sqrt(double)} (Java) or sqrt(float) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _f value to delegate to {@link java.lang.Math#sqrt(double)}/sqrt(float)
- * @return {@link java.lang.Math#sqrt(double)} casted to float/sqrt(float)
- *
- * @see java.lang.Math#sqrt(double)
- * @see sqrt(float)
- */
- @OpenCLMapping(mapTo = "sqrt") protected float sqrt(float _f) {
- return (float) Math.sqrt(_f);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#sqrt(double)} (Java) or sqrt(double) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _d value to delegate to {@link java.lang.Math#sqrt(double)}/sqrt(double)
- * @return {@link java.lang.Math#sqrt(double)}/sqrt(double)
- *
- * @see java.lang.Math#sqrt(double)
- * @see sqrt(double)
- */
- @OpenCLMapping(mapTo = "sqrt") protected double sqrt(double _d) {
- return Math.sqrt(_d);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#tan(double)} (Java) or tan(float) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _f value to delegate to {@link java.lang.Math#tan(double)}/tan(float)
- * @return {@link java.lang.Math#tan(double)} casted to float/tan(float)
- *
- * @see java.lang.Math#tan(double)
- * @see tan(float)
- */
- @OpenCLMapping(mapTo = "tan") protected float tan(float _f) {
- return (float) Math.tan(_f);
- }
-
- /**
- * Delegates to either {@link java.lang.Math#tan(double)} (Java) or tan(double) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _d value to delegate to {@link java.lang.Math#tan(double)}/tan(double)
- * @return {@link java.lang.Math#tan(double)}/tan(double)
- *
- * @see java.lang.Math#tan(double)
- * @see tan(double)
- */
- @OpenCLMapping(mapTo = "tan") protected double tan(double _d) {
- return Math.tan(_d);
- }
-
- // the following rsqrt and native_sqrt and native_rsqrt don't exist in java Math
- // but added them here for nbody testing, not sure if we want to expose them
- /**
- * Computes inverse square root using {@link java.lang.Math#sqrt(double)} (Java) or delegates to rsqrt(double) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _f value to delegate to {@link java.lang.Math#sqrt(double)}/rsqrt(double)
- * @return ( 1.0f / {@link java.lang.Math#sqrt(double)} casted to float )/rsqrt(double)
- *
- * @see java.lang.Math#sqrt(double)
- * @see rsqrt(double)
- */
- @OpenCLMapping(mapTo = "rsqrt") protected float rsqrt(float _f) {
- return (1.0f / (float) Math.sqrt(_f));
- }
-
- /**
- * Computes inverse square root using {@link java.lang.Math#sqrt(double)} (Java) or delegates to rsqrt(double) (OpenCL).
- *
- * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
- *
- * @param _d value to delegate to {@link java.lang.Math#sqrt(double)}/rsqrt(double)
- * @return ( 1.0f / {@link java.lang.Math#sqrt(double)} ) /rsqrt(double)
- *
- * @see java.lang.Math#sqrt(double)
- * @see rsqrt(double)
- */
- @OpenCLMapping(mapTo = "rsqrt") protected double rsqrt(double _d) {
- return (1.0 / Math.sqrt(_d));
- }
-
- @SuppressWarnings("unused") @OpenCLMapping(mapTo = "native_sqrt") private float native_sqrt(float _f) {
- int j = Float.floatToIntBits(_f);
- j = (1 << 29) + (j >> 1) - (1 << 22) - 0x4c00;
- return (Float.intBitsToFloat(j));
- // could add more precision using one iteration of newton's method, use the following
- }
-
- @SuppressWarnings("unused") @OpenCLMapping(mapTo = "native_rsqrt") private float native_rsqrt(float _f) {
- int j = Float.floatToIntBits(_f);
- j = 0x5f3759df - (j >> 1);
- float x = (Float.intBitsToFloat(j));
- return x;
- // if want more precision via one iteration of newton's method, use the following
- // float fhalf = 0.5f*_f;
- // return (x *(1.5f - fhalf * x * x));
- }
-
- // Hacked from AtomicIntegerArray.getAndAdd(i, delta)
- /**
- * Atomically adds _delta value to _index element of array _arr (Java) or delegates to atomic_add(volatile int*, int) (OpenCL).
- *
- *
- * @param _arr array for which an element value needs to be atomically incremented by _delta
- * @param _index index of the _arr array that needs to be atomically incremented by _delta
- * @param _delta value by which _index element of _arr array needs to be atomically incremented
- * @return previous value of _index element of _arr array
- *
- * @see atomic_add(volatile int*, int)
- */
- @OpenCLMapping(atomic32 = true) protected int atomicAdd(int[] _arr, int _index, int _delta) {
-
- if (!Config.disableUnsafe) {
- return UnsafeWrapper.atomicAdd(_arr, _index, _delta);
- } else {
- synchronized (_arr) {
- int previous = _arr[_index];
- _arr[_index] += _delta;
- return previous;
- }
- }
- }
-
- /**
- * Wait for all kernels in the current group to rendezvous at this call before continuing execution.
- *
- * @annotion Experimental
- */
-
- @OpenCLDelegate @Annotations.Experimental protected final void localBarrier() {
- try {
- localBarrier.await();
- } catch (InterruptedException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } catch (BrokenBarrierException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
-
- /**
- * Wait for all kernels in the current group to rendezvous at this call before continuing execution.
- *
- *
- * Java version is identical to localBarrier()
- *
- * @annotion Experimental
- * @deprecated
- */
-
- @OpenCLDelegate @Annotations.Experimental @Deprecated() protected final void globalBarrier() throws DeprecatedException {
- throw new DeprecatedException(
- "Kernel.globalBarrier() has been deprecated. It was based an incorrect understanding of OpenCL functionality.");
-
- }
-
- private KernelRunner kernelRunner = null;
-
- KernelRunner getKernelRunner() {
- return kernelRunner;
- }
-
- /**
- * Determine the execution time of the previous Kernel.execute(range) call.
- *
- * Note that for the first call this will include the conversion time.
- *
- * @return The time spent executing the kernel (ms)
- *
- * @see getConversionTime();
- * @see getAccumulatedExectutionTime();
- *
- */
- public synchronized long getExecutionTime() {
- return (kernelRunner.getExecutionTime());
- }
-
- /**
- * Determine the total execution time of all previous Kernel.execute(range) calls.
- *
- * Note that this will include the initial conversion time.
- *
- * @return The total time spent executing the kernel (ms)
- *
- * @see getExecutionTime();
- * @see getConversionTime();
- *
- */
- public synchronized long getAccumulatedExecutionTime() {
- return (kernelRunner.getAccumulatedExecutionTime());
- }
-
- /**
- * Determine the time taken to convert bytecode to OpenCL for first Kernel.execute(range) call.
- * @return The time spent preparing the kernel for execution using GPU
- *
- * @see getExecutionTime();
- * @see getAccumulatedExectutionTime();
- */
- public synchronized long getConversionTime() {
- return (kernelRunner.getConversionTime());
- }
-
- /**
- * Start execution of _range kernels.
- *
- * When kernel.execute(globalSize) is invoked, Aparapi will schedule the execution of globalSize kernels. If the execution mode is GPU then
- * the kernels will execute as OpenCL code on the GPU device. Otherwise, if the mode is JTP, the kernels will execute as a pool of Java threads on the CPU.
- *
- * @param range The number of Kernels that we would like to initiate.
- * @returnThe Kernel instance (this) so we can chain calls to put(arr).execute(range).get(arr)
- *
- */
- public synchronized Kernel execute(Range _range) {
- return (execute(_range, 1));
- }
-
- /**
- * Start execution of _range kernels.
- *
- * When kernel.execute(_range) is invoked, Aparapi will schedule the execution of _range kernels. If the execution mode is GPU then
- * the kernels will execute as OpenCL code on the GPU device. Otherwise, if the mode is JTP, the kernels will execute as a pool of Java threads on the CPU.
- *
- * Since adding the new Range class this method offers backward compatibility and merely defers to return (execute(Range.create(_range), 1));.
- * @param _range The number of Kernels that we would like to initiate.
- * @returnThe Kernel instance (this) so we can chain calls to put(arr).execute(range).get(arr)
- *
- */
- public synchronized Kernel execute(int _range) {
- return (execute(Range.create(_range), 1));
- }
-
- /**
- * Start execution of _passes iterations of _range kernels.
- *
- * When kernel.execute(_range, _passes) is invoked, Aparapi will schedule the execution of _reange kernels. If the execution mode is GPU then
- * the kernels will execute as OpenCL code on the GPU device. Otherwise, if the mode is JTP, the kernels will execute as a pool of Java threads on the CPU.
- *
- * @param _globalSize The number of Kernels that we would like to initiate.
- * @param _passes The number of passes to make
- * @return The Kernel instance (this) so we can chain calls to put(arr).execute(range).get(arr)
- *
- */
- public synchronized Kernel execute(Range _range, int _passes) {
- return (execute("run", _range, _passes));
- }
-
- /**
- * Start execution of _passes iterations over the _range of kernels.
- *
- * When kernel.execute(_range) is invoked, Aparapi will schedule the execution of _range kernels. If the execution mode is GPU then
- * the kernels will execute as OpenCL code on the GPU device. Otherwise, if the mode is JTP, the kernels will execute as a pool of Java threads on the CPU.
- *
- * Since adding the new Range class this method offers backward compatibility and merely defers to return (execute(Range.create(_range), 1));.
- * @param _range The number of Kernels that we would like to initiate.
- * @returnThe Kernel instance (this) so we can chain calls to put(arr).execute(range).get(arr)
- *
- */
- public synchronized Kernel execute(int _range, int _passes) {
- return (execute(Range.create(_range), _passes));
- }
-
- /**
- * Start execution of globalSize kernels for the given entrypoint.
- *
- * When kernel.execute("entrypoint", globalSize) is invoked, Aparapi will schedule the execution of globalSize kernels. If the execution mode is GPU then
- * the kernels will execute as OpenCL code on the GPU device. Otherwise, if the mode is JTP, the kernels will execute as a pool of Java threads on the CPU.
- *
- * @param _entrypoint is the name of the method we wish to use as the entrypoint to the kernel
- * @param _globalSize The number of Kernels that we would like to initiate.
- * @return The Kernel instance (this) so we can chain calls to put(arr).execute(range).get(arr)
- *
- */
- public synchronized Kernel execute(Entry _entry, Range _range) {
- if (kernelRunner == null) {
- kernelRunner = new KernelRunner(this);
-
- }
- return (kernelRunner.execute(_entry, _range, 1));
- }
-
- /**
- * Start execution of globalSize kernels for the given entrypoint.
- *
- * When kernel.execute("entrypoint", globalSize) is invoked, Aparapi will schedule the execution of globalSize kernels. If the execution mode is GPU then
- * the kernels will execute as OpenCL code on the GPU device. Otherwise, if the mode is JTP, the kernels will execute as a pool of Java threads on the CPU.
- *
- * @param _entrypoint is the name of the method we wish to use as the entrypoint to the kernel
- * @param _globalSize The number of Kernels that we would like to initiate.
- * @return The Kernel instance (this) so we can chain calls to put(arr).execute(range).get(arr)
- *
- */
- public synchronized Kernel execute(String _entrypoint, Range _range) {
- return (execute(_entrypoint, _range, 1));
-
- }
-
- /**
- * Start execution of globalSize kernels for the given entrypoint.
- *
- * When kernel.execute("entrypoint", globalSize) is invoked, Aparapi will schedule the execution of globalSize kernels. If the execution mode is GPU then
- * the kernels will execute as OpenCL code on the GPU device. Otherwise, if the mode is JTP, the kernels will execute as a pool of Java threads on the CPU.
- *
- * @param _entrypoint is the name of the method we wish to use as the entrypoint to the kernel
- * @param _globalSize The number of Kernels that we would like to initiate.
- * @return The Kernel instance (this) so we can chain calls to put(arr).execute(range).get(arr)
- *
- */
- public synchronized Kernel execute(String _entrypoint, Range _range, int _passes) {
- if (kernelRunner == null) {
- kernelRunner = new KernelRunner(this);
-
- }
- return (kernelRunner.execute(_entrypoint, _range, _passes));
-
- }
-
- /**
- * Release any resources associated with this Kernel.
- *
- * When the execution mode is CPU or GPU, Aparapi stores some OpenCL resources in a data structure associated with the kernel instance. The
- * dispose() method must be called to release these resources.
- *
- * If execute(int _globalSize) is called after dispose() is called the results are undefined.
- */
- public synchronized void dispose() {
- if (kernelRunner != null) {
- kernelRunner.dispose();
- kernelRunner = null;
- }
-
- }
-
- /**
- * Return the current execution mode.
- *
- * Before a Kernel executes, this return value will be the execution mode as determined by the setting of
- * the EXECUTION_MODE enumeration. By default, this setting is either GPU
- * if OpenCL is available on the target system, or JTP otherwise. This default setting can be
- * changed by calling setExecutionMode().
- *
- *
- * After a Kernel executes, the return value will be the mode in which the Kernel actually executed.
- *
- * @return The current execution mode.
- *
- * @see #setExecutionMode(EXECUTION_MODE)
- */
- public EXECUTION_MODE getExecutionMode() {
- return (executionMode);
- }
-
- /**
- * Set the execution mode.
- *
- * This should be regarded as a request. The real mode will be determined at runtime based on the availability of OpenCL and the characteristics of the workload.
- *
- * @param _executionMode the requested execution mode.
- *
- * @see #getExecutionMode()
- */
- public void setExecutionMode(EXECUTION_MODE _executionMode) {
- executionMode = _executionMode;
- }
-
- void setFallbackExecutionMode() {
- executionMode = EXECUTION_MODE.getFallbackExecutionMode();
-
- }
-
- final static Map typeToLetterMap = new HashMap();
-
- static {
- // only primitive types for now
- typeToLetterMap.put("double", "D");
- typeToLetterMap.put("float", "F");
- typeToLetterMap.put("int", "I");
- typeToLetterMap.put("long", "J");
- typeToLetterMap.put("boolean", "Z");
- typeToLetterMap.put("byte", "B");
- typeToLetterMap.put("char", "C");
- typeToLetterMap.put("short", "S");
- typeToLetterMap.put("void", "V");
- }
-
- private static String descriptorToReturnTypeLetter(String desc) {
- // find the letter after the closed parenthesis
- return desc.substring(desc.lastIndexOf(')') + 1);
- }
-
- private static String getReturnTypeLetter(Method meth) {
- Class> retClass = meth.getReturnType();
- String strRetClass = retClass.toString();
- String mapping = typeToLetterMap.get(strRetClass);
- // System.out.println("strRetClass = <" + strRetClass + ">, mapping = " + mapping);
- return mapping;
- }
-
- static String getMappedMethodName(MethodReferenceEntry _methodReferenceEntry) {
- String mappedName = null;
- String name = _methodReferenceEntry.getNameAndTypeEntry().getNameUTF8Entry().getUTF8();
- for (Method kernelMethod : Kernel.class.getDeclaredMethods()) {
- if (kernelMethod.isAnnotationPresent(OpenCLMapping.class)) {
- // ultimately, need a way to constrain this based upon signature (to disambiguate abs(float) from abs(int);
- // for Alpha, we will just disambiguate based on the return type
- if (false) {
- System.out.println("kernelMethod is ... " + kernelMethod.toGenericString());
- System.out.println("returnType = " + kernelMethod.getReturnType());
- System.out.println("returnTypeLetter = " + getReturnTypeLetter(kernelMethod));
- System.out.println("kernelMethod getName = " + kernelMethod.getName());
- System.out.println("methRefName = " + name + " descriptor = "
- + _methodReferenceEntry.getNameAndTypeEntry().getDescriptorUTF8Entry().getUTF8());
- System.out
- .println("descToReturnTypeLetter = "
- + descriptorToReturnTypeLetter(_methodReferenceEntry.getNameAndTypeEntry().getDescriptorUTF8Entry()
- .getUTF8()));
- }
- if (_methodReferenceEntry.getNameAndTypeEntry().getNameUTF8Entry().getUTF8().equals(kernelMethod.getName())
- && descriptorToReturnTypeLetter(_methodReferenceEntry.getNameAndTypeEntry().getDescriptorUTF8Entry().getUTF8())
- .equals(getReturnTypeLetter(kernelMethod))) {
- OpenCLMapping annotation = kernelMethod.getAnnotation(OpenCLMapping.class);
- String mapTo = annotation.mapTo();
- if (!mapTo.equals("")) {
- mappedName = mapTo;
- // System.out.println("mapTo = " + mapTo);
- }
- }
- }
- }
- // System.out.println("... in getMappedMethodName, returning = " + mappedName);
- return (mappedName);
- }
-
- static boolean isMappedMethod(MethodReferenceEntry methodReferenceEntry) {
- boolean isMapped = false;
- for (Method kernelMethod : Kernel.class.getDeclaredMethods()) {
- if (kernelMethod.isAnnotationPresent(OpenCLMapping.class)) {
- if (methodReferenceEntry.getNameAndTypeEntry().getNameUTF8Entry().getUTF8().equals(kernelMethod.getName())) {
-
- // well they have the same name ;)
- isMapped = true;
- }
- }
- }
- return (isMapped);
- }
-
- static boolean isOpenCLDelegateMethod(MethodReferenceEntry methodReferenceEntry) {
- boolean isMapped = false;
- for (Method kernelMethod : Kernel.class.getDeclaredMethods()) {
- if (kernelMethod.isAnnotationPresent(OpenCLDelegate.class)) {
- if (methodReferenceEntry.getNameAndTypeEntry().getNameUTF8Entry().getUTF8().equals(kernelMethod.getName())) {
-
- // well they have the same name ;)
- isMapped = true;
- }
- }
- }
- return (isMapped);
- }
-
- static boolean usesAtomic32(MethodReferenceEntry methodReferenceEntry) {
- for (Method kernelMethod : Kernel.class.getDeclaredMethods()) {
- if (kernelMethod.isAnnotationPresent(OpenCLMapping.class)) {
- if (methodReferenceEntry.getNameAndTypeEntry().getNameUTF8Entry().getUTF8().equals(kernelMethod.getName())) {
- OpenCLMapping annotation = kernelMethod.getAnnotation(OpenCLMapping.class);
- return annotation.atomic32();
- }
- }
- }
- return (false);
- }
-
- // For alpha release atomic64 is not supported
- static boolean usesAtomic64(MethodReferenceEntry methodReferenceEntry) {
- //for (java.lang.reflect.Method kernelMethod : Kernel.class.getDeclaredMethods()) {
- // if (kernelMethod.isAnnotationPresent(Kernel.OpenCLMapping.class)) {
- // if (methodReferenceEntry.getNameAndTypeEntry().getNameUTF8Entry().getUTF8().equals(kernelMethod.getName())) {
- // OpenCLMapping annotation = kernelMethod.getAnnotation(Kernel.OpenCLMapping.class);
- // return annotation.atomic64();
- // }
- // }
- //}
- return (false);
- }
-
- // the flag useNullForLocalSize is useful for testing that what we compute for localSize is what OpenCL
- // would also compute if we passed in null. In non-testing mode, we just call execute with the
- // same localSize that we computed in getLocalSizeJNI. We don't want do publicize these of course.
- // GRF we can't access this from test classes without exposing in in javadoc so I left the flag but made the test/set of the flag reflectively
- boolean useNullForLocalSize = false;
-
- // Explicit memory management API's follow
-
- /**
- * For dev purposes (we should remove this for production) allow us to define that this Kernel uses explicit memory management
- * @param _explicit (true if we want explicit memory management)
- */
- public void setExplicit(boolean _explicit) {
- if (kernelRunner == null) {
- kernelRunner = new KernelRunner(this);
-
- }
- kernelRunner.setExplicit(_explicit);
- }
-
- /**
- * For dev purposes (we should remove this for production) determine whether this Kernel uses explicit memory management
- * @return (true if we kernel is using explicit memory management)
- */
- public boolean isExplicit() {
- if (kernelRunner == null) {
- kernelRunner = new KernelRunner(this);
-
- }
- return (kernelRunner.isExplicit());
- }
-
- /**
- * Tag this array so that it is explicitly enqueued before the kernel is executed
- * @param array
- * @return This kernel so that we can use the 'fluent' style API
- */
- public Kernel put(long[] array) {
- if (kernelRunner == null) {
- kernelRunner = new KernelRunner(this);
-
- }
- kernelRunner.put(array);
- return (this);
- }
-
- /**
- * Tag this array so that it is explicitly enqueued before the kernel is executed
- * @param array
- * @return This kernel so that we can use the 'fluent' style API
- */
- public Kernel put(double[] array) {
- if (kernelRunner == null) {
- kernelRunner = new KernelRunner(this);
-
- }
- kernelRunner.put(array);
- return (this);
- }
-
- /**
- * Tag this array so that it is explicitly enqueued before the kernel is executed
- * @param array
- * @return This kernel so that we can use the 'fluent' style API
- */
- public Kernel put(float[] array) {
- if (kernelRunner == null) {
- kernelRunner = new KernelRunner(this);
-
- }
- kernelRunner.put(array);
- return (this);
- }
-
- /**
- * Tag this array so that it is explicitly enqueued before the kernel is executed
- * @param array
- * @return This kernel so that we can use the 'fluent' style API
- */
- public Kernel put(int[] array) {
- if (kernelRunner == null) {
- kernelRunner = new KernelRunner(this);
-
- }
- kernelRunner.put(array);
- return (this);
- }
-
- /**
- * Tag this array so that it is explicitly enqueued before the kernel is executed
- * @param array
- * @return This kernel so that we can use the 'fluent' style API
- */
- public Kernel put(byte[] array) {
- if (kernelRunner == null) {
- kernelRunner = new KernelRunner(this);
-
- }
- kernelRunner.put(array);
- return (this);
- }
-
- /**
- * Tag this array so that it is explicitly enqueued before the kernel is executed
- * @param array
- * @return This kernel so that we can use the 'fluent' style API
- */
- public Kernel put(char[] array) {
- if (kernelRunner == null) {
- kernelRunner = new KernelRunner(this);
- }
- kernelRunner.put(array);
- return (this);
- }
-
- /**
- * Tag this array so that it is explicitly enqueued before the kernel is executed
- * @param array
- * @return This kernel so that we can use the 'fluent' style API
- */
- public Kernel put(boolean[] array) {
- if (kernelRunner == null) {
- kernelRunner = new KernelRunner(this);
- }
- kernelRunner.put(array);
- return (this);
- }
-
- /**
- * Enqueue a request to return this buffer from the GPU. This method blocks until the array is available.
- * @param array
- * @return This kernel so that we can use the 'fluent' style API
- */
- public Kernel get(long[] array) {
- if (kernelRunner == null) {
- kernelRunner = new KernelRunner(this);
-
- }
- kernelRunner.get(array);
- return (this);
- }
-
- /**
- * Enqueue a request to return this buffer from the GPU. This method blocks until the array is available.
- * @param array
- * @return This kernel so that we can use the 'fluent' style API
- */
- public Kernel get(double[] array) {
- if (kernelRunner == null) {
- kernelRunner = new KernelRunner(this);
-
- }
- kernelRunner.get(array);
- return (this);
- }
-
- /**
- * Enqueue a request to return this buffer from the GPU. This method blocks until the array is available.
- * @param array
- * @return This kernel so that we can use the 'fluent' style API
- */
- public Kernel get(float[] array) {
- if (kernelRunner == null) {
- kernelRunner = new KernelRunner(this);
-
- }
- kernelRunner.get(array);
- return (this);
- }
-
- /**
- * Enqueue a request to return this buffer from the GPU. This method blocks until the array is available.
- * @param array
- * @return This kernel so that we can use the 'fluent' style API
- */
- public Kernel get(int[] array) {
- if (kernelRunner == null) {
- kernelRunner = new KernelRunner(this);
-
- }
- kernelRunner.get(array);
- return (this);
- }
-
- /**
- * Enqueue a request to return this buffer from the GPU. This method blocks until the array is available.
- * @param array
- * @return This kernel so that we can use the 'fluent' style API
- */
- public Kernel get(byte[] array) {
- if (kernelRunner == null) {
- kernelRunner = new KernelRunner(this);
-
- }
- kernelRunner.get(array);
- return (this);
- }
-
- /**
- * Enqueue a request to return this buffer from the GPU. This method blocks until the array is available.
- * @param array
- * @return This kernel so that we can use the 'fluent' style API
- */
- public Kernel get(char[] array) {
- if (kernelRunner == null) {
- kernelRunner = new KernelRunner(this);
-
- }
- kernelRunner.get(array);
- return (this);
- }
-
- /**
- * Enqueue a request to return this buffer from the GPU. This method blocks until the array is available.
- * @param array
- * @return This kernel so that we can use the 'fluent' style API
- */
- public Kernel get(boolean[] array) {
- if (kernelRunner == null) {
- kernelRunner = new KernelRunner(this);
-
- }
- kernelRunner.get(array);
- return (this);
- }
-
- /**
- * Get the profiling information from the last successful call to Kernel.execute().
- * @return A list of ProfileInfo records
- */
- public List getProfileInfo() {
- if (kernelRunner == null) {
- kernelRunner = new KernelRunner(this);
- }
- return (kernelRunner.getProfileInfo());
- }
-
- private LinkedHashSet executionModes = EXECUTION_MODE.getDefaultExecutionModes();
-
- private Iterator currentMode = executionModes.iterator();
-
- private EXECUTION_MODE executionMode = currentMode.next();
-
- /**
- * set possible fallback path for execution modes.
- * for example setExecutionFallbackPath(GPU,CPU,JTP) will try to use the GPU
- * if it fails it will fall back to OpenCL CPU and finally it will try JTP.
- */
- public void addExecutionModes(EXECUTION_MODE... platforms) {
- executionModes.addAll(Arrays.asList(platforms));
- currentMode = executionModes.iterator();
- executionMode = currentMode.next();
- }
-
- /**
- * @return is there another execution path we can try
- */
- public boolean hasNextExecutionMode() {
- return currentMode.hasNext();
- }
-
- /**
- * try the next execution path in the list if there aren't any more than give up
- */
- public void tryNextExecutionMode() {
- if (currentMode.hasNext()) {
- executionMode = currentMode.next();
- }
- }
-
-}
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/KernelRunner.java b/com.amd.aparapi/src/java/com/amd/aparapi/KernelRunner.java
index 8091cf52..a719853c 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/KernelRunner.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/KernelRunner.java
@@ -37,22 +37,38 @@ to national security controls as identified on the Commerce Control List (curren
*/
package com.amd.aparapi;
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+//import java.lang.invoke.InnerClassLambdaMetafactory;
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
import java.lang.reflect.Array;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
+import java.util.Arrays;
+import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.concurrent.BrokenBarrierException;
import java.util.concurrent.CyclicBarrier;
+import java.util.function.IntBlock;
import java.util.logging.Level;
import java.util.logging.Logger;
+import java.util.concurrent.CyclicBarrier;
+import java.util.concurrent.BrokenBarrierException;
+
+import com.amd.aparapi.ClassModel.ConstantPool.MethodEntry;
+import com.amd.aparapi.ClassModel.ConstantPool.MethodReferenceEntry;
+import com.amd.aparapi.InstructionSet.AccessField;
+import com.amd.aparapi.InstructionSet.MethodCall;
+import com.amd.aparapi.InstructionSet.VirtualMethodCall;
import com.amd.aparapi.InstructionSet.TypeSpec;
-import com.amd.aparapi.Kernel.EXECUTION_MODE;
+//import com.amd.aparapi.Kernel.EXECUTION_MODE;
/**
* The class is responsible for executing Kernel implementations.
@@ -214,7 +230,7 @@ class KernelRunner{
*
* @author gfrost
*/
- @Annotations.Experimental @UsedByJNICode public static final int ARG_LOCAL = 1 << 11;
+ @UsedByJNICode public static final int ARG_LOCAL = 1 << 11;
/**
* This 'bit' indicates that a particular KernelArg resides in global memory in the generated OpenCL code.
@@ -226,7 +242,7 @@ class KernelRunner{
*
* @author gfrost
*/
- @Annotations.Experimental @UsedByJNICode public static final int ARG_GLOBAL = 1 << 12;
+ @UsedByJNICode public static final int ARG_GLOBAL = 1 << 12;
/**
* This 'bit' indicates that a particular KernelArg resides in constant memory in the generated OpenCL code.
@@ -238,7 +254,7 @@ class KernelRunner{
*
* @author gfrost
*/
- @Annotations.Experimental @UsedByJNICode public static final int ARG_CONSTANT = 1 << 13;
+ @UsedByJNICode public static final int ARG_CONSTANT = 1 << 13;
/**
* This 'bit' indicates that a particular KernelArg has it's length reference, in which case a synthetic arg is passed (name mangled) to the OpenCL kernel.
@@ -484,9 +500,17 @@ static class KernelArg{
@UsedByJNICode public Object array;
/**
- * Field in Kernel class corresponding to this arg
+ * Field in fieldHolder object corresponding to this arg
*/
@UsedByJNICode public Field field;
+
+ /**
+ * Field in fieldHolder object corresponding to this arg
+ * For lambda use, args come from Block, KernelRunner and the
+ * lambda's own object
+ */
+ @UsedByJNICode public Object fieldHolder;
+
/**
* The byte array for obj conversion passed to opencl
@@ -515,10 +539,176 @@ static class KernelArg{
*/
int primitiveSize;
}
+
+
+ enum MappedMethod {
+ ACOS("acos", "D"),
+ COS("cos", "D"),
+ MAX_I("max", "I"),
+ MAX_J("max", "J"),
+ MAX_F("max", "F"),
+ MAX_D("max", "D"),
+ MIN_I("min", "I"),
+ SQRT("sqrt", "D");
+
+ private String mapping;
+ private String returnType;
+
+ private MappedMethod(String name, String ret) {
+ mapping = name;
+ returnType = ret;
+ }
+ public String getName() { return mapping; }
+ public String getReturnType() { return returnType; }
+ };
+
+ static boolean isMappedMethod(MethodReferenceEntry methodReferenceEntry) {
+ boolean isMapped = false;
+ for (MappedMethod mappedMethod : MappedMethod.values()) {
+ if (methodReferenceEntry.getNameAndTypeEntry().getNameUTF8Entry().getUTF8().equals(mappedMethod.getName())) {
+
+ // well they have the same name ;)
+ isMapped = true;
+ }
+ }
+
+ return (isMapped);
+ }
+
+ private static String descriptorToReturnTypeLetter(String desc) {
+ // find the letter after the closed parenthesis
+ return desc.substring(desc.lastIndexOf(')') + 1);
+ }
+
+ static String getMappedMethodName(MethodReferenceEntry _methodReferenceEntry) {
+ String mappedName = null;
+ String name = _methodReferenceEntry.getNameAndTypeEntry().getNameUTF8Entry().getUTF8();
+ for (MappedMethod mappedMethod : MappedMethod.values()) {
+ if (_methodReferenceEntry.getNameAndTypeEntry().getNameUTF8Entry().getUTF8().equals(mappedMethod.getName())
+ && descriptorToReturnTypeLetter(_methodReferenceEntry.getNameAndTypeEntry().getDescriptorUTF8Entry().getUTF8())
+ .equals(mappedMethod.getReturnType())) {
+ String mapTo = mappedMethod.getName();
+ if (!mapTo.equals("")) {
+ mappedName = mapTo;
+ }
+ }
+ }
+ if (logger.isLoggable(Level.FINE)) {
+ logger.fine("Selected mapped method " + mappedName);
+ }
+ return (mappedName);
+ }
+
+
+ class LambdaKernelCall {
+ IntBlock block;
+ String lambdaKernelSource;
+ String lambdaMethodName;
+ Field[] lambdaCapturedFields;
+ Object[] lambdaCapturedArgs;
+ String lambdaMethodSignature;
+
+ public String getLambdaKernelSource() { return lambdaKernelSource; }
+ public Object getLambdaKernelThis() { return lambdaCapturedArgs[0]; }
+ public String getLambdaMethodName() { return lambdaMethodName; }
+ public String getLambdaMethodSignature() { return lambdaMethodSignature; }
+ //public Object[] getLambdaCapturedArgs() { return lambdaCapturedArgs; }
+ //public Object[] getLambdaReferenceArgs() { return lambdaReferencedFields; }
+
+ public String toString() { return getLambdaKernelThis().getClass().getName() + " " +
+ getLambdaMethodName() + " " + getLambdaMethodSignature() + " from block: " +
+ block;
+ }
+
+ public Field[] getLambdaCapturedFields() { return lambdaCapturedFields; }
+
+ public LambdaKernelCall(IntBlock _block) throws AparapiException {
+ block = _block;
+
+ // Try to do reflection on the block
+ Class bc = block.getClass();
+ System.out.println("# block class:" + bc);
+
+ // The first field is "this" for the lambda call if the lambda
+ // is not static, the later fields are captured values which will
+ // become lambda call parameters
+ Field[] bcf = bc.getDeclaredFields();
+ lambdaCapturedArgs = new Object[bcf.length];
+
+ Field[] allBlockClassFields = block.getClass().getDeclaredFields();
+
+ Field[] capturedFieldsWithoutThis = new Field[ allBlockClassFields.length - 1 ];
+ for(int i=1; i acceptCallSites = acceptModel.getMethodCalls();
+ assert acceptCallSites.size() == 1 : "Should only have one call site in this method";
+
+
+ //VirtualMethodCall vCall = (VirtualMethodCall) acceptCallSites.get(0);
+ MethodCall vCall = acceptCallSites.get(0);
+ MethodEntry lambdaCallTarget = vCall.getConstantPoolMethodEntry();
+ lambdaMethodName = lambdaCallTarget.getNameAndTypeEntry().getNameUTF8Entry().getUTF8();
+ lambdaMethodSignature = lambdaCallTarget.getNameAndTypeEntry().getDescriptorUTF8Entry().getUTF8();
+
+ System.out.println("call target = " +
+ lambdaCallTarget.getClassEntry().getNameUTF8Entry().getUTF8() +
+ " " + lambdaMethodName + " " + lambdaMethodSignature);
+
+ String lcNameWithSlashes = lc.getName().replace('.', '/');
+ assert lcNameWithSlashes.equals(lambdaCallTarget.getClassEntry().getNameUTF8Entry().getUTF8()) :
+ "lambda target class name does not match arg in block object";
+
+ }
+ }
+
+ private LambdaKernelCall lambdaKernelCall;
private long jniContextHandle = 0;
- private Kernel kernel;
+// private Kernel kernel;
private Entrypoint entryPoint;
@@ -529,20 +719,35 @@ static class KernelArg{
*
* @param _kernel
*/
- KernelRunner(Kernel _kernel) {
- kernel = _kernel;
-
+// KernelRunner(Kernel _kernel) {
+// kernel = _kernel;
+//
+// }
+
+// KernelRunner() {
+// kernel = null;
+// }
+
+ KernelRunner(IntBlock block) throws AparapiException {
+ //kernel = null;
+ lambdaKernelCall = new LambdaKernelCall(block);
+ if (logger.isLoggable(Level.INFO)) {
+ logger.info("New lambda call is = " + lambdaKernelCall);
+ }
}
-
+
/**
* Kernel.dispose() delegates to KernelRunner.dispose() which delegates to disposeJNI() to actually close JNI data structures.
*
* @see KernelRunner#disposeJNI()
*/
void dispose() {
- if (kernel.getExecutionMode().isOpenCL()) {
- disposeJNI(jniContextHandle);
- }
+
+ // Might need to revisit this for Superbowl
+
+// if (kernel.getExecutionMode().isOpenCL()) {
+// disposeJNI(jniContextHandle);
+// }
}
/**
@@ -556,12 +761,14 @@ void dispose() {
* @param maxJTPLocalSize
* @return
*/
- @Annotations.DocMe private native static synchronized long initJNI(Kernel _kernel, OpenCLDevice device, int _flags);
+ private native static synchronized long initJNI(Object _kernel, OpenCLDevice device, int _flags);
private native long buildProgramJNI(long _jniContextHandle, String _source);
private native int setArgsJNI(long _jniContextHandle, KernelArg[] _args, int argc);
+ private native int updateLambdaBlockJNI(long _jniContextHandle, Object newHolder, int argc);
+
private native int runKernelJNI(long _jniContextHandle, Range _range, boolean _needSync, int _passes);
private native int disposeJNI(long _jniContextHandle);
@@ -670,270 +877,6 @@ boolean hasGLSharingSupport() {
return capabilitiesSet.contains(CL_KHR_GL_SHARING);
}
- /**
- * Execute using a Java thread pool. Either because we were explicitly asked to do so, or because we 'fall back' after discovering an OpenCL issue.
- *
- * @param _globalSize
- * The globalSize requested by the user (via Kernel.execute(globalSize))
- * @param _passes
- * The # of passes requested by the user (via Kernel.execute(globalSize, passes)). Note this is usually defaulted to 1 via Kernel.execute(globalSize).
- * @return
- */
- private long executeJava(final Range _range, final int _passes) {
- if (logger.isLoggable(Level.FINE)) {
- logger.fine("executeJava: range = " + _range);
- }
-
- if (kernel.getExecutionMode().equals(EXECUTION_MODE.SEQ)) {
-
- /**
- * SEQ mode is useful for testing trivial logic, but kernels which use SEQ mode cannot be used if the
- * product of localSize(0..3) is >1. So we can use multi-dim ranges but only if the local size is 1 in all dimensions.
- *
- * As a result of this barrier is only ever 1 work item wide and probably should be turned into a no-op.
- *
- * So we need to check if the range is valid here. If not we have no choice but to punt.
- */
- if (_range.getLocalSize(0) * _range.getLocalSize(1) * _range.getLocalSize(2) > 1) {
- throw new IllegalStateException("Can't run range with group size >1 sequentially. Barriers would deadlock!");
- }
-
- Kernel kernelClone = (Kernel) kernel.clone();
- kernelClone.range = _range;
- kernelClone.groupId[0] = 0;
- kernelClone.groupId[1] = 0;
- kernelClone.groupId[2] = 0;
- kernelClone.localId[0] = 0;
- kernelClone.localId[1] = 0;
- kernelClone.localId[2] = 0;
- kernelClone.localBarrier = new CyclicBarrier(1);
- for (kernelClone.passId = 0; kernelClone.passId < _passes; kernelClone.passId++) {
-
- if (_range.getDims() == 1) {
- for (int id = 0; id < _range.getGlobalSize(0); id++) {
- kernelClone.globalId[0] = id;
- kernelClone.run();
- }
- } else if (_range.getDims() == 2) {
- for (int x = 0; x < _range.getGlobalSize(0); x++) {
- kernelClone.globalId[0] = x;
- for (int y = 0; y < _range.getGlobalSize(1); y++) {
- kernelClone.globalId[1] = y;
- kernelClone.run();
- }
- }
- } else if (_range.getDims() == 3) {
- for (int x = 0; x < _range.getGlobalSize(0); x++) {
- kernelClone.globalId[0] = x;
- for (int y = 0; y < _range.getGlobalSize(1); y++) {
- kernelClone.globalId[1] = y;
- for (int z = 0; z < _range.getGlobalSize(2); z++) {
- kernelClone.globalId[2] = z;
- kernelClone.run();
- }
- kernelClone.run();
- }
- }
- }
- }
-
- } else {
-
- final int threads = _range.getLocalSize(0) * _range.getLocalSize(1) * _range.getLocalSize(2);
- final int globalGroups = _range.getNumGroups(0) * _range.getNumGroups(1) * _range.getNumGroups(2);
- final Thread threadArray[] = new Thread[threads];
- /**
- * This joinBarrier is the barrier that we provide for the kernel threads to rendezvous with the current dispatch thread.
- * So this barrier is threadCount+1 wide (the +1 is for the dispatch thread)
- */
- final CyclicBarrier joinBarrier = new CyclicBarrier(threads + 1);
-
- /**
- * This localBarrier is only ever used by the kernels. If the kernel does not use the barrier the threads
- * can get out of sync, we promised nothing in JTP mode.
- *
- * As with OpenCL all threads within a group must wait at the barrier or none. It is a user error (possible deadlock!)
- * if the barrier is in a conditional that is only executed by some of the threads within a group.
- *
- * Kernel developer must understand this.
- *
- * This barrier is threadCount wide. We never hit the barrier from the dispatch thread.
- */
- final CyclicBarrier localBarrier = new CyclicBarrier(threads);
- for (int passId = 0; passId < _passes; passId++) {
-
- /**
- * Note that we emulate OpenCL by creating one thread per localId (across the group).
- *
- * So threadCount == range.getLocalSize(0)*range.getLocalSize(1)*range.getLocalSize(2);
- *
- * For a 1D range of 12 groups of 4 we create 4 threads. One per localId(0).
- *
- * We also clone the kernel 4 times. One per thread.
- *
- * We create local barrier which has a width of 4
- *
- * Thread-0 handles localId(0) (global 0,4,8)
- * Thread-1 handles localId(1) (global 1,5,7)
- * Thread-2 handles localId(2) (global 2,6,10)
- * Thread-3 handles localId(3) (global 3,7,11)
- *
- * This allows all threads to synchronize using the local barrier.
- *
- * Initially the use of local buffers seems broken as the buffers appears to be per Kernel.
- * Thankfully Kernel.clone() performs a shallow clone of all buffers (local and global)
- * So each of the cloned kernels actually still reference the same underlying local/global buffers.
- *
- * If the kernel uses local buffers but does not use barriers then it is possible for different groups
- * to see mutations from each other (unlike OpenCL), however if the kernel does not us barriers then it
- * cannot assume any coherence in OpenCL mode either (the failure mode will be different but still wrong)
- *
- * So even JTP mode use of local buffers will need to use barriers. Not for the same reason as OpenCL but to keep groups in lockstep.
- *
- **/
-
- for (int id = 0; id < threads; id++) {
- final int threadId = id;
-
- /**
- * We clone one kernel for each thread.
- *
- * They will all share references to the same range, localBarrier and global/local buffers because the clone is shallow.
- * We need clones so that each thread can assign 'state' (localId/globalId/groupId) without worrying
- * about other threads.
- */
- final Kernel kernelClone = (Kernel) kernel.clone();
- kernelClone.range = _range;
- kernelClone.localBarrier = localBarrier;
- kernelClone.passId = passId;
-
- threadArray[threadId] = new Thread(new Runnable(){
- @Override public void run() {
- for (int globalGroupId = 0; globalGroupId < globalGroups; globalGroupId++) {
-
- if (_range.getDims() == 1) {
- kernelClone.localId[0] = threadId % _range.getLocalSize(0);
- kernelClone.globalId[0] = threadId + globalGroupId * threads;
- kernelClone.groupId[0] = globalGroupId;
- } else if (_range.getDims() == 2) {
-
- /**
- * Consider a 12x4 grid of 4*2 local groups
- *