diff --git a/com.amd.aparapi.jni/build.xml b/com.amd.aparapi.jni/build.xml
index 1e4912b8..44516cc9 100644
--- a/com.amd.aparapi.jni/build.xml
+++ b/com.amd.aparapi.jni/build.xml
@@ -479,6 +479,8 @@ First consider editing the properties in build.properties
+
+
@@ -540,6 +542,7 @@ First consider editing the properties in build.properties
+
@@ -564,6 +567,7 @@ First consider editing the properties in build.properties
+
@@ -575,6 +579,7 @@ First consider editing the properties in build.properties
+
@@ -601,6 +606,7 @@ First consider editing the properties in build.properties
+
@@ -614,6 +620,7 @@ First consider editing the properties in build.properties
+
@@ -657,6 +664,7 @@ First consider editing the properties in build.properties
+
diff --git a/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.cpp b/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.cpp
index 142fb19c..277cbe94 100644
--- a/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.cpp
+++ b/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.cpp
@@ -45,6 +45,7 @@
#include "Config.h"
#include "ProfileInfo.h"
#include "ArrayBuffer.h"
+#include "AparapiBuffer.h"
#include "CLHelper.h"
#include "List.h"
#include
@@ -52,7 +53,8 @@
//compiler dependant code
/**
- * calls either clEnqueueMarker or clEnqueueMarkerWithWaitList depending on the version of OpenCL installed.
+ * calls either clEnqueueMarker or clEnqueueMarkerWithWaitList
+ * depending on the version of OpenCL installed.
* conveiniece function so we don't have to have #ifdefs all over the code
*/
int enqueueMarker(cl_command_queue commandQueue, cl_event* firstEvent) {
@@ -240,7 +242,10 @@ jint updateNonPrimitiveReferences(JNIEnv *jenv, jobject jobj, JNIContext* jniCon
if (config->isVerbose()){
fprintf(stderr, "got type for %s: %08x\n", arg->name, arg->type);
}
- if (!arg->isPrimitive()) {
+
+ //this won't be a problem with the aparapi buffers because
+ //we need to copy them every time anyway
+ if (!arg->isPrimitive() && !arg->isAparapiBuffer()) {
// Following used for all primitive arrays, object arrays and nio Buffers
jarray newRef = (jarray)jenv->GetObjectField(arg->javaArg, KernelArg::javaArrayFieldID);
if (config->isVerbose()){
@@ -326,19 +331,8 @@ void profileFirstRun(JNIContext* jniContext) {
}
}
-/**
- * create a new variable in OpenCL for the object, and sets the kernel arguement.
- * currently the only objects supported are arrays.
- *
- * @param jenv the java environment
- * @param jniContext the context we got from java
- * @param arg the argument we're passing to opencl
- * @param argPos out: the position of arg in the opencl argument list
- * @param argIdx the position of arg in the argument array
- *
- * @throws CLException
- */
-void updateObject(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& argPos, int argIdx) {
+
+void updateArray(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& argPos, int argIdx) {
cl_int status = CL_SUCCESS;
// if either this is the first run or user changed input array
@@ -373,6 +367,7 @@ void updateObject(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& arg
// Add the array length if needed
if (arg->usesArrayLength()) {
+ argPos++;
arg->syncJavaArrayLength(jenv);
status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jint), &(arg->arrayBuffer->length));
@@ -381,10 +376,52 @@ void updateObject(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& arg
if (config->isVerbose()){
fprintf(stderr, "runKernel arg %d %s, length = %d\n", argIdx, arg->name, arg->arrayBuffer->length);
}
- argPos++;
}
}
+void updateBuffer(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& argPos, int argIdx) {
+
+ AparapiBuffer* buffer = arg->aparapiBuffer;
+ cl_int status = CL_SUCCESS;
+ cl_uint mask = CL_MEM_USE_HOST_PTR;
+ if (arg->isReadByKernel() && arg->isMutableByKernel()) mask |= CL_MEM_READ_WRITE;
+ else if (arg->isReadByKernel() && !arg->isMutableByKernel()) mask |= CL_MEM_READ_ONLY;
+ else if (arg->isMutableByKernel()) mask |= CL_MEM_WRITE_ONLY;
+ buffer->memMask = mask;
+
+ buffer->mem = clCreateBuffer(jniContext->context, buffer->memMask,
+ buffer->lengthInBytes, buffer->data, &status);
+
+ if(status != CL_SUCCESS) throw CLException(status,"clCreateBuffer");
+
+ if (config->isTrackingOpenCLResources()){
+ memList.add(buffer->mem, __LINE__, __FILE__);
+ }
+
+ status = clSetKernelArg(jniContext->kernel, argPos, sizeof(cl_mem), (void *)&(buffer->mem));
+ if(status != CL_SUCCESS) throw CLException(status,"clSetKernelArg (buffer)");
+
+ // Add the array length if needed
+ if (arg->usesArrayLength()) {
+
+ for(int i = 0; i < buffer->numDims; i++) {
+ argPos++;
+ status = clSetKernelArg(jniContext->kernel, argPos, sizeof(cl_uint), &(buffer->lens[i]));
+ if(status != CL_SUCCESS) throw CLException(status,"clSetKernelArg (buffer length)");
+ if (config->isVerbose()){
+ fprintf(stderr, "runKernel arg %d %s, length = %d\n", argIdx, arg->name, buffer->lens[i]);
+ }
+ argPos++;
+ status = clSetKernelArg(jniContext->kernel, argPos, sizeof(cl_uint), &(buffer->dims[i]));
+ if(status != CL_SUCCESS) throw CLException(status,"clSetKernelArg (buffer dimension)");
+ if (config->isVerbose()){
+ fprintf(stderr, "runKernel arg %d %s, dim = %d\n", argIdx, arg->name, buffer->dims[i]);
+ }
+ }
+ }
+}
+
+
/**
* manages the memory of KernelArgs that are object. i.e. handels pinning, and moved objects.
* currently the only objects supported are arrays.
@@ -398,6 +435,14 @@ void updateObject(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& arg
* @throws CLException
*/
void processObject(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& argPos, int argIdx) {
+ if(arg->isArray()) {
+ processArray(jenv, jniContext, arg, argPos, argIdx);
+ } else if(arg->isAparapiBuffer()) {
+ processBuffer(jenv, jniContext, arg, argPos, argIdx);
+ }
+}
+
+void processArray(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& argPos, int argIdx) {
cl_int status = CL_SUCCESS;
@@ -452,7 +497,7 @@ void processObject(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& ar
arg->arrayBuffer->mem = (cl_mem)0;
}
- updateObject(jenv, jniContext, arg, argPos, argIdx);
+ updateArray(jenv, jniContext, arg, argPos, argIdx);
} else {
// Keep the arg position in sync if no updates were required
@@ -463,6 +508,50 @@ void processObject(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& ar
}
+void processBuffer(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& argPos, int argIdx) {
+
+ cl_int status = CL_SUCCESS;
+
+ if (config->isProfilingEnabled()){
+ arg->aparapiBuffer->read.valid = false;
+ arg->aparapiBuffer->write.valid = false;
+ }
+
+ if (config->isVerbose()) {
+ fprintf(stderr, "runKernel: arrayOrBuf addr=%p, ref.mem=%p\n",
+ arg->aparapiBuffer->data,
+ arg->aparapiBuffer->mem);
+ fprintf(stderr, "at memory addr %p, contents: ", arg->aparapiBuffer->data);
+ unsigned char *pb = (unsigned char *) arg->aparapiBuffer->data;
+ for (int k=0; k<8; k++) {
+ fprintf(stderr, "%02x ", pb[k]);
+ }
+ fprintf(stderr, "\n" );
+ }
+
+ if (config->isVerbose()){
+ if (arg->isExplicit() && arg->isExplicitWrite()){
+ fprintf(stderr, "explicit write of %s\n", arg->name);
+ }
+ }
+
+ if (arg->aparapiBuffer->mem != 0) {
+ if (config->isTrackingOpenCLResources()) {
+ memList.remove((cl_mem)arg->aparapiBuffer->mem, __LINE__, __FILE__);
+ }
+ status = clReleaseMemObject((cl_mem)arg->aparapiBuffer->mem);
+ //fprintf(stdout, "dispose arg %d %0lx\n", i, arg->aparapiBuffer->mem);
+
+ //this needs to be reported, but we can still keep going
+ CLException::checkCLError(status, "clReleaseMemObject()");
+
+ arg->aparapiBuffer->mem = (cl_mem)0;
+ }
+
+ updateBuffer(jenv, jniContext, arg, argPos, argIdx);
+
+}
+
/**
* keeps track of write events for KernelArgs.
@@ -487,8 +576,13 @@ void updateWriteEvents(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int
jniContext->writeEventArgs[writeEventCount] = argIdx;
}
- status = clEnqueueWriteBuffer(jniContext->commandQueue, arg->arrayBuffer->mem, CL_FALSE, 0,
+ if(arg->isArray()) {
+ status = clEnqueueWriteBuffer(jniContext->commandQueue, arg->arrayBuffer->mem, CL_FALSE, 0,
arg->arrayBuffer->lengthInBytes, arg->arrayBuffer->addr, 0, NULL, &(jniContext->writeEvents[writeEventCount]));
+ } else if(arg->isAparapiBuffer()) {
+ status = clEnqueueWriteBuffer(jniContext->commandQueue, arg->aparapiBuffer->mem, CL_FALSE, 0,
+ arg->aparapiBuffer->lengthInBytes, arg->aparapiBuffer->data, 0, NULL, &(jniContext->writeEvents[writeEventCount]));
+ }
if(status != CL_SUCCESS) throw CLException(status,"clEnqueueWriteBuffer");
if (config->isTrackingOpenCLResources()){
@@ -503,6 +597,7 @@ void updateWriteEvents(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int
}
}
+
/**
* sets the opencl kernel arguement for local args.
*
@@ -514,7 +609,7 @@ void updateWriteEvents(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int
*
* @throws CLException
*/
-void processLocal(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& argPos, int argIdx) {
+void processLocalArray(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& argPos, int argIdx) {
cl_int status = CL_SUCCESS;
// what if local buffer size has changed? We need a check for resize here.
@@ -534,7 +629,6 @@ void processLocal(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& arg
if(status != CL_SUCCESS) throw CLException(status,"clSetKernelArg (array length)");
- argPos++;
}
} else {
// Keep the arg position in sync if no updates were required
@@ -544,6 +638,53 @@ void processLocal(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& arg
}
}
+/**
+ * sets the opencl kernel arguement for local args.
+ *
+ * @param jenv the java envrionment
+ * @param jniContext the context we got from java
+ * @param arg the KernelArg to create a write event for
+ * @param argPos out: the position of arg in the opencl argument list
+ * @param argIdx the position of arg in the argument array
+ *
+ * @throws CLException
+ */
+void processLocalBuffer(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& argPos, int argIdx) {
+
+ cl_int status = CL_SUCCESS;
+ // what if local buffer size has changed? We need a check for resize here.
+ if (jniContext->firstRun) {
+ status = arg->setLocalBufferArg(jenv, argIdx, argPos, config->isVerbose());
+ if(status != CL_SUCCESS) throw CLException(status,"clSetKernelArg() (local)");
+
+ // Add the array length if needed
+ if (arg->usesArrayLength()) {
+ arg->syncJavaArrayLength(jenv);
+
+ for(int i = 0; i < arg->aparapiBuffer->numDims; i++)
+ {
+ int length = arg->aparapiBuffer->lens[i];
+ status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jint), &length);
+ if (config->isVerbose()){
+ fprintf(stderr, "runKernel arg %d %s, javaArrayLength = %d\n", argIdx, arg->name, length);
+ }
+ if(status != CL_SUCCESS) throw CLException(status,"clSetKernelArg (array length)");
+ }
+ }
+ } else {
+ // Keep the arg position in sync if no updates were required
+ if (arg->usesArrayLength()) {
+ argPos += arg->aparapiBuffer->numDims;
+ }
+ }
+}
+
+void processLocal(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& argPos, int argIdx) {
+ if(arg->isArray()) processLocalArray(jenv,jniContext,arg,argPos,argIdx);
+ if(arg->isAparapiBuffer()) processLocalBuffer(jenv,jniContext,arg,argPos,argIdx);
+}
+
+
/**
* processes all of the arguments for the OpenCL Kernel that we got from the JNIContext
*
@@ -755,7 +896,7 @@ void enqueueKernel(JNIContext* jniContext, Range& range, int passes, int argPos,
*
* @throws CLException
*/
-int getReadEvents(JNIContext* jniContext) {
+int getReadEvents(JNIEnv* jenv, JNIContext* jniContext) {
int readEventCount = 0;
@@ -774,8 +915,17 @@ int getReadEvents(JNIContext* jniContext) {
fprintf(stderr, "reading buffer %d %s\n", i, arg->name);
}
- status = clEnqueueReadBuffer(jniContext->commandQueue, arg->arrayBuffer->mem, CL_FALSE, 0,
- arg->arrayBuffer->lengthInBytes,arg->arrayBuffer->addr , 1, jniContext->executeEvents, &(jniContext->readEvents[readEventCount]));
+ if(arg->isArray()) {
+ status = clEnqueueReadBuffer(jniContext->commandQueue, arg->arrayBuffer->mem,
+ CL_FALSE, 0, arg->arrayBuffer->lengthInBytes, arg->arrayBuffer->addr, 1,
+ jniContext->executeEvents, &(jniContext->readEvents[readEventCount]));
+ } else if(arg->isAparapiBuffer()) {
+ status = clEnqueueReadBuffer(jniContext->commandQueue, arg->aparapiBuffer->mem,
+ CL_TRUE, 0, arg->aparapiBuffer->lengthInBytes, arg->aparapiBuffer->data, 1,
+ jniContext->executeEvents, &(jniContext->readEvents[readEventCount]));
+ arg->aparapiBuffer->inflate(jenv, arg);
+ }
+
if (status != CL_SUCCESS) throw CLException(status, "clEnqueueReadBuffer()");
if (config->isTrackingOpenCLResources()){
@@ -932,7 +1082,7 @@ JNI_JAVA(jint, KernelRunnerJNI, runKernelJNI)
int writeEventCount = 0;
processArgs(jenv, jniContext, argPos, writeEventCount);
enqueueKernel(jniContext, range, passes, argPos, writeEventCount);
- int readEventCount = getReadEvents(jniContext);
+ int readEventCount = getReadEvents(jenv, jniContext);
waitForReadEvents(jniContext, readEventCount, passes);
checkEvents(jenv, jniContext, writeEventCount);
}
@@ -1144,7 +1294,7 @@ KernelArg* getArgForBuffer(JNIEnv* jenv, JNIContext* jniContext, jobject buffer)
if (jniContext != NULL){
for (jint i = 0; returnArg == NULL && i < jniContext->argc; i++){
KernelArg *arg = jniContext->args[i];
- if (arg->isArray()){
+ if (arg->isArray()) {
jboolean isSame = jenv->IsSameObject(buffer, arg->arrayBuffer->javaArray);
if (isSame){
if (config->isVerbose()){
@@ -1156,6 +1306,18 @@ KernelArg* getArgForBuffer(JNIEnv* jenv, JNIContext* jniContext, jobject buffer)
fprintf(stderr, "unmatched arg '%s'\n", arg->name);
}
}
+ } else if(arg->isAparapiBuffer()) {
+ jboolean isSame = jenv->IsSameObject(buffer, arg->aparapiBuffer->getJavaObject(jenv,arg));
+ if (isSame) {
+ if (config->isVerbose()) {
+ fprintf(stderr, "matched arg '%s'\n", arg->name);
+ }
+ returnArg = arg;
+ } else {
+ if (config->isVerbose()) {
+ fprintf(stderr, "unmatched arg '%s'\n", arg->name);
+ }
+ }
}
}
if (returnArg == NULL){
@@ -1181,44 +1343,79 @@ JNI_JAVA(jint, KernelRunnerJNI, getJNI)
if (config->isVerbose()){
fprintf(stderr, "explicitly reading buffer %s\n", arg->name);
}
- arg->pin(jenv);
-
- try {
- status = clEnqueueReadBuffer(jniContext->commandQueue, arg->arrayBuffer->mem,
- CL_FALSE, 0,
- arg->arrayBuffer->lengthInBytes,
- arg->arrayBuffer->addr , 0, NULL,
- &jniContext->readEvents[0]);
- if (config->isVerbose()){
- fprintf(stderr, "explicitly read %s ptr=%lx len=%d\n",
- arg->name, (unsigned long)arg->arrayBuffer->addr,
- arg->arrayBuffer->lengthInBytes );
- }
- if (status != CL_SUCCESS) throw CLException(status, "clEnqueueReadBuffer()");
+ if(arg->isArray()) {
+ arg->pin(jenv);
+
+ try {
+ status = clEnqueueReadBuffer(jniContext->commandQueue, arg->arrayBuffer->mem,
+ CL_FALSE, 0,
+ arg->arrayBuffer->lengthInBytes,
+ arg->arrayBuffer->addr , 0, NULL,
+ &jniContext->readEvents[0]);
+ if (config->isVerbose()){
+ fprintf(stderr, "explicitly read %s ptr=%lx len=%d\n",
+ arg->name, (unsigned long)arg->arrayBuffer->addr,
+ arg->arrayBuffer->lengthInBytes );
+ }
+ if (status != CL_SUCCESS) throw CLException(status, "clEnqueueReadBuffer()");
+
+ status = clWaitForEvents(1, jniContext->readEvents);
+ if (status != CL_SUCCESS) throw CLException(status, "clWaitForEvents");
+
+ if (config->isProfilingEnabled()) {
+ status = profile(&arg->arrayBuffer->read, &jniContext->readEvents[0], 0,
+ arg->name, jniContext->profileBaseTime);
+ if (status != CL_SUCCESS) throw CLException(status, "profile ");
+ }
- status = clWaitForEvents(1, jniContext->readEvents);
- if (status != CL_SUCCESS) throw CLException(status, "clWaitForEvents");
+ status = clReleaseEvent(jniContext->readEvents[0]);
+ if (status != CL_SUCCESS) throw CLException(status, "clReleaseEvent() read event");
- if (config->isProfilingEnabled()) {
- status = profile(&arg->arrayBuffer->read, &jniContext->readEvents[0], 0,
- arg->name, jniContext->profileBaseTime);
- if (status != CL_SUCCESS) throw CLException(status, "profile ");
+ // since this is an explicit buffer get,
+ // we expect the buffer to have changed so we commit
+ arg->unpin(jenv); // was unpinCommit
+
+ //something went wrong print the error and exit
+ } catch(CLException& cle) {
+ cle.printError();
+ return status;
}
+ } else if(arg->isAparapiBuffer()) {
+
+ try {
+ status = clEnqueueReadBuffer(jniContext->commandQueue, arg->aparapiBuffer->mem,
+ CL_FALSE, 0,
+ arg->aparapiBuffer->lengthInBytes,
+ arg->aparapiBuffer->data, 0, NULL,
+ &jniContext->readEvents[0]);
+ if (config->isVerbose()){
+ fprintf(stderr, "explicitly read %s ptr=%lx len=%d\n",
+ arg->name, (unsigned long)arg->aparapiBuffer->data,
+ arg->aparapiBuffer->lengthInBytes );
+ }
+ if (status != CL_SUCCESS) throw CLException(status, "clEnqueueReadBuffer()");
+
+ status = clWaitForEvents(1, jniContext->readEvents);
+ if (status != CL_SUCCESS) throw CLException(status, "clWaitForEvents");
- status = clReleaseEvent(jniContext->readEvents[0]);
- if (status != CL_SUCCESS) throw CLException(status, "clReleaseEvent() read event");
+ if (config->isProfilingEnabled()) {
+ status = profile(&arg->aparapiBuffer->read, &jniContext->readEvents[0], 0,
+ arg->name, jniContext->profileBaseTime);
+ if (status != CL_SUCCESS) throw CLException(status, "profile ");
+ }
- // since this is an explicit buffer get,
- // we expect the buffer to have changed so we commit
- arg->unpin(jenv); // was unpinCommit
+ status = clReleaseEvent(jniContext->readEvents[0]);
+ if (status != CL_SUCCESS) throw CLException(status, "clReleaseEvent() read event");
- //something went wrong print the error and exit
- } catch(CLException& cle) {
- cle.printError();
- return status;
- }
+ arg->aparapiBuffer->inflate(jenv,arg);
- }else{
+ //something went wrong print the error and exit
+ } catch(CLException& cle) {
+ cle.printError();
+ return status;
+ }
+ }
+ } else {
if (config->isVerbose()){
fprintf(stderr, "attempt to request to get a buffer that does not appear to be referenced from kernel\n");
}
diff --git a/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.h b/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.h
index 3b4d40e0..dc174134 100644
--- a/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.h
+++ b/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.h
@@ -65,13 +65,18 @@ jint updateNonPrimitiveReferences(JNIEnv *jenv, jobject jobj, JNIContext* jniCon
void profileFirstRun(JNIContext* jniContext);
-void updateObject(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& argPos, int argIdx);
+void updateArray(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& argPos, int argIdx);
+void updateBuffer(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& argPos, int argIdx);
void processObject(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& argPos, int argIdx);
+void processArray(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& argPos, int argIdx);
+void processBuffer(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& argPos, int argIdx);
void updateWriteEvents(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int argIdx, int& writeEventCount);
void processLocal(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& argPos, int argIdx);
+void processLocalArray(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& argPos, int argIdx);
+void processLocalBuffer(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& argPos, int argIdx);
int processArgs(JNIEnv* jenv, JNIContext* jniContext, int& argPos, int& writeEventCount);
diff --git a/com.amd.aparapi.jni/src/cpp/runKernel/AparapiBuffer.cpp b/com.amd.aparapi.jni/src/cpp/runKernel/AparapiBuffer.cpp
new file mode 100644
index 00000000..f5fcb19c
--- /dev/null
+++ b/com.amd.aparapi.jni/src/cpp/runKernel/AparapiBuffer.cpp
@@ -0,0 +1,1557 @@
+/*
+ Copyright (c) 2010-2011, Advanced Micro Devices, Inc.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
+ following conditions are met:
+
+ Redistributions of source code must retain the above copyright notice, this list of conditions and the following
+ disclaimer.
+
+ Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided with the distribution.
+
+ Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ If you use the software (in whole or in part), you shall adhere to all applicable U.S., European, and other export
+ laws, including but not limited to the U.S. Export Administration Regulations ("EAR"), (15 C.F.R. Sections 730
+ through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June 2000. Further, pursuant to Section 740.6 of
+ the EAR, you hereby certify that, except pursuant to a license granted by the United States Department of Commerce
+ Bureau of Industry and Security or as otherwise permitted pursuant to a License Exception under the U.S. Export
+ Administration Regulations ("EAR"), you will not (1) export, re-export or release to a national of a country in
+ Country Groups D:1, E:1 or E:2 any restricted technology, software, or source code you receive hereunder, or (2)
+ export to Country Groups D:1, E:1 or E:2 the direct product of such technology or software, if such foreign produced
+ direct product is subject to national security controls as identified on the Commerce Control List (currently
+ found in Supplement 1 to Part 774 of EAR). For the most current Country Group listings, or for additional
+ information about the EAR or your obligations under those regulations, please refer to the U.S. Bureau of Industry
+ and Security?s website at http://www.bis.doc.gov/.
+ */
+#define APARAPIBUFFER_SOURCE
+#include "AparapiBuffer.h"
+#include "KernelArg.h"
+
+AparapiBuffer::AparapiBuffer():
+ javaObject((jobject) 0),
+ numDims(0),
+ dims(NULL),
+ lengthInBytes(0),
+ mem((cl_mem) 0),
+ data(NULL),
+ memMask((cl_uint)0) {
+ }
+
+AparapiBuffer::AparapiBuffer(void* _data, cl_uint* _lens, cl_uint _numDims, long _lengthInBytes, jobject _javaObject) :
+ data(_data),
+ lens(_lens),
+ numDims(_numDims),
+ lengthInBytes(_lengthInBytes),
+ javaObject(_javaObject),
+ mem((cl_mem) 0),
+ memMask((cl_uint)0)
+{
+ dims = new cl_uint[_numDims];
+ for(int i = 0; i < _numDims; i++) {
+ dims[i] = 1;
+ for(int j = i+1; j < _numDims; j++) {
+ dims[i] *= lens[j];
+ }
+ }
+}
+
+jobject AparapiBuffer::getJavaObject(JNIEnv* env, KernelArg* arg) {
+ return JNIHelper::getInstanceField(env, arg->javaArg, "javaBuffer", ObjectClassArg);
+}
+
+
+AparapiBuffer* AparapiBuffer::flatten(JNIEnv* env, jobject arg, int type) {
+ int numDims = JNIHelper::getInstanceField(env, arg, "numDims", IntArg);
+ if(numDims == 2 && isBoolean(type)) {
+ return AparapiBuffer::flattenBoolean2D(env,arg);
+ } else if(numDims == 2 && isByte(type)) {
+ return AparapiBuffer::flattenByte2D(env,arg);
+ } else if(numDims == 2 && isShort(type)) {
+ return AparapiBuffer::flattenShort2D(env,arg);
+ } else if(numDims == 2 && isInt(type)) {
+ return AparapiBuffer::flattenInt2D(env,arg);
+ } else if(numDims == 2 && isLong(type)) {
+ return AparapiBuffer::flattenLong2D(env,arg);
+ } else if(numDims == 2 && isFloat(type)) {
+ return AparapiBuffer::flattenFloat2D(env,arg);
+ } else if(numDims == 2 && isDouble(type)) {
+ return AparapiBuffer::flattenDouble2D(env,arg);
+ } else if(numDims == 3 && isBoolean(type)) {
+ return AparapiBuffer::flattenBoolean3D(env,arg);
+ } else if(numDims == 3 && isByte(type)) {
+ return AparapiBuffer::flattenByte3D(env,arg);
+ } else if(numDims == 3 && isShort(type)) {
+ return AparapiBuffer::flattenShort3D(env,arg);
+ } else if(numDims == 3 && isInt(type)) {
+ return AparapiBuffer::flattenInt3D(env,arg);
+ } else if(numDims == 3 && isLong(type)) {
+ return AparapiBuffer::flattenLong3D(env,arg);
+ } else if(numDims == 3 && isFloat(type)) {
+ return AparapiBuffer::flattenFloat3D(env,arg);
+ } else if(numDims == 3 && isDouble(type)) {
+ return AparapiBuffer::flattenDouble3D(env,arg);
+ }
+ return new AparapiBuffer();
+}
+
+
+AparapiBuffer* AparapiBuffer::flattenBoolean2D(JNIEnv* env, jobject arg) {
+
+ jobject javaBuffer = JNIHelper::getInstanceField(env, arg, "javaBuffer", ObjectClassArg);
+ cl_uint* dims = new cl_uint[2];
+ dims[0] = env->GetArrayLength((jobjectArray)javaBuffer);
+ dims[1] = env->GetArrayLength((jbooleanArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, 0));
+ int totalSize = dims[0] * dims[1];
+ long bitSize = totalSize * sizeof(jboolean);
+
+ jboolean* array = new jboolean[totalSize];
+ /*
+ jbooleanArray* jArray = new jbooleanArray[dims[0]];
+ jboolean** elems = new jboolean*[dims[0]];
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jArray[i] = (jbooleanArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ elems[i] = env->GetBooleanArrayElements(jArray[i],0);
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < (int)dims[0]; i++) {
+ for(int j = 0; j < (int)dims[1]; j++) {
+ array[i*dims[1] + j] = elems[i][j];
+ }
+ }
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ env->ReleaseBooleanArrayElements(jArray[i], elems[i], 0);
+ }
+ delete[] jArray;
+ delete[] elems;
+ */
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jbooleanArray jArray = (jbooleanArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ jboolean* elems = env->GetBooleanArrayElements(jArray,0);
+
+ for(int j = 0; j < (int)dims[1]; j++) {
+ array[i*dims[1] + j] = elems[j];
+ }
+ env->ReleaseBooleanArrayElements(jArray, elems, 0);
+ }
+
+ return new AparapiBuffer((void*)array, (cl_uint*)dims, 2, bitSize, javaBuffer);
+}
+
+AparapiBuffer* AparapiBuffer::flattenByte2D(JNIEnv* env, jobject arg) {
+
+ jobject javaBuffer = JNIHelper::getInstanceField(env, arg, "javaBuffer", ObjectClassArg);
+ cl_uint* dims = new cl_uint[2];
+ dims[0] = env->GetArrayLength((jobjectArray)javaBuffer);
+ dims[1] = env->GetArrayLength((jbyteArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, 0));
+ int totalSize = dims[0] * dims[1];
+ long bitSize = totalSize * sizeof(jbyte);
+
+ jbyte* array = new jbyte[totalSize];
+ /*
+ jbyteArray* jArray = new jbyteArray[dims[0]];
+ jbyte** elems = new jbyte*[dims[0]];
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jArray[i] = (jbyteArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ elems[i] = env->GetByteArrayElements(jArray[i],0);
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < (int)dims[0]; i++) {
+ for(int j = 0; j < (int)dims[1]; j++) {
+ array[i*dims[1] + j] = elems[i][j];
+ }
+ }
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ env->ReleaseByteArrayElements(jArray[i], elems[i], 0);
+ }
+ delete[] jArray;
+ delete[] elems;
+ */
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jbyteArray jArray = (jbyteArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ jbyte* elems = env->GetByteArrayElements(jArray,0);
+
+ for(int j = 0; j < (int)dims[1]; j++) {
+ array[i*dims[1] + j] = elems[j];
+ }
+ env->ReleaseByteArrayElements(jArray, elems, 0);
+ }
+
+ return new AparapiBuffer((void*)array, (cl_uint*)dims, 2, bitSize, javaBuffer);
+}
+
+AparapiBuffer* AparapiBuffer::flattenShort2D(JNIEnv* env, jobject arg) {
+
+ jobject javaBuffer = JNIHelper::getInstanceField(env, arg, "javaBuffer", ObjectClassArg);
+ cl_uint* dims = new cl_uint[2];
+ dims[0] = env->GetArrayLength((jobjectArray)javaBuffer);
+ dims[1] = env->GetArrayLength((jshortArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, 0));
+ int totalSize = dims[0] * dims[1];
+ long bitSize = totalSize * sizeof(jshort);
+
+ jshort* array = new jshort[totalSize];
+ /*
+ jshortArray* jArray = new jshortArray[dims[0]];
+ jshort** elems = new jshort*[dims[0]];
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jArray[i] = (jshortArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ elems[i] = env->GetShortArrayElements(jArray[i],0);
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < (int)dims[0]; i++) {
+ for(int j = 0; j < (int)dims[1]; j++) {
+ array[i*dims[1] + j] = elems[i][j];
+ }
+ }
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ env->ReleaseShortArrayElements(jArray[i], elems[i], 0);
+ }
+ delete[] jArray;
+ delete[] elems;
+ */
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jshortArray jArray = (jshortArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ jshort* elems = env->GetShortArrayElements(jArray,0);
+
+ for(int j = 0; j < (int)dims[1]; j++) {
+ array[i*dims[1] + j] = elems[j];
+ }
+ env->ReleaseShortArrayElements(jArray, elems, 0);
+ }
+
+ return new AparapiBuffer((void*)array, (cl_uint*)dims, 2, bitSize, javaBuffer);
+}
+
+AparapiBuffer* AparapiBuffer::flattenInt2D(JNIEnv* env, jobject arg) {
+
+ jobject javaBuffer = JNIHelper::getInstanceField(env, arg, "javaBuffer", ObjectClassArg);
+ cl_uint* dims = new cl_uint[2];
+ dims[0] = env->GetArrayLength((jobjectArray)javaBuffer);
+ dims[1] = env->GetArrayLength((jintArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, 0));
+ int totalSize = dims[0] * dims[1];
+ long bitSize = totalSize * sizeof(jint);
+
+ jint* array = new jint[totalSize];
+ /*
+ jintArray* jArray = new jintArray[dims[0]];
+ jint** elems = new jint*[dims[0]];
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jArray[i] = (jintArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ elems[i] = env->GetIntArrayElements(jArray[i],0);
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < (int)dims[0]; i++) {
+ for(int j = 0; j < (int)dims[1]; j++) {
+ array[i*dims[1] + j] = elems[i][j];
+ }
+ }
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ env->ReleaseIntArrayElements(jArray[i], elems[i], 0);
+ }
+ delete[] jArray;
+ delete[] elems;
+ */
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jintArray jArray = (jintArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ jint* elems = env->GetIntArrayElements(jArray,0);
+
+ for(int j = 0; j < (int)dims[1]; j++) {
+ array[i*dims[1] + j] = elems[j];
+ }
+ env->ReleaseIntArrayElements(jArray, elems, 0);
+ }
+
+ return new AparapiBuffer((void*)array, (cl_uint*)dims, 2, bitSize, javaBuffer);
+}
+
+AparapiBuffer* AparapiBuffer::flattenLong2D(JNIEnv* env, jobject arg) {
+
+ jobject javaBuffer = JNIHelper::getInstanceField(env, arg, "javaBuffer", ObjectClassArg);
+ cl_uint* dims = new cl_uint[2];
+ dims[0] = env->GetArrayLength((jobjectArray)javaBuffer);
+ dims[1] = env->GetArrayLength((jlongArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, 0));
+ int totalSize = dims[0] * dims[1];
+ long bitSize = totalSize * sizeof(jlong);
+
+ jlong* array = new jlong[totalSize];
+ /*
+ jlongArray* jArray = new jlongArray[dims[0]];
+ jlong** elems = new jlong*[dims[0]];
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jArray[i] = (jlongArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ elems[i] = env->GetLongArrayElements(jArray[i],0);
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < (int)dims[0]; i++) {
+ for(int j = 0; j < (int)dims[1]; j++) {
+ array[i*dims[1] + j] = elems[i][j];
+ }
+ }
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ env->ReleaseLongArrayElements(jArray[i], elems[i], 0);
+ }
+ delete[] jArray;
+ delete[] elems;
+ */
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jlongArray jArray = (jlongArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ jlong* elems = env->GetLongArrayElements(jArray,0);
+
+ for(int j = 0; j < (int)dims[1]; j++) {
+ array[i*dims[1] + j] = elems[j];
+ }
+ env->ReleaseLongArrayElements(jArray, elems, 0);
+ }
+
+ return new AparapiBuffer((void*)array, (cl_uint*)dims, 2, bitSize, javaBuffer);
+}
+
+AparapiBuffer* AparapiBuffer::flattenFloat2D(JNIEnv* env, jobject arg) {
+
+ jobject javaBuffer = JNIHelper::getInstanceField(env, arg, "javaBuffer", ObjectClassArg);
+ cl_uint* dims = new cl_uint[2];
+ dims[0] = env->GetArrayLength((jobjectArray)javaBuffer);
+ dims[1] = env->GetArrayLength((jfloatArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, 0));
+ int totalSize = dims[0] * dims[1];
+ long bitSize = totalSize * sizeof(jfloat);
+
+ jfloat* array = new jfloat[totalSize];
+ /*
+ jfloatArray* jArray = new jfloatArray[dims[0]];
+ jfloat** elems = new jfloat*[dims[0]];
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jArray[i] = (jfloatArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ elems[i] = env->GetFloatArrayElements(jArray[i],0);
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < (int)dims[0]; i++) {
+ for(int j = 0; j < (int)dims[1]; j++) {
+ array[i*dims[1] + j] = elems[i][j];
+ }
+ }
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ env->ReleaseFloatArrayElements(jArray[i], elems[i], 0);
+ }
+ delete[] jArray;
+ delete[] elems;
+ */
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jfloatArray jArray = (jfloatArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ jfloat* elems = env->GetFloatArrayElements(jArray,0);
+
+ for(int j = 0; j < (int)dims[1]; j++) {
+ array[i*dims[1] + j] = elems[j];
+ }
+ env->ReleaseFloatArrayElements(jArray, elems, 0);
+ }
+
+ return new AparapiBuffer((void*)array, (cl_uint*)dims, 2, bitSize, javaBuffer);
+}
+
+AparapiBuffer* AparapiBuffer::flattenDouble2D(JNIEnv* env, jobject arg) {
+
+ jobject javaBuffer = JNIHelper::getInstanceField(env, arg, "javaBuffer", ObjectClassArg);
+ cl_uint* dims = new cl_uint[2];
+ dims[0] = env->GetArrayLength((jobjectArray)javaBuffer);
+ dims[1] = env->GetArrayLength((jdoubleArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, 0));
+ int totalSize = dims[0] * dims[1];
+ long bitSize = totalSize * sizeof(jdouble);
+
+ jdouble* array = new jdouble[totalSize];
+ /*
+ jdoubleArray* jArray = new jdoubleArray[dims[0]];
+ jdouble** elems = new jdouble*[dims[0]];
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jArray[i] = (jdoubleArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ elems[i] = env->GetDoubleArrayElements(jArray[i],0);
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < (int)dims[0]; i++) {
+ for(int j = 0; j < (int)dims[1]; j++) {
+ array[i*dims[1] + j] = elems[i][j];
+ }
+ }
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ env->ReleaseDoubleArrayElements(jArray[i], elems[i], 0);
+ }
+ delete[] jArray;
+ delete[] elems;
+ */
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jdoubleArray jArray = (jdoubleArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ jdouble* elems = env->GetDoubleArrayElements(jArray,0);
+
+ for(int j = 0; j < (int)dims[1]; j++) {
+ array[i*dims[1] + j] = elems[j];
+ }
+ env->ReleaseDoubleArrayElements(jArray, elems, 0);
+ }
+
+ return new AparapiBuffer((void*)array, (cl_uint*)dims, 2, bitSize, javaBuffer);
+}
+
+
+AparapiBuffer* AparapiBuffer::flattenBoolean3D(JNIEnv* env, jobject arg) {
+
+ jobject javaBuffer = JNIHelper::getInstanceField(env, arg, "javaBuffer", ObjectClassArg);
+ cl_uint* dims = new cl_uint[3];
+ jobjectArray j0 = (jobjectArray)javaBuffer;
+ jobjectArray j1 = (jobjectArray)env->GetObjectArrayElement(j0, 0);
+ jbooleanArray j2 = (jbooleanArray)env->GetObjectArrayElement(j1, 0);
+ dims[0] = env->GetArrayLength(j0);
+ dims[1] = env->GetArrayLength(j1);
+ dims[2] = env->GetArrayLength(j2);
+
+ int totalSize = dims[0] * dims[1] * dims[2];
+ long bitSize = totalSize * sizeof(jboolean);
+
+ jboolean* array = new jboolean[totalSize];
+ /*
+ jbooleanArray** jArray = new jbooleanArray*[dims[0]];
+ jboolean*** elems = new jboolean**[dims[0]];
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jArray[i] = new jbooleanArray[dims[1]];
+ elems[i] = new jboolean*[dims[1]];
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ for(int j = 0; j < (int)dims[1]; j++) {
+ jArray[i][j] = (jbooleanArray)env->GetObjectArrayElement(jrow, j);
+ elems[i][j] = env->GetBooleanArrayElements(jArray[i][j],0);
+ }
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < (int)dims[0]; i++) {
+ for(int j = 0; j < (int)dims[1]; j++) {
+ for(int k = 0; k < (int)dims[2]; k++) {
+ array[i*dims[1]*dims[2] + j*dims[2] + k] = elems[i][j][k];
+ }
+ }
+ }
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ for(int j = 0; j < (int)dims[1]; j++) {
+ env->ReleaseBooleanArrayElements(jArray[i][j], elems[i][j], 0);
+ }
+ delete[] jArray[i];
+ delete[] elems[i];
+ }
+ delete[] jArray;
+ delete[] elems;
+ */
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ for(int j = 0; j < (int)dims[1]; j++) {
+ jbooleanArray jArray = (jbooleanArray)env->GetObjectArrayElement(jrow, j);
+ jboolean* elems = env->GetBooleanArrayElements(jArray,0);
+ for(int k = 0; k < (int)dims[2]; k++) {
+ array[i*dims[1]*dims[2] + j*dims[2] + k] = elems[k];
+ }
+ env->ReleaseBooleanArrayElements(jArray, elems, 0);
+ }
+ }
+
+ return new AparapiBuffer((void*)array, (cl_uint*)dims, 3, bitSize, javaBuffer);
+}
+
+AparapiBuffer* AparapiBuffer::flattenByte3D(JNIEnv* env, jobject arg) {
+
+ jobject javaBuffer = JNIHelper::getInstanceField(env, arg, "javaBuffer", ObjectClassArg);
+ cl_uint* dims = new cl_uint[3];
+ jobjectArray j0 = (jobjectArray)javaBuffer;
+ jobjectArray j1 = (jobjectArray)env->GetObjectArrayElement(j0, 0);
+ jbyteArray j2 = (jbyteArray)env->GetObjectArrayElement(j1, 0);
+ dims[0] = env->GetArrayLength(j0);
+ dims[1] = env->GetArrayLength(j1);
+ dims[2] = env->GetArrayLength(j2);
+
+ int totalSize = dims[0] * dims[1] * dims[2];
+ long bitSize = totalSize * sizeof(jbyte);
+
+ jbyte* array = new jbyte[totalSize];
+ /*
+ jbyteArray** jArray = new jbyteArray*[dims[0]];
+ jbyte*** elems = new jbyte**[dims[0]];
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jArray[i] = new jbyteArray[dims[1]];
+ elems[i] = new jbyte*[dims[1]];
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ for(int j = 0; j < (int)dims[1]; j++) {
+ jArray[i][j] = (jbyteArray)env->GetObjectArrayElement(jrow, j);
+ elems[i][j] = env->GetByteArrayElements(jArray[i][j],0);
+ }
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < (int)dims[0]; i++) {
+ for(int j = 0; j < (int)dims[1]; j++) {
+ for(int k = 0; k < (int)dims[2]; k++) {
+ array[i*dims[1]*dims[2] + j*dims[2] + k] = elems[i][j][k];
+ }
+ }
+ }
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ for(int j = 0; j < (int)dims[1]; j++) {
+ env->ReleaseByteArrayElements(jArray[i][j], elems[i][j], 0);
+ }
+ delete[] jArray[i];
+ delete[] elems[i];
+ }
+ delete[] jArray;
+ delete[] elems;
+ */
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ for(int j = 0; j < (int)dims[1]; j++) {
+ jbyteArray jArray = (jbyteArray)env->GetObjectArrayElement(jrow, j);
+ jbyte* elems = env->GetByteArrayElements(jArray,0);
+ for(int k = 0; k < (int)dims[2]; k++) {
+ array[i*dims[1]*dims[2] + j*dims[2] + k] = elems[k];
+ }
+ env->ReleaseByteArrayElements(jArray, elems, 0);
+ }
+ }
+
+ return new AparapiBuffer((void*)array, (cl_uint*)dims, 3, bitSize, javaBuffer);
+}
+
+AparapiBuffer* AparapiBuffer::flattenShort3D(JNIEnv* env, jobject arg) {
+
+ jobject javaBuffer = JNIHelper::getInstanceField(env, arg, "javaBuffer", ObjectClassArg);
+ cl_uint* dims = new cl_uint[3];
+ jobjectArray j0 = (jobjectArray)javaBuffer;
+ jobjectArray j1 = (jobjectArray)env->GetObjectArrayElement(j0, 0);
+ jshortArray j2 = (jshortArray)env->GetObjectArrayElement(j1, 0);
+ dims[0] = env->GetArrayLength(j0);
+ dims[1] = env->GetArrayLength(j1);
+ dims[2] = env->GetArrayLength(j2);
+
+ int totalSize = dims[0] * dims[1] * dims[2];
+ long bitSize = totalSize * sizeof(jshort);
+
+ jshort* array = new jshort[totalSize];
+ /*
+ jshortArray** jArray = new jshortArray*[dims[0]];
+ jshort*** elems = new jshort**[dims[0]];
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jArray[i] = new jshortArray[dims[1]];
+ elems[i] = new jshort*[dims[1]];
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ for(int j = 0; j < (int)dims[1]; j++) {
+ jArray[i][j] = (jshortArray)env->GetObjectArrayElement(jrow, j);
+ elems[i][j] = env->GetShortArrayElements(jArray[i][j],0);
+ }
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < (int)dims[0]; i++) {
+ for(int j = 0; j < (int)dims[1]; j++) {
+ for(int k = 0; k < (int)dims[2]; k++) {
+ array[i*dims[1]*dims[2] + j*dims[2] + k] = elems[i][j][k];
+ }
+ }
+ }
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ for(int j = 0; j < (int)dims[1]; j++) {
+ env->ReleaseShortArrayElements(jArray[i][j], elems[i][j], 0);
+ }
+ delete[] jArray[i];
+ delete[] elems[i];
+ }
+ delete[] jArray;
+ delete[] elems;
+ */
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ for(int j = 0; j < (int)dims[1]; j++) {
+ jshortArray jArray = (jshortArray)env->GetObjectArrayElement(jrow, j);
+ jshort* elems = env->GetShortArrayElements(jArray,0);
+ for(int k = 0; k < (int)dims[2]; k++) {
+ array[i*dims[1]*dims[2] + j*dims[2] + k] = elems[k];
+ }
+ env->ReleaseShortArrayElements(jArray, elems, 0);
+ }
+ }
+
+ return new AparapiBuffer((void*)array, (cl_uint*)dims, 3, bitSize, javaBuffer);
+}
+
+AparapiBuffer* AparapiBuffer::flattenInt3D(JNIEnv* env, jobject arg) {
+
+ jobject javaBuffer = JNIHelper::getInstanceField(env, arg, "javaBuffer", ObjectClassArg);
+ cl_uint* dims = new cl_uint[3];
+ jobjectArray j0 = (jobjectArray)javaBuffer;
+ jobjectArray j1 = (jobjectArray)env->GetObjectArrayElement(j0, 0);
+ jintArray j2 = (jintArray)env->GetObjectArrayElement(j1, 0);
+ dims[0] = env->GetArrayLength(j0);
+ dims[1] = env->GetArrayLength(j1);
+ dims[2] = env->GetArrayLength(j2);
+
+ int totalSize = dims[0] * dims[1] * dims[2];
+ long bitSize = totalSize * sizeof(jint);
+
+ jint* array = new jint[totalSize];
+ /*
+ jintArray** jArray = new jintArray*[dims[0]];
+ jint*** elems = new jint**[dims[0]];
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jArray[i] = new jintArray[dims[1]];
+ elems[i] = new jint*[dims[1]];
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ for(int j = 0; j < (int)dims[1]; j++) {
+ jArray[i][j] = (jintArray)env->GetObjectArrayElement(jrow, j);
+ elems[i][j] = env->GetIntArrayElements(jArray[i][j],0);
+ }
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < (int)dims[0]; i++) {
+ for(int j = 0; j < (int)dims[1]; j++) {
+ for(int k = 0; k < (int)dims[2]; k++) {
+ array[i*dims[1]*dims[2] + j*dims[2] + k] = elems[i][j][k];
+ }
+ }
+ }
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ for(int j = 0; j < (int)dims[1]; j++) {
+ env->ReleaseIntArrayElements(jArray[i][j], elems[i][j], 0);
+ }
+ delete[] jArray[i];
+ delete[] elems[i];
+ }
+ delete[] jArray;
+ delete[] elems;
+ */
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ for(int j = 0; j < (int)dims[1]; j++) {
+ jintArray jArray = (jintArray)env->GetObjectArrayElement(jrow, j);
+ jint* elems = env->GetIntArrayElements(jArray,0);
+ for(int k = 0; k < (int)dims[2]; k++) {
+ array[i*dims[1]*dims[2] + j*dims[2] + k] = elems[k];
+ }
+ env->ReleaseIntArrayElements(jArray, elems, 0);
+ }
+ }
+
+ return new AparapiBuffer((void*)array, (cl_uint*)dims, 3, bitSize, javaBuffer);
+}
+
+AparapiBuffer* AparapiBuffer::flattenLong3D(JNIEnv* env, jobject arg) {
+
+ jobject javaBuffer = JNIHelper::getInstanceField(env, arg, "javaBuffer", ObjectClassArg);
+ cl_uint* dims = new cl_uint[3];
+ jobjectArray j0 = (jobjectArray)javaBuffer;
+ jobjectArray j1 = (jobjectArray)env->GetObjectArrayElement(j0, 0);
+ jlongArray j2 = (jlongArray)env->GetObjectArrayElement(j1, 0);
+ dims[0] = env->GetArrayLength(j0);
+ dims[1] = env->GetArrayLength(j1);
+ dims[2] = env->GetArrayLength(j2);
+
+ int totalSize = dims[0] * dims[1] * dims[2];
+ jlong bitSize = totalSize * sizeof(jlong);
+
+ jlong* array = new jlong[totalSize];
+ /*
+ jlongArray** jArray = new jlongArray*[dims[0]];
+ jlong*** elems = new jlong**[dims[0]];
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jArray[i] = new jlongArray[dims[1]];
+ elems[i] = new jlong*[dims[1]];
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ for(int j = 0; j < (int)dims[1]; j++) {
+ jArray[i][j] = (jlongArray)env->GetObjectArrayElement(jrow, j);
+ elems[i][j] = env->GetLongArrayElements(jArray[i][j],0);
+ }
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < (int)dims[0]; i++) {
+ for(int j = 0; j < (int)dims[1]; j++) {
+ for(int k = 0; k < (int)dims[2]; k++) {
+ array[i*dims[1]*dims[2] + j*dims[2] + k] = elems[i][j][k];
+ }
+ }
+ }
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ for(int j = 0; j < (int)dims[1]; j++) {
+ env->ReleaseLongArrayElements(jArray[i][j], elems[i][j], 0);
+ }
+ delete[] jArray[i];
+ delete[] elems[i];
+ }
+ delete[] jArray;
+ delete[] elems;
+ */
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ for(int j = 0; j < (int)dims[1]; j++) {
+ jlongArray jArray = (jlongArray)env->GetObjectArrayElement(jrow, j);
+ jlong* elems = env->GetLongArrayElements(jArray,0);
+ for(int k = 0; k < (int)dims[2]; k++) {
+ array[i*dims[1]*dims[2] + j*dims[2] + k] = elems[k];
+ }
+ env->ReleaseLongArrayElements(jArray, elems, 0);
+ }
+ }
+
+ return new AparapiBuffer((void*)array, (cl_uint*)dims, 3, bitSize, javaBuffer);
+}
+
+AparapiBuffer* AparapiBuffer::flattenFloat3D(JNIEnv* env, jobject arg) {
+
+ jobject javaBuffer = JNIHelper::getInstanceField(env, arg, "javaBuffer", ObjectClassArg);
+ cl_uint* dims = new cl_uint[3];
+ jobjectArray j0 = (jobjectArray)javaBuffer;
+ jobjectArray j1 = (jobjectArray)env->GetObjectArrayElement(j0, 0);
+ jfloatArray j2 = (jfloatArray)env->GetObjectArrayElement(j1, 0);
+ dims[0] = env->GetArrayLength(j0);
+ dims[1] = env->GetArrayLength(j1);
+ dims[2] = env->GetArrayLength(j2);
+
+ int totalSize = dims[0] * dims[1] * dims[2];
+ long bitSize = totalSize * sizeof(jfloat);
+
+ jfloat* array = new jfloat[totalSize];
+ /*
+ jfloatArray** jArray = new jfloatArray*[dims[0]];
+ jfloat*** elems = new jfloat**[dims[0]];
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jArray[i] = new jfloatArray[dims[1]];
+ elems[i] = new jfloat*[dims[1]];
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ for(int j = 0; j < (int)dims[1]; j++) {
+ jArray[i][j] = (jfloatArray)env->GetObjectArrayElement(jrow, j);
+ elems[i][j] = env->GetFloatArrayElements(jArray[i][j],0);
+ }
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < (int)dims[0]; i++) {
+ for(int j = 0; j < (int)dims[1]; j++) {
+ for(int k = 0; k < (int)dims[2]; k++) {
+ array[i*dims[1]*dims[2] + j*dims[2] + k] = elems[i][j][k];
+ }
+ }
+ }
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ for(int j = 0; j < (int)dims[1]; j++) {
+ env->ReleaseFloatArrayElements(jArray[i][j], elems[i][j], 0);
+ }
+ delete[] jArray[i];
+ delete[] elems[i];
+ }
+ delete[] jArray;
+ delete[] elems;
+ */
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ for(int j = 0; j < (int)dims[1]; j++) {
+ jfloatArray jArray = (jfloatArray)env->GetObjectArrayElement(jrow, j);
+ jfloat* elems = env->GetFloatArrayElements(jArray,0);
+ for(int k = 0; k < (int)dims[2]; k++) {
+ array[i*dims[1]*dims[2] + j*dims[2] + k] = elems[k];
+ }
+ env->ReleaseFloatArrayElements(jArray, elems, 0);
+ }
+ }
+
+ return new AparapiBuffer((void*)array, (cl_uint*)dims, 3, bitSize, javaBuffer);
+}
+
+AparapiBuffer* AparapiBuffer::flattenDouble3D(JNIEnv* env, jobject arg) {
+
+ jobject javaBuffer = JNIHelper::getInstanceField(env, arg, "javaBuffer", ObjectClassArg);
+ cl_uint* dims = new cl_uint[3];
+ jobjectArray j0 = (jobjectArray)javaBuffer;
+ jobjectArray j1 = (jobjectArray)env->GetObjectArrayElement(j0, 0);
+ jdoubleArray j2 = (jdoubleArray)env->GetObjectArrayElement(j1, 0);
+ dims[0] = env->GetArrayLength(j0);
+ dims[1] = env->GetArrayLength(j1);
+ dims[2] = env->GetArrayLength(j2);
+
+ int totalSize = dims[0] * dims[1] * dims[2];
+ long bitSize = totalSize * sizeof(jdouble);
+
+ jdouble* array = new jdouble[totalSize];
+ /*
+ jdoubleArray** jArray = new jdoubleArray*[dims[0]];
+ jdouble*** elems = new jdouble**[dims[0]];
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jArray[i] = new jdoubleArray[dims[1]];
+ elems[i] = new jdouble*[dims[1]];
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ for(int j = 0; j < (int)dims[1]; j++) {
+ jArray[i][j] = (jdoubleArray)env->GetObjectArrayElement(jrow, j);
+ elems[i][j] = env->GetDoubleArrayElements(jArray[i][j],0);
+ }
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < (int)dims[0]; i++) {
+ for(int j = 0; j < (int)dims[1]; j++) {
+ for(int k = 0; k < (int)dims[2]; k++) {
+ array[i*dims[1]*dims[2] + j*dims[2] + k] = elems[i][j][k];
+ }
+ }
+ }
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ for(int j = 0; j < (int)dims[1]; j++) {
+ env->ReleaseDoubleArrayElements(jArray[i][j], elems[i][j], 0);
+ }
+ delete[] jArray[i];
+ delete[] elems[i];
+ }
+ delete[] jArray;
+ delete[] elems;
+ */
+
+ for(int i = 0; i < (int)dims[0]; i++) {
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement((jobjectArray)javaBuffer, i);
+ for(int j = 0; j < (int)dims[1]; j++) {
+ jdoubleArray jArray = (jdoubleArray)env->GetObjectArrayElement(jrow, j);
+ jdouble* elems = env->GetDoubleArrayElements(jArray,0);
+ for(int k = 0; k < (int)dims[2]; k++) {
+ array[i*dims[1]*dims[2] + j*dims[1] + k] = elems[k];
+ }
+ env->ReleaseDoubleArrayElements(jArray, elems, 0);
+ }
+ }
+
+ return new AparapiBuffer((void*)array, (cl_uint*)dims, 3, bitSize, javaBuffer);
+}
+
+
+
+void AparapiBuffer::inflate(JNIEnv* env, KernelArg* arg) {
+ javaObject = JNIHelper::getInstanceField(env, arg->javaArg, "javaBuffer", ObjectClassArg);
+ if(numDims == 2 && arg->isBoolean()) {
+ AparapiBuffer::inflateBoolean2D(env, arg);
+ } else if(numDims == 2 && arg->isByte()) {
+ AparapiBuffer::inflateByte2D(env, arg);
+ } else if(numDims == 2 && arg->isShort()) {
+ AparapiBuffer::inflateShort2D(env, arg);
+ } else if(numDims == 2 && arg->isInt()) {
+ AparapiBuffer::inflateInt2D(env, arg);
+ } else if(numDims == 2 && arg->isLong()) {
+ AparapiBuffer::inflateLong2D(env, arg);
+ } else if(numDims == 2 && arg->isFloat()) {
+ AparapiBuffer::inflateFloat2D(env, arg);
+ } else if(numDims == 2 && arg->isDouble()) {
+ AparapiBuffer::inflateDouble2D(env, arg);
+ } else if(numDims == 3 && arg->isBoolean()) {
+ AparapiBuffer::inflateBoolean3D(env, arg);
+ } else if(numDims == 3 && arg->isByte()) {
+ AparapiBuffer::inflateByte3D(env, arg);
+ } else if(numDims == 3 && arg->isShort()) {
+ AparapiBuffer::inflateShort3D(env, arg);
+ } else if(numDims == 3 && arg->isInt()) {
+ AparapiBuffer::inflateInt3D(env, arg);
+ } else if(numDims == 3 && arg->isLong()) {
+ AparapiBuffer::inflateLong3D(env, arg);
+ } else if(numDims == 3 && arg->isFloat()) {
+ AparapiBuffer::inflateFloat3D(env, arg);
+ } else if(numDims == 3 && arg->isDouble()) {
+ AparapiBuffer::inflateDouble3D(env, arg);
+ } else {
+ return;
+ }
+
+ deleteBuffer(arg);
+}
+
+
+void AparapiBuffer::inflateBoolean2D(JNIEnv *env, KernelArg* arg) {
+
+ jobjectArray buffer = (jobjectArray)javaObject;
+ jboolean* array = (jboolean*)data;
+ /*
+ jbooleanArray* jArray = new jbooleanArray[lens[0]];
+ jboolean** body = new jboolean*[lens[0]];
+
+ for(int i = 0; i < lens[0]; i++) {
+ jArray[i] = (jbooleanArray)env->GetObjectArrayElement(buffer, i);
+ body[i] = env->GetBooleanArrayElements(jArray[i],0);
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < lens[0]; i++) {
+ for(int j = 0; j < lens[1]; j++) {
+ body[i][j] = array[i*dims[0] + j];
+ }
+ }
+
+ for(int i = 0; i < lens[0]; i++) {
+ env->ReleaseBooleanArrayElements(jArray[i], body[i], 0);
+ }
+ delete[] jArray;
+ delete[] body;
+ */
+
+ for(int i = 0; i < lens[0]; i++) {
+ jbooleanArray jArray = (jbooleanArray)env->GetObjectArrayElement(buffer, i);
+ jboolean* body = env->GetBooleanArrayElements(jArray,0);
+ for(int j = 0; j < lens[1]; j++) {
+ body[j] = array[i*dims[0] + j];
+ }
+ env->ReleaseBooleanArrayElements(jArray, body, 0);
+ }
+}
+
+void AparapiBuffer::inflateByte2D(JNIEnv *env, KernelArg* arg) {
+
+ jobjectArray buffer = (jobjectArray)javaObject;
+ jbyte* array = (jbyte*)data;
+ /*
+ jbyteArray* jArray = new jbyteArray[lens[0]];
+ jbyte** body = new jbyte*[lens[0]];
+
+ for(int i = 0; i < lens[0]; i++) {
+ jArray[i] = (jbyteArray)env->GetObjectArrayElement(buffer, i);
+ body[i] = env->GetByteArrayElements(jArray[i],0);
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < lens[0]; i++) {
+ for(int j = 0; j < lens[1]; j++) {
+ body[i][j] = array[i*dims[0] + j];
+ }
+ }
+
+ for(int i = 0; i < lens[0]; i++) {
+ env->ReleaseByteArrayElements(jArray[i], body[i], 0);
+ }
+ delete[] jArray;
+ delete[] body;
+ */
+
+ for(int i = 0; i < lens[0]; i++) {
+ jbyteArray jArray = (jbyteArray)env->GetObjectArrayElement(buffer, i);
+ jbyte* body = env->GetByteArrayElements(jArray,0);
+ for(int j = 0; j < lens[1]; j++) {
+ body[j] = array[i*dims[0] + j];
+ }
+ env->ReleaseByteArrayElements(jArray, body, 0);
+ }
+}
+
+void AparapiBuffer::inflateShort2D(JNIEnv *env, KernelArg* arg) {
+
+ jobjectArray buffer = (jobjectArray)javaObject;
+ jshort* array = (jshort*)data;
+ /*
+ jshortArray* jArray = new jshortArray[lens[0]];
+ jshort** body = new jshort*[lens[0]];
+
+ for(int i = 0; i < lens[0]; i++) {
+ jArray[i] = (jshortArray)env->GetObjectArrayElement(buffer, i);
+ body[i] = env->GetShortArrayElements(jArray[i],0);
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < lens[0]; i++) {
+ for(int j = 0; j < lens[1]; j++) {
+ body[i][j] = array[i*dims[0] + j];
+ }
+ }
+
+ for(int i = 0; i < lens[0]; i++) {
+ env->ReleaseShortArrayElements(jArray[i], body[i], 0);
+ }
+ delete[] jArray;
+ delete[] body;
+ */
+
+ for(int i = 0; i < lens[0]; i++) {
+ jshortArray jArray = (jshortArray)env->GetObjectArrayElement(buffer, i);
+ jshort* body = env->GetShortArrayElements(jArray,0);
+ for(int j = 0; j < lens[1]; j++) {
+ body[j] = array[i*dims[0] + j];
+ }
+ env->ReleaseShortArrayElements(jArray, body, 0);
+ }
+}
+
+void AparapiBuffer::inflateInt2D(JNIEnv *env, KernelArg* arg) {
+
+ jobjectArray buffer = (jobjectArray)javaObject;
+ jint* array = (jint*)data;
+ /*
+ jintArray* jArray = new jintArray[lens[0]];
+ jint** body = new jint*[lens[0]];
+
+ for(int i = 0; i < lens[0]; i++) {
+ jArray[i] = (jintArray)env->GetObjectArrayElement(buffer, i);
+ body[i] = env->GetIntArrayElements(jArray[i],0);
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < lens[0]; i++) {
+ for(int j = 0; j < lens[1]; j++) {
+ body[i][j] = array[i*dims[0] + j];
+ }
+ }
+
+ for(int i = 0; i < lens[0]; i++) {
+ env->ReleaseIntArrayElements(jArray[i], body[i], 0);
+ }
+ delete[] jArray;
+ delete[] body;
+ */
+
+ for(int i = 0; i < lens[0]; i++) {
+ jintArray jArray = (jintArray)env->GetObjectArrayElement(buffer, i);
+ jint* body = env->GetIntArrayElements(jArray,0);
+ for(int j = 0; j < lens[1]; j++) {
+ body[j] = array[i*dims[0] + j];
+ }
+ env->ReleaseIntArrayElements(jArray, body, 0);
+ }
+}
+
+void AparapiBuffer::inflateLong2D(JNIEnv *env, KernelArg* arg) {
+
+ jobjectArray buffer = (jobjectArray)javaObject;
+ jlong* array = (jlong*)data;
+ /*
+ jlongArray* jArray = new jlongArray[lens[0]];
+ jlong** body = new jlong*[lens[0]];
+
+ for(int i = 0; i < lens[0]; i++) {
+ jArray[i] = (jlongArray)env->GetObjectArrayElement(buffer, i);
+ body[i] = env->GetLongArrayElements(jArray[i],0);
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < lens[0]; i++) {
+ for(int j = 0; j < lens[1]; j++) {
+ body[i][j] = array[i*dims[0] + j];
+ }
+ }
+
+ for(int i = 0; i < lens[0]; i++) {
+ env->ReleaseLongArrayElements(jArray[i], body[i], 0);
+ }
+ delete[] jArray;
+ delete[] body;
+ */
+
+ for(int i = 0; i < lens[0]; i++) {
+ jlongArray jArray = (jlongArray)env->GetObjectArrayElement(buffer, i);
+ jlong* body = env->GetLongArrayElements(jArray,0);
+ for(int j = 0; j < lens[1]; j++) {
+ body[j] = array[i*dims[0] + j];
+ }
+ env->ReleaseLongArrayElements(jArray, body, 0);
+ }
+}
+
+void AparapiBuffer::inflateFloat2D(JNIEnv *env, KernelArg* arg) {
+
+ jobjectArray buffer = (jobjectArray)javaObject;
+ jfloat* array = (jfloat*)data;
+ /*
+ jfloatArray* jArray = new jfloatArray[lens[0]];
+ jfloat** body = new jfloat*[lens[0]];
+
+ for(int i = 0; i < lens[0]; i++) {
+ jArray[i] = (jfloatArray)env->GetObjectArrayElement(buffer, i);
+ body[i] = env->GetFloatArrayElements(jArray[i],0);
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < lens[0]; i++) {
+ for(int j = 0; j < lens[1]; j++) {
+ body[i][j] = array[i*dims[0] + j];
+ }
+ }
+
+ for(int i = 0; i < lens[0]; i++) {
+ env->ReleaseFloatArrayElements(jArray[i], body[i], 0);
+ }
+ delete[] jArray;
+ delete[] body;
+ */
+
+ for(int i = 0; i < lens[0]; i++) {
+ jfloatArray jArray = (jfloatArray)env->GetObjectArrayElement(buffer, i);
+ jfloat* body = env->GetFloatArrayElements(jArray,0);
+ for(int j = 0; j < lens[1]; j++) {
+ body[j] = array[i*dims[0] + j];
+ }
+ env->ReleaseFloatArrayElements(jArray, body, 0);
+ }
+}
+
+void AparapiBuffer::inflateDouble2D(JNIEnv *env, KernelArg* arg) {
+
+ jobjectArray buffer = (jobjectArray)javaObject;
+ jdouble* array = (jdouble*)data;
+ /*
+ jdoubleArray* jArray = new jdoubleArray[lens[0]];
+ jdouble** body = new jdouble*[lens[0]];
+
+ for(int i = 0; i < lens[0]; i++) {
+ jArray[i] = (jdoubleArray)env->GetObjectArrayElement(buffer, i);
+ body[i] = env->GetDoubleArrayElements(jArray[i],0);
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < lens[0]; i++) {
+ for(int j = 0; j < lens[1]; j++) {
+ body[i][j] = array[i*dims[0] + j];
+ }
+ }
+
+ for(int i = 0; i < lens[0]; i++) {
+ env->ReleaseDoubleArrayElements(jArray[i], body[i], 0);
+ }
+ delete[] jArray;
+ delete[] body;
+ */
+
+ for(int i = 0; i < lens[0]; i++) {
+ jdoubleArray jArray = (jdoubleArray)env->GetObjectArrayElement(buffer, i);
+ jdouble* body = env->GetDoubleArrayElements(jArray,0);
+ for(int j = 0; j < lens[1]; j++) {
+ body[j] = array[i*dims[0] + j];
+ }
+ env->ReleaseDoubleArrayElements(jArray, body, 0);
+ }
+}
+
+
+void AparapiBuffer::inflateBoolean3D(JNIEnv *env, KernelArg* arg) {
+
+ jobjectArray buffer = (jobjectArray)javaObject;
+ jboolean* array = (jboolean*)data;
+ /*
+ jbooleanArray** jArray = new jbooleanArray*[lens[0]];
+ jboolean*** body = new jboolean**[lens[0]];
+
+ for(int i = 0; i < lens[0]; i++) {
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement(buffer, i);
+ jArray[i] = new jbooleanArray[lens[0]];
+ body[i] = new jboolean*[lens[0]];
+ for(int j = 0; j < lens[1]; j++) {
+ jArray[i][j] = (jbooleanArray)env->GetObjectArrayElement(jrow, j);
+ body[i][j] = env->GetBooleanArrayElements(jArray[i][j],0);
+ }
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < lens[0]; i++) {
+ for(int j = 0; j < lens[1]; j++) {
+ for(int k = 0; k < lens[2]; k++) {
+ body[i][j][k] = array[i*dims[0] + j*dims[1] + k];
+ }
+ }
+ }
+
+ for(int i = 0; i < lens[0]; i++) {
+ for(int j = 0; j < lens[1]; j++) {
+ env->ReleaseBooleanArrayElements(jArray[i][j], body[i][j], 0);
+ }
+ delete[] jArray[i];
+ delete[] body[i];
+ }
+ delete[] jArray;
+ delete[] body;
+ */
+
+ for(int i = 0; i < lens[0]; i++) {
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement(buffer, i);
+ for(int j = 0; j < lens[1]; j++) {
+ jbooleanArray jArray = (jbooleanArray)env->GetObjectArrayElement(jrow, j);
+ jboolean* body = env->GetBooleanArrayElements(jArray,0);
+ for(int k = 0; k < lens[2]; k++) {
+ body[k] = array[i*dims[0] + j*dims[1] + k];
+ }
+ env->ReleaseBooleanArrayElements(jArray, body, 0);
+ }
+ }
+}
+
+void AparapiBuffer::inflateByte3D(JNIEnv *env, KernelArg* arg) {
+
+ jobjectArray buffer = (jobjectArray)javaObject;
+ jbyte* array = (jbyte*)data;
+ /*
+ jbyteArray** jArray = new jbyteArray*[lens[0]];
+ jbyte*** body = new jbyte**[lens[0]];
+
+ for(int i = 0; i < lens[0]; i++) {
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement(buffer, i);
+ jArray[i] = new jbyteArray[lens[0]];
+ body[i] = new jbyte*[lens[0]];
+ for(int j = 0; j < lens[1]; j++) {
+ jArray[i][j] = (jbyteArray)env->GetObjectArrayElement(jrow, j);
+ body[i][j] = env->GetByteArrayElements(jArray[i][j],0);
+ }
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < lens[0]; i++) {
+ for(int j = 0; j < lens[1]; j++) {
+ for(int k = 0; k < lens[2]; k++) {
+ body[i][j][k] = array[i*dims[0] + j*dims[1] + k];
+ }
+ }
+ }
+
+ for(int i = 0; i < lens[0]; i++) {
+ for(int j = 0; j < lens[1]; j++) {
+ env->ReleaseByteArrayElements(jArray[i][j], body[i][j], 0);
+ }
+ delete[] jArray[i];
+ delete[] body[i];
+ }
+ delete[] jArray;
+ delete[] body;
+ */
+
+ for(int i = 0; i < lens[0]; i++) {
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement(buffer, i);
+ for(int j = 0; j < lens[1]; j++) {
+ jbyteArray jArray = (jbyteArray)env->GetObjectArrayElement(jrow, j);
+ jbyte* body = env->GetByteArrayElements(jArray,0);
+ for(int k = 0; k < lens[2]; k++) {
+ body[k] = array[i*dims[0] + j*dims[1] + k];
+ }
+ env->ReleaseByteArrayElements(jArray, body, 0);
+ }
+ }
+}
+
+void AparapiBuffer::inflateShort3D(JNIEnv *env, KernelArg* arg) {
+
+ jobjectArray buffer = (jobjectArray)javaObject;
+ jshort* array = (jshort*)data;
+ /*
+ jshortArray** jArray = new jshortArray*[lens[0]];
+ jshort*** body = new jshort**[lens[0]];
+
+ for(int i = 0; i < lens[0]; i++) {
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement(buffer, i);
+ jArray[i] = new jshortArray[lens[0]];
+ body[i] = new jshort*[lens[0]];
+ for(int j = 0; j < lens[1]; j++) {
+ jArray[i][j] = (jshortArray)env->GetObjectArrayElement(jrow, j);
+ body[i][j] = env->GetShortArrayElements(jArray[i][j],0);
+ }
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < lens[0]; i++) {
+ for(int j = 0; j < lens[1]; j++) {
+ for(int k = 0; k < lens[2]; k++) {
+ body[i][j][k] = array[i*dims[0] + j*dims[1] + k];
+ }
+ }
+ }
+
+ for(int i = 0; i < lens[0]; i++) {
+ for(int j = 0; j < lens[1]; j++) {
+ env->ReleaseShortArrayElements(jArray[i][j], body[i][j], 0);
+ }
+ delete[] jArray[i];
+ delete[] body[i];
+ }
+ delete[] jArray;
+ delete[] body;
+ */
+
+ for(int i = 0; i < lens[0]; i++) {
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement(buffer, i);
+ for(int j = 0; j < lens[1]; j++) {
+ jshortArray jArray = (jshortArray)env->GetObjectArrayElement(jrow, j);
+ jshort* body = env->GetShortArrayElements(jArray,0);
+ for(int k = 0; k < lens[2]; k++) {
+ body[k] = array[i*dims[0] + j*dims[1] + k];
+ }
+ env->ReleaseShortArrayElements(jArray, body, 0);
+ }
+ }
+}
+
+void AparapiBuffer::inflateInt3D(JNIEnv *env, KernelArg* arg) {
+
+ jobjectArray buffer = (jobjectArray)javaObject;
+ jint* array = (jint*)data;
+ /*
+ jintArray** jArray = new jintArray*[lens[0]];
+ jint*** body = new jint**[lens[0]];
+
+ for(int i = 0; i < lens[0]; i++) {
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement(buffer, i);
+ jArray[i] = new jintArray[lens[0]];
+ body[i] = new jint*[lens[0]];
+ for(int j = 0; j < lens[1]; j++) {
+ jArray[i][j] = (jintArray)env->GetObjectArrayElement(jrow, j);
+ body[i][j] = env->GetIntArrayElements(jArray[i][j],0);
+ }
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < lens[0]; i++) {
+ for(int j = 0; j < lens[1]; j++) {
+ for(int k = 0; k < lens[2]; k++) {
+ body[i][j][k] = array[i*dims[0] + j*dims[1] + k];
+ }
+ }
+ }
+
+ for(int i = 0; i < lens[0]; i++) {
+ for(int j = 0; j < lens[1]; j++) {
+ env->ReleaseIntArrayElements(jArray[i][j], body[i][j], 0);
+ }
+ delete[] jArray[i];
+ delete[] body[i];
+ }
+ delete[] jArray;
+ delete[] body;
+ */
+
+ for(int i = 0; i < lens[0]; i++) {
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement(buffer, i);
+ for(int j = 0; j < lens[1]; j++) {
+ jintArray jArray = (jintArray)env->GetObjectArrayElement(jrow, j);
+ jint* body = env->GetIntArrayElements(jArray,0);
+ for(int k = 0; k < lens[2]; k++) {
+ body[k] = array[i*dims[0] + j*dims[1] + k];
+ }
+ env->ReleaseIntArrayElements(jArray, body, 0);
+ }
+ }
+}
+
+void AparapiBuffer::inflateLong3D(JNIEnv *env, KernelArg* arg) {
+
+ jobjectArray buffer = (jobjectArray)javaObject;
+ jlong* array = (jlong*)data;
+ /*
+ jlongArray** jArray = new jlongArray*[lens[0]];
+ jlong*** body = new jlong**[lens[0]];
+
+ for(int i = 0; i < lens[0]; i++) {
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement(buffer, i);
+ jArray[i] = new jlongArray[lens[0]];
+ body[i] = new jlong*[lens[0]];
+ for(int j = 0; j < lens[1]; j++) {
+ jArray[i][j] = (jlongArray)env->GetObjectArrayElement(jrow, j);
+ body[i][j] = env->GetLongArrayElements(jArray[i][j],0);
+ }
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < lens[0]; i++) {
+ for(int j = 0; j < lens[1]; j++) {
+ for(int k = 0; k < lens[2]; k++) {
+ body[i][j][k] = array[i*dims[0] + j*dims[1] + k];
+ }
+ }
+ }
+
+ for(int i = 0; i < lens[0]; i++) {
+ for(int j = 0; j < lens[1]; j++) {
+ env->ReleaseLongArrayElements(jArray[i][j], body[i][j], 0);
+ }
+ delete[] jArray[i];
+ delete[] body[i];
+ }
+ delete[] jArray;
+ delete[] body;
+ */
+
+ for(int i = 0; i < lens[0]; i++) {
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement(buffer, i);
+ for(int j = 0; j < lens[1]; j++) {
+ jlongArray jArray = (jlongArray)env->GetObjectArrayElement(jrow, j);
+ jlong* body = env->GetLongArrayElements(jArray,0);
+ for(int k = 0; k < lens[2]; k++) {
+ body[k] = array[i*dims[0] + j*dims[1] + k];
+ }
+ env->ReleaseLongArrayElements(jArray, body, 0);
+ }
+ }
+}
+
+void AparapiBuffer::inflateFloat3D(JNIEnv *env, KernelArg* arg) {
+
+ jobjectArray buffer = (jobjectArray)javaObject;
+ jfloat* array = (jfloat*)data;
+ /*
+ jfloatArray** jArray = new jfloatArray*[lens[0]];
+ jfloat*** body = new jfloat**[lens[0]];
+
+ for(int i = 0; i < lens[0]; i++) {
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement(buffer, i);
+ jArray[i] = new jfloatArray[lens[0]];
+ body[i] = new jfloat*[lens[0]];
+ for(int j = 0; j < lens[1]; j++) {
+ jArray[i][j] = (jfloatArray)env->GetObjectArrayElement(jrow, j);
+ body[i][j] = env->GetFloatArrayElements(jArray[i][j],0);
+ }
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < lens[0]; i++) {
+ for(int j = 0; j < lens[1]; j++) {
+ for(int k = 0; k < lens[2]; k++) {
+ body[i][j][k] = array[i*dims[0] + j*dims[1] + k];
+ }
+ }
+ }
+
+ for(int i = 0; i < lens[0]; i++) {
+ for(int j = 0; j < lens[1]; j++) {
+ env->ReleaseFloatArrayElements(jArray[i][j], body[i][j], 0);
+ }
+ delete[] jArray[i];
+ delete[] body[i];
+ }
+ delete[] jArray;
+ delete[] body;
+ */
+
+ for(int i = 0; i < lens[0]; i++) {
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement(buffer, i);
+ for(int j = 0; j < lens[1]; j++) {
+ jfloatArray jArray = (jfloatArray)env->GetObjectArrayElement(jrow, j);
+ jfloat* body = env->GetFloatArrayElements(jArray,0);
+ for(int k = 0; k < lens[2]; k++) {
+ body[k] = array[i*dims[0] + j*dims[1] + k];
+ }
+ env->ReleaseFloatArrayElements(jArray, body, 0);
+ }
+ }
+}
+
+void AparapiBuffer::inflateDouble3D(JNIEnv *env, KernelArg* arg) {
+
+ jobjectArray buffer = (jobjectArray)javaObject;
+ jdouble* array = (jdouble*)data;
+ /*
+ jdoubleArray** jArray = new jdoubleArray*[lens[0]];
+ jdouble*** body = new jdouble**[lens[0]];
+
+ for(int i = 0; i < lens[0]; i++) {
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement(buffer, i);
+ jArray[i] = new jdoubleArray[lens[0]];
+ body[i] = new jdouble*[lens[0]];
+ for(int j = 0; j < lens[1]; j++) {
+ jArray[i][j] = (jdoubleArray)env->GetObjectArrayElement(jrow, j);
+ body[i][j] = env->GetDoubleArrayElements(jArray[i][j],0);
+ }
+ }
+
+ #pragma omp parallel for
+ for(int i = 0; i < lens[0]; i++) {
+ for(int j = 0; j < lens[1]; j++) {
+ for(int k = 0; k < lens[2]; k++) {
+ body[i][j][k] = array[i*dims[0] + j*dims[1] + k];
+ }
+ }
+ }
+
+ for(int i = 0; i < lens[0]; i++) {
+ for(int j = 0; j < lens[1]; j++) {
+ env->ReleaseDoubleArrayElements(jArray[i][j], body[i][j], 0);
+ }
+ delete[] jArray[i];
+ delete[] body[i];
+ }
+ delete[] jArray;
+ delete[] body;
+ */
+
+ for(int i = 0; i < lens[0]; i++) {
+ jobjectArray jrow = (jobjectArray)env->GetObjectArrayElement(buffer, i);
+ for(int j = 0; j < lens[1]; j++) {
+ jdoubleArray jArray = (jdoubleArray)env->GetObjectArrayElement(jrow, j);
+ jdouble* body = env->GetDoubleArrayElements(jArray,0);
+ for(int k = 0; k < lens[2]; k++) {
+ body[k] = array[i*dims[0] + j*dims[1] + k];
+ }
+ env->ReleaseDoubleArrayElements(jArray, body, 0);
+ }
+ }
+}
+
+void AparapiBuffer::deleteBuffer(KernelArg* arg)
+{
+ delete[] dims;
+ delete[] lens;
+ if(arg->isBoolean()) {
+ delete[] (jboolean*)data;
+ } else if(arg->isByte()) {
+ delete[] (jbyte*)data;
+ } else if(arg->isShort()) {
+ delete[] (jshort*)data;
+ } else if(arg->isInt()) {
+ delete[] (jint*)data;
+ } else if(arg->isLong()) {
+ delete[] (jlong*)data;
+ } else if(arg->isFloat()) {
+ delete[] (jfloat*)data;
+ } else if(arg->isDouble()) {
+ delete[] (jdouble*)data;
+ }
+}
diff --git a/com.amd.aparapi.jni/src/cpp/runKernel/AparapiBuffer.h b/com.amd.aparapi.jni/src/cpp/runKernel/AparapiBuffer.h
new file mode 100644
index 00000000..b5a05d82
--- /dev/null
+++ b/com.amd.aparapi.jni/src/cpp/runKernel/AparapiBuffer.h
@@ -0,0 +1,133 @@
+/*
+ Copyright (c) 2010-2011, Advanced Micro Devices, Inc.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
+ following conditions are met:
+
+ Redistributions of source code must retain the above copyright notice, this list of conditions and the following
+ disclaimer.
+
+ Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided with the distribution.
+
+ Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ If you use the software (in whole or in part), you shall adhere to all applicable U.S., European, and other export
+ laws, including but not limited to the U.S. Export Administration Regulations ("EAR"), (15 C.F.R. Sections 730
+ through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June 2000. Further, pursuant to Section 740.6 of
+ the EAR, you hereby certify that, except pursuant to a license granted by the United States Department of Commerce
+ Bureau of Industry and Security or as otherwise permitted pursuant to a License Exception under the U.S. Export
+ Administration Regulations ("EAR"), you will not (1) export, re-export or release to a national of a country in
+ Country Groups D:1, E:1 or E:2 any restricted technology, software, or source code you receive hereunder, or (2)
+ export to Country Groups D:1, E:1 or E:2 the direct product of such technology or software, if such foreign produced
+ direct product is subject to national security controls as identified on the Commerce Control List (currently
+ found in Supplement 1 to Part 774 of EAR). For the most current Country Group listings, or for additional
+ information about the EAR or your obligations under those regulations, please refer to the U.S. Bureau of Industry
+ and Security?s website at http://www.bis.doc.gov/.
+ */
+
+#ifndef APARAPIBUFFER_H
+#define APARAPIBUFFER_H
+#include "Common.h"
+#include "ProfileInfo.h"
+#include "com_amd_aparapi_internal_jni_KernelRunnerJNI.h"
+
+class KernelArg;
+
+class AparapiBuffer{
+
+private:
+
+ static int isFloat(int type){
+ return(type&com_amd_aparapi_internal_jni_KernelRunnerJNI_ARG_FLOAT);
+ }
+ static int isLong(int type){
+ return (type&com_amd_aparapi_internal_jni_KernelRunnerJNI_ARG_LONG);
+ }
+ static int isInt(int type){
+ return (type&com_amd_aparapi_internal_jni_KernelRunnerJNI_ARG_INT);
+ }
+ static int isDouble(int type){
+ return (type&com_amd_aparapi_internal_jni_KernelRunnerJNI_ARG_DOUBLE);
+ }
+ static int isBoolean(int type){
+ return (type&com_amd_aparapi_internal_jni_KernelRunnerJNI_ARG_BOOLEAN);
+ }
+ static int isByte(int type){
+ return (type&com_amd_aparapi_internal_jni_KernelRunnerJNI_ARG_BYTE);
+ }
+ static int isShort(int type){
+ return (type&com_amd_aparapi_internal_jni_KernelRunnerJNI_ARG_SHORT);
+ }
+
+public:
+ jobject javaObject; // The java array that this arg is mapped to
+ cl_uint numDims; // sizes of dimensions of the object (array lengths for ND arrays)
+ cl_uint* dims; // sizes of offsets of the object (first element offset in ND arrays)
+ cl_uint* lens; // sizes of dimensions of the object (array lengths for ND arrays)
+ jint lengthInBytes; // bytes in the array or directBuf
+ cl_mem mem; // the opencl buffer
+ void *data; // a copy of the object itself (this is what we pass to OpenCL)
+ cl_uint memMask; // the mask used for createBuffer
+ ProfileInfo read;
+ ProfileInfo write;
+
+ AparapiBuffer();
+ AparapiBuffer(void* _data, cl_uint* _dims, cl_uint _numDims, long _lengthInBytes, jobject _javaObject);
+
+ void deleteBuffer(KernelArg* arg);
+
+ static AparapiBuffer* flatten(JNIEnv *env, jobject arg, int type);
+
+ static AparapiBuffer* flattenBoolean2D(JNIEnv *env, jobject arg);
+ static AparapiBuffer* flattenChar2D(JNIEnv *env, jobject arg);
+ static AparapiBuffer* flattenByte2D(JNIEnv *env, jobject arg);
+ static AparapiBuffer* flattenShort2D(JNIEnv *env, jobject arg);
+ static AparapiBuffer* flattenInt2D(JNIEnv *env, jobject arg);
+ static AparapiBuffer* flattenLong2D(JNIEnv *env, jobject arg);
+ static AparapiBuffer* flattenFloat2D(JNIEnv *env, jobject arg);
+ static AparapiBuffer* flattenDouble2D(JNIEnv *env, jobject arg);
+
+ static AparapiBuffer* flattenBoolean3D(JNIEnv *env, jobject arg);
+ static AparapiBuffer* flattenChar3D(JNIEnv *env, jobject arg);
+ static AparapiBuffer* flattenByte3D(JNIEnv *env, jobject arg);
+ static AparapiBuffer* flattenShort3D(JNIEnv *env, jobject arg);
+ static AparapiBuffer* flattenInt3D(JNIEnv *env, jobject arg);
+ static AparapiBuffer* flattenLong3D(JNIEnv *env, jobject arg);
+ static AparapiBuffer* flattenFloat3D(JNIEnv *env, jobject arg);
+ static AparapiBuffer* flattenDouble3D(JNIEnv *env, jobject arg);
+
+ void inflate(JNIEnv *env, KernelArg* arg);
+
+ void inflateBoolean2D(JNIEnv *env, KernelArg* arg);
+ void inflateChar2D(JNIEnv *env, KernelArg* arg);
+ void inflateByte2D(JNIEnv *env, KernelArg* arg);
+ void inflateShort2D(JNIEnv *env, KernelArg* arg);
+ void inflateInt2D(JNIEnv *env, KernelArg* arg);
+ void inflateLong2D(JNIEnv *env, KernelArg* arg);
+ void inflateFloat2D(JNIEnv *env, KernelArg* arg);
+ void inflateDouble2D(JNIEnv *env, KernelArg* arg);
+
+ void inflateBoolean3D(JNIEnv *env, KernelArg* arg);
+ void inflateChar3D(JNIEnv *env, KernelArg* arg);
+ void inflateByte3D(JNIEnv *env, KernelArg* arg);
+ void inflateShort3D(JNIEnv *env, KernelArg* arg);
+ void inflateInt3D(JNIEnv *env, KernelArg* arg);
+ void inflateLong3D(JNIEnv *env, KernelArg* arg);
+ void inflateFloat3D(JNIEnv *env, KernelArg* arg);
+ void inflateDouble3D(JNIEnv *env, KernelArg* arg);
+
+ jobject getJavaObject(JNIEnv* env, KernelArg* arg);
+};
+
+#endif // ARRAYBUFFER_H
diff --git a/com.amd.aparapi.jni/src/cpp/runKernel/KernelArg.cpp b/com.amd.aparapi.jni/src/cpp/runKernel/KernelArg.cpp
index c7e918b9..0f401cea 100644
--- a/com.amd.aparapi.jni/src/cpp/runKernel/KernelArg.cpp
+++ b/com.amd.aparapi.jni/src/cpp/runKernel/KernelArg.cpp
@@ -34,6 +34,8 @@ KernelArg::KernelArg(JNIEnv *jenv, JNIContext *jniContext, jobject argObj):
jenv->ReleaseStringUTFChars(nameString, nameChars);
if (isArray()){
arrayBuffer = new ArrayBuffer();
+ } else if(isAparapiBuffer()) {
+ aparapiBuffer = AparapiBuffer::flatten(jenv, argObj, type);
}
}
@@ -44,6 +46,13 @@ cl_int KernelArg::setLocalBufferArg(JNIEnv *jenv, int argIdx, int argPos, bool v
return(clSetKernelArg(jniContext->kernel, argPos, (int)arrayBuffer->lengthInBytes, NULL));
}
+cl_int KernelArg::setLocalAparapiBufferArg(JNIEnv *jenv, int argIdx, int argPos, bool verbose) {
+ if (verbose){
+ fprintf(stderr, "ISLOCAL, clSetKernelArg(jniContext->kernel, %d, %d, NULL);\n", argIdx, (int) aparapiBuffer->lengthInBytes);
+ }
+ return(clSetKernelArg(jniContext->kernel, argPos, (int)aparapiBuffer->lengthInBytes, NULL));
+}
+
const char* KernelArg::getTypeName() {
string s = "";
if(isStatic()) {
diff --git a/com.amd.aparapi.jni/src/cpp/runKernel/KernelArg.h b/com.amd.aparapi.jni/src/cpp/runKernel/KernelArg.h
index 75c7a5ca..a54e411e 100644
--- a/com.amd.aparapi.jni/src/cpp/runKernel/KernelArg.h
+++ b/com.amd.aparapi.jni/src/cpp/runKernel/KernelArg.h
@@ -5,6 +5,7 @@
#include "Common.h"
#include "JNIHelper.h"
#include "ArrayBuffer.h"
+#include "AparapiBuffer.h"
#include "com_amd_aparapi_internal_jni_KernelRunnerJNI.h"
#include "Config.h"
#include
@@ -69,6 +70,7 @@ class KernelArg{
jint type; // a bit mask determining the type of this arg
ArrayBuffer *arrayBuffer;
+ AparapiBuffer *aparapiBuffer;
// Uses JNIContext so cant inline here see below
KernelArg(JNIEnv *jenv, JNIContext *jniContext, jobject argObj);
@@ -157,14 +159,14 @@ class KernelArg{
int isConstant(){
return (type&com_amd_aparapi_internal_jni_KernelRunnerJNI_ARG_CONSTANT);
}
- int isAparapiBuf(){
- return (type&com_amd_aparapi_internal_jni_KernelRunnerJNI_ARG_APARAPI_BUF);
+ int isAparapiBuffer(){
+ return (type&com_amd_aparapi_internal_jni_KernelRunnerJNI_ARG_APARAPI_BUFFER);
}
int isBackedByArray(){
return ( (isArray() && (isGlobal() || isConstant())));
}
int needToEnqueueRead(){
- return(((isArray() && isGlobal()) || ((isAparapiBuf()&&isGlobal()))) && (isImplicit()&&isMutableByKernel()));
+ return(((isArray() && isGlobal()) || ((isAparapiBuffer()&&isGlobal()))) && (isImplicit()&&isMutableByKernel()));
}
int needToEnqueueWrite(){
return ((isImplicit()&&isReadByKernel())||(isExplicit()&&isExplicitWrite()));
@@ -188,6 +190,7 @@ class KernelArg{
// Uses JNIContext so can't inline here we below.
cl_int setLocalBufferArg(JNIEnv *jenv, int argIdx, int argPos, bool verbose);
+ cl_int setLocalAparapiBufferArg(JNIEnv *jenv, int argIdx, int argPos, bool verbose);
// Uses JNIContext so can't inline here we below.
cl_int setPrimitiveArg(JNIEnv *jenv, int argIdx, int argPos, bool verbose);
};
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelArgJNI.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelArgJNI.java
index 19d297f2..270321ff 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelArgJNI.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelArgJNI.java
@@ -47,6 +47,11 @@ public abstract class KernelArgJNI{
*/
@UsedByJNICode protected Object javaArray;
+ /**
+ * If this field represents an aparapi buffer then the instance will be captured here
+ */
+ @UsedByJNICode protected Object javaBuffer;
+
/**
* If this is an array or a buffer then the size (in bytes) is held here
*/
@@ -57,6 +62,18 @@ public abstract class KernelArgJNI{
*/
@UsedByJNICode protected int numElements;
+
+ /**
+ * If this is an multidimensional array then the number of dimensions is stored here
+ */
+ @UsedByJNICode protected int numDims;
+
+
+ /**
+ * If this is an multidimensional array then the dimensions are stored here
+ */
+ @UsedByJNICode protected int[] dims;
+
/**
* If this is an array buffer then the number of elements is stored here.
*
@@ -69,6 +86,8 @@ public abstract class KernelArgJNI{
*/
@UsedByJNICode protected Object array;
+ @UsedByJNICode protected Object buffer;
+
/**
* Field in Kernel class corresponding to this arg
*/
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java
index 76aae813..a672093f 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java
@@ -178,7 +178,7 @@ public abstract class KernelRunnerJNI{
*
* @author gfrost
*/
- @UsedByJNICode protected static final int ARG_APARAPI_BUF = 1 << 15;
+ @UsedByJNICode protected static final int ARG_APARAPI_BUFFER = 1 << 15;
/**
* This 'bit' indicates that the arg has been explicitly marked for reading
@@ -207,23 +207,6 @@ public abstract class KernelRunnerJNI{
*/
@UsedByJNICode protected static final int ARG_OBJ_ARRAY_STRUCT = 1 << 18;
- /**
- * TODO:
- *
- * @see com.amd.aparapi.annotations.UsedByJNICode
- *
- * @author gfrost
- */
- // @UsedByJNICode protected static final int ARG_APARAPI_BUF_HAS_ARRAY = 1 << 19;
-
- /**
- * TODO:
- *
- * @see com.amd.aparapi.annotations.UsedByJNICode
- *
- * @author gfrost
- */
- // @UsedByJNICode protected static final int ARG_APARAPI_BUF_IS_DIRECT = 1 << 20;
/**
* This 'bit' indicates that a particular KernelArg represents a char type (array or primitive).
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelArg.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelArg.java
index 29fe3fc8..9599bf04 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelArg.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelArg.java
@@ -218,4 +218,46 @@ protected Field getField() {
protected void setField(Field field) {
this.field = field;
}
+
+ /**
+ * @return the buffer
+ */
+ protected Object getJavaBuffer() {
+ return javaBuffer;
+ }
+
+ /**
+ * @param buffer the buffer to set
+ */
+ protected void setJavaBuffer(Object buffer) {
+ this.javaBuffer = buffer;
+ }
+
+ /**
+ * @return the number of dimensions to buffer
+ */
+ protected int getNumDims() {
+ return numDims;
+ }
+
+ /**
+ * @param numDims the number of dimensions for the buffer
+ */
+ protected void setNumDims(int numDims) {
+ this.numDims = numDims;
+ }
+
+ /**
+ * @return the dimensions for the buffer
+ */
+ protected int[] getDims() {
+ return dims;
+ }
+
+ /**
+ * @param dims the dimsensions for the buffer
+ */
+ protected void setDims(int[] dims) {
+ this.dims = dims;
+ }
}
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java
index 75d37b2a..b036829c 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java
@@ -1056,14 +1056,9 @@ && hasGlobalInt32ExtendedAtomicsSupport() && hasLocalInt32BaseAtomicsSupport()
} else {
args[i].setType(args[i].getType() | ARG_GLOBAL);
}
-
- args[i].setArray(null); // will get updated in updateKernelArrayRefs
- args[i].setType(args[i].getType() | ARG_ARRAY);
-
if (isExplicit()) {
args[i].setType(args[i].getType() | ARG_EXPLICIT);
}
-
// for now, treat all write arrays as read-write, see bugzilla issue 4859
// we might come up with a better solution later
args[i].setType(args[i].getType()
@@ -1071,26 +1066,51 @@ && hasGlobalInt32ExtendedAtomicsSupport() && hasLocalInt32BaseAtomicsSupport()
args[i].setType(args[i].getType()
| (entryPoint.getArrayFieldAccesses().contains(field.getName()) ? ARG_READ : 0));
// args[i].type |= ARG_GLOBAL;
- args[i].setType(args[i].getType() | (type.isAssignableFrom(float[].class) ? ARG_FLOAT : 0));
- args[i].setType(args[i].getType() | (type.isAssignableFrom(int[].class) ? ARG_INT : 0));
- args[i].setType(args[i].getType() | (type.isAssignableFrom(boolean[].class) ? ARG_BOOLEAN : 0));
- args[i].setType(args[i].getType() | (type.isAssignableFrom(byte[].class) ? ARG_BYTE : 0));
- args[i].setType(args[i].getType() | (type.isAssignableFrom(char[].class) ? ARG_CHAR : 0));
- args[i].setType(args[i].getType() | (type.isAssignableFrom(double[].class) ? ARG_DOUBLE : 0));
- args[i].setType(args[i].getType() | (type.isAssignableFrom(long[].class) ? ARG_LONG : 0));
- args[i].setType(args[i].getType() | (type.isAssignableFrom(short[].class) ? ARG_SHORT : 0));
-
- // arrays whose length is used will have an int arg holding
- // the length as a kernel param
- if (entryPoint.getArrayFieldArrayLengthUsed().contains(args[i].getName())) {
- args[i].setType(args[i].getType() | ARG_ARRAYLENGTH);
- }
+
if (type.getName().startsWith("[L")) {
- args[i].setType(args[i].getType() | (ARG_OBJ_ARRAY_STRUCT | ARG_WRITE | ARG_READ));
+ args[i].setType(args[i].getType()
+ | (ARG_OBJ_ARRAY_STRUCT |
+ ARG_WRITE |
+ ARG_READ |
+ ARG_APARAPI_BUFFER));
+
if (logger.isLoggable(Level.FINE)) {
logger.fine("tagging " + args[i].getName() + " as (ARG_OBJ_ARRAY_STRUCT | ARG_WRITE | ARG_READ)");
}
+ } else if (type.getName().startsWith("[[")) {
+
+ try {
+ setMultiArrayType(args[i], type);
+ } catch(AparapiException e) {
+ return warnFallBackAndExecute(_entrypointName, _range, _passes, "failed to set kernel arguement " + args[i].getName() + ". Aparapi only supports 2D and 3D arrays.");
+ }
+ } else {
+
+ args[i].setArray(null); // will get updated in updateKernelArrayRefs
+ args[i].setType(args[i].getType() | ARG_ARRAY);
+
+ args[i].setType(args[i].getType() | (type.isAssignableFrom(float[].class) ? ARG_FLOAT : 0));
+ args[i].setType(args[i].getType() | (type.isAssignableFrom(int[].class) ? ARG_INT : 0));
+ args[i].setType(args[i].getType() | (type.isAssignableFrom(boolean[].class) ? ARG_BOOLEAN : 0));
+ args[i].setType(args[i].getType() | (type.isAssignableFrom(byte[].class) ? ARG_BYTE : 0));
+ args[i].setType(args[i].getType() | (type.isAssignableFrom(char[].class) ? ARG_CHAR : 0));
+ args[i].setType(args[i].getType() | (type.isAssignableFrom(double[].class) ? ARG_DOUBLE : 0));
+ args[i].setType(args[i].getType() | (type.isAssignableFrom(long[].class) ? ARG_LONG : 0));
+ args[i].setType(args[i].getType() | (type.isAssignableFrom(short[].class) ? ARG_SHORT : 0));
+
+ // arrays whose length is used will have an int arg holding
+ // the length as a kernel param
+ if (entryPoint.getArrayFieldArrayLengthUsed().contains(args[i].getName())) {
+ args[i].setType(args[i].getType() | ARG_ARRAYLENGTH);
+ }
+
+ if (type.getName().startsWith("[L")) {
+ args[i].setType(args[i].getType() | (ARG_OBJ_ARRAY_STRUCT | ARG_WRITE | ARG_READ));
+ if (logger.isLoggable(Level.FINE)) {
+ logger.fine("tagging " + args[i].getName() + " as (ARG_OBJ_ARRAY_STRUCT | ARG_WRITE | ARG_READ)");
+ }
+ }
}
} else if (type.isAssignableFrom(float.class)) {
args[i].setType(args[i].getType() | ARG_PRIMITIVE);
@@ -1122,10 +1142,7 @@ && hasGlobalInt32ExtendedAtomicsSupport() && hasLocalInt32BaseAtomicsSupport()
e.printStackTrace();
}
- args[i].setPrimitiveSize(((args[i].getType() & ARG_FLOAT) != 0 ? 4 : (args[i].getType() & ARG_INT) != 0 ? 4
- : (args[i].getType() & ARG_BYTE) != 0 ? 1 : (args[i].getType() & ARG_CHAR) != 0 ? 2
- : (args[i].getType() & ARG_BOOLEAN) != 0 ? 1 : (args[i].getType() & ARG_SHORT) != 0 ? 2 : (args[i]
- .getType() & ARG_LONG) != 0 ? 8 : (args[i].getType() & ARG_DOUBLE) != 0 ? 8 : 0));
+ args[i].setPrimitiveSize(getPrimitiveSize(args[i].getType()));
if (logger.isLoggable(Level.FINE)) {
logger.fine("arg " + i + ", " + args[i].getName() + ", type=" + Integer.toHexString(args[i].getType())
@@ -1177,6 +1194,87 @@ && hasGlobalInt32ExtendedAtomicsSupport() && hasLocalInt32BaseAtomicsSupport()
return kernel;
}
+
+ private int getPrimitiveSize(int type) {
+ if ((type & ARG_FLOAT) != 0) {
+ return 4;
+ } else if ((type & ARG_INT) != 0) {
+ return 4;
+ } else if ((type & ARG_BYTE) != 0) {
+ return 1;
+ } else if ((type & ARG_CHAR) != 0) {
+ return 2;
+ } else if ((type & ARG_BOOLEAN) != 0) {
+ return 1;
+ } else if ((type & ARG_SHORT) != 0) {
+ return 2;
+ } else if ((type & ARG_LONG) != 0) {
+ return 8;
+ } else if ((type & ARG_DOUBLE) != 0) {
+ return 8;
+ }
+ return 0;
+ }
+
+ private void setMultiArrayType(KernelArg arg, Class> type) throws AparapiException {
+ arg.setType(arg.getType() | (ARG_WRITE | ARG_READ | ARG_APARAPI_BUFFER));
+ int numDims = 0;
+ while(type.getName().startsWith("[[[[")) {
+ throw new AparapiException("Aparapi only supports 2D and 3D arrays.");
+ }
+ arg.setType(arg.getType() | ARG_ARRAYLENGTH);
+ while(type.getName().charAt(numDims) == '[') {
+ numDims++;
+ }
+ Object buffer = new Object();
+ try {
+ buffer = arg.getField().get(kernel);
+ } catch(IllegalAccessException e) {
+ e.printStackTrace();
+ }
+ arg.setJavaBuffer(buffer);
+ arg.setNumDims(numDims);
+ Object subBuffer = buffer;
+ int[] dims = new int[numDims];
+ for(int i = 0; i < numDims-1; i++) {
+ dims[i] = Array.getLength(subBuffer);
+ subBuffer = Array.get(subBuffer, 0);
+ }
+ dims[numDims-1] = Array.getLength(subBuffer);
+ arg.setDims(dims);
+
+ if (subBuffer.getClass().isAssignableFrom(float[].class)) {
+ arg.setType(arg.getType() | ARG_FLOAT);
+ }
+ if (subBuffer.getClass().isAssignableFrom(int[].class)) {
+ arg.setType(arg.getType() | ARG_INT);
+ }
+ if (subBuffer.getClass().isAssignableFrom(boolean[].class)) {
+ arg.setType(arg.getType() | ARG_BOOLEAN);
+ }
+ if (subBuffer.getClass().isAssignableFrom(byte[].class)) {
+ arg.setType(arg.getType() | ARG_BYTE);
+ }
+ if (subBuffer.getClass().isAssignableFrom(char[].class)) {
+ arg.setType(arg.getType() | ARG_CHAR);
+ }
+ if (subBuffer.getClass().isAssignableFrom(double[].class)) {
+ arg.setType(arg.getType() | ARG_DOUBLE);
+ }
+ if (subBuffer.getClass().isAssignableFrom(long[].class)) {
+ arg.setType(arg.getType() | ARG_LONG);
+ }
+ if (subBuffer.getClass().isAssignableFrom(short[].class)) {
+ arg.setType(arg.getType() | ARG_SHORT);
+ }
+ int primitiveSize = getPrimitiveSize(arg.getType());
+ int totalElements = 1;
+ for(int i = 0; i < numDims; i++) {
+ totalElements *= dims[i];
+ }
+ arg.setSizeInBytes(totalElements * primitiveSize);
+ }
+
private final Set