package edu.stanford.rsl.conrad.opencl;
import java.nio.FloatBuffer;
import java.nio.IntBuffer;
import com.jogamp.opencl.CLBuffer;
import com.jogamp.opencl.CLCommandQueue;
import com.jogamp.opencl.CLDevice;
import com.jogamp.opencl.CLKernel;
import com.jogamp.opencl.CLMemory.Mem;
/**
* Performs rendering using an append buffer. This enables very fast simulation of volumetric rendering.
*
* @author akmaier
*
*/
public class OpenCLAppendBufferRenderer extends OpenCLRenderer {
CLBuffer<IntBuffer> appendBuffer;
CLBuffer<IntBuffer> appendBufferPointer;
CLBuffer<IntBuffer> pixelCount;
int appendBufferSize;
private boolean debug = false;
public OpenCLAppendBufferRenderer(CLDevice device) {
super(device);
OpenCLUtil.initTriangleAppendBufferRender(context);
//OpenCLUtil.initAppendBufferRender(context);
}
public void release(){
super.release();
appendBuffer.release();
appendBuffer = null;
appendBufferPointer.release();
appendBufferPointer = null;
pixelCount.release();
pixelCount = null;
}
public void init (int width, int height){
super.init(width, height);
// assumption: we have about 30 hits per pixel (on average)
appendBufferSize = (int) (width * height * 100);
appendBuffer = generateIntBuffer(appendBufferSize, 3, Mem.READ_WRITE);
appendBufferPointer = generateIntBuffer(1, 1, Mem.READ_WRITE);
pixelCount = generateIntBuffer(width, height, Mem.READ_WRITE);
}
public void resetBuffers(){
appendBufferPointer.getBuffer().put(0);
appendBufferPointer.getBuffer().rewind();
CLCommandQueue clc = device.createCommandQueue();
clc.putWriteBuffer(appendBuffer, false)
.putWriteBuffer(pixelCount, false)
.putWriteBuffer(appendBufferPointer, false).finish();
clc.release();
}
/* public int drawTriangles(CLBuffer<FloatBuffer> pointBuffer, CLBuffer<FloatBuffer> screenBuffer, int id){
int elementCount = pointBuffer.getBuffer().capacity()/3; // Length of arrays to process
int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 3); // Local work size dimensions
int globalWorkSize = OpenCLUtil.roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the localWorkSize
CLKernel kernel = OpenCLUtil.getAppendBufferRenderInstance().createCLKernel("drawTrianglesAppendBufferLocal");
kernel.putArgs(pointBuffer, appendBuffer, appendBufferPointer, pixelCount)
.putArg(width)
.putArg(id)
.putArg(elementCount);
// asynchronous write of data to GPU device,
// followed by blocking read to get the computed results back.
CLCommandQueue clc = device.createCommandQueue();
clc.put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize).finish();
clc.putReadBuffer(appendBufferPointer, true).finish();
int revan = appendBufferPointer.getBuffer().get();
if (debug ) System.out.println("Final append buffer index: " + revan + " local group size: " +globalWorkSize);
appendBufferPointer.getBuffer().rewind();
clc.release();
kernel.release();
return revan;
}*/
public int drawTrianglesGlobal(CLBuffer<FloatBuffer> pointBuffer, CLBuffer<FloatBuffer> screenBuffer, int id, int elementCountU, int elementCountV, int normalsign){
int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 32); // Local work size dimensions
int globalWorkSize = OpenCLUtil.roundUp(localWorkSize, elementCountU*elementCountV); // rounded up to the nearest multiple of the localWorkSize
CLKernel kernel = OpenCLUtil.getAppendBufferRenderInstance().createCLKernel("drawTrianglesAppendBufferGlobal");
kernel.putArgs(pointBuffer, appendBuffer, appendBufferPointer, pixelCount)
.putArg(width)
.putArg(height)
.putArg(id)
.putArg(elementCountU)
.putArg(elementCountV)
.putArg(normalsign);
// asynchronous write of data to GPU device,
// followed by blocking read to get the computed results back.
//CLCommandQueue clc = device.createCommandQueue((1<<31)); // Sequential execution on intel, necessary for printf: (1<<31) == CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL
CLCommandQueue clc = device.createCommandQueue();
clc.put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize).finish();
int revan = 1;
clc.putReadBuffer(appendBufferPointer, true).finish();
clc.release();
revan = appendBufferPointer.getBuffer().get();
if (debug) {
System.out.println("Final append buffer index: " + revan);
}
appendBufferPointer.getBuffer().rewind();
kernel.release();
return revan;
}
public void drawScreen(CLBuffer<FloatBuffer> screen){
// draw to screen buffer:
int elementCount = width * height; // Length of arrays to process
int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 256); // Local work size dimensions
int globalWorkSize = OpenCLUtil.roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the localWorkSize
CLKernel kernel = OpenCLUtil.getAppendBufferRenderInstance().createCLKernel("drawAppendBufferScreen");
kernel.putArgs(screen, appendBuffer, pixelCount)
.putArg(width)
.putArg(elementCount);
CLCommandQueue clc = device.createCommandQueue();
clc.put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize).finish();
clc.release();
kernel.release();
}
public void drawScreenMonochromatic(CLBuffer<FloatBuffer> screen, CLBuffer<FloatBuffer> mu, CLBuffer<IntBuffer> priorities){
// draw to screen buffer:
int elementCount = width * height; // Length of arrays to process
int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 512); // Local work size dimensions
int globalWorkSize = OpenCLUtil.roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the localWorkSize
CLKernel kernel = OpenCLUtil.getAppendBufferRenderInstance().createCLKernel("drawAppendBufferScreenMonochromatic");
kernel.putArgs(screen, appendBuffer, pixelCount, mu, priorities)
.putArg(width)
.putArg(elementCount);
//System.out.println(width);
CLCommandQueue clc = device.createCommandQueue();
clc.put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize).finish();
clc.release();
kernel.release();
}
}
/*
* Copyright (C) 2010-2014 Andreas Maier
* CONRAD is developed as an Open Source project under the GNU General Public License (GPL).
*/