package edu.stanford.rsl.conrad.opencl; import java.nio.FloatBuffer; import java.nio.IntBuffer; import com.jogamp.opencl.CLBuffer; import com.jogamp.opencl.CLCommandQueue; import com.jogamp.opencl.CLContext; import com.jogamp.opencl.CLDevice; import com.jogamp.opencl.CLKernel; import com.jogamp.opencl.CLMemory; import com.jogamp.opencl.CLMemory.Mem; import edu.stanford.rsl.conrad.numerics.SimpleMatrix; import edu.stanford.rsl.conrad.numerics.SimpleVector; /** * Renders OpenCL Splines into a screen buffer. * * TODO: move different renderers such as ray casters and zbuffer renderer into different classes, i.e. create new sub classes which are derived from this one. This will reduce duplicate code a lot. * * * * @author akmaier * @see OpenCLAppendBufferRenderer */ public class OpenCLRenderer { protected CLContext context; protected CLDevice device; protected CLBuffer<FloatBuffer> pMatrix; protected int width; protected int height; public void release(){ pMatrix.release(); pMatrix = null; } public CLBuffer<FloatBuffer> generateFloatBuffer(int width, int height, CLMemory.Mem ... flags){ CLBuffer<FloatBuffer> screenBuffer = context.createFloatBuffer(width*height, flags); for (int j = 0; j < height; j++){ for (int i = 0; i < width; i++){ screenBuffer.getBuffer().put(0.f); } } screenBuffer.getBuffer().rewind(); device.createCommandQueue().putWriteBuffer(screenBuffer, true).finish(); return screenBuffer; } public CLBuffer<IntBuffer> generateIntBuffer(int width, int height, CLMemory.Mem ... flags){ CLBuffer<IntBuffer> screenBuffer = context.createIntBuffer(width*height, flags); for (int j = 0; j < height; j++){ for (int i = 0; i < width; i++){ screenBuffer.getBuffer().put(0); } } screenBuffer.getBuffer().rewind(); device.createCommandQueue().putWriteBuffer(screenBuffer, true).finish(); return screenBuffer; } public OpenCLRenderer(CLDevice device) { this.context = device.getContext(); this.device = device; OpenCLUtil.initRender(context); } public void setProjectionMatrix(SimpleMatrix m){ if (pMatrix == null) { pMatrix = context.createFloatBuffer((3*4), Mem.READ_ONLY); } pMatrix.getBuffer().clear(); pMatrix.getBuffer().put((float)m.getElement(0,0)); pMatrix.getBuffer().put((float)m.getElement(0,1)); pMatrix.getBuffer().put((float)m.getElement(0,2)); pMatrix.getBuffer().put((float)m.getElement(0,3)); pMatrix.getBuffer().put((float)m.getElement(1,0)); pMatrix.getBuffer().put((float)m.getElement(1,1)); pMatrix.getBuffer().put((float)m.getElement(1,2)); pMatrix.getBuffer().put((float)m.getElement(1,3)); pMatrix.getBuffer().put((float)m.getElement(2,0)); pMatrix.getBuffer().put((float)m.getElement(2,1)); pMatrix.getBuffer().put((float)m.getElement(2,2)); pMatrix.getBuffer().put((float)m.getElement(2,3)); pMatrix.getBuffer().rewind(); CLCommandQueue clc = device.createCommandQueue(); clc.putWriteBuffer(pMatrix, false).finish(); clc.release(); } public void init (int width, int height){ this.width = width; this.height = height; } public void debugOut(CLBuffer<FloatBuffer> pointBuffer){ CLCommandQueue clc = device.createCommandQueue(); clc.putReadBuffer(pointBuffer, true).finish(); clc.release(); for (int j=0; j<pointBuffer.getBuffer().capacity();j++){ System.out.println(pointBuffer.getBuffer().get()); } pointBuffer.getBuffer().rewind(); } public void project(CLBuffer<FloatBuffer> pointBuffer){ int elementCount = pointBuffer.getBuffer().capacity()/3; // Length of arrays to process int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 256); // Local work size dimensions int globalWorkSize = OpenCLUtil.roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the localWorkSize CLKernel kernel = OpenCLUtil.getRenderInstance().createCLKernel("project"); kernel.putArgs(pMatrix, pointBuffer) .putArg(elementCount); // asynchronous write of data to GPU device, // followed by blocking read to get the computed results back. CLCommandQueue clc = device.createCommandQueue(); clc.put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize).finish(); clc.release(); kernel.release(); } public void project(CLBuffer<FloatBuffer> pointBuffer, SimpleVector translation){ int elementCount = pointBuffer.getBuffer().capacity()/3; // Length of arrays to process int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 256); // Local work size dimensions int globalWorkSize = OpenCLUtil.roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the localWorkSize CLKernel kernel = OpenCLUtil.getRenderInstance().createCLKernel("projectTranslate"); kernel.putArg(pMatrix) .putArg((float)translation.getElement(0)) .putArg((float)translation.getElement(1)) .putArg((float)translation.getElement(2)) .putArg(pointBuffer) .putArg(elementCount); // asynchronous write of data to GPU device, // followed by blocking read to get the computed results back. CLCommandQueue clc = device.createCommandQueue(); clc.put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize).finish(); clc.release(); kernel.release(); } public int drawTriangles(CLBuffer<FloatBuffer> pointBuffer, CLBuffer<FloatBuffer> screenBuffer, int id){ int elementCount = pointBuffer.getBuffer().capacity()/3; // Length of arrays to process int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 256); // Local work size dimensions int globalWorkSize = OpenCLUtil.roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the localWorkSize CLKernel kernel = OpenCLUtil.getRenderInstance().createCLKernel("drawTriangles"); kernel.putArgs(pointBuffer, screenBuffer) .putArg(width) .putArg(id) .putArg(elementCount); // asynchronous write of data to GPU device, // followed by blocking read to get the computed results back. CLCommandQueue clc = device.createCommandQueue(); clc.put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize).finish(); kernel.release(); clc.release(); return 0; } public void drawTrianglesZBuffer(CLBuffer<FloatBuffer> pointBuffer, CLBuffer<FloatBuffer> screenBuffer, CLBuffer<IntBuffer> zBuffer, int id){ int elementCount = pointBuffer.getBuffer().capacity()/3; // Length of arrays to process int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 256); // Local work size dimensions int globalWorkSize = OpenCLUtil.roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the localWorkSize CLKernel kernel = OpenCLUtil.getRenderInstance().createCLKernel("drawTrianglesZBuffer"); kernel.putArgs(pointBuffer, screenBuffer, zBuffer) .putArg(width) .putArg(id) .putArg(elementCount); // asynchronous write of data to GPU device, // followed by blocking read to get the computed results back. CLCommandQueue clc = device.createCommandQueue(); clc.put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize).finish(); kernel.release(); clc.release(); } public void computeMinMaxValues(CLBuffer<FloatBuffer> pointBuffer, CLBuffer<FloatBuffer> ranges){ int elementCount = ranges.getBuffer().capacity() / 4; // Length of arrays to process int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 256); // Local work size dimensions int globalWorkSize = OpenCLUtil.roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the localWorkSize CLKernel kernel = OpenCLUtil.getRenderInstance().createCLKernel("fillMaxMinValues"); kernel.putArgs(pointBuffer, ranges) .putArg(elementCount); CLCommandQueue clc = device.createCommandQueue(); clc.put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize).finish(); kernel.release(); clc.release(); } public void drawTrianglesRayCast(CLBuffer<FloatBuffer> pointBuffer, CLBuffer<FloatBuffer> screenBuffer, int controlPoints, int id){ int elementCount = width*height; // Length of arrays to process int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 256); // Local work size dimensions int globalWorkSize = OpenCLUtil.roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the localWorkSize int start = 0; int stop = controlPoints; if (stop > controlPoints) stop = controlPoints; CLKernel kernel = OpenCLUtil.getRenderInstance().createCLKernel("drawTrianglesRayCast"); kernel.putArgs(pointBuffer, screenBuffer) .putArg(width) .putArg(controlPoints) .putArg(start) .putArg(stop) .putArg(id) .putArg(elementCount); start += 1000; stop += 1000; CLCommandQueue clc = device.createCommandQueue(); clc.put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize).finish(); kernel.release(); clc.release(); } public void drawTrianglesRayCastRanges(CLBuffer<FloatBuffer> pointBuffer, CLBuffer<FloatBuffer> ranges, CLBuffer<FloatBuffer> screenBuffer, int controlPoints, int id){ int elementCount = width*height; // Length of arrays to process int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 256); // Local work size dimensions int globalWorkSize = OpenCLUtil.roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the localWorkSize CLKernel kernel = OpenCLUtil.getRenderInstance().createCLKernel("drawTrianglesRayCastRanges"); kernel.putArgs(pointBuffer, ranges, screenBuffer) .putArg(width) .putArg(controlPoints) .putArg(id) .putArg(elementCount); CLCommandQueue clc = device.createCommandQueue(); clc.put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize).finish(); kernel.release(); clc.release(); } } /* * Copyright (C) 2010-2014 Andreas Maier * CONRAD is developed as an Open Source project under the GNU General Public License (GPL). */