/* * Copyright (C) 2014 Zijia Guo, Andreas Maier * CONRAD is developed as an Open Source project under the GNU General Public License (GPL). */ package edu.stanford.rsl.conrad.opencl.shapes; import java.nio.FloatBuffer; import java.util.ArrayList; import com.jogamp.opencl.CLBuffer; import com.jogamp.opencl.CLCommandQueue; import com.jogamp.opencl.CLContext; import com.jogamp.opencl.CLDevice; import com.jogamp.opencl.CLKernel; import com.jogamp.opencl.CLMemory.Mem; import edu.stanford.rsl.apps.gui.opengl.PointCloudViewer; import edu.stanford.rsl.conrad.geometry.shapes.simple.Cone; import edu.stanford.rsl.conrad.geometry.shapes.simple.PointND; import edu.stanford.rsl.conrad.numerics.SimpleMatrix; import edu.stanford.rsl.conrad.numerics.SimpleOperators; import edu.stanford.rsl.conrad.numerics.SimpleVector; import edu.stanford.rsl.conrad.opencl.OpenCLEvaluatable; import edu.stanford.rsl.conrad.opencl.OpenCLUtil; public class OpenCLCone extends Cone implements OpenCLEvaluatable { private static final long serialVersionUID = -2146326261229488589L; protected CLContext context; protected CLDevice device; protected CLBuffer<FloatBuffer> parameter; public OpenCLCone(CLDevice device) { // TODO Auto-generated constructor stub } /** * @param dx * @param dy * @param dz */ public OpenCLCone(double dx, double dy, double dz, CLDevice device) { super(dx, dy, dz); // TODO Auto-generated constructor stub this.context = device.getContext(); this.device = device; OpenCLUtil.initSimpleObjectEvaluator(context); // ?? handleParameter(dx, dy, dz); } /** * @param c */ public OpenCLCone(Cone c, CLDevice device) { //TODO: The CPU cone still has a different radius then the GPU cone! We need to clarify which one is correct this(SimpleOperators.multiplyElementWise( SimpleOperators.subtract(c.getMax().getAbstractVector(),c.getMin().getAbstractVector()), new SimpleVector(0.5,0.5,1)) , device); this.transform = c.getTransform(); } /** * @param c */ public OpenCLCone(SimpleVector paras, CLDevice device) { this(paras.getElement(0), paras.getElement(1), paras.getElement(2), device); } protected void handleParameter(double dx, double dy, double dz){ double a = (dx)/(dz); double b = (dy)/(dz); this.parameter = context.createFloatBuffer(5, Mem.READ_ONLY); /* this.parameter = context.createFloatBuffer(5, Mem.READ_ONLY); this.parameter.getBuffer().put(-(float)min.get(0)); //parameter[0] --> dx this.parameter.getBuffer().put(-(float)min.get(1)); //parameter[1] --> dy this.parameter.getBuffer().put(-(float)min.get(2)); //parameter[2] --> dz */ this.parameter.getBuffer().put((float)dx); this.parameter.getBuffer().put((float)dy); this.parameter.getBuffer().put((float)dz); this.parameter.getBuffer().put((float)a); //parameter[3] this.parameter.getBuffer().put((float)b); //parameter[4] this.parameter.getBuffer().rewind(); device.createCommandQueue().putWriteBuffer(this.parameter, true); } @Override public boolean isClockwise() { // TODO Auto-generated method stub return true; } @Override public boolean isTimeVariant() { // TODO Auto-generated method stub return false; } @Override public void evaluate(CLBuffer<FloatBuffer> samplingPoints, CLBuffer<FloatBuffer> outputBuffer) { // TODO Auto-generated method stub int elementCount = samplingPoints.getBuffer().capacity()/2; // capacity? 2 or 3? evaluate(samplingPoints, outputBuffer, (int)Math.sqrt(elementCount), (int)Math.sqrt(elementCount)); } @Override public void evaluate(CLBuffer<FloatBuffer> samplingPoints, CLBuffer<FloatBuffer> outputBuffer, int elementCountU, int elementCountV) { // TODO Auto-generated method stub int elementCount = samplingPoints.getBuffer().capacity()/2; int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 256); int globalWorkSize = OpenCLUtil.roundUp(localWorkSize, elementCount); // to guaranty that global size is interger multiple of group size (local size) CLKernel kernel = OpenCLUtil.simpleObjects.createCLKernel("evaluateCone"); kernel.putArgs(parameter, samplingPoints, outputBuffer).putArg(elementCountU).putArg(elementCountV); CLCommandQueue clc = device.createCommandQueue(); clc.put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize).finish(); kernel.release(); clc.release(); // SimpleMatrix transform SimpleMatrix transform = SimpleMatrix.I_4.clone(); transform.setSubMatrixValue(0, 0, this.transform.getRotation(3)); transform.setSubColValue(0, 3, this.transform.getTranslation(3)); OpenCLUtil.transformPoints(outputBuffer, transform, context, device); } public static void main(String [] args){ CLContext context = OpenCLUtil.getStaticContext(); CLDevice device = context.getMaxFlopsDevice(); int u = 3; int v = 10; Cone cone = new Cone(12,12,12); ArrayList<PointND> cpu = cone.getPointCloud(u,v); int numPoints = u*v; OpenCLCone clcone = new OpenCLCone(cone, device); CLBuffer<FloatBuffer> samplingPoints = OpenCLUtil.generateSamplingPoints(u, v, context, device); CLBuffer<FloatBuffer> outputBuffer = context.createFloatBuffer(3*numPoints, Mem.READ_WRITE); clcone.evaluate(samplingPoints, outputBuffer, u, v); CLCommandQueue queue = device.createCommandQueue(); queue.putReadBuffer(outputBuffer, true); queue.release(); ArrayList<PointND> gpu = new ArrayList<PointND>(); double error =0; for (int i=0; i< numPoints; i++){ PointND point = new PointND(outputBuffer.getBuffer().get(), outputBuffer.getBuffer().get(), outputBuffer.getBuffer().get()); gpu.add(point); error += point.euclideanDistance(cpu.get(i)); } samplingPoints.release(); outputBuffer.release(); PointCloudViewer pcv = new PointCloudViewer("gpu points with error " + error/ numPoints, gpu); pcv.setVisible(true); } }