package edu.stanford.rsl.tutorial.motion.compensation; import ij.IJ; import ij.plugin.Memory; import java.io.IOException; import java.nio.FloatBuffer; import java.nio.IntBuffer; import java.util.ArrayList; import java.util.Arrays; import com.jogamp.opencl.CLBuffer; import com.jogamp.opencl.CLCommandQueue; import com.jogamp.opencl.CLContext; import com.jogamp.opencl.CLDevice; import com.jogamp.opencl.CLImage2d; import com.jogamp.opencl.CLImageFormat; import com.jogamp.opencl.CLKernel; import com.jogamp.opencl.CLProgram; import com.jogamp.opencl.CLImageFormat.ChannelOrder; import com.jogamp.opencl.CLImageFormat.ChannelType; import com.jogamp.opencl.CLMemory.Mem; import edu.stanford.rsl.apps.gui.Citeable; import edu.stanford.rsl.conrad.data.numeric.Grid2D; import edu.stanford.rsl.conrad.data.numeric.Grid3D; import edu.stanford.rsl.conrad.data.numeric.opencl.OpenCLGrid3D; import edu.stanford.rsl.conrad.numerics.SimpleMatrix; import edu.stanford.rsl.conrad.opencl.OpenCLUtil; import edu.stanford.rsl.conrad.reconstruction.VOIBasedReconstructionFilter; import edu.stanford.rsl.conrad.utils.CONRAD; import edu.stanford.rsl.conrad.utils.Configuration; import edu.stanford.rsl.conrad.utils.ImageGridBuffer; /** * DO NOT USE IN LARGEVOLUMEMODE * @author Marco * */ public class OpenCLCompensatedBackProjectorTPS extends VOIBasedReconstructionFilter implements Runnable, Citeable{ /** * */ private static final long serialVersionUID = -8615490043940236889L; private ImageGridBuffer inputQueue = new ImageGridBuffer(); static int bpBlockSize[] = {32, 16}; private static boolean debug = false; /** * The OpenCL context */ private CLContext context; /** * The OpenCL program */ private CLProgram program; /** * The OpenCL device */ private CLDevice device; /** * The OpenCL kernel function binding */ private CLKernel kernelFunction; /** * The OpenCL command queue */ private CLCommandQueue commandQueue; /** * The 2D projection texture reference */ private CLImage2d<FloatBuffer> projectionTex = null; /** * The volume data that is to be reconstructed */ protected float h_volume[]; /** * An initial rigid transform applied before the non-linear motion * If null this is given to the GPU as identity matrix * The linear memory holds a 4x4 matrix in column first order */ private CLBuffer<FloatBuffer> d_initialRigidTransform = null; private float[] initialRigidTransform = null; /** * The global variable of the module which stores the * view matrix. */ private CLBuffer<FloatBuffer> projectionMatrix = null; private CLBuffer<FloatBuffer> volumePointer = null; private CLBuffer<FloatBuffer> projectionArray = null; protected ImageGridBuffer projections = new ImageGridBuffer(); protected ArrayList<Integer> projectionsAvailable; protected ArrayList<Integer> projectionsDone; private boolean largeVolumeMode = false; private int nSteps = 1; private int subVolumeZ = 0; private boolean initialized = false; private float[][] coeff; private float[][] pts; private float[][] A; private float[][] b; public OpenCLCompensatedBackProjectorTPS(float[][] coeff, float[][] pts, float[][] A, float[][] b) { super(); this.coeff = coeff; this.A = A; this.b = b; this.pts = pts; } @Override public void prepareForSerialization(){ super.prepareForSerialization(); projectionMatrix = null; volumePointer = null; projectionArray = null; projections = null; projectionsAvailable =null; projectionsDone = null; h_volume = null; initialized = false; // JOCL members program = null; device = null; kernelFunction = null; commandQueue = null; projectionTex = null; context = null; } @Override public void configure() throws Exception{ configured = true; } protected void init(){ if (!initialized) { largeVolumeMode = false; int reconDimensionX = getGeometry().getReconDimensionX(); int reconDimensionY = getGeometry().getReconDimensionY(); int reconDimensionZ = getGeometry().getReconDimensionZ(); projectionsAvailable = new ArrayList<Integer>(); projectionsDone = new ArrayList<Integer>(); // Initialize JOCL. context = OpenCLUtil.getStaticContext(); try { // get the fastest device //device = context.getMaxFlopsDevice(); // create the command queue commandQueue = OpenCLUtil.getStaticCommandQueue(); device = commandQueue.getDevice(); // initialize the program if (program==null || !program.getContext().equals(this.context)){ program = context.createProgram(OpenCLCompensatedBackProjector.class.getResourceAsStream("compensatedBackprojectCLTPS.cl")).build(); } } catch (Exception e) { if (commandQueue!=null) commandQueue.release(); if (kernelFunction != null) kernelFunction.release(); if (program != null) program.release(); e.printStackTrace(); } d_initialRigidTransform = context.createFloatBuffer(12, Mem.READ_ONLY); if (initialRigidTransform==null){ while(d_initialRigidTransform.getBuffer().hasRemaining()) d_initialRigidTransform.getBuffer().put(0); d_initialRigidTransform.getBuffer().put(0, 1); d_initialRigidTransform.getBuffer().put(4, 1); d_initialRigidTransform.getBuffer().put(8, 1); } else{ for (int i = 0; i < initialRigidTransform.length; i++) { d_initialRigidTransform.getBuffer().put(initialRigidTransform[i]); } } d_initialRigidTransform.getBuffer().rewind(); // check space on device: long memory = device.getMaxMemAllocSize(); long availableMemory = (memory); long requiredMemory = (long)((( ((double) reconDimensionX) * reconDimensionY * ((double) reconDimensionZ) * 4) + (((double)Configuration.getGlobalConfiguration().getGeometry().getDetectorHeight()) * Configuration.getGlobalConfiguration().getGeometry().getDetectorWidth() * 4))); if (debug) { System.out.println("Total available Memory on OpenCL card:" + availableMemory); System.out.println("Required Memory on OpenCL card:" + requiredMemory); } if (requiredMemory > availableMemory){ nSteps = (int)OpenCLUtil.iDivUp (requiredMemory, availableMemory); if (debug) System.out.println("Switching to large volume mode with nSteps = " + nSteps); largeVolumeMode = true; } // create the computing kernel if(debug) kernelFunction = program.createCLKernel("backprojectKernel_returnMotion"); else kernelFunction = program.createCLKernel("backprojectKernel"); // create the reconstruction volume; int memorysize = reconDimensionX * reconDimensionY * reconDimensionZ * 4; if (largeVolumeMode){ subVolumeZ = OpenCLUtil.iDivUp(reconDimensionZ, nSteps); if(debug) System.out.println("SubVolumeZ: " + subVolumeZ); h_volume = new float[reconDimensionX * reconDimensionY * subVolumeZ]; memorysize = reconDimensionX * reconDimensionY * subVolumeZ * 4; if(debug)System.out.println("Memory: " + memorysize); } else { h_volume = new float[reconDimensionX * reconDimensionY * reconDimensionZ]; } // copy volume to device volumePointer = context.createFloatBuffer(h_volume.length, Mem.WRITE_ONLY); volumePointer.getBuffer().put(h_volume); volumePointer.getBuffer().rewind(); commandQueue. putWriteBuffer(volumePointer, true). putWriteBuffer(d_initialRigidTransform,true). finish(); initialized = true; } } private synchronized void unload(){ if (initialized) { if ((projectionVolume != null) && (!largeVolumeMode)) { commandQueue.putReadBuffer(volumePointer, true).finish(); volumePointer.getBuffer().rewind(); volumePointer.getBuffer().get(h_volume); volumePointer.getBuffer().rewind(); int width = projectionVolume.getSize()[0]; int height = projectionVolume.getSize()[1]; if (this.useVOImap) { for (int k = 0; k < projectionVolume.getSize()[2]; k++){ for (int j = 0; j < height; j++){ for (int i = 0; i < width; i++){ float value = h_volume[(((height * k) + j) * width) + i]; if (voiMap[i][j][k]) { projectionVolume.setAtIndex(i, j, k, value); } else { projectionVolume.setAtIndex(i, j, k, 0); } } } } } else { for (int k = 0; k < projectionVolume.getSize()[2]; k++){ for (int j = 0; j < height; j++){ for (int i = 0; i < width; i++){ float value = h_volume[(((height * k) + j) * width) + i]; projectionVolume.setAtIndex(i, j, k, value); } } } } } else { System.out.println("Check ProjectionVolume. It seems null."); } h_volume = null; // free memory on device commandQueue.release(); if (projectionTex != null) projectionTex.release(); if (projectionMatrix != null) projectionMatrix.release(); if (projectionArray != null) projectionArray.release(); if (volumePointer != null) volumePointer.release(); if (d_initialRigidTransform != null) d_initialRigidTransform.release(); kernelFunction.release(); program.release(); commandQueue = null; projectionArray = null; projectionMatrix = null; projectionTex = null; volumePointer = null; kernelFunction = null; program = null; context = null; initialized = false; } } private synchronized void initProjectionMatrix(int projectionNumber){ // load projection Matrix for current Projection. SimpleMatrix pMat = getGeometry().getProjectionMatrix(projectionNumber).computeP(); float [] pMatFloat = new float[pMat.getCols() * pMat.getRows()]; for (int j = 0; j< pMat.getRows(); j++) { for (int i = 0; i< pMat.getCols(); i++) { pMatFloat[(j * pMat.getCols()) + i] = (float) pMat.getElement(j, i); } } // Obtain the global pointer to the view matrix from // the module if (projectionMatrix == null) projectionMatrix = context.createFloatBuffer(pMatFloat.length, Mem.READ_ONLY); projectionMatrix.getBuffer().put(pMatFloat); projectionMatrix.getBuffer().rewind(); commandQueue.putWriteBuffer(projectionMatrix, true).finish(); } private synchronized void initProjectionData(Grid2D projection){ initialize(projection); if (projection != null){ float [] proj= new float[projection.getWidth() * projection.getHeight()]; for(int i = 0; i< projection.getWidth(); i++){ for (int j =0; j < projection.getHeight(); j++){ proj[(j*projection.getWidth()) + i] = projection.getPixelValue(i, j); } } if (projectionArray == null) { // Create the array that will contain the // projection data. projectionArray = context.createFloatBuffer(projection.getWidth()*projection.getHeight(), Mem.READ_ONLY); } // Copy the projection data to the array projectionArray.getBuffer().put(proj); projectionArray.getBuffer().rewind(); if(projectionTex != null && !projectionTex.isReleased()){ projectionTex.release(); } // set the texture CLImageFormat format = new CLImageFormat(ChannelOrder.INTENSITY, ChannelType.FLOAT); projectionTex = context.createImage2d(projectionArray.getBuffer(), projection.getWidth(), projection.getHeight(), format, Mem.READ_ONLY); //projectionArray.release(); } else { System.out.println("Projection was null!!"); } } @Override public String getBibtexCitation() { String bibtex = "@inproceedings{Rohkohl08-CCR,\n" + " author = {{Scherl}, H. and {Keck}, B. and {Kowarschik}, M. and {Hornegger}, J.},\n" + " title = {{Fast GPU-Based CT Reconstruction using the Common Unified Device Architecture (CUDA)}},\n" + " booktitle = {{Nuclear Science Symposium, Medical Imaging Conference 2007}},\n" + " publisher = {IEEE},\n" + " volume={6},\n" + " address = {Honolulu, HI, United States},\n" + " year = {2007}\n" + " pages= {4464--4466},\n" + "}"; return bibtex; } @Override public String getMedlineCitation() { return "Scherl H, Keck B, Kowarschik M, Hornegger J. Fast GPU-Based CT Reconstruction using the Common Unified Device Architecture (CUDA). In Nuclear Science Symposium, Medical Imaging Conference Record, IEEE, Honolulu, HI, United States, 2008 6:4464-6."; } public void waitForResult() { OpenCLRun(); } @Override public void backproject(Grid2D projection, int projectionNumber) { appendProjection(projection, projectionNumber); } private void appendProjection(Grid2D projection, int projectionNumber){ projections.add(projection, projectionNumber); projectionsAvailable.add(new Integer(projectionNumber)); } private synchronized void projectSingleProjection(int projectionNumber, int dimz){ // load projection matrix initProjectionMatrix(projectionNumber); // load projection Grid2D projection = projections.get(projectionNumber); initProjectionData(projection); // Correct for constant part of distance weighting + For angular sampling double D = getGeometry().getSourceToDetectorDistance(); float projectionMultiplier = (float)(10 * D*D * 2* Math.PI * getGeometry().getPixelDimensionX() / getGeometry().getNumProjectionMatrices()); if (!largeVolumeMode) { //projections.remove(projectionNumber); } // backproject for each slice // OpenCL Grids are only two dimensional! int reconDimensionZ = dimz; double voxelSpacingX = getGeometry().getVoxelSpacingX(); double voxelSpacingY = getGeometry().getVoxelSpacingY(); double voxelSpacingZ = getGeometry().getVoxelSpacingZ(); OpenCLGrid3D xdeform = null; OpenCLGrid3D ydeform = null; OpenCLGrid3D zdeform = null; if (debug){ xdeform = new OpenCLGrid3D(new Grid3D(getGeometry().getReconDimensionX(),getGeometry().getReconDimensionY(),getGeometry().getReconDimensionZ())); ydeform = new OpenCLGrid3D(xdeform); zdeform = new OpenCLGrid3D(xdeform); xdeform.getDelegate().prepareForDeviceOperation(); ydeform.getDelegate().prepareForDeviceOperation(); zdeform.getDelegate().prepareForDeviceOperation(); } CLBuffer<FloatBuffer> coeffPtr = context.createFloatBuffer(coeff[projectionNumber].length, Mem.READ_ONLY); coeffPtr.getBuffer().put(coeff[projectionNumber]); coeffPtr.getBuffer().rewind(); commandQueue.putWriteBuffer(coeffPtr, true); CLBuffer<FloatBuffer> ptsGlobalPtr = context.createFloatBuffer(pts[projectionNumber].length, Mem.READ_ONLY); ptsGlobalPtr.getBuffer().put(pts[projectionNumber]); ptsGlobalPtr.getBuffer().rewind(); commandQueue.putWriteBuffer(ptsGlobalPtr, true); CLBuffer<FloatBuffer> ptsLocalPtr = context.createFloatBuffer(pts[projectionNumber].length, Mem.READ_WRITE); CLBuffer<FloatBuffer> APtr = context.createFloatBuffer(A[projectionNumber].length, Mem.READ_ONLY); APtr.getBuffer().put(A[projectionNumber]); APtr.getBuffer().rewind(); commandQueue.putWriteBuffer(APtr, true); CLBuffer<FloatBuffer> bPtr = context.createFloatBuffer(b[projectionNumber].length, Mem.READ_ONLY); bPtr.getBuffer().put(b[projectionNumber]); bPtr.getBuffer().rewind(); commandQueue.putWriteBuffer(bPtr, true); // write kernel parameters kernelFunction.rewind(); kernelFunction .putArg(volumePointer) .putArg(coeffPtr) .putNullArg((int)coeffPtr.getCLSize()) .putArg(ptsGlobalPtr) .putNullArg((int)ptsGlobalPtr.getCLSize()) .putArg(APtr) .putArg(bPtr) .putArg((int)pts[projectionNumber].length/3) .putArg(getGeometry().getReconDimensionX()) .putArg(getGeometry().getReconDimensionY()) .putArg(reconDimensionZ) .putArg((int) lineOffset) .putArg((float) voxelSpacingX) .putArg((float) voxelSpacingY) .putArg((float) voxelSpacingZ) .putArg((float) offsetX) .putArg((float) offsetY) .putArg((float) offsetZ) .putArg(projectionTex) .putArg(projectionMatrix) .putArg(d_initialRigidTransform) .putArg(projectionMultiplier); if (debug){ kernelFunction.putArg(xdeform.getDelegate().getCLBuffer()) .putArg(ydeform.getDelegate().getCLBuffer()) .putArg(zdeform.getDelegate().getCLBuffer()); } int[] realLocalSize = new int[2]; realLocalSize[0] = Math.min(device.getMaxWorkGroupSize(),bpBlockSize[0]); realLocalSize[1] = Math.max(1, Math.min(device.getMaxWorkGroupSize()/realLocalSize[0], bpBlockSize[1])); // rounded up to the nearest multiple of localWorkSize int[] globalWorkSize = {getGeometry().getReconDimensionX(), getGeometry().getReconDimensionY()}; if ((globalWorkSize[0] % realLocalSize[0] ) != 0){ globalWorkSize[0] = ((globalWorkSize[0] / realLocalSize[0]) + 1) * realLocalSize[0]; } if ((globalWorkSize[1] % realLocalSize[1] ) != 0){ globalWorkSize[1] = ((globalWorkSize[1] / realLocalSize[1]) + 1) * realLocalSize[1]; } // Call the OpenCL kernel, writing the results into the volume which is pointed at commandQueue .putWriteImage(projectionTex, true) .finish() .put2DRangeKernel(kernelFunction, 0, 0, globalWorkSize[0], globalWorkSize[1], realLocalSize[0], realLocalSize[1]) .finish(); if (debug){ xdeform.getDelegate().notifyDeviceChange(); ydeform.getDelegate().notifyDeviceChange(); zdeform.getDelegate().notifyDeviceChange(); xdeform.show("Xdeform"); ydeform.show("Ydeform"); zdeform.show("Zdeform"); xdeform.release(); ydeform.release(); zdeform.release(); } coeffPtr.release(); ptsGlobalPtr.release(); ptsLocalPtr.release(); APtr.release(); bPtr.release(); if (showStatus) IJ.showProgress(projectionNumber, Configuration.getGlobalConfiguration().getGeometry().getNumProjectionMatrices()); } public void OpenCLRun() { try { while (projectionsAvailable.size() > 0) { Thread.sleep(CONRAD.INVERSE_SPEEDUP); if (showStatus) { float status = (float) (1.0 / projections.size()); if (largeVolumeMode) { IJ.showStatus("Streaming Projections to OpenCL Buffer"); } else { IJ.showStatus("Backprojecting with OpenCL"); } IJ.showProgress(status); } if (!largeVolumeMode) { workOnProjectionData(); } else { checkProjectionData(); } } // System.out.println("large Volume " + largeVolumeMode); if (largeVolumeMode){ // we have collected all projections. // now we can reconstruct subvolumes and stich them together. int reconDimensionZ = getGeometry().getReconDimensionZ(); double voxelSpacingX = getGeometry().getVoxelSpacingX(); double voxelSpacingY = getGeometry().getVoxelSpacingY(); double voxelSpacingZ = getGeometry().getVoxelSpacingZ(); useVOImap = false; initialize(projections.get(0)); double originalOffsetZ = offsetZ; double originalReconDimZ = reconDimensionZ; reconDimensionZ = subVolumeZ; int maxProjectionNumber = projections.size(); float all = nSteps * maxProjectionNumber*2; for (int n =0; n < nSteps; n++){ // For each subvolume // set all to 0; Arrays.fill(h_volume, 0); volumePointer.getBuffer().rewind(); volumePointer.getBuffer().put(h_volume); volumePointer.getBuffer().rewind(); commandQueue.putWriteBuffer(volumePointer, true).finish(); offsetZ = originalOffsetZ - (reconDimensionZ*voxelSpacingZ*n); for (int p = 0; p < maxProjectionNumber; p ++){ // For all projections float currentStep = (n*maxProjectionNumber*2) + p; if (showStatus) { IJ.showStatus("Backprojecting with OpenCL"); IJ.showProgress(currentStep/all); } //System.out.println("Current: " + p); try { projectSingleProjection(p, reconDimensionZ); } catch (Exception e){ System.out.println("Backprojection of projection " + p + " was not successful."); e.printStackTrace(); } } // Gather volume commandQueue.putReadBuffer(volumePointer, true).finish(); volumePointer.getBuffer().rewind(); volumePointer.getBuffer().get(h_volume); volumePointer.getBuffer().rewind(); // move data to ImagePlus; if (projectionVolume != null) { for (int k = 0; k < reconDimensionZ; k++){ int index = (n*subVolumeZ) + k; if (showStatus) { float currentStep = (n*maxProjectionNumber*2) + maxProjectionNumber + k; IJ.showStatus("Fetching Volume from OpenCL"); IJ.showProgress(currentStep/all); } if (index < originalReconDimZ) { for (int j = 0; j < projectionVolume.getSize()[1]; j++){ for (int i = 0; i < projectionVolume.getSize()[0]; i++){ float value = h_volume[(((projectionVolume.getSize()[1] * k) + j) * projectionVolume.getSize()[0]) + i]; double[][] voxel = new double [4][1]; voxel[0][0] = (voxelSpacingX * i) - offsetX; voxel[1][0] = (voxelSpacingY * j) - offsetY; voxel[2][0] = (voxelSpacingZ * index) - originalOffsetZ; // exception for the case "interestedInVolume == null" and largeVolume is enabled if (interestedInVolume == null) { projectionVolume.setAtIndex(i, j, index, value); } else { if (interestedInVolume.contains(voxel[0][0], voxel[1][0], voxel[2][0])) { projectionVolume.setAtIndex(i, j, index, value); } else { projectionVolume.setAtIndex(i, j, index, 0); } } } } } } } } } } catch (InterruptedException e) { e.printStackTrace(); } if (showStatus) IJ.showProgress(1.0); unload(); if (debug) System.out.println("Unloaded"); } private synchronized void workOnProjectionData(){ if (projectionsAvailable.size() > 0){ Integer current = projectionsAvailable.get(0); projectionsAvailable.remove(0); int p = current.intValue(); projectSingleProjection(p, getGeometry().getReconDimensionZ()); projectionsDone.add(current); } } private synchronized void checkProjectionData(){ if (projectionsAvailable.size() > 0){ Integer current = projectionsAvailable.get(0); projectionsAvailable.remove(current); projectionsDone.add(current); } } public Grid3D reconstructCL(){ init(); int n = inputQueue.size(); if (showStatus) IJ.showStatus(this.getToolName()); for (int i = 0; i < n; i++){ backproject(inputQueue.get(i), i); } waitForResult(); if (Configuration.getGlobalConfiguration().getUseHounsfieldScaling()) applyHounsfieldScaling(); //projectionVolume.show(); return projectionVolume; } public void loadInputQueue(ImageGridBuffer inp) throws IOException { inputQueue = inp; projections = inp; } public float[][] getCoeff() { return coeff; } public void setCoeff(float[][] coeff) { this.coeff = coeff; } public float[][] getPts() { return pts; } public void setPts(float[][] pts) { this.pts = pts; } public float[][] getA() { return A; } public void setA(float[][] a) { A = a; } public float[][] getB() { return b; } public void setB(float[][] b) { this.b = b; } public SimpleMatrix getInitialRigidTransform() { if (initialRigidTransform != null){ SimpleMatrix out = SimpleMatrix.I_4.clone(); for (int j = 0; j < 4; j++) { for (int i = 0; i < 3; i++) { out.setElementValue(i, j, initialRigidTransform[j*3+i]); } } return out; }else return null; } public void setInitialRigidTransform(SimpleMatrix initTform) { if (initialRigidTransform == null) initialRigidTransform = new float[12]; // no scaling is taken into account!! 6 parameter rigid only for (int j = 0; j < 4; j++) { for (int i = 0; i < 3; i++) { initialRigidTransform[j*3+i] = (float)initTform.getElement(i, j); } } } } /* * Copyright (C) 2010-2014 Marco B�gel * CONRAD is developed as an Open Source project under the GNU General Public License (GPL). */