package edu.stanford.rsl.conrad.cuda;
import edu.stanford.rsl.conrad.volume3d.Volume3D;
import jcuda.Pointer;
import jcuda.Sizeof;
import jcuda.driver.CUdevice;
import jcuda.driver.CUdeviceptr;
import jcuda.driver.CUdevprop;
import jcuda.driver.CUmodule;
import jcuda.driver.JCudaDriver;
import jcuda.runtime.JCuda;
import jcuda.runtime.cudaMemcpyKind;
public abstract class CUDAUtil {
// Pre-determined kernel block size
public static int gridBlockSize[] = {32, 16};
/**
* Returns the given (address) value, adjusted to have
* the given alignment. In newer versions of JCuda, this
* function is also available as JCudaDriver#align
*
* @param value The address value
* @param alignment The desired alignment
* @return The aligned address value
*/
public static int align(int value, int alignment)
{
return (((value) + (alignment) - 1) & ~((alignment) - 1));
}
/**
* copies an int array to the device and returns a pointer to the memory.
* @param data the int array
* @return the pointer to the device memory
*/
public static CUdeviceptr copyToDeviceMemory(int [] data){
int memorySize = data.length * Sizeof.INT;
CUdeviceptr deviceX = new CUdeviceptr();
JCuda.cudaMalloc(deviceX, memorySize);
JCuda.cudaMemcpy(deviceX, Pointer.to(data), memorySize,
cudaMemcpyKind.cudaMemcpyHostToDevice);
return deviceX;
}
/**
* copies a float array to the device and returns a pointer to the memory.
* @param data the float array
* @return the pointer to the device memory
*/
public static CUdeviceptr copyToDeviceMemory(float [] data){
int memorySize = data.length * Sizeof.FLOAT;
CUdeviceptr deviceX = new CUdeviceptr();
JCuda.cudaMalloc(deviceX, memorySize);
JCuda.cudaMemcpy(deviceX, Pointer.to(data), memorySize,
cudaMemcpyKind.cudaMemcpyHostToDevice);
return deviceX;
}
/**
* fetches a float data array from the device and frees the memory on the device.
* @param data the float array to write to
* @param deviceX the pointer to the device memory
*/
public static void fetchFromDeviceMemory(float [] data, CUdeviceptr deviceX){
int memorySize = data.length * Sizeof.FLOAT;
JCuda.cudaMemcpy(Pointer.to(data), deviceX, memorySize,
cudaMemcpyKind.cudaMemcpyDeviceToHost);
JCuda.cudaFree(deviceX);
}
/**
* Allocates space on the CUDA device for a Volume3D
* @param vol the volume
* @return the pointer to the memory
*/
public static CUdeviceptr allocateSpace(Volume3D vol){
// We allocate too much memory as we parallelize along x and y direction and the memory must be a multiple along this direction internally.
int adaptedWidth = iDivUp(vol.size[2], gridBlockSize[0]) * gridBlockSize[0];
int adaptedHeight = iDivUp(vol.size[1], gridBlockSize[1]) * gridBlockSize[1];
int memorySize = adaptedWidth*adaptedHeight*vol.size[0]* vol.getInternalDimension() * Sizeof.FLOAT;
CUdeviceptr deviceX = new CUdeviceptr();
JCuda.cudaMalloc(deviceX, memorySize);
return deviceX;
}
/**
* Moves the volume to the device.
* @param vol the volume
* @param deviceX the memory pointer
*/
public static void moveToDevice(Volume3D vol, CUdeviceptr deviceX){
// Allocate memory on the device using JCuda
int memorySize = vol.size[2]* vol.getInternalDimension() * Sizeof.FLOAT;
// Copy memory from host to device using JCuda
for (int i = 0; i < vol.size[0]; i++){
for(int j = 0; j < vol.size[1]; j++){
AdjustablePointer offset = new AdjustablePointer(deviceX, ((vol.size[1]*i) + j) * memorySize);
JCuda.cudaMemcpy(offset, Pointer.to(vol.data[i][j]), memorySize,
cudaMemcpyKind.cudaMemcpyHostToDevice);
}
}
}
/**
* Fetches the volume from the device
* @param vol the volume object
* @param deviceX the pointer to the memory on the deivce.
*/
public static void fetchFromDevice(Volume3D vol, CUdeviceptr deviceX){
// Allocate memory on the device using JCuda
int memorySize = vol.size[2]* vol.getInternalDimension() * Sizeof.FLOAT;
// Copy memory from host to device using JCuda
for (int i = 0; i < vol.size[0]; i++){
for(int j = 0; j < vol.size[1]; j++){
AdjustablePointer offset = new AdjustablePointer(deviceX, (((vol.size[1]*i) + j) * memorySize));
JCuda.cudaMemcpy(Pointer.to(vol.data[i][j]), offset, memorySize,
cudaMemcpyKind.cudaMemcpyDeviceToHost);
}
}
}
/**
* Integral division, rounding the result to the next highest integer.
*
* @param a Dividend
* @param b Divisor
* @return a/b rounded to the next highest integer.
*/
public static int iDivUp(int a, int b)
{
return (a % b != 0) ? (a / b + 1) : (a / b);
}
public static CUdeviceptr copyFloatArrayToDevice(float [] array, CUmodule module, String nameInCode) {
CUdeviceptr devptr = new CUdeviceptr();
JCudaDriver.cuModuleGetGlobal(devptr, new int[1], module, nameInCode);
JCudaDriver.cuMemcpyHtoD(devptr, Pointer.to(array), Sizeof.FLOAT * array.length);
return devptr;
}
public static void updateFloatArrayOnDevice(CUdeviceptr devptr, float [] array, CUmodule module) {
//JCudaDriver.cuModuleGetGlobal(devptr, new int[1], module, nameInCode);
JCudaDriver.cuMemcpyHtoD(devptr, Pointer.to(array), Sizeof.FLOAT * array.length);
}
public static long correctMemoryValue(int memory){
long mem = memory;
if (mem < 0) {
mem -= Integer.MIN_VALUE;
mem += Integer.MAX_VALUE;
}
return mem;
}
public static CUdevice getBestDevice() {
CUdevice best = null;
long lastmem = Long.MIN_VALUE;
int [] count = new int[1];
JCudaDriver.cuDeviceGetCount(count);
for (int i = 0; i < count[0]; i++) {
CUdevice dev = new CUdevice();
JCudaDriver.cuDeviceGet(dev, i);
CUdevprop prop = new CUdevprop();
JCudaDriver.cuDeviceGetProperties(prop, dev);
//System.out.println(prop);
int [] memory = new int [1];
JCudaDriver.cuDeviceTotalMem(memory, dev);
long mem = correctMemoryValue(memory[0]);
//System.out.println("Memory " + mem);
if (mem > lastmem){
best = dev;
lastmem = mem;
}
}
return best;
}
public static CUdevice getSmallestDevice() {
CUdevice best = null;
long lastmem = Long.MAX_VALUE;
int [] count = new int[1];
JCudaDriver.cuDeviceGetCount(count);
for (int i = 0; i < count[0]; i++) {
CUdevice dev = new CUdevice();
JCudaDriver.cuDeviceGet(dev, i);
CUdevprop prop = new CUdevprop();
JCudaDriver.cuDeviceGetProperties(prop, dev);
//System.out.println(prop);
int [] memory = new int [1];
JCudaDriver.cuDeviceTotalMem(memory, dev);
long mem = correctMemoryValue(memory[0]);
//System.out.println("Memory " + mem);
if (mem < lastmem){
best = dev;
lastmem = mem;
}
}
return best;
}
}
/*
* Copyright (C) 2010-2014 - Andreas Maier
* CONRAD is developed as an Open Source project under the GNU General Public License (GPL).
*/