package edu.stanford.rsl.conrad.cuda;
/*
* JCuda - Java bindings for NVIDIA CUDA driver and runtime API
* http://www.jcuda.org
*
* DISCLAIMER: THIS SOFTWARE IS PROVIDED WITHOUT WARRANTY OF ANY KIND
* If you find any bugs or errors, contact me at http://www.jcuda.org
*
* LICENSE: THIS SOFTWARE IS FREE FOR NON-COMMERCIAL USE ONLY
* For non-commercial applications, you may use this software without
* any restrictions. If you wish to use it for commercial purposes,
* contact me at http://www.jcuda.org
*/
import java.util.*;
import jcuda.*;
import jcuda.jcublas.*;
import jcuda.jcudpp.*;
import jcuda.jcufft.*;
import jcuda.runtime.*;
/**
* This is a class that demonstrates the interoperability among
* JCuda, JCufft, JCublas and JCudpp. It performs several
* computations using each library, using always the same
* "shared" device memory.
*/
public class JCudaRuntimeSample
{
public static void main(String args[])
{
System.out.println("Creating input data");
// the classpath
System.out.println( System.getProperty( "java.class.path" ) );
// extension directories whose jars are included on the classpath
System.out.println( System.getProperty( "java.ext.dirs" ) );
// low level classpath, includes system jars
System.out.println( System.getProperty( "java.library.path" ) );
// character to separate (not terminate!) entries on the classpath, ; for Windows : for unix.
System.out.println( System.getProperty( "path.separator" ) );
// Create some input data
int complexElements = 100;
int floatElements = complexElements * 2;
int memorySize = floatElements * Sizeof.FLOAT;
float hostX[] = createRandomFloatData(floatElements);
float hostY[] = createRandomFloatData(floatElements);
System.out.println("Initializing device data using JCuda");
// Allocate memory on the device using JCuda
Pointer deviceX = new Pointer();
Pointer deviceY = new Pointer();
JCuda.cudaMalloc(deviceX, memorySize);
JCuda.cudaMalloc(deviceY, memorySize);
// Copy memory from host to device using JCuda
JCuda.cudaMemcpy(deviceX, Pointer.to(hostX), memorySize,
cudaMemcpyKind.cudaMemcpyHostToDevice);
JCuda.cudaMemcpy(deviceY, Pointer.to(hostY), memorySize,
cudaMemcpyKind.cudaMemcpyHostToDevice);
System.out.println("Performing FFT using JCufft");
// Perform in-place complex-to-complex 1D transforms using JCufft
cufftHandle plan = new cufftHandle();
JCufft.cufftPlan1d(plan, complexElements, cufftType.CUFFT_C2C, 1);
JCufft.cufftExecC2C(plan, deviceX, deviceX, JCufft.CUFFT_FORWARD);
JCufft.cufftExecC2C(plan, deviceY, deviceY, JCufft.CUFFT_FORWARD);
System.out.println("Performing caxpy using JCublas");
// Perform a complex y=a*x+y operation (caxpy) using JCublas
cuComplex alpha = cuComplex.cuCmplx(0.3f, 0.7f);
JCublas.cublasInit();
JCublas.cublasCaxpy(complexElements, alpha, deviceX, 1, deviceY, 1);
// This is a sample application, so perform a scan of one
// of the complex vectors using JCudpp, although this does
// not make any sense...
System.out.println("Performing scan using JCudpp");
// Create a configuration that describes a scan
CUDPPConfiguration config = new CUDPPConfiguration();
config.op = CUDPPOperator.CUDPP_ADD;
config.datatype = CUDPPDatatype.CUDPP_FLOAT;
config.algorithm = CUDPPAlgorithm.CUDPP_SCAN;
config.options = CUDPPOption.CUDPP_OPTION_FORWARD;
// Create a CUDPPHandle for the scan operation
CUDPPHandle handle = new CUDPPHandle();
JCudpp.cudppPlan(handle, config, complexElements, 1, 0);
// Run the scan
JCudpp.cudppScan(handle, deviceX, deviceY, floatElements);
// Copy the result from the device to the host
JCuda.cudaMemcpy(Pointer.to(hostX), deviceX, memorySize,
cudaMemcpyKind.cudaMemcpyDeviceToHost);
System.out.println("Result: "+hostX[hostX.length-1]);
// Clean up
JCuda.cudaFree(deviceX);
JCuda.cudaFree(deviceY);
JCublas.cublasShutdown();
JCufft.cufftDestroy(plan);
JCudpp.cudppDestroyPlan(handle);
}
/**
* Creates an array of the specified size, containing some random data
*/
private static float[] createRandomFloatData(int x)
{
Random random = new Random(0);
float a[] = new float[x];
for (int i=0; i<x; i++)
{
a[i] = random.nextFloat();
}
return a;
}
}