Pages

Thursday, 27 December 2012

CUDA Test Code

Basic Programming Module, You are at Level - 1

CUDA Test Code

This program is just for checking that your device and you kernel writing skills are correct or not?
Program statement : This program insert (
1000 * i + j) value in an array and check does inserted value correctly inserted or not  ... Well the main concern of this code is, we insert this
(
1000 * i + j) value in kernel code not in host code. 
where 'i' and 'j' are index indicator.



// includes, system
#include <stdio.h>
#include <assert.h>

// Simple utility function to check for CUDA runtime errors
void checkCUDAError(const char *msg);

// Part 3 of 5: implement the kernel
__global__ void myFirstKernel( int *d_a )
{
//assert(h_a[i * numThreadsPerBlock + j] == 1000 * i + j);
//int i = blockDim.x * blockIdx.x + threadIdx.x;
int bx = blockIdx.x ; 
int tx = threadIdx.x ; 
int i = blockDim.x*bx + tx ; 

d_a[i] = 1000*bx + tx;

}

////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int main( int argc, char** argv)
{
    // pointer for host memory
    int *h_a;

    // pointer for device memory
    int *d_a;

    // define grid and block size
    int numBlocks = 8;
    int numThreadsPerBlock = 8;

    // Part 1 of 5: allocate host and device memory
    size_t memSize = numBlocks * numThreadsPerBlock * sizeof(int);
    h_a = (int *) malloc(memSize);

    cudaMalloc((void **)&d_a,memSize);

   // copy host to device
  
// Part 2 of 5: configure and launch kernel
    dim3 dimGrid(numBlocks,1,1  );
    dim3 dimBlock(numThreadsPerBlock,1,1 );
    myFirstKernel<<< dimGrid ,dimBlock  >>>(d_a);

    // block until the device has completed
    cudaThreadSynchronize();

    // check if kernel execution generated an error
    checkCUDAError("kernel execution");

    // Part 4 of 5: device to host copy
    cudaMemcpy(h_a,d_a,memSize, cudaMemcpyDeviceToHost);

    // Check for any CUDA errors
    checkCUDAError("cudaMemcpy");

    // Part 5 of 5: verify the data returned to the host is correct
    for (int i = 0; i <8       ; i++)
    {
        for (int j = 0; j <8; j++)
        {
            assert(h_a[i * numThreadsPerBlock + j] == 1000 * i + j);
        }
    }

    // free device memory
    cudaFree(d_a);

    // free host memory
    free(h_a);

    // If the program makes it this far, then the results are correct and
    // there are no run-time errors.  Good work!
    printf("Correct!\n");

    return 0;
}

void checkCUDAError(const char *msg)
{
    cudaError_t err = cudaGetLastError();
    if( cudaSuccess != err)
    {
        fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) );
        exit(-1);
    }
}



Feel free to comment...


References
CUDA C Programming Guide
Programming Massively Parallel Processors By David B. Kirk and Wen-mei W.Hwu

No comments:

Post a Comment

Help us to improve our quality and become contributor to our blog