Implementation Sobel operator in CUDA C on YUV video File
Implementation Sobel operator in CUDA C on YUV video File
Today, we discuss Sobel
operator and how to apply on YUV video file with step by step
discussion. If you are not familiar with the sobel operator or don’t know in
detail, don’t worry, we first discuss what is sobel operator followed by its C
code.
Lets start our
discussion.
First question is what
is Sobel filter/operator?
The algorithm we will
look at in this tutorial is an edge detection algorithm, specifically an edge
detection algorithm based on the Sobel operator. This algorithm works by
calculating the gradient of the intensity of the frame at each point, finding the
direction of the change from light to dark and the magnitude of the change.
This magnitude corresponds to how sharp the edge is.
If you are not aware how
to apply sobel operator on image, please refer this link for that.
The Sobel Operator
To calculate the gradient
of each point in the frame, the frame is convolved with the Sobel Kernel.
Convolution is done by moving the kernel across the frame, one pixel at a time.
At each pixel, the pixel and its neighbours are weighted by the corresponding
value in the kernel, and summed to produce a new value. This operation is shown
in the following diagram.
we treat it as though the
kernel has been overlaid onto the frame, with the center pixel of the kernel
aligning we the first pixel in the frame. Then we multiple each entry in the
kernel by the value beneath it, and sum them to produce a single single output
value from that pixel.
For the pixels on the
boundary, we just ignore any entries in the kernel that fall outside.
Edge detection using the
Sobel Operator applies two separate kernels to calculate the x and y gradients
in the frame. The length of this gradient is then calculated and normalized to
produce a single intensity approximately equal to the sharpness of the edge at
that position.
The kernels used for
Sobel Edge Detection are shown below.
The Algorithm
Now the algorithm can be
broken down into its constituent steps
1.
Load YUV File.
2.
Load Frame of YUV video file.
3.
Load Y , U and V Plane separately
4.
Iterate over every pixel in the Plane for
each Plane for each Frame
5.
Apply the x gradient kernel
6.
Apply the y gradient kernel
7.
Find the length of the gradient using
pythagoras' theorem
8.
Normalize the gradient length to the range
0-255
9.
Set the pixels to the new values
10. Combined
all three Plane and make a frame
11. Write
this frame to video file yuv format
12. Save
the YUV file.
This illustrates the main steps, though we miss out some
specifics such as what we do when we meet the boundary of the frame.
Formula for calculating
gradient in X and Y direction ;
Let say (i,j) is the middle pixel of 3*3 grid then we have the
following formula
Gx = - ( - Z1 + Z3 - 2*Z4 + 2*Z6 –
Z7 + Z9 ) / 8
&
Gy = -(-Z1 – 2*Z2 – Z3 + Z7 + 2*Z8 + Z9 )
/8
|
Now time to move ahead,
now we are looking for CUDA C code, right?
Here is the complete
Code;
Code
have 3 procedure.
Save
below procedure in new M file with name Sobel_Prototype.h
/*************************** Error Handling Header
*********************/
#ifndef SOBEL_PROTOTYPE_H
#define SOBEL_PROTOTYPE_H
#include "HEADER.h"
#include "ERROR.h"
#define HEIGHT 480
#define WIDTH 640
#define NUM_FRAMES 20
#define THRESH_HOLD_Y
20
#define THRESH_HOLD_U 1
#define THRESH_HOLD_V
1
//CUDA Utility
#define THREADSPERBLOCK 16
#define BLOCK_SIZE_X (
ISDIV (WIDTH , THREADSPERBLOCK ) )
#define BLOCK_SIZE_Y (
ISDIV (HEIGHT , THREADSPERBLOCK ) )
//calculating the next division
#define ISDIV(a,b) (
( ( a ) % ( b ) != 0 ) ? ( ( a ) / ( b ) + 1 ) : ( ( a ) / ( b ) ) )
typedef unsigned char BYTE ;
char *Input_Filename
= "front_camera_640x480_600_frames.yuv";
char *Output_Filename = "Output_front_CUDA.yuv";
//All in one
void Sobel_Filter_Wrapper () ;
/********************************* Intialization and Release
*******************************/
//Intialize
//For host
bool
Sobel_Host_Init ( unsigned char
**Input_Image_ptr, unsigned char
**Output_Image_ptr) ;
//For device
bool Sobel_Device_Init (
unsigned char
**Input_Image_ptr, unsigned char
**Output_Image_ptr ) ;
/***************************Function for handling
Frames***********************************/
// For Loading Video File Frames
void Load_Frame ( unsigned char
*Input_Image_ptr, FILE *File_reader, const int Frame_num ) ;
//Saving each frame to output file
void Save_Frame_In_Video ( const
unsigned char
*Output_Image_ptr , FILE * File_writer ) ;
/*****************************Function for Sobel
operation************************************/
//Sobel operation; Applying Sobel operator
__global__ void
Sobel_Operation ( unsigned char
*Input , unsigned char
*Output , const int
Width , const int
Height , const size_t Thresh ) ;
//Sobel Threshold
__device__
char Sobel_Threshhold (const char
Pixel_Value , const size_t Thresh ) ;
#endif
|
And save this file as Sobel_Utility.h
/************************************ Sobel Utility
****************************/
#ifndef SOBEL_UTILITY_H
#define SOBEL_UTILITY_H
#include "Sobel_Prototype.h"
/******************************Function
Definations******************************************/
//Intialize
bool
Sobel_Host_Init ( unsigned char
**Input_Image_ptr, unsigned char
**Output_Image_ptr )
{
//Assign
memory to image pointers for storing FRAMES in terms of bytes
//For input
image; SINCE EACH FRAM IS OF SIZE 1.5*WIDTH * HEIGHT
if ( !(
*Input_Image_ptr = (unsigned char *) malloc ( size_t( 1.5f * WIDTH * HEIGHT ) * sizeof ( unsigned char ) ) ) )
{
printf ("\nMemory
can not be allocated; :(" );
system("pause");
exit( 0 );
}
//for output
image
if ( !(
*Output_Image_ptr = (unsigned char *) malloc ( size_t(1.5f * WIDTH * HEIGHT) * sizeof ( unsigned char ) ) ) )
{
printf ("\nMemory
can not be allocated; :(" );
system("pause");
exit( 0 );
}
//set all bytes
value in output image to 0
memset ( *Output_Image_ptr, 0 , WIDTH *
HEIGHT ) ;
return true ;
}
//Intialize
bool Sobel_Device_Init ( unsigned
char **Input_Image_ptr, unsigned char **Output_Image_ptr )
{
//Assign
memory to image pointers for storing FRAMES in terms of bytes
//For input
image; SINCE EACH FRAMW IS OF SIZE 1.5*WIDTH * HEIGHT
CUDA_CALL ( cudaMalloc ( (void **)&(*Input_Image_ptr) , size_t(1.5f * WIDTH * HEIGHT) * sizeof ( unsigned char ) ) )
;
CUDA_CALL ( cudaMalloc ( (void **)&(*Output_Image_ptr) , size_t(1.5f *
WIDTH * HEIGHT) * sizeof ( unsigned char ) ) ) ;
CUDA_CALL ( cudaMemset (
*Output_Image_ptr , 0 ,
size_t(1.5f * WIDTH * HEIGHT) * sizeof ( unsigned char ) ) ) ;
return true ;
}
// For Loading Video File Frames
void Load_Frame ( unsigned char
*Input_Image_ptr, FILE *File_reader, const int Frame_num
)
{
//size of file
in term of bytes
size_t read_size;
//read the
image and store bytes in Input_Imgae_ptr along with get its size in terms of
bytes in read_size
if (
( ( read_size = fread ( Input_Image_ptr, sizeof(unsigned char),
size_t ( 1.5f * WIDTH * HEIGHT), File_reader ) ) != ( 1.5*HEIGHT * WIDTH ) )
)
{
printf ("\nRead
operation; Error occured\n" );
printf("reached
end of video file at frame number %d\n", Frame_num);
printf("read
size is %d\n", read_size);
system("pause");
exit( 0 );
}
}
//Sobel operation; Applying Sobel operator
__global__ void
Sobel_Operation ( unsigned char *Input , unsigned
char *Output
, const int
Width , const int
Height ,const size_t Thresh )
{
//Variable
for Gradient in X and Y direction and Final one
float
Gradient_h, Gradient_v;
//Pixel
value's
char
Pixel_Value = 0 ;
//Calculating
index id
const unsigned
int Col_Index =
blockDim.x * blockIdx.x + threadIdx.x
;
const
unsigned int
Row_Index = blockDim.y * blockIdx.y + threadIdx.y ;
if
( ( Row_Index < Height ) && ( Col_Index < Width ) )
{
if
( ( Row_Index != 0 ) && ( Col_Index != 0 ) && ( Row_Index !=
HEIGHT - 1 ) && ( Col_Index !=
WIDTH - 1 ) )
{
Gradient_v = - (
- Input [ ( Row_Index - 1 )* Width + ( Col_Index
- 1 ) ] +
Input [ ( Row_Index-1) * Width + ( Col_Index + 1 ) ] - 2 * Input [ Row_Index * Width
+
(Col_Index-1)] + 2*Input [ Row_Index*Width + (Col_Index+1) ] - Input[
(Row_Index+1)*Width
+
(Col_Index-1)] + Input [ (Row_Index+1)*Width + (Col_Index+1)] ) /8 ;
Gradient_h = - ( - Input
[(Row_Index-1)*Width + (Col_Index-1)]
- 2*Input
[ (Row_Index-1)*Width + Col_Index ] - Input [(Row_Index-1)*Width +
(Col_Index+1)]
+ Input
[(Row_Index+1)*Width + (Col_Index-1)] + 2*Input [(Row_Index+1)*Width +
Col_Index ]
+ Input [(Row_Index+1)*Width +
(Col_Index+1)] ) /8 ;
//Assign to image
Pixel_Value = ( char ) sqrtf ( Gradient_h * Gradient_h + Gradient_v
* Gradient_v ) ;
Output[ (
Row_Index - 1 ) * Width + ( Col_Index - 1 ) ] =
Sobel_Threshhold ( Pixel_Value , Thresh );
}
}
}
//Sobel Threshold
__device__
char Sobel_Threshhold ( const char
Pixel_Value, const size_t Thresh )
{
if ( Pixel_Value >= Thresh )
return
char (255) ;
else
return
char (0) ;
}
//Saving each frame to output file
void Save_Frame_In_Video ( const
unsigned char
*Output_Image_ptr , FILE * File_writer )
{
//Wrtie each frame
to output video
fwrite( Output_Image_ptr, sizeof (unsigned char), (size_t)(1.5*WIDTH * HEIGHT), File_writer);
}
#endif
|
// Sobel.cu
#include "SOBEL_UTILITY.h"
int main (char *argv , char *argc[] )
{
time_t start_time , end_time ;
/***********************************
Variable Declaration ******************************/
int
Frame_num = 0 ; // loop variable
//File
pointer for reading and writting
FILE *File_reader, *File_writer;
//Host
Memory
//Image
FRAMES pointer's
unsigned
char *Host_Input_Image_ptr;
unsigned
char *Host_Output_Image_ptr;
//Device
Memory
//Image
FRAMES pointer's
unsigned
char *Device_Input_Image_ptr;
unsigned
char *Device_Output_Image_ptr;
/*******************************************************************************************/
//Calculating
Number of blocks and Number of Threads need for solution
//Y
plane
//
LUNCHING THREADS IN TWO DIMENSIONS X and Y
dim3
dimBlock_Y ( THREADSPERBLOCK , THREADSPERBLOCK ) ;
//
LUNCHING Blocks IN TWO DIMENSIONS X and Y
dim3
dimGrid_Y (BLOCK_SIZE_X ,
BLOCK_SIZE_Y) ;
//U
plane
//
LUNCHING THREADS IN TWO DIMENSIONS X and Y
dim3
dimBlock_U ( THREADSPERBLOCK , THREADSPERBLOCK ) ;
//
LUNCHING Blocks IN TWO DIMENSIONS X and Y
dim3
dimGrid_U ( BLOCK_SIZE_X/2 ,
BLOCK_SIZE_Y/2 ) ;
//V
plane
//
LUNCHING THREADS IN TWO DIMENSIONS X and Y
dim3
dimBlock_V ( THREADSPERBLOCK , THREADSPERBLOCK ) ;
//
LUNCHING Blocks IN TWO DIMENSIONS X and Y
dim3
dimGrid_V (BLOCK_SIZE_X/2 ,
BLOCK_SIZE_Y/2) ;
/*******************************************************************************************/
/***********************************
Opena and close file operation
******************************/
//Read Input
imgae file in binary mode
if ( ! (
File_reader = fopen(Input_Filename,"rb")
) )
{
printf ( "\nError
in opening input file" );
system("pause");
exit( 0 );
}
//Open Output
imgae file in binary mode
if ( ! (
File_writer = fopen(Output_Filename,"wb")
) )
{
printf ( "\nError
in opening output file" );
system("pause");
exit( 0 );
}
/*******************************************************************************************/
//Host memory
allocation
if ( !Sobel_Host_Init ( &Host_Input_Image_ptr, &Host_Output_Image_ptr ) )
{
printf ("Memory
Error occured\n Host memory allocation failed\nProgram exiting")
;
system("pause")
;
exit(0) ;
}
//Device
memroy allocation
if (
!Sobel_Device_Init (
&Device_Input_Image_ptr,
&Device_Output_Image_ptr )
)
{
printf ("Memory
Error occured\n Device memory allocation failed\nProgram exiting")
;
system("pause")
;
exit(0) ;
}
//start timer
time(&start_time) ;
/*********************************** ITERATING OVER EACH
FRAM ONE BY ONE
******************************/
for
(Frame_num = 0 ; Frame_num < NUM_FRAMES; Frame_num++ )
{
//Load
Frame
Load_Frame ( Host_Input_Image_ptr, File_reader ,
Frame_num ) ;
//Copy
data from Host to Device
CUDA_CALL ( cudaMemcpy
(Device_Input_Image_ptr ,Host_Input_Image_ptr, size_t(1.5 * WIDTH * HEIGHT)*sizeof(unsigned char) , cudaMemcpyHostToDevice ) ) ;
//CUDA_CALL
( cudaMemcpy (Device_Output_Image_ptr ,Device_Input_Image_ptr, size_t(1.5 *
WIDTH * HEIGHT)*sizeof(unsigned char) , cudaMemcpyDeviceToDevice ) ) ;
//Apply
Sobel filter; Lunching kernel with required number of blocks and fixed number
of threads
// Apply
for Y plane
Sobel_Operation<<<
dimGrid_Y , dimBlock_Y >>> (
Device_Input_Image_ptr,
Device_Output_Image_ptr , WIDTH , HEIGHT , THRESH_HOLD_Y ) ;
//Apply
for U Plane
Sobel_Operation<<<
dimGrid_U , dimBlock_U >>> (
Device_Input_Image_ptr + HEIGHT*WIDTH , Device_Output_Image_ptr +
HEIGHT*WIDTH , WIDTH / 2 ,
HEIGHT/2, THRESH_HOLD_U ) ;
// Apply
for V plane
Sobel_Operation<<<
dimGrid_V , dimBlock_V >>> (
Device_Input_Image_ptr + size_t (1.25
* HEIGHT * WIDTH), Device_Output_Image_ptr + size_t
(1.25 * HEIGHT * WIDTH) , WIDTH/2 , HEIGHT/2, THRESH_HOLD_V ) ;
//Check
for the error in Kernel
checkCUDAError ("Kernel: Soble Operation") ;
//For
synchronizing the Kernel ( GPU ) with Host ( CPU )
cudaThreadSynchronize ( ) ;
//Copy
data from Device to Host
CUDA_CALL ( cudaMemcpy (
Host_Output_Image_ptr ,Device_Output_Image_ptr, size_t (1.5 * WIDTH *
HEIGHT)*sizeof(unsigned
char) , cudaMemcpyDeviceToHost ) ) ;
//Save
frame in video
Save_Frame_In_Video (
Host_Output_Image_ptr , File_writer )
;
}
//stop timer
time(&end_time) ;
//Host memory
free (Host_Input_Image_ptr ) ;
free (Host_Output_Image_ptr) ;
//Device
memory
CUDA_CALL ( cudaFree (
Device_Input_Image_ptr ) ) ;
CUDA_CALL ( cudaFree (
Device_Output_Image_ptr ) ) ;
//Close files
fclose (File_reader);
fclose (File_writer) ;
printf ("The
total duration of program excution is %.3f seconds ", difftime
(end_time,start_time) ) ;
system( Output_Filename );
system("pause");
return 0
; }
|
Got Questions?
Feel free to ask me any
question because I'd be happy to walk you through step by step!
References and External Links
For Contact us….. Click on Contact us Tab
This is great, That's a great website
ReplyDeleteOxford Solutions Intermediate Podręcznik 2015
C++ Programming Code Examples
ReplyDeleteNormalize the gradient length to the range 0-255, need find the max gradient
ReplyDeleteFantastic blog you got here, I enjoyed reading some of your posts.
ReplyDeleteProfessional Courses