CUDA C program for matrix Multiplication using Shared/non Shared memory
//Matrix multiplication using shared and non shared
kernal
#include <stdio.h>
#include <math.h>
#define TILE_WIDTH 2
/*matrix multiplication kernels*/
//non shared
__global__ void
MatrixMul( float *Md , float *Nd , float *Pd , const int WIDTH )
{
// calculate thread id
unsigned int col = TILE_WIDTH*blockIdx.x + threadIdx.x ;
unsigned int row = TILE_WIDTH*blockIdx.y + threadIdx.y ;
for (int k = 0 ; k<WIDTH ; k++ )
{
Pd[row*WIDTH + col]+= Md[row * WIDTH + k ] * Nd[ k * WIDTH + col] ;
}
}
// shared
__global__ void
MatrixMulSh( float *Md , float *Nd , float *Pd , const int WIDTH )
{
//Taking shared array to break the MAtrix in Tile widht
and fatch them in that array per ele
__shared__ float Mds [TILE_WIDTH][TILE_WIDTH] ;
__shared__
float Nds [TILE_WIDTH][TILE_WIDTH] ;
// calculate thread id
unsigned int col = TILE_WIDTH*blockIdx.x + threadIdx.x ;
unsigned int row = TILE_WIDTH*blockIdx.y + threadIdx.y ;
for (int m = 0 ; m<WIDTH/TILE_WIDTH ; m++ ) // m indicate number of phase
{
Mds[threadIdx.y][threadIdx.x] = Md[row*WIDTH + (m*TILE_WIDTH +
threadIdx.x)] ;
Nds[threadIdx.y][threadIdx.x] = Nd[ ( m*TILE_WIDTH +
threadIdx.y)
* WIDTH +
col] ;
__syncthreads()
; // for
syncronizeing the threads
// Do for
tile
for
( int k = 0; k<TILE_WIDTH ;
k++ )
Pd[row*WIDTH + col]+= Mds[threadIdx.x][k] * Nds[k][threadIdx.y] ;
__syncthreads() ; // for syncronizeing
the threads
}
}
// main routine
int main ()
{
const int WIDTH = 6 ;
float array1_h[WIDTH][WIDTH] ,array2_h[WIDTH][WIDTH],
result_array_h[WIDTH][WIDTH] ,M_result_array_h[WIDTH][WIDTH] ;
float *array1_d , *array2_d ,*result_array_d
,*M_result_array_d ; // device array
int i , j ;
//input in host array
for ( i = 0 ; i<WIDTH ; i++ )
{
for (j = 0 ; j<WIDTH ; j++ )
{
array1_h[i][j] = 1 ;
array2_h[i][j] = 2 ;
}
}
//create device array cudaMalloc ( (void
**)&array_name, sizeofmatrixinbytes) ;
cudaMalloc((void **) &array1_d
, WIDTH*WIDTH*sizeof (int) ) ;
cudaMalloc((void **) &array2_d
, WIDTH*WIDTH*sizeof (int) ) ;
//copy host array to device array; cudaMemcpy ( dest ,
source , WIDTH , direction )
cudaMemcpy ( array1_d ,
array1_h , WIDTH*WIDTH*sizeof (int) ,
cudaMemcpyHostToDevice ) ;
cudaMemcpy ( array2_d ,
array2_h , WIDTH*WIDTH*sizeof (int) ,
cudaMemcpyHostToDevice ) ;
//allocating memory for resultent device array
cudaMalloc((void **) &result_array_d
, WIDTH*WIDTH*sizeof (int) ) ;
cudaMalloc((void **) &M_result_array_d
, WIDTH*WIDTH*sizeof (int) ) ;
//calling kernal
dim3 dimGrid ( WIDTH/TILE_WIDTH
, WIDTH/TILE_WIDTH
,1 ) ;
dim3 dimBlock( TILE_WIDTH,
TILE_WIDTH, 1 )
;
// Change if 0 to if 1 for running non shared
code and make if 0 for shared memory code
#if 0
MatrixMul <<<dimGrid,dimBlock>>> (
array1_d , array2_d ,M_result_array_d
, WIDTH) ;
#endif
#if 1
MatrixMulSh<<<dimGrid,dimBlock>>> (
array1_d , array2_d ,M_result_array_d
, WIDTH) ;
#endif
// all gpu function blocked till kernel is working
//copy back result_array_d to result_array_h
cudaMemcpy(M_result_array_h ,
M_result_array_d , WIDTH*WIDTH*sizeof(int) ,
cudaMemcpyDeviceToHost) ;
//printf the result array
for ( i = 0 ; i<WIDTH ; i++ )
{
for ( j = 0 ; j < WIDTH ; j++ )
{
printf ("%f ",M_result_array_h[i][j] ) ;
}
printf ("\n") ;
}
system("pause") ;
}
|
Got Questions?
Feel free to ask me any
question because I'd be happy to walk you through step by step!
thank u sir..
ReplyDeleteit's really helpful...
have you guys done matrix inverse??
ReplyDeleteI've not tried this problem yet....
DeleteThanks @Sanjay
ReplyDeleteThanks for sharing this code.
ReplyDeleteMay I ask how would you implement non-square matrix multiplication? For instance multiply a 4x3 by a 3x1
Thanks.
I need to write a basic CUDA code for multiplying matrix...How would I write it?
ReplyDeletewhen the WIDTH value is 2000 ,the code gets hanged why ???
ReplyDeleteThis comment has been removed by the author.
ReplyDeletehi, im trying to use your code, but why the result is always changing ?
ReplyDeletethanks :)
I am getting "Segmentation fault (core dumped)"
ReplyDeletewhen I try it for dynamic square arrays how do I solve this?
Hi,
ReplyDeleteI am trying to change the cuda code, into a .so file and have to call it from python, initializing matrixes and converting into ctypes are done in python, any one have any idea of doing that.
Great idea for an article! Looking forward to the next part.
ReplyDeletewhat is the diffference between shared and non-shared technique
ReplyDeletewhy are you using 'sizeof(int)' in cudaMemcpy ( array1_d , array1_h , WIDTH*WIDTH*sizeof (int) , cudaMemcpyHostToDevice ) while you have declared floating point variables. should'nt it be sizeof(float)
ReplyDeleteSir, how can I do the matrix multiplication in cuda without using shared memory? Can u share the code for same?
ReplyDeleteHi, thanks for the code. Could you explain it step by step as you offered in last line?
ReplyDeleteThanks
By the way my e-mail address is chinmay.toekker@gmail.com
DeleteI would like to thank you for the great text.
ReplyDeletenice article for beginners.thank you.
ReplyDeletec++ tutorial
java tutorial
Can someone please paste the output screenshot
ReplyDeletenice
ReplyDeletethe connection to the server localhost:8080 was refused - did you specify the right host or port?
ReplyDeleteAWS VPC
kubernetes dashboard
aws inspector
arm template
azure bastion
Aswindow is the Top Organization upvc entryways makers in delhi and Supply UPVC, Top of the line Entryways and Windows to Clients All around The Noida, Delhi Gurgaon and NCR. AS Window is a trailblazer in the creation of plasticized upvc window suppliers in gurugram and entryways. UPVC is a superb option in contrast to wood and metal. AS Window offers an extensive variety of wonderful and perfect home window plans that won't just change your home yet will likewise safeguard it from the rest of the world. UPVC is an all-climate, harmless to the ecosystem, intensity and commotion safe material for entryways and windows that add solace, accommodation, and style to current homes.
ReplyDeleteGümüşçay
ReplyDeletePaşaköy
Saraykent
Çarşı
Demirtaş
K05S
adıyaman
ReplyDeletesakarya
yalova
tekirdağ
amasya
F333
goruntulu show
ReplyDeleteücretli
WU6O
https://titandijital.com.tr/
ReplyDeletebingöl parça eşya taşıma
kırşehir parça eşya taşıma
gümüşhane parça eşya taşıma
rize parça eşya taşıma
VBUN
FB9F2
ReplyDeleteProbit Güvenilir mi
Bayburt Şehir İçi Nakliyat
Kocaeli Evden Eve Nakliyat
Antalya Lojistik
Bybit Güvenilir mi
Sincan Boya Ustası
Muş Lojistik
Çerkezköy Bulaşık Makinesi Tamircisi
Muş Parça Eşya Taşıma
5D6E6
ReplyDeleteKırıkkale Evden Eve Nakliyat
Binance Güvenilir mi
testosterone propionat
buy dianabol methandienone
order sarms
Bitlis Evden Eve Nakliyat
primobolan for sale
Kocaeli Evden Eve Nakliyat
Aksaray Evden Eve Nakliyat
6C75F
ReplyDeleteŞırnak Evden Eve Nakliyat
Adıyaman Evden Eve Nakliyat
Bayburt Lojistik
Tokat Lojistik
Afyon Lojistik
Malatya Evden Eve Nakliyat
Adıyaman Parça Eşya Taşıma
Burdur Lojistik
Muğla Lojistik
A105C
ReplyDeleteKeçiören Fayans Ustası
Bursa Evden Eve Nakliyat
Pancakeswap Güvenilir mi
Etlik Boya Ustası
Kars Lojistik
Konya Lojistik
Çerkezköy Bulaşık Makinesi Tamircisi
Osmaniye Evden Eve Nakliyat
Artvin Şehir İçi Nakliyat
F0C6C
ReplyDeletebuy winstrol stanozolol
Çankırı Evden Eve Nakliyat
boldenone
buy parabolan
buy steroid cycles
order pharmacy steroids
Amasya Evden Eve Nakliyat
oxandrolone anavar
Kripto Para Borsaları
17935
ReplyDeleteBig Wolf Coin Hangi Borsada
Binance Sahibi Kim
Threads İzlenme Hilesi
Görüntülü Sohbet
Binance Kimin
Likee App Beğeni Hilesi
Linkedin Takipçi Satın Al
Bitcoin Nasıl Üretilir
Coin Para Kazanma
57975
ReplyDeleteChat Gpt Coin Hangi Borsada
Likee App Beğeni Hilesi
Twitter Trend Topic Hilesi
Fuckelon Coin Hangi Borsada
Arg Coin Hangi Borsada
Coin Kazma
Binance Hesap Açma
Bitcoin Madenciliği Nedir
Binance Borsası Güvenilir mi
A4C90
ReplyDeleteshapeshift
pudgy penguins
yearn finance
layerzero
sushiswap
dappradar
zkswap
DefiLlama
quickswap
XZDFGVFHB
ReplyDeleteشركة كشف تسربات المياه بالاحساء
شركة عزل اسطح بالمزاحمية ZSeNvbFRiU
ReplyDeleteشركة عزل اسطح بالجبيل 5jAADcIJDI
ReplyDeleteشركة صيانة افران tM9b4Of7iS
ReplyDelete