CUDA C program for matrix Multiplication using Shared/non Shared memory

Posted by Unknown at 09:07 | 52 comments

//Matrix multiplication using shared and non shared kernal

#include <stdio.h>

#include <math.h>

#define TILE_WIDTH 2

/*matrix multiplication kernels*/

//non shared

__global__ void

MatrixMul( float *Md , float *Nd , float *Pd , const int WIDTH )

{

// calculate thread id

unsigned int col = TILE_WIDTH*blockIdx.x + threadIdx.x ;

unsigned int row = TILE_WIDTH*blockIdx.y + threadIdx.y ;

for (int k = 0 ; k<WIDTH ; k++ )

{

Pd[row*WIDTH + col]+= Md[row * WIDTH + k ] * Nd[ k * WIDTH + col] ;

}

// shared

__global__ void

MatrixMulSh( float *Md , float *Nd , float *Pd , const int WIDTH )

{

//Taking shared array to break the MAtrix in Tile widht and fatch them in that array per ele

__shared__ float Mds [TILE_WIDTH][TILE_WIDTH] ;

__shared__ float Nds [TILE_WIDTH][TILE_WIDTH] ;

// calculate thread id

unsigned int col = TILE_WIDTH*blockIdx.x + threadIdx.x ;

unsigned int row = TILE_WIDTH*blockIdx.y + threadIdx.y ;

for (int m = 0 ; m<WIDTH/TILE_WIDTH ; m++ ) // m indicate number of phase

{

Mds[threadIdx.y][threadIdx.x] = Md[row*WIDTH + (m*TILE_WIDTH + threadIdx.x)] ;

Nds[threadIdx.y][threadIdx.x] = Nd[ ( m*TILE_WIDTH + threadIdx.y) * WIDTH + col] ;

__syncthreads() ; // for syncronizeing the threads

// Do for tile

for ( int k = 0; k<TILE_WIDTH ; k++ )

Pd[row*WIDTH + col]+= Mds[threadIdx.x][k] * Nds[k][threadIdx.y] ;

__syncthreads() ; // for syncronizeing the threads

}

// main routine

int main ()

{

const int WIDTH = 6 ;

float array1_h[WIDTH][WIDTH] ,array2_h[WIDTH][WIDTH],

result_array_h[WIDTH][WIDTH] ,M_result_array_h[WIDTH][WIDTH] ;

float *array1_d , *array2_d ,*result_array_d ,*M_result_array_d ; // device array

int i , j ;

//input in host array

for ( i = 0 ; i<WIDTH ; i++ )

{

for (j = 0 ; j<WIDTH ; j++ )

{

array1_h[i][j] = 1 ;

array2_h[i][j] = 2 ;

}

//create device array cudaMalloc ( (void **)&array_name, sizeofmatrixinbytes) ;

cudaMalloc((void **) &array1_d , WIDTH*WIDTH*sizeof (int) ) ;

cudaMalloc((void **) &array2_d , WIDTH*WIDTH*sizeof (int) ) ;

//copy host array to device array; cudaMemcpy ( dest , source , WIDTH , direction )

cudaMemcpy ( array1_d , array1_h , WIDTH*WIDTH*sizeof (int) , cudaMemcpyHostToDevice ) ;

cudaMemcpy ( array2_d , array2_h , WIDTH*WIDTH*sizeof (int) , cudaMemcpyHostToDevice ) ;

//allocating memory for resultent device array

cudaMalloc((void **) &result_array_d , WIDTH*WIDTH*sizeof (int) ) ;

cudaMalloc((void **) &M_result_array_d , WIDTH*WIDTH*sizeof (int) ) ;

//calling kernal

dim3 dimGrid ( WIDTH/TILE_WIDTH , WIDTH/TILE_WIDTH ,1 ) ;

dim3 dimBlock( TILE_WIDTH, TILE_WIDTH, 1 ) ;

// Change if 0 to if 1 for running non shared code and make if 0 for shared memory code

#if 0

MatrixMul <<<dimGrid,dimBlock>>> ( array1_d , array2_d ,M_result_array_d , WIDTH) ;

#endif

#if 1

MatrixMulSh<<<dimGrid,dimBlock>>> ( array1_d , array2_d ,M_result_array_d , WIDTH) ;

#endif

// all gpu function blocked till kernel is working

//copy back result_array_d to result_array_h

cudaMemcpy(M_result_array_h , M_result_array_d , WIDTH*WIDTH*sizeof(int) ,

cudaMemcpyDeviceToHost) ;

//printf the result array

for ( i = 0 ; i<WIDTH ; i++ )

{

for ( j = 0 ; j < WIDTH ; j++ )

{

printf ("%f ",M_result_array_h[i][j] ) ;

}

printf ("\n") ;

}

system("pause") ;

}

Got Questions?

Feel free to ask me any question because I'd be happy to walk you through step by step!

52 comments:

SAVE INDIA22 January 2013 at 04:29
thank u sir..
it's really helpful...
ReplyDelete
Replies
Anonymous25 March 2013 at 08:20
have you guys done matrix inverse??
ReplyDelete
Replies
Unknown31 May 2013 at 23:15
Thanks @Sanjay
ReplyDelete
Replies
Anonymous23 November 2013 at 20:06
Thanks for sharing this code.
May I ask how would you implement non-square matrix multiplication? For instance multiply a 4x3 by a 3x1
Thanks.
ReplyDelete
Replies
Anonymous11 March 2014 at 17:54
I need to write a basic CUDA code for multiplying matrix...How would I write it?
ReplyDelete
Replies
HRG-RAJ11 May 2015 at 01:00
when the WIDTH value is 2000 ,the code gets hanged why ???
ReplyDelete
Replies
Unknown30 August 2015 at 06:28
This comment has been removed by the author.
ReplyDelete
Replies
Unknown12 October 2015 at 06:17
hi, im trying to use your code, but why the result is always changing ?

thanks :)
ReplyDelete
Replies
Anonymous7 November 2015 at 03:35
I am getting "Segmentation fault (core dumped)"
when I try it for dynamic square arrays how do I solve this?
ReplyDelete
Replies
Unknown24 November 2015 at 11:34
Hi,
I am trying to change the cuda code, into a .so file and have to call it from python, initializing matrixes and converting into ctypes are done in python, any one have any idea of doing that.
ReplyDelete
Replies
Tally Services9 July 2016 at 04:37
Great idea for an article! Looking forward to the next part.
ReplyDelete
Replies
Unknown6 December 2016 at 23:23
what is the diffference between shared and non-shared technique
ReplyDelete
Replies
Anonymous11 February 2017 at 23:14
why are you using 'sizeof(int)' in cudaMemcpy ( array1_d , array1_h , WIDTH*WIDTH*sizeof (int) , cudaMemcpyHostToDevice ) while you have declared floating point variables. should'nt it be sizeof(float)
ReplyDelete
Replies
Unknown20 March 2017 at 11:00
Sir, how can I do the matrix multiplication in cuda without using shared memory? Can u share the code for same?
ReplyDelete
Replies
Unknown24 October 2017 at 15:59
Hi, thanks for the code. Could you explain it step by step as you offered in last line?
Thanks
ReplyDelete
Replies
Leona2 October 2018 at 06:11
I would like to thank you for the great text.
ReplyDelete
Replies
suman3 August 2019 at 21:42
nice article for beginners.thank you.
c++ tutorial
java tutorial
ReplyDelete
Replies
Unknown10 September 2020 at 10:33
Can someone please paste the output screenshot
ReplyDelete
Replies
Anonymous23 June 2021 at 06:59
nice
ReplyDelete
Replies
anilkumartricks7 August 2021 at 06:33
the connection to the server localhost:8080 was refused - did you specify the right host or port?
AWS VPC
kubernetes dashboard
aws inspector
arm template
azure bastion
ReplyDelete
Replies
AS Window7 October 2022 at 04:22
Aswindow is the Top Organization upvc entryways makers in delhi and Supply UPVC, Top of the line Entryways and Windows to Clients All around The Noida, Delhi Gurgaon and NCR. AS Window is a trailblazer in the creation of plasticized upvc window suppliers in gurugram and entryways. UPVC is a superb option in contrast to wood and metal. AS Window offers an extensive variety of wonderful and perfect home window plans that won't just change your home yet will likewise safeguard it from the rest of the world. UPVC is an all-climate, harmless to the ecosystem, intensity and commotion safe material for entryways and windows that add solace, accommodation, and style to current homes.
ReplyDelete
Replies
Recyclops27 September 2023 at 07:03
Gümüşçay
Paşaköy
Saraykent
Çarşı
Demirtaş
K05S
ReplyDelete
Replies
Sergen5 October 2023 at 20:43
adıyaman
sakarya
yalova
tekirdağ
amasya
F333
ReplyDelete
Replies
NeonNinja90009 October 2023 at 00:48
goruntulu show
ücretli
WU6O
ReplyDelete
Replies
KızılSavaşçı3715 October 2023 at 21:50
https://titandijital.com.tr/
bingöl parça eşya taşıma
kırşehir parça eşya taşıma
gümüşhane parça eşya taşıma
rize parça eşya taşıma
VBUN
ReplyDelete
Replies
7499FAugustusD61509 November 2023 at 00:14
FB9F2
Probit Güvenilir mi
Bayburt Şehir İçi Nakliyat
Kocaeli Evden Eve Nakliyat
Antalya Lojistik
Bybit Güvenilir mi
Sincan Boya Ustası
Muş Lojistik
Çerkezköy Bulaşık Makinesi Tamircisi
Muş Parça Eşya Taşıma
ReplyDelete
Replies
9EE92KarleighD37E39 November 2023 at 12:21
5D6E6
Kırıkkale Evden Eve Nakliyat
Binance Güvenilir mi
testosterone propionat
buy dianabol methandienone
order sarms
Bitlis Evden Eve Nakliyat
primobolan for sale
Kocaeli Evden Eve Nakliyat
Aksaray Evden Eve Nakliyat
ReplyDelete
Replies
6D250Kelsey597EF9 November 2023 at 12:23
6C75F
Şırnak Evden Eve Nakliyat
Adıyaman Evden Eve Nakliyat
Bayburt Lojistik
Tokat Lojistik
Afyon Lojistik
Malatya Evden Eve Nakliyat
Adıyaman Parça Eşya Taşıma
Burdur Lojistik
Muğla Lojistik
ReplyDelete
Replies
05358Ariella35E5613 November 2023 at 06:19
A105C
Keçiören Fayans Ustası
Bursa Evden Eve Nakliyat
Pancakeswap Güvenilir mi
Etlik Boya Ustası
Kars Lojistik
Konya Lojistik
Çerkezköy Bulaşık Makinesi Tamircisi
Osmaniye Evden Eve Nakliyat
Artvin Şehir İçi Nakliyat
ReplyDelete
Replies
8E2FDLauraA3ACA19 November 2023 at 01:04
F0C6C
buy winstrol stanozolol
Çankırı Evden Eve Nakliyat
boldenone
buy parabolan
buy steroid cycles
order pharmacy steroids
Amasya Evden Eve Nakliyat
oxandrolone anavar
Kripto Para Borsaları
ReplyDelete
Replies
35E1BWesley87A4F17 January 2024 at 14:32
17935
Big Wolf Coin Hangi Borsada
Binance Sahibi Kim
Threads İzlenme Hilesi
Görüntülü Sohbet
Binance Kimin
Likee App Beğeni Hilesi
Linkedin Takipçi Satın Al
Bitcoin Nasıl Üretilir
Coin Para Kazanma
ReplyDelete
Replies
3477EIsrael0C7F420 January 2024 at 07:01
57975
Chat Gpt Coin Hangi Borsada
Likee App Beğeni Hilesi
Twitter Trend Topic Hilesi
Fuckelon Coin Hangi Borsada
Arg Coin Hangi Borsada
Coin Kazma
Binance Hesap Açma
Bitcoin Madenciliği Nedir
Binance Borsası Güvenilir mi
ReplyDelete
Replies
1EAEEDenver0FE0F8 February 2024 at 15:54
A4C90
shapeshift
pudgy penguins
yearn finance
layerzero
sushiswap
dappradar
zkswap
DefiLlama
quickswap
ReplyDelete
Replies
Anonymous29 August 2024 at 02:48
XZDFGVFHB
شركة كشف تسربات المياه بالاحساء
ReplyDelete
Replies
Anonymous26 October 2024 at 05:14
شركة عزل اسطح بالمزاحمية ZSeNvbFRiU
ReplyDelete
Replies
Anonymous4 November 2024 at 00:16
شركة عزل اسطح بالجبيل 5jAADcIJDI
ReplyDelete
Replies
Anonymous5 November 2024 at 00:07
شركة صيانة افران tM9b4Of7iS
ReplyDelete
Replies
Anonymous19 November 2024 at 04:17
<a href="
ReplyDelete
Replies
Anonymous24 November 2024 at 02:31
شركة تنظيف كنب بالاحساء CJoowCGgUF
ReplyDelete
Replies
Anonymous24 November 2024 at 06:37
شركة تنظيف بالاحساء L13MNVPaEW
ReplyDelete
Replies
Anonymous29 November 2024 at 23:48
شركة عزل اسطح بالخبر 0EMaeSbP5i
ReplyDelete
Replies
Anonymous4 December 2024 at 12:03
شركة مكافحة حشرات بالخبر
OdoEHM
ReplyDelete
Replies
C8BEF4BB7DKyraAD7FE67B1F30 December 2024 at 02:51
1727B00760
tiktok ucuz takipçi
ReplyDelete
Replies
Anonymous30 January 2025 at 07:35
CE2A1AFCF1
instagram organik türk takipçi
ReplyDelete
Replies
Anonymous7 February 2025 at 10:43
8F73E94FA2
ucuz takipçi
MFF Kupon Kodu
Free Fire Elmas Kodu
Footer Link Satın Al
MLBB Hediye Kodu
Titan War Hediye Kodu
Roblox Şarkı Kodları
Azar Elmas Kodu
Danone Sürpriz Kodları
ReplyDelete
Replies
Anonymous25 February 2025 at 21:10
7C3E0F2175
kayıt ol coin kazan
görev yap coin kazan
telegram coin botları güvenilir mi
coin kazandıran oyunlar
oyun oyna coin kazan
ReplyDelete
Replies
Anonymous8 March 2025 at 04:04
شركة مكافحة بق الفراش بالجبيل oFdY2AqC9e
ReplyDelete
Replies
Anonymous13 May 2025 at 05:48
DB120D3C60
tiktok organik takipçi
begeni satin al
bot takipçi
ig takipçi
aktif takipçi
ReplyDelete
Replies
Anonymous16 May 2025 at 01:01
6CA5D94AE3
en uygun instagram takipçi
begeni satin al
takipçi paketi
ig takipçi
organik takipçi
ReplyDelete
Replies
Anonymous16 May 2025 at 18:56
8E2787B7DE
instagram takipçi paketi mobil ödeme
beğeni satın al
takipçi paketi
takipçi paketi
düşmeyen takipçi
ReplyDelete
Replies

Add comment

Help us to improve our quality and become contributor to our blog

CUDA Programming

Prefer Your Language

Search This Blog

Tags

CUDA C program for matrix Multiplication using Shared/non Shared memory

52 comments:

Recent Post

About Me

Total Pageviews

Labels

Blog Archive

Labels

Like Us

Cloud

Admin

CUDA Programming

Prefer Your Language

Search This Blog

Tags

Related Posts

Share This

CUDA C program for matrix Multiplication using Shared/non Shared memory

52 comments:

Recent Post

About Me

Subscribe To

Total Pageviews

Labels

Blog Archive

Labels

Like Us

Cloud

Admin