peakBandwidth #
Description #
Code to obtain theoretical peak memory bandwidth of GPUs on the system.
Effective bandwidth can be obtained with
\[bw_e = (r_B + w_B)/(t\cdot10^9)\]where \(bw_e\) is the effective bandiwdth, \(r_B\) is the number of Bytes read, \(w_B\) is the number of Bytes written, and \(t\) is elapsed wall time in seconds.
The wall time of the simple memory
kernel written in the
limitingFactor code can be used.
Another note: the three memory bandwidth values we care about are:
- Theoretical: calculated in the code below
- Effective: can be measured with the approach above
- Actual: the realised bandwidth which is affected by memory access patterns
Code (C++) #
#include <stdio.h>
int main() {
int nDevices;
cudaGetDeviceCount(&nDevices);
cudaDeviceProp prop;
for (int i = 0; i < nDevices; ++i) {
cudaGetDeviceProperties(&prop, i);
printf(" Device Number: %d\n", i);
printf(" Memory Clock Rate (kHz): %d\n", prop.memoryClockRate);
printf(" Memory Bush Width (bits): %d\n", prop.memoryBusWidth);
printf(" Peak Memory Bandwidth (GB/s): %f\n\n",
2.0*prop.memoryClockRate*(prop.memoryBusWidth / 8)*1.e-6);
}
}
Code (Fortran) #
program peakBandwidth
use cudafor
implicit none
integer:: i, istat, nDevices=0
type(cudaDeviceProp):: prop
istat = cudaGetDeviceCount(nDevices)
do i = 0, nDevices-1
istat = cudaGetDeviceProperties(prop, i)
write(*,"(' Device Number: ',i0)") i
write(*,"(' Device name: ',a)") trim(prop%name)
write(*,"(' Memory Clock Rate (KHz): ', i0)") prop%memoryClockRate
write(*,"(' Memory Bush Width (bits): ', i0)") prop%memoryBusWidth
write(*,"(' Peak Memory Bandwidth (GB/s): ', f6.2)") &
2. *prop%memoryClockRate * (prop%memoryBusWidth/8.) * 1.e-6
write(*,*)
end do
end program peakBandwidth