Add CUDA error checking

This commit is contained in:
Tom Deakin 2015-07-27 17:07:07 +01:00
parent ec9d73b8d5
commit 80955ff2dc

View File

@ -40,6 +40,20 @@ struct badntimes : public std::exception
} }
}; };
// Code to check CUDA errors
void check_cuda_error(void)
{
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
std::cerr
<< "Error: "
<< cudaGetErrorString(err)
<< std::endl;
exit(err);
}
}
void check_solution(void* a, void* b, void* c) void check_solution(void* a, void* b, void* c)
{ {
// Generate correct solution // Generate correct solution
@ -169,8 +183,10 @@ int main(int argc, char *argv[])
// Check device index is in range // Check device index is in range
int count; int count;
cudaGetDeviceCount(&count); cudaGetDeviceCount(&count);
check_cuda_error();
if (deviceIndex >= count) throw invaliddevice(); if (deviceIndex >= count) throw invaliddevice();
cudaSetDevice(deviceIndex); cudaSetDevice(deviceIndex);
check_cuda_error();
// Print out device name // Print out device name
std::cout << "Using CUDA device " << getDeviceName(deviceIndex) << std::endl; std::cout << "Using CUDA device " << getDeviceName(deviceIndex) << std::endl;
@ -201,16 +217,23 @@ int main(int argc, char *argv[])
// Create device buffers // Create device buffers
void * d_a, * d_b, *d_c; void * d_a, * d_b, *d_c;
cudaMalloc(&d_a, ARRAY_SIZE*DATATYPE_SIZE); cudaMalloc(&d_a, ARRAY_SIZE*DATATYPE_SIZE);
check_cuda_error();
cudaMalloc(&d_b, ARRAY_SIZE*DATATYPE_SIZE); cudaMalloc(&d_b, ARRAY_SIZE*DATATYPE_SIZE);
check_cuda_error();
cudaMalloc(&d_c, ARRAY_SIZE*DATATYPE_SIZE); cudaMalloc(&d_c, ARRAY_SIZE*DATATYPE_SIZE);
check_cuda_error();
// Copy host memory to device // Copy host memory to device
cudaMemcpy(d_a, h_a, ARRAY_SIZE*DATATYPE_SIZE, cudaMemcpyHostToDevice); cudaMemcpy(d_a, h_a, ARRAY_SIZE*DATATYPE_SIZE, cudaMemcpyHostToDevice);
check_cuda_error();
cudaMemcpy(d_b, h_b, ARRAY_SIZE*DATATYPE_SIZE, cudaMemcpyHostToDevice); cudaMemcpy(d_b, h_b, ARRAY_SIZE*DATATYPE_SIZE, cudaMemcpyHostToDevice);
check_cuda_error();
cudaMemcpy(d_c, h_c, ARRAY_SIZE*DATATYPE_SIZE, cudaMemcpyHostToDevice); cudaMemcpy(d_c, h_c, ARRAY_SIZE*DATATYPE_SIZE, cudaMemcpyHostToDevice);
check_cuda_error();
// Make sure the copies are finished // Make sure the copies are finished
cudaDeviceSynchronize(); cudaDeviceSynchronize();
check_cuda_error();
// List of times // List of times
std::vector< std::vector<double> > timings; std::vector< std::vector<double> > timings;
@ -227,7 +250,9 @@ int main(int argc, char *argv[])
copy<<<ARRAY_SIZE/1024, 1024>>>((float*)d_a, (float*)d_c); copy<<<ARRAY_SIZE/1024, 1024>>>((float*)d_a, (float*)d_c);
else else
copy<<<ARRAY_SIZE/1024, 1024>>>((double*)d_a, (double*)d_c); copy<<<ARRAY_SIZE/1024, 1024>>>((double*)d_a, (double*)d_c);
check_cuda_error();
cudaDeviceSynchronize(); cudaDeviceSynchronize();
check_cuda_error();
t2 = std::chrono::high_resolution_clock::now(); t2 = std::chrono::high_resolution_clock::now();
times.push_back(std::chrono::duration_cast<std::chrono::duration<double> >(t2 - t1).count()); times.push_back(std::chrono::duration_cast<std::chrono::duration<double> >(t2 - t1).count());
@ -237,7 +262,9 @@ int main(int argc, char *argv[])
mul<<<ARRAY_SIZE/1024, 1024>>>((float*)d_b, (float*)d_c); mul<<<ARRAY_SIZE/1024, 1024>>>((float*)d_b, (float*)d_c);
else else
mul<<<ARRAY_SIZE/1024, 1024>>>((double*)d_b, (double*)d_c); mul<<<ARRAY_SIZE/1024, 1024>>>((double*)d_b, (double*)d_c);
check_cuda_error();
cudaDeviceSynchronize(); cudaDeviceSynchronize();
check_cuda_error();
t2 = std::chrono::high_resolution_clock::now(); t2 = std::chrono::high_resolution_clock::now();
times.push_back(std::chrono::duration_cast<std::chrono::duration<double> >(t2 - t1).count()); times.push_back(std::chrono::duration_cast<std::chrono::duration<double> >(t2 - t1).count());
@ -247,7 +274,9 @@ int main(int argc, char *argv[])
add<<<ARRAY_SIZE/1024, 1024>>>((float*)d_a, (float*)d_b, (float*)d_c); add<<<ARRAY_SIZE/1024, 1024>>>((float*)d_a, (float*)d_b, (float*)d_c);
else else
add<<<ARRAY_SIZE/1024, 1024>>>((double*)d_a, (double*)d_b, (double*)d_c); add<<<ARRAY_SIZE/1024, 1024>>>((double*)d_a, (double*)d_b, (double*)d_c);
check_cuda_error();
cudaDeviceSynchronize(); cudaDeviceSynchronize();
check_cuda_error();
t2 = std::chrono::high_resolution_clock::now(); t2 = std::chrono::high_resolution_clock::now();
times.push_back(std::chrono::duration_cast<std::chrono::duration<double> >(t2 - t1).count()); times.push_back(std::chrono::duration_cast<std::chrono::duration<double> >(t2 - t1).count());
@ -257,7 +286,9 @@ int main(int argc, char *argv[])
triad<<<ARRAY_SIZE/1024, 1024>>>((float*)d_a, (float*)d_b, (float*)d_c); triad<<<ARRAY_SIZE/1024, 1024>>>((float*)d_a, (float*)d_b, (float*)d_c);
else else
triad<<<ARRAY_SIZE/1024, 1024>>>((double*)d_a, (double*)d_b, (double*)d_c); triad<<<ARRAY_SIZE/1024, 1024>>>((double*)d_a, (double*)d_b, (double*)d_c);
check_cuda_error();
cudaDeviceSynchronize(); cudaDeviceSynchronize();
check_cuda_error();
t2 = std::chrono::high_resolution_clock::now(); t2 = std::chrono::high_resolution_clock::now();
times.push_back(std::chrono::duration_cast<std::chrono::duration<double> >(t2 - t1).count()); times.push_back(std::chrono::duration_cast<std::chrono::duration<double> >(t2 - t1).count());
@ -267,8 +298,11 @@ int main(int argc, char *argv[])
// Check solutions // Check solutions
cudaMemcpy(h_a, d_a, ARRAY_SIZE*DATATYPE_SIZE, cudaMemcpyDeviceToHost); cudaMemcpy(h_a, d_a, ARRAY_SIZE*DATATYPE_SIZE, cudaMemcpyDeviceToHost);
check_cuda_error();
cudaMemcpy(h_b, d_b, ARRAY_SIZE*DATATYPE_SIZE, cudaMemcpyDeviceToHost); cudaMemcpy(h_b, d_b, ARRAY_SIZE*DATATYPE_SIZE, cudaMemcpyDeviceToHost);
check_cuda_error();
cudaMemcpy(h_c, d_c, ARRAY_SIZE*DATATYPE_SIZE, cudaMemcpyDeviceToHost); cudaMemcpy(h_c, d_c, ARRAY_SIZE*DATATYPE_SIZE, cudaMemcpyDeviceToHost);
check_cuda_error();
check_solution(h_a, h_b, h_c); check_solution(h_a, h_b, h_c);
// Crunch results // Crunch results
@ -328,6 +362,7 @@ std::string getDeviceName(int device)
{ {
struct cudaDeviceProp prop; struct cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, device); cudaGetDeviceProperties(&prop, device);
check_cuda_error();
return std::string(prop.name); return std::string(prop.name);
} }
@ -355,6 +390,7 @@ void parseArguments(int argc, char *argv[])
// Get number of devices // Get number of devices
int count; int count;
cudaGetDeviceCount(&count); cudaGetDeviceCount(&count);
check_cuda_error();
// Print device names // Print device names
if (count == 0) if (count == 0)
@ -368,6 +404,7 @@ void parseArguments(int argc, char *argv[])
for (int i = 0; i < count; i++) for (int i = 0; i < count; i++)
{ {
std::cout << i << ": " << getDeviceName(i) << std::endl; std::cout << i << ": " << getDeviceName(i) << std::endl;
check_cuda_error();
} }
std::cout << std::endl; std::cout << std::endl;
} }