diff --git a/ocl-stream.cpp b/ocl-stream.cpp index 0ec8332..1a2d80a 100644 --- a/ocl-stream.cpp +++ b/ocl-stream.cpp @@ -10,10 +10,12 @@ #define __CL_ENABLE_EXCEPTIONS #include "cl.hpp" -#define DATATYPE double unsigned int ARRAY_SIZE = 50000000; unsigned int NTIMES = 10; +size_t DATATYPE_SIZE = sizeof(double); +bool useFloat = false; + #define MIN(a,b) ((a) < (b)) ? (a) : (b) #define MAX(a,b) ((a) > (b)) ? (a) : (b) @@ -23,7 +25,6 @@ void parseArguments(int argc, char *argv[]); std::string getDeviceName(const cl::Device& device); unsigned getDeviceList(std::vector& devices); - struct badfile : public std::exception { virtual const char * what () const throw () @@ -32,14 +33,6 @@ struct badfile : public std::exception } }; -struct badtype : public std::exception -{ - virtual const char * what () const throw () - { - return "Datatype is not 4 or 8"; - } -}; - struct invaliddevice : public std::exception { virtual const char * what () const throw () @@ -57,27 +50,37 @@ struct badntimes : public std::exception }; size_t sizes[4] = { - 2 * sizeof(DATATYPE) * ARRAY_SIZE, - 2 * sizeof(DATATYPE) * ARRAY_SIZE, - 3 * sizeof(DATATYPE) * ARRAY_SIZE, - 3 * sizeof(DATATYPE) * ARRAY_SIZE + 2 * DATATYPE_SIZE * ARRAY_SIZE, + 2 * DATATYPE_SIZE * ARRAY_SIZE, + 3 * DATATYPE_SIZE * ARRAY_SIZE, + 3 * DATATYPE_SIZE * ARRAY_SIZE }; -void check_solution(std::vector& a, std::vector& b, std::vector& c) +void check_solution(void* a, void* b, void* c) { // Generate correct solution - DATATYPE golda = 1.0; - DATATYPE goldb = 2.0; - DATATYPE goldc = 0.0; + double golda = 1.0; + double goldb = 2.0; + double goldc = 0.0; + float goldaf = 1.0; + float goldbf = 2.0; + float goldcf = 0.0; - const DATATYPE scalar = 3.0; + const double scalar = 3.0; + const float scalarf = 3.0; for (unsigned int i = 0; i < NTIMES; i++) { + // Double goldc = golda; goldb = scalar * goldc; goldc = golda + goldb; golda = goldb + scalar * goldc; + // Float + goldcf = goldaf; + goldbf = scalarf * goldcf; + goldcf = goldaf + goldbf; + goldaf = goldbf + scalarf * goldcf; } // Calculate average error @@ -86,18 +89,26 @@ void check_solution(std::vector& a, std::vector& b, std::vec double errc = 0.0; for (unsigned int i = 0; i < ARRAY_SIZE; i++) { - erra += fabs(a[i] - golda); - errb += fabs(b[i] - goldb); - errc += fabs(c[i] - goldc); + if (useFloat) + { + erra += fabsf(((float*)a)[i] - goldaf); + errb += fabsf(((float*)b)[i] - goldbf); + errc += fabsf(((float*)c)[i] - goldcf); + } + else + { + erra += fabs(((double*)a)[i] - (double)golda); + errb += fabs(((double*)b)[i] - (double)goldb); + errc += fabs(((double*)c)[i] - (double)goldc); + } } erra /= (double)ARRAY_SIZE; errb /= (double)ARRAY_SIZE; errc /= (double)ARRAY_SIZE; double epsi; - if (sizeof(DATATYPE) == 4) epsi = 1.0E-6; - else if (sizeof(DATATYPE) == 8) epsi = 1.0E-13; - else throw badtype(); + if (useFloat) epsi = 1.0E-6; + else epsi = 1.0E-13; if (erra > epsi) std::cout @@ -156,7 +167,10 @@ int main(int argc, char *argv[]) try { - program.build(); + std::string options = ""; + if (useFloat) + options = "-DFLOAT"; + program.build(options.c_str()); } catch (cl::Error& e) { @@ -175,19 +189,36 @@ int main(int argc, char *argv[]) cl::make_kernel triad(program, "triad"); // Create host vectors - std::vector h_a(ARRAY_SIZE, 1.0); - std::vector h_b(ARRAY_SIZE, 2.0); - std::vector h_c(ARRAY_SIZE, 0.0); + void *h_a = malloc(ARRAY_SIZE * DATATYPE_SIZE); + void *h_b = malloc(ARRAY_SIZE * DATATYPE_SIZE); + void *h_c = malloc(ARRAY_SIZE * DATATYPE_SIZE); + + // Initilise arrays + for (unsigned int i = 0; i < ARRAY_SIZE; i++) + { + if (useFloat) + { + ((float*)h_a)[i] = 1.0; + ((float*)h_b)[i] = 2.0; + ((float*)h_c)[i] = 0.0; + } + else + { + ((double*)h_a)[i] = 1.0; + ((double*)h_b)[i] = 2.0; + ((double*)h_c)[i] = 0.0; + } + } // Create device buffers - cl::Buffer d_a(context, CL_MEM_READ_WRITE, sizeof(DATATYPE) * ARRAY_SIZE); - cl::Buffer d_b(context, CL_MEM_READ_WRITE, sizeof(DATATYPE) * ARRAY_SIZE); - cl::Buffer d_c(context, CL_MEM_READ_WRITE, sizeof(DATATYPE) * ARRAY_SIZE); + cl::Buffer d_a(context, CL_MEM_READ_WRITE, DATATYPE_SIZE * ARRAY_SIZE); + cl::Buffer d_b(context, CL_MEM_READ_WRITE, DATATYPE_SIZE * ARRAY_SIZE); + cl::Buffer d_c(context, CL_MEM_READ_WRITE, DATATYPE_SIZE * ARRAY_SIZE); // Copy host memory to device - cl::copy(queue, h_a.begin(), h_a.end(), d_a); - cl::copy(queue, h_b.begin(), h_b.end(), d_b); - cl::copy(queue, h_c.begin(), h_c.end(), d_c); + queue.enqueueWriteBuffer(d_a, CL_FALSE, 0, ARRAY_SIZE*DATATYPE_SIZE, h_a); + queue.enqueueWriteBuffer(d_b, CL_FALSE, 0, ARRAY_SIZE*DATATYPE_SIZE, h_b); + queue.enqueueWriteBuffer(d_c, CL_FALSE, 0, ARRAY_SIZE*DATATYPE_SIZE, h_c); // Make sure the copies are finished queue.finish(); @@ -250,9 +281,10 @@ int main(int argc, char *argv[]) } // Check solutions - cl::copy(queue, d_a, h_a.begin(), h_a.end()); - cl::copy(queue, d_b, h_b.begin(), h_b.end()); - cl::copy(queue, d_c, h_c.begin(), h_c.end()); + queue.enqueueReadBuffer(d_a, CL_FALSE, 0, ARRAY_SIZE*DATATYPE_SIZE, h_a); + queue.enqueueReadBuffer(d_b, CL_FALSE, 0, ARRAY_SIZE*DATATYPE_SIZE, h_b); + queue.enqueueReadBuffer(d_c, CL_FALSE, 0, ARRAY_SIZE*DATATYPE_SIZE, h_c); + queue.finish(); check_solution(h_a, h_b, h_c); // Crunch results @@ -403,6 +435,11 @@ void parseArguments(int argc, char *argv[]) exit(1); } } + else if (!strcmp(argv[i], "--float")) + { + useFloat = true; + DATATYPE_SIZE = sizeof(float); + } else if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h")) { std::cout << std::endl; @@ -413,6 +450,7 @@ void parseArguments(int argc, char *argv[]) std::cout << " --device INDEX Select device at INDEX" << std::endl; std::cout << " -s --arraysize SIZE Use SIZE elements in the array" << std::endl; std::cout << " -n --numtimes NUM Run the test NUM times (NUM >= 2)" << std::endl; + std::cout << " --float Use floats (rather than doubles)" << std::endl; std::cout << std::endl; exit(0); }