From 847cdc9b5e2cafcd31689f5a9a3a9becdfdcb07c Mon Sep 17 00:00:00 2001 From: Tom Deakin Date: Thu, 16 Jul 2015 12:04:07 +0100 Subject: [PATCH] Time the runs and print out results --- ocl-stream.cpp | 115 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 111 insertions(+), 4 deletions(-) diff --git a/ocl-stream.cpp b/ocl-stream.cpp index 91021ca..9cdca91 100644 --- a/ocl-stream.cpp +++ b/ocl-stream.cpp @@ -2,12 +2,19 @@ #include #include #include +#include +#include +#include #define __CL_ENABLE_EXCEPTIONS #include "cl.hpp" #define DATATYPE double -#define ARRAY_SIZE 1000000 +#define ARRAY_SIZE 50000000 +#define NTIMES 2 + +#define MIN(a,b) ((a) < (b)) ? (a) : (b) +#define MAX(a,b) ((a) > (b)) ? (a) : (b) struct badfile : public std::exception { @@ -17,6 +24,12 @@ struct badfile : public std::exception } }; +size_t sizes[4] = { + 2 * sizeof(DATATYPE) * ARRAY_SIZE, + 2 * sizeof(DATATYPE) * ARRAY_SIZE, + 3 * sizeof(DATATYPE) * ARRAY_SIZE, + 3 * sizeof(DATATYPE) * ARRAY_SIZE +}; int main(void) @@ -29,7 +42,7 @@ int main(void) std::string kernels(std::istreambuf_iterator(in), (std::istreambuf_iterator())); // Setup OpenCL - cl::Context context(CL_DEVICE_TYPE_GPU); + cl::Context context(CL_DEVICE_TYPE_CPU); cl::CommandQueue queue(context); cl::Program program(context, kernels); @@ -50,8 +63,8 @@ int main(void) cl::make_kernel copy(program, "copy"); cl::make_kernel mul(program, "mul"); - cl::make_kernel add(program, "add"); - cl::make_kernel triad(program, "triad"); + cl::make_kernel add(program, "add"); + cl::make_kernel triad(program, "triad"); // Create host vectors std::vector h_a(ARRAY_SIZE, 1.0); @@ -71,6 +84,100 @@ int main(void) // Make sure the copies are finished queue.finish(); + // List of times + std::vector< std::vector > timings; + + // Declare timers + std::chrono::high_resolution_clock::time_point t1, t2; + + // Main loop + for (unsigned int k = 0; k < NTIMES; k++) + { + std::vector times; + t1 = std::chrono::high_resolution_clock::now(); + copy( + cl::EnqueueArgs( + queue, + cl::NDRange(ARRAY_SIZE)), + d_a, d_c); + queue.finish(); + t2 = std::chrono::high_resolution_clock::now(); + times.push_back(std::chrono::duration_cast >(t2 - t1).count()); + + + t1 = std::chrono::high_resolution_clock::now(); + mul( + cl::EnqueueArgs( + queue, + cl::NDRange(ARRAY_SIZE)), + d_b, d_c); + queue.finish(); + t2 = std::chrono::high_resolution_clock::now(); + times.push_back(std::chrono::duration_cast >(t2 - t1).count()); + + + t1 = std::chrono::high_resolution_clock::now(); + add( + cl::EnqueueArgs( + queue, + cl::NDRange(ARRAY_SIZE)), + d_a, d_b, d_c); + queue.finish(); + t2 = std::chrono::high_resolution_clock::now(); + times.push_back(std::chrono::duration_cast >(t2 - t1).count()); + + + t1 = std::chrono::high_resolution_clock::now(); + triad( + cl::EnqueueArgs( + queue, + cl::NDRange(ARRAY_SIZE)), + d_a, d_b, d_c); + queue.finish(); + t2 = std::chrono::high_resolution_clock::now(); + times.push_back(std::chrono::duration_cast >(t2 - t1).count()); + + timings.push_back(times); + + } + + // Crunch results + double min[4] = {DBL_MAX, DBL_MAX, DBL_MAX, DBL_MAX}; + double max[4] = {0.0, 0.0, 0.0, 0.0}; + double avg[4] = {0.0, 0.0, 0.0, 0.0}; + // Ignore first result + for (unsigned int i = 1; i < NTIMES; i++) + { + for (int j = 0; j < 4; j++) + { + avg[j] += timings[i][j]; + min[j] = MIN(min[j], timings[i][j]); + max[j] = MAX(max[j], timings[i][j]); + } + } + for (int j = 0; j < 4; j++) + avg[j] /= (double)NTIMES; + + // Display results + std::string labels[] = {"Copy", "Mul", "Add", "Triad"}; + std::cout + << std::left << std::setw(12) << "Function" + << std::left << std::setw(12) << "MBytes/sec" + << std::left << std::setw(12) << "Min (sec)" + << std::left << std::setw(12) << "Max" + << std::left << std::setw(12) << "Average" + << std::endl; + for (int j = 0; j < 4; j++) + { + std::cout + << std::left << std::setw(12) << labels[j] + << std::left << std::setw(12) << 1.0E-06 * sizes[j]/min[j] + << std::left << std::setw(12) << min[j] + << std::left << std::setw(12) << max[j] + << std::left << std::setw(12) << avg[j] + << std::endl; + } + } // Catch OpenCL Errors and display information catch (cl::Error& e)