Vortex 2.0 changes:

+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes

minor update

minor update

minor update

minor update

minor update

minor update

cleanup

cleanup

cache bindings and memory perf refactory

minor update

minor update

hw unit tests fixes

minor update

minor update

minor update

minor update

minor update

minor udpate

minor update

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor updates

minor updates

minor update

minor update
This commit is contained in:
Blaise Tine
2023-10-19 20:51:22 -07:00
parent d69a64c32c
commit c1e168fdbe
1309 changed files with 247412 additions and 311463 deletions

View File

@@ -0,0 +1,9 @@
PROJECT = blackscholes
SRCS = main.cpp oclUtils.cpp shrUtils.cpp cmd_arg_reader.cpp oclBlackScholes_launcher.cpp oclBlackScholes_gold.cpp
CXXFLAGS += -I.
OPTS ?=
include ../common.mk

View File

@@ -0,0 +1,152 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
/* CUda UTility Library */
// includes, file
#include "cmd_arg_reader.h"
// includes, system
#include <vector>
// internal unnamed namespace
namespace
{
// types, internal (class, enum, struct, union, typedef)
// variables, internal
} // namespace {
// variables, exported
/*static*/ CmdArgReader* CmdArgReader::self;
/*static*/ char** CmdArgReader::rargv;
/*static*/ int CmdArgReader::rargc;
// functions, exported
////////////////////////////////////////////////////////////////////////////////
//! Public construction interface
//! @return a handle to the class instance
//! @param argc number of command line arguments (as given to main())
//! @param argv command line argument string (as given to main())
////////////////////////////////////////////////////////////////////////////////
/*static*/ void
CmdArgReader::init( const int argc, const char** argv)
{
if ( NULL != self)
{
return;
}
// command line arguments
if (( 0 == argc) || ( 0 == argv))
{
LOGIC_EXCEPTION( "No command line arguments given.");
}
self = new CmdArgReader();
self->createArgsMaps( argc, argv);
rargc = argc;
rargv = const_cast<char**>( argv);
}
////////////////////////////////////////////////////////////////////////////////
//! Constructor, default
////////////////////////////////////////////////////////////////////////////////
CmdArgReader::CmdArgReader() :
args(),
unprocessed(),
iter(),
iter_unprocessed()
{ }
////////////////////////////////////////////////////////////////////////////////
//! Destructor
////////////////////////////////////////////////////////////////////////////////
CmdArgReader::~CmdArgReader()
{
for( iter = args.begin(); iter != args.end(); ++iter)
{
if( *(iter->second.first) == typeid( int))
{
delete static_cast<int*>( iter->second.second);
break;
}
else if( *(iter->second.first) == typeid( bool))
{
delete static_cast<bool*>( iter->second.second);
break;
}
else if( *(iter->second.first) == typeid( std::string))
{
delete static_cast<std::string*>( iter->second.second);
break;
}
else if( *(iter->second.first) == typeid( std::vector< std::string>) )
{
delete static_cast< std::vector< std::string>* >( iter->second.second);
break;
}
else if( *(iter->second.first) == typeid( std::vector<int>) )
{
delete static_cast< std::vector<int>* >( iter->second.second);
break;
}
}
}
////////////////////////////////////////////////////////////////////////////////
//! Read args as token value pair into map for better processing (Even the
//! values remain strings until the parameter values is requested by the
//! program.)
//! @param argc the argument count (as given to 'main')
//! @param argv the char* array containing the command line arguments
////////////////////////////////////////////////////////////////////////////////
void
CmdArgReader::createArgsMaps( const int argc, const char** argv) {
std::string token;
std::string val_str;
std::map< std::string, std::string> args;
std::string::size_type pos;
std::string arg;
for( int i=1; i<argc; ++i)
{
arg = argv[i];
// check if valid command line argument: all arguments begin with - or --
if (arg[0] != '-')
{
RUNTIME_EXCEPTION("Invalid command line argument.");
}
int numDashes = (arg[1] == '-' ? 2 : 1);
// check if only flag or if a value is given
if ( (pos = arg.find( '=')) == std::string::npos)
{
unprocessed[ std::string( arg, numDashes, arg.length()-numDashes)] = "FLAG";
}
else
{
unprocessed[ std::string( arg, numDashes, pos-numDashes)] =
std::string( arg, pos+1, arg.length()-1);
}
}
}

View File

@@ -0,0 +1,488 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
/* CUda UTility Library */
#ifndef _CMDARGREADER_H_
#define _CMDARGREADER_H_
// includes, system
#include <map>
#include <iostream>
#include <sstream>
#include <algorithm>
#include <typeinfo>
// includes, project
#include "exception.h"
//! Preprocessed command line arguments
//! @note Lazy evaluation: The arguments are converted from strings to
//! the correct data type upon request. Converted values are stored
//! in an additonal map so that no additional conversion is
//! necessary. Arrays of command line arguments are stored in
//! std::vectors
//! @note Usage:
//! const std::string* file =
//! CmdArgReader::getArg< std::string>( "model")
//! const std::vector< std::string>* files =
//! CmdArgReader::getArg< std::vector< std::string> >( "model")
//! @note All command line arguments begin with '--' followed by the token;
//! token and value are seperated by '='; example --samples=50
//! @note Arrays have the form --model=[one.obj,two.obj,three.obj]
//! (without whitespaces)
//! Command line argument parser
class CmdArgReader
{
template<class> friend class TestCmdArgReader;
protected:
//! @param self handle to the only instance of this class
static CmdArgReader* self;
public:
//! Public construction interface
//! @return a handle to the class instance
//! @param argc number of command line arguments (as given to main())
//! @param argv command line argument string (as given to main())
static void init( const int argc, const char** argv);
public:
//! Get the value of the command line argument with given name
//! @return A const handle to the requested argument.
//! If the argument does not exist or if it
//! is not from type T NULL is returned
//! @param name the name of the requested argument
//! @note T the type of the argument requested
template<class T>
static inline const T* getArg( const std::string& name);
//! Check if a command line argument with the given name exists
//! @return true if a command line argument with name \a name exists,
//! otherwise false
//! @param name name of the command line argument in question
static inline bool existArg( const std::string& name);
//! Get the original / raw argc program argument
static inline int& getRArgc();
//! Get the original / raw argv program argument
static inline char**& getRArgv();
public:
//! Destructor
~CmdArgReader();
protected:
//! Constructor, default
CmdArgReader();
private:
// private helper functions
//! Get the value of the command line argument with given name
//! @note Private helper function for 'getArg' to work on the members
//! @return A const handle to the requested argument. If the argument
//! does not exist or if it is not from type T a NULL pointer
//! is returned.
//! @param name the name of the requested argument
//! @note T the type of the argument requested
template<class T>
inline const T* getArgHelper( const std::string& name);
//! Check if a command line argument with name \a name exists
//! @return true if a command line argument of name \a name exists,
//! otherwise false
//! @param name the name of the requested argument
inline bool existArgHelper( const std::string& name) const;
//! Read args as token value pair into map for better processing
//! (Even the values remain strings until the parameter values is
//! requested by the program.)
//! @param argc the argument count (as given to 'main')
//! @param argv the char* array containing the command line arguments
void createArgsMaps( const int argc, const char** argv);
//! Helper for "casting" the strings from the map with the unprocessed
//! values to the correct
//! data type.
//! @return true if conversion succeeded, otherwise false
//! @param element the value as string
//! @param val the value as type T
template<class T>
static inline bool convertToT( const std::string& element, T& val);
public:
// typedefs internal
//! container for a processed command line argument
//! typeid is used to easily be able to decide if a re-requested token-value
//! pair match the type of the first conversion
typedef std::pair< const std::type_info*, void*> ValType;
//! map of already converted values
typedef std::map< std::string, ValType > ArgsMap;
//! iterator for the map of already converted values
typedef ArgsMap::iterator ArgsMapIter;
typedef ArgsMap::const_iterator ConstArgsMapIter;
//! map of unprocessed (means unconverted) token-value pairs
typedef std::map< std::string, std::string> UnpMap;
//! iterator for the map of unprocessed (means unconverted) token-value pairs
typedef std::map< std::string, std::string>::iterator UnpMapIter;
private:
#ifdef _WIN32
# pragma warning( disable: 4251)
#endif
//! rargc original value of argc
static int rargc;
//! rargv contains command line arguments in raw format
static char** rargv;
//! args Map containing the already converted token-value pairs
ArgsMap args;
//! args Map containing the unprocessed / unconverted token-value pairs
UnpMap unprocessed;
//! iter Iterator for the map with the already converted token-value
//! pairs (to avoid frequent reallocation)
ArgsMapIter iter;
//! iter Iterator for the map with the unconverted token-value
//! pairs (to avoid frequent reallocation)
UnpMapIter iter_unprocessed;
#ifdef _WIN32
# pragma warning( default: 4251)
#endif
private:
//! Constructor, copy (not implemented)
CmdArgReader( const CmdArgReader&);
//! Assignment operator (not implemented)
CmdArgReader& operator=( const CmdArgReader&);
};
// variables, exported (extern)
// functions, inlined (inline)
////////////////////////////////////////////////////////////////////////////////
//! Conversion function for command line argument arrays
//! @note This function is used each type for which no template specialization
//! exist (which will cause errors if the type does not fulfill the std::vector
//! interface).
////////////////////////////////////////////////////////////////////////////////
template<class T>
/*static*/ inline bool
CmdArgReader::convertToT( const std::string& element, T& val)
{
// preallocate storage
val.resize( std::count( element.begin(), element.end(), ',') + 1);
unsigned int i = 0;
std::string::size_type pos_start = 1; // leave array prefix '['
std::string::size_type pos_end = 0;
// do for all elements of the comma seperated list
while( std::string::npos != ( pos_end = element.find(',', pos_end+1)) )
{
// convert each element by the appropriate function
if ( ! convertToT< typename T::value_type >(
std::string( element, pos_start, pos_end - pos_start), val[i]))
{
return false;
}
pos_start = pos_end + 1;
++i;
}
std::string tmp1( element, pos_start, element.length() - pos_start - 1);
// process last element (leave array postfix ']')
if ( ! convertToT< typename T::value_type >( std::string( element,
pos_start,
element.length() - pos_start - 1),
val[i]))
{
return false;
}
// possible to process all elements?
return true;
}
////////////////////////////////////////////////////////////////////////////////
//! Conversion function for command line arguments of type int
////////////////////////////////////////////////////////////////////////////////
template<>
inline bool
CmdArgReader::convertToT<int>( const std::string& element, int& val)
{
std::istringstream ios( element);
ios >> val;
bool ret_val = false;
if ( ios.eof())
{
ret_val = true;
}
return ret_val;
}
////////////////////////////////////////////////////////////////////////////////
//! Conversion function for command line arguments of type float
////////////////////////////////////////////////////////////////////////////////
template<>
inline bool
CmdArgReader::convertToT<float>( const std::string& element, float& val)
{
std::istringstream ios( element);
ios >> val;
bool ret_val = false;
if ( ios.eof())
{
ret_val = true;
}
return ret_val;
}
////////////////////////////////////////////////////////////////////////////////
//! Conversion function for command line arguments of type double
////////////////////////////////////////////////////////////////////////////////
template<>
inline bool
CmdArgReader::convertToT<double>( const std::string& element, double& val)
{
std::istringstream ios( element);
ios >> val;
bool ret_val = false;
if ( ios.eof())
{
ret_val = true;
}
return ret_val;
}
////////////////////////////////////////////////////////////////////////////////
//! Conversion function for command line arguments of type string
////////////////////////////////////////////////////////////////////////////////
template<>
inline bool
CmdArgReader::convertToT<std::string>( const std::string& element,
std::string& val)
{
val = element;
return true;
}
////////////////////////////////////////////////////////////////////////////////
//! Conversion function for command line arguments of type bool
////////////////////////////////////////////////////////////////////////////////
template<>
inline bool
CmdArgReader::convertToT<bool>( const std::string& element, bool& val)
{
// check if value is given as string-type { true | false }
if ( "true" == element)
{
val = true;
return true;
}
else if ( "false" == element)
{
val = false;
return true;
}
// check if argument is given as integer { 0 | 1 }
else
{
int tmp;
if ( convertToT<int>( element, tmp))
{
if ( 1 == tmp)
{
val = true;
return true;
}
else if ( 0 == tmp)
{
val = false;
return true;
}
}
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
//! Get the value of the command line argument with given name
//! @return A const handle to the requested argument. If the argument does
//! not exist or if it is not from type T NULL is returned
//! @param T the type of the argument requested
//! @param name the name of the requested argument
////////////////////////////////////////////////////////////////////////////////
template<class T>
/*static*/ const T*
CmdArgReader::getArg( const std::string& name)
{
if( ! self)
{
RUNTIME_EXCEPTION("CmdArgReader::getArg(): CmdArgReader not initialized.");
return NULL;
}
return self->getArgHelper<T>( name);
}
////////////////////////////////////////////////////////////////////////////////
//! Check if a command line argument with the given name exists
//! @return true if a command line argument with name \a name exists,
//! otherwise false
//! @param name name of the command line argument in question
////////////////////////////////////////////////////////////////////////////////
/*static*/ inline bool
CmdArgReader::existArg( const std::string& name)
{
if( ! self)
{
RUNTIME_EXCEPTION("CmdArgReader::getArg(): CmdArgReader not initialized.");
return false;
}
return self->existArgHelper( name);
}
////////////////////////////////////////////////////////////////////////////////
//! @brief Get the value of the command line argument with given name
//! @return A const handle to the requested argument. If the argument does
//! not exist or if it is not from type T NULL is returned
//! @param T the type of the argument requested
//! @param name the name of the requested argument
////////////////////////////////////////////////////////////////////////////////
template<class T>
const T*
CmdArgReader::getArgHelper( const std::string& name)
{
// check if argument already processed and stored in correct type
if ( args.end() != (iter = args.find( name)))
{
if ( (*(iter->second.first)) == typeid( T) )
{
return (T*) iter->second.second;
}
}
else
{
T* tmp = new T;
// check the array with unprocessed values
if ( unprocessed.end() != (iter_unprocessed = unprocessed.find( name)))
{
// try to "cast" the string to the type requested
if ( convertToT< T >( iter_unprocessed->second, *tmp))
{
// add the token element pair to map of already converted values
args[name] = std::make_pair( &(typeid( T)), (void*) tmp);
return tmp;
}
}
// not used while not inserted into the map -> cleanup
delete tmp;
}
// failed, argument not available
return NULL;
}
////////////////////////////////////////////////////////////////////////////////
//! Check if a command line argument with name \a name exists
//! @return true if a command line argument of name \a name exists,
//! otherwise false
//! @param name the name of the requested argument
////////////////////////////////////////////////////////////////////////////////
inline bool
CmdArgReader::existArgHelper( const std::string& name) const
{
bool ret_val = false;
// check if argument already processed and stored in correct type
if( args.end() != args.find( name))
{
ret_val = true;
}
else
{
// check the array with unprocessed values
if ( unprocessed.end() != unprocessed.find( name))
{
ret_val = true;
}
}
return ret_val;
}
////////////////////////////////////////////////////////////////////////////////
//! Get the original / raw argc program argument
////////////////////////////////////////////////////////////////////////////////
/*static*/ inline int&
CmdArgReader::getRArgc()
{
if( ! self)
{
RUNTIME_EXCEPTION("CmdArgReader::getRArgc(): CmdArgReader not initialized.");
}
return rargc;
}
////////////////////////////////////////////////////////////////////////////////
//! Get the original / raw argv program argument
////////////////////////////////////////////////////////////////////////////////
/*static*/ inline char**&
CmdArgReader::getRArgv()
{
if( ! self)
{
RUNTIME_EXCEPTION("CmdArgReader::getRArgc(): CmdArgReader not initialized.");
}
return rargv;
}
// functions, exported (extern)
#endif // #ifndef _CMDARGREADER_H_

View File

@@ -0,0 +1,151 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
/* CUda UTility Library */
#ifndef _EXCEPTION_H_
#define _EXCEPTION_H_
// includes, system
#include <exception>
#include <stdexcept>
#include <iostream>
#include <stdlib.h>
//! Exception wrapper.
//! @param Std_Exception Exception out of namespace std for easy typing.
template<class Std_Exception>
class Exception : public Std_Exception
{
public:
//! @brief Static construction interface
//! @return Alwayss throws ( Located_Exception<Exception>)
//! @param file file in which the Exception occurs
//! @param line line in which the Exception occurs
//! @param detailed details on the code fragment causing the Exception
static void throw_it( const char* file,
const int line,
const char* detailed = "-" );
//! Static construction interface
//! @return Alwayss throws ( Located_Exception<Exception>)
//! @param file file in which the Exception occurs
//! @param line line in which the Exception occurs
//! @param detailed details on the code fragment causing the Exception
static void throw_it( const char* file,
const int line,
const std::string& detailed);
//! Destructor
virtual ~Exception() throw();
private:
//! Constructor, default (private)
Exception();
//! Constructor, standard
//! @param str string returned by what()
Exception( const std::string& str);
};
////////////////////////////////////////////////////////////////////////////////
//! Exception handler function for arbitrary exceptions
//! @param ex exception to handle
////////////////////////////////////////////////////////////////////////////////
template<class Exception_Typ>
inline void
handleException( const Exception_Typ& ex)
{
std::cerr << ex.what() << std::endl;
exit( EXIT_FAILURE);
}
//! Convenience macros
//! Exception caused by dynamic program behavior, e.g. file does not exist
#define RUNTIME_EXCEPTION( msg) \
Exception<std::runtime_error>::throw_it( __FILE__, __LINE__, msg)
//! Logic exception in program, e.g. an assert failed
#define LOGIC_EXCEPTION( msg) \
Exception<std::logic_error>::throw_it( __FILE__, __LINE__, msg)
//! Out of range exception
#define RANGE_EXCEPTION( msg) \
Exception<std::range_error>::throw_it( __FILE__, __LINE__, msg)
////////////////////////////////////////////////////////////////////////////////
//! Implementation
// includes, system
#include <sstream>
////////////////////////////////////////////////////////////////////////////////
//! Static construction interface.
//! @param Exception causing code fragment (file and line) and detailed infos.
////////////////////////////////////////////////////////////////////////////////
/*static*/ template<class Std_Exception>
void
Exception<Std_Exception>::
throw_it( const char* file, const int line, const char* detailed)
{
std::stringstream s;
// Quiet heavy-weight but exceptions are not for
// performance / release versions
s << "Exception in file '" << file << "' in line " << line << "\n"
<< "Detailed description: " << detailed << "\n";
throw Exception( s.str());
}
////////////////////////////////////////////////////////////////////////////////
//! Static construction interface.
//! @param Exception causing code fragment (file and line) and detailed infos.
////////////////////////////////////////////////////////////////////////////////
/*static*/ template<class Std_Exception>
void
Exception<Std_Exception>::
throw_it( const char* file, const int line, const std::string& msg)
{
throw_it( file, line, msg.c_str());
}
////////////////////////////////////////////////////////////////////////////////
//! Constructor, default (private).
////////////////////////////////////////////////////////////////////////////////
template<class Std_Exception>
Exception<Std_Exception>::Exception() :
Exception("Unknown Exception.\n")
{ }
////////////////////////////////////////////////////////////////////////////////
//! Constructor, standard (private).
//! String returned by what().
////////////////////////////////////////////////////////////////////////////////
template<class Std_Exception>
Exception<Std_Exception>::Exception( const std::string& s) :
Std_Exception( s)
{ }
////////////////////////////////////////////////////////////////////////////////
//! Destructor
////////////////////////////////////////////////////////////////////////////////
template<class Std_Exception>
Exception<Std_Exception>::~Exception() throw() { }
// functions, exported
#endif // #ifndef _EXCEPTION_H_

View File

@@ -0,0 +1,101 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#if(0)
#define EXP(a) native_exp(a)
#define LOG(a) native_log(a)
#define SQRT(a) native_sqrt(a)
#else
#define EXP(a) exp(a)
#define LOG(a) log(a)
#define SQRT(a) sqrt(a)
#endif
///////////////////////////////////////////////////////////////////////////////
// Predefine functions to avoid bug in OpenCL compiler on Mac OSX 10.7 systems
///////////////////////////////////////////////////////////////////////////////
float CND(float d);
void BlackScholesBody(__global float *call, __global float *put, float S,
float X, float T, float R, float V);
///////////////////////////////////////////////////////////////////////////////
// Rational approximation of cumulative normal distribution function
///////////////////////////////////////////////////////////////////////////////
float CND(float d){
const float A1 = 0.31938153f;
const float A2 = -0.356563782f;
const float A3 = 1.781477937f;
const float A4 = -1.821255978f;
const float A5 = 1.330274429f;
const float RSQRT2PI = 0.39894228040143267793994605993438f;
float
K = 1.0f / (1.0f + 0.2316419f * fabs(d));
float
cnd = RSQRT2PI * EXP(- 0.5f * d * d) *
(K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))));
if(d > 0)
cnd = 1.0f - cnd;
return cnd;
}
///////////////////////////////////////////////////////////////////////////////
// Black-Scholes formula for both call and put
///////////////////////////////////////////////////////////////////////////////
void BlackScholesBody(
__global float *call, //Call option price
__global float *put, //Put option price
float S, //Current stock price
float X, //Option strike price
float T, //Option years
float R, //Riskless rate of return
float V //Stock volatility
){
float sqrtT = SQRT(T);
float d1 = (LOG(S / X) + (R + 0.5f * V * V) * T) / (V * sqrtT);
float d2 = d1 - V * sqrtT;
float CNDD1 = CND(d1);
float CNDD2 = CND(d2);
//Calculate Call and Put simultaneously
float expRT = EXP(- R * T);
*call = (S * CNDD1 - X * expRT * CNDD2);
*put = (X * expRT * (1.0f - CNDD2) - S * (1.0f - CNDD1));
}
__kernel void BlackScholes(
__global float *d_Call, //Call option price
__global float *d_Put, //Put option price
__global float *d_S, //Current stock price
__global float *d_X, //Option strike price
__global float *d_T, //Option years
float R, //Riskless rate of return
float V, //Stock volatility
unsigned int optN
){
for(unsigned int opt = get_global_id(0); opt < optN; opt += get_global_size(0))
BlackScholesBody(
&d_Call[opt],
&d_Put[opt],
d_S[opt],
d_X[opt],
d_T[opt],
R,
V
);
}

View File

@@ -0,0 +1,248 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
// standard utilities and systems includes
#include <oclUtils.h>
#include <shrQATest.h>
#include "oclBlackScholes_common.h"
////////////////////////////////////////////////////////////////////////////////
// Helper functions
////////////////////////////////////////////////////////////////////////////////
double executionTime(cl_event &event){
cl_ulong start, end;
clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL);
clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
return (double)1.0e-9 * (end - start); // convert nanoseconds to seconds on return
}
////////////////////////////////////////////////////////////////////////////////
// Random float helper
////////////////////////////////////////////////////////////////////////////////
float randFloat(float low, float high){
float t = (float)rand() / (float)RAND_MAX;
return (1.0f - t) * low + t * high;
}
////////////////////////////////////////////////////////////////////////////////
// Main program
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv)
{
cl_platform_id cpPlatform; //OpenCL platform
cl_device_id* cdDevices = NULL; //OpenCL devices list (array)
cl_context cxGPUContext; //OpenCL context
cl_command_queue cqCommandQueue; //OpenCL command que
cl_mem //OpenCL memory buffer objects
d_Call,
d_Put,
d_S,
d_X,
d_T;
cl_int ciErrNum;
float
*h_CallCPU,
*h_PutCPU,
*h_CallGPU,
*h_PutGPU,
*h_S,
*h_X,
*h_T;
const unsigned int optionCount = 64;
const float R = 0.02f;
const float V = 0.30f;
shrQAStart(argc, argv);
// Get the NVIDIA platform
ciErrNum = oclGetPlatformID(&cpPlatform);
oclCheckErrorEX(ciErrNum, CL_SUCCESS, NULL);
shrLog("clGetPlatformID...\n");
//Get all the devices
cl_uint uiNumDevices = 0; // Number of devices available
cl_uint uiTargetDevice = 0; // Default Device to compute on
cl_uint uiNumComputeUnits; // Number of compute units (SM's on NV GPU)
shrLog("Get the Device info and select Device...\n");
ciErrNum = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_DEFAULT, 0, NULL, &uiNumDevices);
oclCheckErrorEX(ciErrNum, CL_SUCCESS, NULL);
cdDevices = (cl_device_id *)malloc(uiNumDevices * sizeof(cl_device_id) );
ciErrNum = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_DEFAULT, uiNumDevices, cdDevices, NULL);
oclCheckErrorEX(ciErrNum, CL_SUCCESS, NULL);
// Get command line device options and config accordingly
shrLog(" # of Devices Available = %u\n", uiNumDevices);
if(shrGetCmdLineArgumentu(argc, (const char**)argv, "device", &uiTargetDevice)== shrTRUE)
{
uiTargetDevice = CLAMP(uiTargetDevice, 0, (uiNumDevices - 1));
}
shrLog(" Using Device %u: ", uiTargetDevice);
oclPrintDevName(LOGBOTH, cdDevices[uiTargetDevice]);
ciErrNum = clGetDeviceInfo(cdDevices[uiTargetDevice], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(uiNumComputeUnits), &uiNumComputeUnits, NULL);
oclCheckErrorEX(ciErrNum, CL_SUCCESS, NULL);
shrLog("\n # of Compute Units = %u\n", uiNumComputeUnits);
// set logfile name and start logs
shrSetLogFileName ("oclBlackScholes.txt");
shrLog("%s Starting...\n\n", argv[0]);
shrLog("Allocating and initializing host memory...\n");
h_CallCPU = (float *)malloc(optionCount * sizeof(float));
h_PutCPU = (float *)malloc(optionCount * sizeof(float));
h_CallGPU = (float *)malloc(optionCount * sizeof(float));
h_PutGPU = (float *)malloc(optionCount * sizeof(float));
h_S = (float *)malloc(optionCount * sizeof(float));
h_X = (float *)malloc(optionCount * sizeof(float));
h_T = (float *)malloc(optionCount * sizeof(float));
srand(2009);
for(unsigned int i = 0; i < optionCount; i++){
h_CallCPU[i] = -1.0f;
h_PutCPU[i] = -1.0f;
h_S[i] = randFloat(5.0f, 30.0f);
h_X[i] = randFloat(1.0f, 100.0f);
h_T[i] = randFloat(0.25f, 10.0f);
}
shrLog("Initializing OpenCL...\n");
// Get the NVIDIA platform
ciErrNum = oclGetPlatformID(&cpPlatform);
oclCheckError(ciErrNum, CL_SUCCESS);
// Get a GPU device
ciErrNum = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_DEFAULT, 1, &cdDevices[uiTargetDevice], NULL);
oclCheckError(ciErrNum, CL_SUCCESS);
// Create the context
cxGPUContext = clCreateContext(0, 1, &cdDevices[uiTargetDevice], NULL, NULL, &ciErrNum);
oclCheckError(ciErrNum, CL_SUCCESS);
//Create a command-queue
cqCommandQueue = clCreateCommandQueue(cxGPUContext, cdDevices[uiTargetDevice], CL_QUEUE_PROFILING_ENABLE, &ciErrNum);
oclCheckError(ciErrNum, CL_SUCCESS);
shrLog("Creating OpenCL memory objects...\n");
d_Call = clCreateBuffer(cxGPUContext, CL_MEM_READ_WRITE, optionCount * sizeof(float), NULL, &ciErrNum);
oclCheckError(ciErrNum, CL_SUCCESS);
d_Put = clCreateBuffer(cxGPUContext, CL_MEM_READ_WRITE, optionCount * sizeof(float), NULL, &ciErrNum);
oclCheckError(ciErrNum, CL_SUCCESS);
d_S = clCreateBuffer(cxGPUContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, optionCount * sizeof(float), h_S, &ciErrNum);
oclCheckError(ciErrNum, CL_SUCCESS);
d_X = clCreateBuffer(cxGPUContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, optionCount * sizeof(float), h_X, &ciErrNum);
oclCheckError(ciErrNum, CL_SUCCESS);
d_T = clCreateBuffer(cxGPUContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, optionCount * sizeof(float), h_T, &ciErrNum);
oclCheckError(ciErrNum, CL_SUCCESS);
shrLog("Starting up BlackScholes...\n");
initBlackScholes(cxGPUContext, cqCommandQueue, (const char **)argv);
shrLog("Running OpenCL BlackScholes...\n\n");
//Just a single run or a warmup iteration
BlackScholes(
NULL,
d_Call,
d_Put,
d_S,
d_X,
d_T,
R,
V,
optionCount
);
#ifdef GPU_PROFILING
const int numIterations = 16;
cl_event startMark, endMark;
ciErrNum = clEnqueueMarker(cqCommandQueue, &startMark);
ciErrNum |= clFinish(cqCommandQueue);
shrCheckError(ciErrNum, CL_SUCCESS);
shrDeltaT(0);
for(int i = 0; i < numIterations; i++){
BlackScholes(
cqCommandQueue,
d_Call,
d_Put,
d_S,
d_X,
d_T,
R,
V,
optionCount
);
}
ciErrNum = clEnqueueMarker(cqCommandQueue, &endMark);
ciErrNum |= clFinish(cqCommandQueue);
shrCheckError(ciErrNum, CL_SUCCESS);
//Calculate performance metrics by wallclock time
double gpuTime = shrDeltaT(0) / numIterations;
shrLogEx(LOGBOTH | MASTER, 0, "oclBlackScholes, Throughput = %.4f GOptions/s, Time = %.5f s, Size = %u options, NumDevsUsed = %i, Workgroup = %u\n",
(double)(2.0 * optionCount * 1.0e-9)/gpuTime, gpuTime, (2 * optionCount), 1, 0);
//Get profiling info
cl_ulong startTime = 0, endTime = 0;
ciErrNum = clGetEventProfilingInfo(startMark, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &startTime, NULL);
ciErrNum |= clGetEventProfilingInfo(endMark, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime, NULL);
shrCheckError(ciErrNum, CL_SUCCESS);
shrLog("\nOpenCL time: %.5f s\n\n", 1.0e-9 * ((double)endTime - (double)startTime) / (double)numIterations);
#endif
shrLog("\nReading back OpenCL BlackScholes results...\n");
ciErrNum = clEnqueueReadBuffer(cqCommandQueue, d_Call, CL_TRUE, 0, optionCount * sizeof(float), h_CallGPU, 0, NULL, NULL);
oclCheckError(ciErrNum, CL_SUCCESS);
ciErrNum = clEnqueueReadBuffer(cqCommandQueue, d_Put, CL_TRUE, 0, optionCount * sizeof(float), h_PutGPU, 0, NULL, NULL);
oclCheckError(ciErrNum, CL_SUCCESS);
shrLog("Comparing against Host/C++ computation...\n");
BlackScholesCPU(h_CallCPU, h_PutCPU, h_S, h_X, h_T, R, V, optionCount);
double deltaCall = 0, deltaPut = 0, sumCall = 0, sumPut = 0;
double L1call, L1put;
for(unsigned int i = 0; i < optionCount; i++)
{
sumCall += fabs(h_CallCPU[i]);
sumPut += fabs(h_PutCPU[i]);
deltaCall += fabs(h_CallCPU[i] - h_CallGPU[i]);
deltaPut += fabs(h_PutCPU[i] - h_PutGPU[i]);
}
L1call = deltaCall / sumCall;
L1put = deltaPut / sumPut;
shrLog("Relative L1 (call, put) = (%.3e, %.3e)\n\n", L1call, L1put);
shrLog("Shutting down...\n");
closeBlackScholes();
ciErrNum = clReleaseMemObject(d_T);
ciErrNum |= clReleaseMemObject(d_X);
ciErrNum |= clReleaseMemObject(d_S);
ciErrNum |= clReleaseMemObject(d_Put);
ciErrNum |= clReleaseMemObject(d_Call);
ciErrNum |= clReleaseCommandQueue(cqCommandQueue);
ciErrNum |= clReleaseContext(cxGPUContext);
oclCheckError(ciErrNum, CL_SUCCESS);
free(h_T);
free(h_X);
free(h_S);
free(h_PutGPU);
free(h_CallGPU);
free(h_PutCPU);
free(h_CallCPU);
if(cdDevices)free(cdDevices);
shrQAFinishExit(argc, (const char **)argv, ((L1call < 1E-6) && (L1put < 1E-6)) ? QA_PASSED : QA_FAILED );
}

Binary file not shown.

View File

@@ -0,0 +1,50 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#include <oclUtils.h>
////////////////////////////////////////////////////////////////////////////////
// Process an array of optN options on CPU
////////////////////////////////////////////////////////////////////////////////
extern "C" void BlackScholesCPU(
float *h_Call, //Call option price
float *h_Put, //Put option price
float *h_S, //Current stock price
float *h_X, //Option strike price
float *h_T, //Option years
float R, //Riskless rate of return
float V, //Stock volatility
unsigned int optionCount
);
////////////////////////////////////////////////////////////////////////////////
// OpenCL Black-Scholes kernel launcher
////////////////////////////////////////////////////////////////////////////////
extern "C" void initBlackScholes(cl_context cxGPUContext, cl_command_queue cqParamCommandQue, const char **argv);
extern "C" void closeBlackScholes(void);
extern "C" void BlackScholes(
cl_command_queue cqCommandQueue,
cl_mem d_Call, //Call option price
cl_mem d_Put, //Put option price
cl_mem d_S, //Current stock price
cl_mem d_X, //Option strike price
cl_mem d_T, //Option years
cl_float R, //Riskless rate of return
cl_float V, //Stock volatility
cl_uint optionCount
);

View File

@@ -0,0 +1,92 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#include <math.h>
#include "oclBlackScholes_common.h"
///////////////////////////////////////////////////////////////////////////////
// Rational approximation of cumulative normal distribution function
///////////////////////////////////////////////////////////////////////////////
static double CND(double d){
const double A1 = 0.31938153;
const double A2 = -0.356563782;
const double A3 = 1.781477937;
const double A4 = -1.821255978;
const double A5 = 1.330274429;
const double RSQRT2PI = 0.39894228040143267793994605993438;
double
K = 1.0 / (1.0 + 0.2316419 * fabs(d));
double
cnd = RSQRT2PI * exp(- 0.5 * d * d) *
(K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))));
if(d > 0)
cnd = 1.0 - cnd;
return cnd;
}
///////////////////////////////////////////////////////////////////////////////
// Black-Scholes formula for both call and put
///////////////////////////////////////////////////////////////////////////////
static void BlackScholesBodyCPU(
float& call, //Call option price
float& put, //Put option price
float Sf, //Current stock price
float Xf, //Option strike price
float Tf, //Option years
float Rf, //Riskless rate of return
float Vf //Stock volatility
){
double S = Sf, X = Xf, T = Tf, R = Rf, V = Vf;
double sqrtT = sqrt(T);
double d1 = (log(S / X) + (R + 0.5 * V * V) * T) / (V * sqrtT);
double d2 = d1 - V * sqrtT;
double CNDD1 = CND(d1);
double CNDD2 = CND(d2);
//Calculate Call and Put simultaneously
double expRT = exp(- R * T);
call = (float)(S * CNDD1 - X * expRT * CNDD2);
put = (float)(X * expRT * (1.0 - CNDD2) - S * (1.0 - CNDD1));
}
////////////////////////////////////////////////////////////////////////////////
// Process an array of optN options
////////////////////////////////////////////////////////////////////////////////
extern "C" void BlackScholesCPU(
float *h_Call, //Call option price
float *h_Put, //Put option price
float *h_S, //Current stock price
float *h_X, //Option strike price
float *h_T, //Option years
float R, //Riskless rate of return
float V, //Stock volatility
unsigned int optionCount
){
for(unsigned int i = 0; i < optionCount; i++)
BlackScholesBodyCPU(
h_Call[i],
h_Put[i],
h_S[i],
h_X[i],
h_T[i],
R,
V
);
}

View File

@@ -0,0 +1,151 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#include <oclUtils.h>
#include "oclBlackScholes_common.h"
static cl_program cpBlackScholes; //OpenCL program
static cl_kernel ckBlackScholes; //OpenCL kernel
static cl_command_queue cqDefaultCommandQueue;
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
if (NULL == filename || NULL == data || 0 == size)
return CL_INVALID_VALUE;
FILE* fp = fopen(filename, "r");
if (NULL == fp) {
fprintf(stderr, "Failed to load kernel.");
return CL_INVALID_VALUE;
}
fseek(fp , 0 , SEEK_END);
long fsize = ftell(fp);
rewind(fp);
*data = (uint8_t*)malloc(fsize);
*size = fread(*data, 1, fsize, fp);
fclose(fp);
return CL_SUCCESS;
}
extern "C" void initBlackScholes(cl_context cxGPUContext, cl_command_queue cqParamCommandQueue, const char **argv){
cl_int ciErrNum;
size_t kernelLength;
/*shrLog("...loading BlackScholes.cl\n");
char *cPathAndName = shrFindFilePath("BlackScholes.cl", argv[0]);
shrCheckError(cPathAndName != NULL, shrTRUE);
char *cBlackScholes = oclLoadProgSource(cPathAndName, "// My comment\n", &kernelLength);
shrCheckError(cBlackScholes != NULL, shrTRUE);*/
shrLog("...creating BlackScholes program\n");
//cpBlackScholes = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&cBlackScholes, &kernelLength, &ciErrNum);
uint8_t *kernel_bin = NULL;
size_t kernel_size;
cl_int binary_status = 0;
ciErrNum = read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size);
shrCheckError(ciErrNum, CL_SUCCESS);
cl_device_id device_id = oclGetFirstDev(cxGPUContext);
cpBlackScholes = clCreateProgramWithBinary(
cxGPUContext, 1, &device_id, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &ciErrNum);
shrCheckError(ciErrNum, CL_SUCCESS);
shrLog("...building BlackScholes program\n");
ciErrNum = clBuildProgram(cpBlackScholes, 0, NULL, "-cl-fast-relaxed-math -Werror", NULL, NULL);
if(ciErrNum != CL_BUILD_SUCCESS){
shrLog("*** Compilation failure ***\n");
size_t deviceNum;
cl_device_id *cdDevices;
ciErrNum = clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &deviceNum);
shrCheckError(ciErrNum, CL_SUCCESS);
cdDevices = (cl_device_id *)malloc(deviceNum * sizeof(cl_device_id));
shrCheckError(cdDevices != NULL, shrTRUE);
ciErrNum = clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, deviceNum * sizeof(cl_device_id), cdDevices, NULL);
shrCheckError(ciErrNum, CL_SUCCESS);
size_t logSize;
char *logTxt;
ciErrNum = clGetProgramBuildInfo(cpBlackScholes, cdDevices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
shrCheckError(ciErrNum, CL_SUCCESS);
logTxt = (char *)malloc(logSize);
shrCheckError(logTxt != NULL, shrTRUE);
ciErrNum = clGetProgramBuildInfo(cpBlackScholes, cdDevices[0], CL_PROGRAM_BUILD_LOG, logSize, logTxt, NULL);
shrCheckError(ciErrNum, CL_SUCCESS);
shrLog("%s\n", logTxt);
shrLog("*** Exiting ***\n");
free(logTxt);
free(cdDevices);
exit(1);
}
//Save ptx code to separate file
oclLogPtx(cpBlackScholes, oclGetFirstDev(cxGPUContext), "BlackScholes.ptx");
shrLog("...creating BlackScholes kernels\n");
ckBlackScholes = clCreateKernel(cpBlackScholes, "BlackScholes", &ciErrNum);
shrCheckError(ciErrNum, CL_SUCCESS);
cqDefaultCommandQueue = cqParamCommandQueue;
//free(cBlackScholes);
//free(cPathAndName);
}
extern "C" void closeBlackScholes(void){
cl_int ciErrNum;
ciErrNum = clReleaseKernel(ckBlackScholes);
ciErrNum |= clReleaseProgram(cpBlackScholes);
shrCheckError(ciErrNum, CL_SUCCESS);
}
////////////////////////////////////////////////////////////////////////////////
// OpenCL Black-Scholes kernel launcher
////////////////////////////////////////////////////////////////////////////////
extern "C" void BlackScholes(
cl_command_queue cqCommandQueue,
cl_mem d_Call, //Call option price
cl_mem d_Put, //Put option price
cl_mem d_S, //Current stock price
cl_mem d_X, //Option strike price
cl_mem d_T, //Option years
cl_float R, //Riskless rate of return
cl_float V, //Stock volatility
cl_uint optionCount
){
cl_int ciErrNum;
if(!cqCommandQueue)
cqCommandQueue = cqDefaultCommandQueue;
ciErrNum = clSetKernelArg(ckBlackScholes, 0, sizeof(cl_mem), (void *)&d_Call);
ciErrNum |= clSetKernelArg(ckBlackScholes, 1, sizeof(cl_mem), (void *)&d_Put);
ciErrNum |= clSetKernelArg(ckBlackScholes, 2, sizeof(cl_mem), (void *)&d_S);
ciErrNum |= clSetKernelArg(ckBlackScholes, 3, sizeof(cl_mem), (void *)&d_X);
ciErrNum |= clSetKernelArg(ckBlackScholes, 4, sizeof(cl_mem), (void *)&d_T);
ciErrNum |= clSetKernelArg(ckBlackScholes, 5, sizeof(cl_float), (void *)&R);
ciErrNum |= clSetKernelArg(ckBlackScholes, 6, sizeof(cl_float), (void *)&V);
ciErrNum |= clSetKernelArg(ckBlackScholes, 7, sizeof(cl_uint), (void *)&optionCount);
shrCheckError(ciErrNum, CL_SUCCESS);
//Run the kernel
size_t globalWorkSize = 16;//60 * 1024;
size_t localWorkSize = 16;//128;
ciErrNum = clEnqueueNDRangeKernel(cqCommandQueue, ckBlackScholes, 1, NULL, &globalWorkSize, &localWorkSize, 0, NULL, NULL);
shrCheckError(ciErrNum, CL_SUCCESS);
}

View File

@@ -0,0 +1,806 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
// *********************************************************************
// Utilities specific to OpenCL samples in NVIDIA GPU Computing SDK
// *********************************************************************
#include <fstream>
#include <vector>
#include <iostream>
#include <algorithm>
#include <stdarg.h>
#include "oclUtils.h"
//////////////////////////////////////////////////////////////////////////////
//! Gets the platform ID for NVIDIA if available, otherwise default
//!
//! @return the id
//! @param clSelectedPlatformID OpenCL platoform ID
//////////////////////////////////////////////////////////////////////////////
cl_int oclGetPlatformID(cl_platform_id* clSelectedPlatformID)
{
char chBuffer[1024];
cl_uint num_platforms;
cl_platform_id* clPlatformIDs;
cl_int ciErrNum;
*clSelectedPlatformID = NULL;
// Get OpenCL platform count
ciErrNum = clGetPlatformIDs (0, NULL, &num_platforms);
if (ciErrNum != CL_SUCCESS)
{
shrLog(" Error %i in clGetPlatformIDs Call !!!\n\n", ciErrNum);
return -1000;
}
else
{
if(num_platforms == 0)
{
shrLog("No OpenCL platform found!\n\n");
return -2000;
}
else
{
// if there's a platform or more, make space for ID's
if ((clPlatformIDs = (cl_platform_id*)malloc(num_platforms * sizeof(cl_platform_id))) == NULL)
{
shrLog("Failed to allocate memory for cl_platform ID's!\n\n");
return -3000;
}
// get platform info for each platform and trap the NVIDIA platform if found
ciErrNum = clGetPlatformIDs (num_platforms, clPlatformIDs, NULL);
for(cl_uint i = 0; i < num_platforms; ++i)
{
ciErrNum = clGetPlatformInfo (clPlatformIDs[i], CL_PLATFORM_NAME, 1024, &chBuffer, NULL);
if(ciErrNum == CL_SUCCESS)
{
if(strstr(chBuffer, "NVIDIA") != NULL)
{
*clSelectedPlatformID = clPlatformIDs[i];
break;
}
}
}
// default to zeroeth platform if NVIDIA not found
if(*clSelectedPlatformID == NULL)
{
shrLog("WARNING: NVIDIA OpenCL platform not found - defaulting to first platform!\n\n");
*clSelectedPlatformID = clPlatformIDs[0];
}
free(clPlatformIDs);
}
}
return CL_SUCCESS;
}
//////////////////////////////////////////////////////////////////////////////
//! Print the device name
//!
//! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE
//! @param device OpenCL id of the device
//////////////////////////////////////////////////////////////////////////////
void oclPrintDevName(int iLogMode, cl_device_id device)
{
char device_string[1024];
clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL);
shrLogEx(iLogMode, 0, "%s", device_string);
}
//////////////////////////////////////////////////////////////////////////////
//! Print info about the device
//!
//! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE
//! @param device OpenCL id of the device
//////////////////////////////////////////////////////////////////////////////
void oclPrintDevInfo(int iLogMode, cl_device_id device)
{
char device_string[1024];
bool nv_device_attibute_query = false;
// CL_DEVICE_NAME
clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_NAME: \t\t\t%s\n", device_string);
// CL_DEVICE_VENDOR
clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(device_string), &device_string, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_VENDOR: \t\t\t%s\n", device_string);
// CL_DRIVER_VERSION
clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(device_string), &device_string, NULL);
shrLogEx(iLogMode, 0, " CL_DRIVER_VERSION: \t\t\t%s\n", device_string);
// CL_DEVICE_VERSION
clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(device_string), &device_string, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_VERSION: \t\t\t%s\n", device_string);
// CL_DEVICE_OPENCL_C_VERSION (if CL_DEVICE_VERSION version > 1.0)
if(strncmp("OpenCL 1.0", device_string, 10) != 0)
{
// This code is unused for devices reporting OpenCL 1.0, but a def is needed anyway to allow compilation using v 1.0 headers
// This constant isn't #defined in 1.0
#ifndef CL_DEVICE_OPENCL_C_VERSION
#define CL_DEVICE_OPENCL_C_VERSION 0x103D
#endif
clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, sizeof(device_string), &device_string, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_OPENCL_C_VERSION: \t\t%s\n", device_string);
}
// CL_DEVICE_TYPE
cl_device_type type;
clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(type), &type, NULL);
if( type & CL_DEVICE_TYPE_CPU )
shrLogEx(iLogMode, 0, " CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_CPU");
if( type & CL_DEVICE_TYPE_GPU )
shrLogEx(iLogMode, 0, " CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_GPU");
if( type & CL_DEVICE_TYPE_ACCELERATOR )
shrLogEx(iLogMode, 0, " CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR");
if( type & CL_DEVICE_TYPE_DEFAULT )
shrLogEx(iLogMode, 0, " CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT");
// CL_DEVICE_MAX_COMPUTE_UNITS
cl_uint compute_units;
clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_COMPUTE_UNITS:\t\t%u\n", compute_units);
// CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
size_t workitem_dims;
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(workitem_dims), &workitem_dims, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:\t%u\n", workitem_dims);
// CL_DEVICE_MAX_WORK_ITEM_SIZES
size_t workitem_size[3];
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(workitem_size), &workitem_size, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%u / %u / %u \n", workitem_size[0], workitem_size[1], workitem_size[2]);
// CL_DEVICE_MAX_WORK_GROUP_SIZE
size_t workgroup_size;
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(workgroup_size), &workgroup_size, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_WORK_GROUP_SIZE:\t%u\n", workgroup_size);
// CL_DEVICE_MAX_CLOCK_FREQUENCY
cl_uint clock_frequency;
clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_CLOCK_FREQUENCY:\t%u MHz\n", clock_frequency);
// CL_DEVICE_ADDRESS_BITS
cl_uint addr_bits;
clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(addr_bits), &addr_bits, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_ADDRESS_BITS:\t\t%u\n", addr_bits);
// CL_DEVICE_MAX_MEM_ALLOC_SIZE
cl_ulong max_mem_alloc_size;
clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_alloc_size), &max_mem_alloc_size, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_MEM_ALLOC_SIZE:\t\t%u MByte\n", (unsigned int)(max_mem_alloc_size / (1024 * 1024)));
// CL_DEVICE_GLOBAL_MEM_SIZE
cl_ulong mem_size;
clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_GLOBAL_MEM_SIZE:\t\t%u MByte\n", (unsigned int)(mem_size / (1024 * 1024)));
// CL_DEVICE_ERROR_CORRECTION_SUPPORT
cl_bool error_correction_support;
clGetDeviceInfo(device, CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(error_correction_support), &error_correction_support, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_ERROR_CORRECTION_SUPPORT:\t%s\n", error_correction_support == CL_TRUE ? "yes" : "no");
// CL_DEVICE_LOCAL_MEM_TYPE
cl_device_local_mem_type local_mem_type;
clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(local_mem_type), &local_mem_type, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_LOCAL_MEM_TYPE:\t\t%s\n", local_mem_type == 1 ? "local" : "global");
// CL_DEVICE_LOCAL_MEM_SIZE
clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_LOCAL_MEM_SIZE:\t\t%u KByte\n", (unsigned int)(mem_size / 1024));
// CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(mem_size), &mem_size, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:\t%u KByte\n", (unsigned int)(mem_size / 1024));
// CL_DEVICE_QUEUE_PROPERTIES
cl_command_queue_properties queue_properties;
clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(queue_properties), &queue_properties, NULL);
if( queue_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE )
shrLogEx(iLogMode, 0, " CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE");
if( queue_properties & CL_QUEUE_PROFILING_ENABLE )
shrLogEx(iLogMode, 0, " CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_PROFILING_ENABLE");
// CL_DEVICE_IMAGE_SUPPORT
cl_bool image_support;
clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(image_support), &image_support, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_IMAGE_SUPPORT:\t\t%u\n", image_support);
// CL_DEVICE_MAX_READ_IMAGE_ARGS
cl_uint max_read_image_args;
clGetDeviceInfo(device, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(max_read_image_args), &max_read_image_args, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_READ_IMAGE_ARGS:\t%u\n", max_read_image_args);
// CL_DEVICE_MAX_WRITE_IMAGE_ARGS
cl_uint max_write_image_args;
clGetDeviceInfo(device, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(max_write_image_args), &max_write_image_args, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_WRITE_IMAGE_ARGS:\t%u\n", max_write_image_args);
// CL_DEVICE_SINGLE_FP_CONFIG
cl_device_fp_config fp_config;
clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(cl_device_fp_config), &fp_config, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_SINGLE_FP_CONFIG:\t\t%s%s%s%s%s%s\n",
fp_config & CL_FP_DENORM ? "denorms " : "",
fp_config & CL_FP_INF_NAN ? "INF-quietNaNs " : "",
fp_config & CL_FP_ROUND_TO_NEAREST ? "round-to-nearest " : "",
fp_config & CL_FP_ROUND_TO_ZERO ? "round-to-zero " : "",
fp_config & CL_FP_ROUND_TO_INF ? "round-to-inf " : "",
fp_config & CL_FP_FMA ? "fma " : "");
// CL_DEVICE_IMAGE2D_MAX_WIDTH, CL_DEVICE_IMAGE2D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_WIDTH, CL_DEVICE_IMAGE3D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_DEPTH
size_t szMaxDims[5];
shrLogEx(iLogMode, 0, "\n CL_DEVICE_IMAGE <dim>");
clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &szMaxDims[0], NULL);
shrLogEx(iLogMode, 0, "\t\t\t2D_MAX_WIDTH\t %u\n", szMaxDims[0]);
clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &szMaxDims[1], NULL);
shrLogEx(iLogMode, 0, "\t\t\t\t\t2D_MAX_HEIGHT\t %u\n", szMaxDims[1]);
clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &szMaxDims[2], NULL);
shrLogEx(iLogMode, 0, "\t\t\t\t\t3D_MAX_WIDTH\t %u\n", szMaxDims[2]);
clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &szMaxDims[3], NULL);
shrLogEx(iLogMode, 0, "\t\t\t\t\t3D_MAX_HEIGHT\t %u\n", szMaxDims[3]);
clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &szMaxDims[4], NULL);
shrLogEx(iLogMode, 0, "\t\t\t\t\t3D_MAX_DEPTH\t %u\n", szMaxDims[4]);
// CL_DEVICE_EXTENSIONS: get device extensions, and if any then parse & log the string onto separate lines
clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, sizeof(device_string), &device_string, NULL);
if (device_string != 0)
{
shrLogEx(iLogMode, 0, "\n CL_DEVICE_EXTENSIONS:");
std::string stdDevString;
stdDevString = std::string(device_string);
size_t szOldPos = 0;
size_t szSpacePos = stdDevString.find(' ', szOldPos); // extensions string is space delimited
while (szSpacePos != stdDevString.npos)
{
if( strcmp("cl_nv_device_attribute_query", stdDevString.substr(szOldPos, szSpacePos - szOldPos).c_str()) == 0 )
nv_device_attibute_query = true;
if (szOldPos > 0)
{
shrLogEx(iLogMode, 0, "\t\t");
}
shrLogEx(iLogMode, 0, "\t\t\t%s\n", stdDevString.substr(szOldPos, szSpacePos - szOldPos).c_str());
do {
szOldPos = szSpacePos + 1;
szSpacePos = stdDevString.find(' ', szOldPos);
} while (szSpacePos == szOldPos);
}
shrLogEx(iLogMode, 0, "\n");
}
else
{
shrLogEx(iLogMode, 0, " CL_DEVICE_EXTENSIONS: None\n");
}
if(nv_device_attibute_query)
{
cl_uint compute_capability_major, compute_capability_minor;
clGetDeviceInfo(device, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof(cl_uint), &compute_capability_major, NULL);
clGetDeviceInfo(device, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof(cl_uint), &compute_capability_minor, NULL);
shrLogEx(iLogMode, 0, "\n CL_DEVICE_COMPUTE_CAPABILITY_NV:\t%u.%u\n", compute_capability_major, compute_capability_minor);
shrLogEx(iLogMode, 0, " NUMBER OF MULTIPROCESSORS:\t\t%u\n", compute_units); // this is the same value reported by CL_DEVICE_MAX_COMPUTE_UNITS
shrLogEx(iLogMode, 0, " NUMBER OF CUDA CORES:\t\t\t%u\n", ConvertSMVer2Cores(compute_capability_major, compute_capability_minor) * compute_units);
cl_uint regs_per_block;
clGetDeviceInfo(device, CL_DEVICE_REGISTERS_PER_BLOCK_NV, sizeof(cl_uint), &regs_per_block, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_REGISTERS_PER_BLOCK_NV:\t%u\n", regs_per_block);
cl_uint warp_size;
clGetDeviceInfo(device, CL_DEVICE_WARP_SIZE_NV, sizeof(cl_uint), &warp_size, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_WARP_SIZE_NV:\t\t%u\n", warp_size);
cl_bool gpu_overlap;
clGetDeviceInfo(device, CL_DEVICE_GPU_OVERLAP_NV, sizeof(cl_bool), &gpu_overlap, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_GPU_OVERLAP_NV:\t\t%s\n", gpu_overlap == CL_TRUE ? "CL_TRUE" : "CL_FALSE");
cl_bool exec_timeout;
clGetDeviceInfo(device, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, sizeof(cl_bool), &exec_timeout, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV:\t%s\n", exec_timeout == CL_TRUE ? "CL_TRUE" : "CL_FALSE");
cl_bool integrated_memory;
clGetDeviceInfo(device, CL_DEVICE_INTEGRATED_MEMORY_NV, sizeof(cl_bool), &integrated_memory, NULL);
shrLogEx(iLogMode, 0, " CL_DEVICE_INTEGRATED_MEMORY_NV:\t%s\n", integrated_memory == CL_TRUE ? "CL_TRUE" : "CL_FALSE");
}
// CL_DEVICE_PREFERRED_VECTOR_WIDTH_<type>
shrLogEx(iLogMode, 0, " CL_DEVICE_PREFERRED_VECTOR_WIDTH_<t>\t");
cl_uint vec_width [6];
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), &vec_width[0], NULL);
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint), &vec_width[1], NULL);
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), &vec_width[2], NULL);
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint), &vec_width[3], NULL);
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), &vec_width[4], NULL);
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), &vec_width[5], NULL);
shrLogEx(iLogMode, 0, "CHAR %u, SHORT %u, INT %u, LONG %u, FLOAT %u, DOUBLE %u\n\n\n",
vec_width[0], vec_width[1], vec_width[2], vec_width[3], vec_width[4], vec_width[5]);
}
//////////////////////////////////////////////////////////////////////////////
//! Get and return device capability
//!
//! @return the 2 digit integer representation of device Cap (major minor). return -1 if NA
//! @param device OpenCL id of the device
//////////////////////////////////////////////////////////////////////////////
int oclGetDevCap(cl_device_id device)
{
char cDevString[1024];
bool bDevAttributeQuery = false;
int iDevArch = -1;
// Get device extensions, and if any then search for cl_nv_device_attribute_query
clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, sizeof(cDevString), &cDevString, NULL);
if (cDevString != 0)
{
std::string stdDevString;
stdDevString = std::string(cDevString);
size_t szOldPos = 0;
size_t szSpacePos = stdDevString.find(' ', szOldPos); // extensions string is space delimited
while (szSpacePos != stdDevString.npos)
{
if( strcmp("cl_nv_device_attribute_query", stdDevString.substr(szOldPos, szSpacePos - szOldPos).c_str()) == 0 )
{
bDevAttributeQuery = true;
}
do {
szOldPos = szSpacePos + 1;
szSpacePos = stdDevString.find(' ', szOldPos);
} while (szSpacePos == szOldPos);
}
}
// if search succeeded, get device caps
if(bDevAttributeQuery)
{
cl_int iComputeCapMajor, iComputeCapMinor;
clGetDeviceInfo(device, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof(cl_uint), (void*)&iComputeCapMajor, NULL);
clGetDeviceInfo(device, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof(cl_uint), (void*)&iComputeCapMinor, NULL);
iDevArch = (10 * iComputeCapMajor) + iComputeCapMinor;
}
return iDevArch;
}
//////////////////////////////////////////////////////////////////////////////
//! Gets the id of the first device from the context
//!
//! @return the id
//! @param cxGPUContext OpenCL context
//////////////////////////////////////////////////////////////////////////////
cl_device_id oclGetFirstDev(cl_context cxGPUContext)
{
size_t szParmDataBytes;
cl_device_id* cdDevices;
// get the list of GPU devices associated with context
clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
cdDevices = (cl_device_id*) malloc(szParmDataBytes);
clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
cl_device_id first = cdDevices[0];
free(cdDevices);
return first;
}
//////////////////////////////////////////////////////////////////////////////
//! Gets the id of device with maximal FLOPS from the context
//!
//! @return the id
//! @param cxGPUContext OpenCL context
//////////////////////////////////////////////////////////////////////////////
cl_device_id oclGetMaxFlopsDev(cl_context cxGPUContext)
{
size_t szParmDataBytes;
cl_device_id* cdDevices;
// get the list of GPU devices associated with context
clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
cdDevices = (cl_device_id*) malloc(szParmDataBytes);
size_t device_count = szParmDataBytes / sizeof(cl_device_id);
clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
cl_device_id max_flops_device = cdDevices[0];
int max_flops = 0;
size_t current_device = 0;
// CL_DEVICE_MAX_COMPUTE_UNITS
cl_uint compute_units;
clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL);
// CL_DEVICE_MAX_CLOCK_FREQUENCY
cl_uint clock_frequency;
clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL);
max_flops = compute_units * clock_frequency;
++current_device;
while( current_device < device_count )
{
// CL_DEVICE_MAX_COMPUTE_UNITS
cl_uint compute_units;
clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL);
// CL_DEVICE_MAX_CLOCK_FREQUENCY
cl_uint clock_frequency;
clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL);
int flops = compute_units * clock_frequency;
if( flops > max_flops )
{
max_flops = flops;
max_flops_device = cdDevices[current_device];
}
++current_device;
}
free(cdDevices);
return max_flops_device;
}
//////////////////////////////////////////////////////////////////////////////
//! Loads a Program file and prepends the cPreamble to the code.
//!
//! @return the source string if succeeded, 0 otherwise
//! @param cFilename program filename
//! @param cPreamble code that is prepended to the loaded file, typically a set of #defines or a header
//! @param szFinalLength returned length of the code string
//////////////////////////////////////////////////////////////////////////////
char* oclLoadProgSource(const char* cFilename, const char* cPreamble, size_t* szFinalLength)
{
// locals
FILE* pFileStream = NULL;
size_t szSourceLength;
// open the OpenCL source code file
#ifdef _WIN32 // Windows version
if(fopen_s(&pFileStream, cFilename, "rb") != 0)
{
return NULL;
}
#else // Linux version
pFileStream = fopen(cFilename, "rb");
if(pFileStream == 0)
{
return NULL;
}
#endif
size_t szPreambleLength = strlen(cPreamble);
// get the length of the source code
fseek(pFileStream, 0, SEEK_END);
szSourceLength = ftell(pFileStream);
fseek(pFileStream, 0, SEEK_SET);
// allocate a buffer for the source code string and read it in
char* cSourceString = (char *)malloc(szSourceLength + szPreambleLength + 1);
memcpy(cSourceString, cPreamble, szPreambleLength);
if (fread((cSourceString) + szPreambleLength, szSourceLength, 1, pFileStream) != 1)
{
fclose(pFileStream);
free(cSourceString);
return 0;
}
// close the file and return the total length of the combined (preamble + source) string
fclose(pFileStream);
if(szFinalLength != 0)
{
*szFinalLength = szSourceLength + szPreambleLength;
}
cSourceString[szSourceLength + szPreambleLength] = '\0';
return cSourceString;
}
//////////////////////////////////////////////////////////////////////////////
//! Gets the id of the nth device from the context
//!
//! @return the id or -1 when out of range
//! @param cxGPUContext OpenCL context
//! @param device_idx index of the device of interest
//////////////////////////////////////////////////////////////////////////////
cl_device_id oclGetDev(cl_context cxGPUContext, unsigned int nr)
{
size_t szParmDataBytes;
cl_device_id* cdDevices;
// get the list of GPU devices associated with context
clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
if( szParmDataBytes / sizeof(cl_device_id) <= nr ) {
return (cl_device_id)-1;
}
cdDevices = (cl_device_id*) malloc(szParmDataBytes);
clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
cl_device_id device = cdDevices[nr];
free(cdDevices);
return device;
}
//////////////////////////////////////////////////////////////////////////////
//! Get the binary (PTX) of the program associated with the device
//!
//! @param cpProgram OpenCL program
//! @param cdDevice device of interest
//! @param binary returned code
//! @param length length of returned code
//////////////////////////////////////////////////////////////////////////////
void oclGetProgBinary( cl_program cpProgram, cl_device_id cdDevice, char** binary, size_t* length)
{
// Grab the number of devices associated witht the program
cl_uint num_devices;
clGetProgramInfo(cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_devices, NULL);
// Grab the device ids
cl_device_id* devices = (cl_device_id*) malloc(num_devices * sizeof(cl_device_id));
clGetProgramInfo(cpProgram, CL_PROGRAM_DEVICES, num_devices * sizeof(cl_device_id), devices, 0);
// Grab the sizes of the binaries
size_t* binary_sizes = (size_t*)malloc(num_devices * sizeof(size_t));
clGetProgramInfo(cpProgram, CL_PROGRAM_BINARY_SIZES, num_devices * sizeof(size_t), binary_sizes, NULL);
// Now get the binaries
char** ptx_code = (char**) malloc(num_devices * sizeof(char*));
for( unsigned int i=0; i<num_devices; ++i) {
ptx_code[i]= (char*)malloc(binary_sizes[i]);
}
clGetProgramInfo(cpProgram, CL_PROGRAM_BINARIES, 0, ptx_code, NULL);
// Find the index of the device of interest
unsigned int idx = 0;
while( idx<num_devices && devices[idx] != cdDevice ) ++idx;
// If it is associated prepare the result
if( idx < num_devices )
{
*binary = ptx_code[idx];
*length = binary_sizes[idx];
}
// Cleanup
free( devices );
free( binary_sizes );
for( unsigned int i=0; i<num_devices; ++i) {
if( i != idx ) free(ptx_code[i]);
}
free( ptx_code );
}
//////////////////////////////////////////////////////////////////////////////
//! Get and log the binary (PTX) from the OpenCL compiler for the requested program & device
//!
//! @param cpProgram OpenCL program
//! @param cdDevice device of interest
//! @param const char* cPtxFileName optional PTX file name
//////////////////////////////////////////////////////////////////////////////
void oclLogPtx(cl_program cpProgram, cl_device_id cdDevice, const char* cPtxFileName)
{
// Grab the number of devices associated with the program
cl_uint num_devices;
clGetProgramInfo(cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_devices, NULL);
// Grab the device ids
cl_device_id* devices = (cl_device_id*) malloc(num_devices * sizeof(cl_device_id));
clGetProgramInfo(cpProgram, CL_PROGRAM_DEVICES, num_devices * sizeof(cl_device_id), devices, 0);
// Grab the sizes of the binaries
size_t* binary_sizes = (size_t*)malloc(num_devices * sizeof(size_t));
clGetProgramInfo(cpProgram, CL_PROGRAM_BINARY_SIZES, num_devices * sizeof(size_t), binary_sizes, NULL);
// Now get the binaries
char** ptx_code = (char**)malloc(num_devices * sizeof(char*));
for( unsigned int i=0; i<num_devices; ++i)
{
ptx_code[i] = (char*)malloc(binary_sizes[i]);
}
clGetProgramInfo(cpProgram, CL_PROGRAM_BINARIES, 0, ptx_code, NULL);
// Find the index of the device of interest
unsigned int idx = 0;
while((idx < num_devices) && (devices[idx] != cdDevice))
{
++idx;
}
// If the index is associated, log the result
if(idx < num_devices)
{
// if a separate filename is supplied, dump ptx there
if (NULL != cPtxFileName)
{
shrLog("\nWriting ptx to separate file: %s ...\n\n", cPtxFileName);
FILE* pFileStream = NULL;
#ifdef _WIN32
fopen_s(&pFileStream, cPtxFileName, "wb");
#else
pFileStream = fopen(cPtxFileName, "wb");
#endif
fwrite(ptx_code[idx], binary_sizes[idx], 1, pFileStream);
fclose(pFileStream);
}
else // log to logfile and console if no ptx file specified
{
shrLog("\n%s\nProgram Binary:\n%s\n%s\n", HDASHLINE, ptx_code[idx], HDASHLINE);
}
}
// Cleanup
free(devices);
free(binary_sizes);
for(unsigned int i = 0; i < num_devices; ++i)
{
free(ptx_code[i]);
}
free( ptx_code );
}
//////////////////////////////////////////////////////////////////////////////
//! Get and log the binary (PTX) from the OpenCL compiler for the requested program & device
//!
//! @param cpProgram OpenCL program
//! @param cdDevice device of interest
//////////////////////////////////////////////////////////////////////////////
void oclLogBuildInfo(cl_program cpProgram, cl_device_id cdDevice)
{
// write out the build log and ptx, then exit
char cBuildLog[10240];
clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG,
sizeof(cBuildLog), cBuildLog, NULL );
shrLog("\n%s\nBuild Log:\n%s\n%s\n", HDASHLINE, cBuildLog, HDASHLINE);
}
// Helper function for De-allocating cl objects
// *********************************************************************
void oclDeleteMemObjs(cl_mem* cmMemObjs, int iNumObjs)
{
int i;
for (i = 0; i < iNumObjs; i++)
{
if (cmMemObjs[i])clReleaseMemObject(cmMemObjs[i]);
}
}
// Helper function to get OpenCL error string from constant
// *********************************************************************
const char* oclErrorString(cl_int error)
{
static const char* errorString[] = {
"CL_SUCCESS",
"CL_DEVICE_NOT_FOUND",
"CL_DEVICE_NOT_AVAILABLE",
"CL_COMPILER_NOT_AVAILABLE",
"CL_MEM_OBJECT_ALLOCATION_FAILURE",
"CL_OUT_OF_RESOURCES",
"CL_OUT_OF_HOST_MEMORY",
"CL_PROFILING_INFO_NOT_AVAILABLE",
"CL_MEM_COPY_OVERLAP",
"CL_IMAGE_FORMAT_MISMATCH",
"CL_IMAGE_FORMAT_NOT_SUPPORTED",
"CL_BUILD_PROGRAM_FAILURE",
"CL_MAP_FAILURE",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"CL_INVALID_VALUE",
"CL_INVALID_DEVICE_TYPE",
"CL_INVALID_PLATFORM",
"CL_INVALID_DEVICE",
"CL_INVALID_CONTEXT",
"CL_INVALID_QUEUE_PROPERTIES",
"CL_INVALID_COMMAND_QUEUE",
"CL_INVALID_HOST_PTR",
"CL_INVALID_MEM_OBJECT",
"CL_INVALID_IMAGE_FORMAT_DESCRIPTOR",
"CL_INVALID_IMAGE_SIZE",
"CL_INVALID_SAMPLER",
"CL_INVALID_BINARY",
"CL_INVALID_BUILD_OPTIONS",
"CL_INVALID_PROGRAM",
"CL_INVALID_PROGRAM_EXECUTABLE",
"CL_INVALID_KERNEL_NAME",
"CL_INVALID_KERNEL_DEFINITION",
"CL_INVALID_KERNEL",
"CL_INVALID_ARG_INDEX",
"CL_INVALID_ARG_VALUE",
"CL_INVALID_ARG_SIZE",
"CL_INVALID_KERNEL_ARGS",
"CL_INVALID_WORK_DIMENSION",
"CL_INVALID_WORK_GROUP_SIZE",
"CL_INVALID_WORK_ITEM_SIZE",
"CL_INVALID_GLOBAL_OFFSET",
"CL_INVALID_EVENT_WAIT_LIST",
"CL_INVALID_EVENT",
"CL_INVALID_OPERATION",
"CL_INVALID_GL_OBJECT",
"CL_INVALID_BUFFER_SIZE",
"CL_INVALID_MIP_LEVEL",
"CL_INVALID_GLOBAL_WORK_SIZE",
};
const int errorCount = sizeof(errorString) / sizeof(errorString[0]);
const int index = -error;
return (index >= 0 && index < errorCount) ? errorString[index] : "Unspecified Error";
}
// Helper function to get OpenCL image format string (channel order and type) from constant
// *********************************************************************
const char* oclImageFormatString(cl_uint uiImageFormat)
{
// cl_channel_order
if (uiImageFormat == CL_R)return "CL_R";
if (uiImageFormat == CL_A)return "CL_A";
if (uiImageFormat == CL_RG)return "CL_RG";
if (uiImageFormat == CL_RA)return "CL_RA";
if (uiImageFormat == CL_RGB)return "CL_RGB";
if (uiImageFormat == CL_RGBA)return "CL_RGBA";
if (uiImageFormat == CL_BGRA)return "CL_BGRA";
if (uiImageFormat == CL_ARGB)return "CL_ARGB";
if (uiImageFormat == CL_INTENSITY)return "CL_INTENSITY";
if (uiImageFormat == CL_LUMINANCE)return "CL_LUMINANCE";
// cl_channel_type
if (uiImageFormat == CL_SNORM_INT8)return "CL_SNORM_INT8";
if (uiImageFormat == CL_SNORM_INT16)return "CL_SNORM_INT16";
if (uiImageFormat == CL_UNORM_INT8)return "CL_UNORM_INT8";
if (uiImageFormat == CL_UNORM_INT16)return "CL_UNORM_INT16";
if (uiImageFormat == CL_UNORM_SHORT_565)return "CL_UNORM_SHORT_565";
if (uiImageFormat == CL_UNORM_SHORT_555)return "CL_UNORM_SHORT_555";
if (uiImageFormat == CL_UNORM_INT_101010)return "CL_UNORM_INT_101010";
if (uiImageFormat == CL_SIGNED_INT8)return "CL_SIGNED_INT8";
if (uiImageFormat == CL_SIGNED_INT16)return "CL_SIGNED_INT16";
if (uiImageFormat == CL_SIGNED_INT32)return "CL_SIGNED_INT32";
if (uiImageFormat == CL_UNSIGNED_INT8)return "CL_UNSIGNED_INT8";
if (uiImageFormat == CL_UNSIGNED_INT16)return "CL_UNSIGNED_INT16";
if (uiImageFormat == CL_UNSIGNED_INT32)return "CL_UNSIGNED_INT32";
if (uiImageFormat == CL_HALF_FLOAT)return "CL_HALF_FLOAT";
if (uiImageFormat == CL_FLOAT)return "CL_FLOAT";
// unknown constant
return "Unknown";
}

View File

@@ -0,0 +1,198 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#ifndef OCL_UTILS_H
#define OCL_UTILS_H
// *********************************************************************
// Utilities specific to OpenCL samples in NVIDIA GPU Computing SDK
// *********************************************************************
// Common headers: Cross-API utililties and OpenCL header
#include <shrUtils.h>
// All OpenCL headers
#if defined (__APPLE__) || defined(MACOSX)
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif
// Includes
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
// For systems with CL_EXT that are not updated with these extensions, we copied these
// extensions from <CL/cl_ext.h>
#ifndef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV
/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
#define CL_DEVICE_WARP_SIZE_NV 0x4003
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
#endif
// reminders for build output window and log
#ifdef _WIN32
#pragma message ("Note: including shrUtils.h")
#pragma message ("Note: including opencl.h")
#endif
// SDK Revision #
#define OCL_SDKREVISION "7027912"
// Error and Exit Handling Macros...
// *********************************************************************
// Full error handling macro with Cleanup() callback (if supplied)...
// (Companion Inline Function lower on page)
#define oclCheckErrorEX(a, b, c) __oclCheckErrorEX(a, b, c, __FILE__ , __LINE__)
// Short version without Cleanup() callback pointer
// Both Input (a) and Reference (b) are specified as args
#define oclCheckError(a, b) oclCheckErrorEX(a, b, 0)
//////////////////////////////////////////////////////////////////////////////
//! Gets the platform ID for NVIDIA if available, otherwise default to platform 0
//!
//! @return the id
//! @param clSelectedPlatformID OpenCL platform ID
//////////////////////////////////////////////////////////////////////////////
extern "C" cl_int oclGetPlatformID(cl_platform_id* clSelectedPlatformID);
//////////////////////////////////////////////////////////////////////////////
//! Print info about the device
//!
//! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE
//! @param device OpenCL id of the device
//////////////////////////////////////////////////////////////////////////////
extern "C" void oclPrintDevInfo(int iLogMode, cl_device_id device);
//////////////////////////////////////////////////////////////////////////////
//! Get and return device capability
//!
//! @return the 2 digit integer representation of device Cap (major minor). return -1 if NA
//! @param device OpenCL id of the device
//////////////////////////////////////////////////////////////////////////////
extern "C" int oclGetDevCap(cl_device_id device);
//////////////////////////////////////////////////////////////////////////////
//! Print the device name
//!
//! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE
//! @param device OpenCL id of the device
//////////////////////////////////////////////////////////////////////////////
extern "C" void oclPrintDevName(int iLogMode, cl_device_id device);
//////////////////////////////////////////////////////////////////////////////
//! Gets the id of the first device from the context
//!
//! @return the id
//! @param cxGPUContext OpenCL context
//////////////////////////////////////////////////////////////////////////////
extern "C" cl_device_id oclGetFirstDev(cl_context cxGPUContext);
//////////////////////////////////////////////////////////////////////////////
//! Gets the id of the nth device from the context
//!
//! @return the id or -1 when out of range
//! @param cxGPUContext OpenCL context
//! @param device_idx index of the device of interest
//////////////////////////////////////////////////////////////////////////////
extern "C" cl_device_id oclGetDev(cl_context cxGPUContext, unsigned int device_idx);
//////////////////////////////////////////////////////////////////////////////
//! Gets the id of device with maximal FLOPS from the context
//!
//! @return the id
//! @param cxGPUContext OpenCL context
//////////////////////////////////////////////////////////////////////////////
extern "C" cl_device_id oclGetMaxFlopsDev(cl_context cxGPUContext);
//////////////////////////////////////////////////////////////////////////////
//! Loads a Program file and prepends the cPreamble to the code.
//!
//! @return the source string if succeeded, 0 otherwise
//! @param cFilename program filename
//! @param cPreamble code that is prepended to the loaded file, typically a set of #defines or a header
//! @param szFinalLength returned length of the code string
//////////////////////////////////////////////////////////////////////////////
extern "C" char* oclLoadProgSource(const char* cFilename, const char* cPreamble, size_t* szFinalLength);
//////////////////////////////////////////////////////////////////////////////
//! Get the binary (PTX) of the program associated with the device
//!
//! @param cpProgram OpenCL program
//! @param cdDevice device of interest
//! @param binary returned code
//! @param length length of returned code
//////////////////////////////////////////////////////////////////////////////
extern "C" void oclGetProgBinary( cl_program cpProgram, cl_device_id cdDevice, char** binary, size_t* length);
//////////////////////////////////////////////////////////////////////////////
//! Get and log the binary (PTX) from the OpenCL compiler for the requested program & device
//!
//! @param cpProgram OpenCL program
//! @param cdDevice device of interest
//! @param const char* cPtxFileName optional PTX file name
//////////////////////////////////////////////////////////////////////////////
extern "C" void oclLogPtx(cl_program cpProgram, cl_device_id cdDevice, const char* cPtxFileName);
//////////////////////////////////////////////////////////////////////////////
//! Get and log the Build Log from the OpenCL compiler for the requested program & device
//!
//! @param cpProgram OpenCL program
//! @param cdDevice device of interest
//////////////////////////////////////////////////////////////////////////////
extern "C" void oclLogBuildInfo(cl_program cpProgram, cl_device_id cdDevice);
// Helper function for De-allocating cl objects
// *********************************************************************
extern "C" void oclDeleteMemObjs(cl_mem* cmMemObjs, int iNumObjs);
// Helper function to get OpenCL error string from constant
// *********************************************************************
extern "C" const char* oclErrorString(cl_int error);
// Helper function to get OpenCL image format string (channel order and type) from constant
// *********************************************************************
extern "C" const char* oclImageFormatString(cl_uint uiImageFormat);
// companion inline function for error checking and exit on error WITH Cleanup Callback (if supplied)
// *********************************************************************
inline void __oclCheckErrorEX(cl_int iSample, cl_int iReference, void (*pCleanup)(int), const char* cFile, const int iLine)
{
// An error condition is defined by the sample/test value not equal to the reference
if (iReference != iSample)
{
// If the sample/test value isn't equal to the ref, it's an error by defnition, so override 0 sample/test value
iSample = (iSample == 0) ? -9999 : iSample;
// Log the error info
shrLog("\n !!! Error # %i (%s) at line %i , in file %s !!!\n\n", iSample, oclErrorString(iSample), iLine, cFile);
// Cleanup and exit, or just exit if no cleanup function pointer provided. Use iSample (error code in this case) as process exit code.
if (pCleanup != NULL)
{
pCleanup(iSample);
}
else
{
shrLogEx(LOGBOTH | CLOSELOG, 0, "Exiting...\n");
exit(iSample);
}
}
}
#endif

View File

@@ -0,0 +1,238 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#ifndef SHR_QATEST_H
#define SHR_QATEST_H
// *********************************************************************
// Generic utilities for NVIDIA GPU Computing SDK
// *********************************************************************
// OS dependent includes
#ifdef _WIN32
#pragma message ("Note: including windows.h")
#pragma message ("Note: including math.h")
#pragma message ("Note: including assert.h")
#pragma message ("Note: including time.h")
// Headers needed for Windows
#include <windows.h>
#include <time.h>
#else
// Headers needed for Linux
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <unistd.h>
#include <time.h>
#endif
#ifndef STRCASECMP
#ifdef _WIN32
#define STRCASECMP _stricmp
#else
#define STRCASECMP strcasecmp
#endif
#endif
#ifndef STRNCASECMP
#ifdef _WIN32
#define STRNCASECMP _strnicmp
#else
#define STRNCASECMP strncasecmp
#endif
#endif
// Standardized QA Start/Finish for CUDA SDK tests
#define shrQAStart(a, b) __shrQAStart(a, b)
#define shrQAFinish(a, b, c) __shrQAFinish(a, b, c)
#define shrQAFinish2(a, b, c, d) __shrQAFinish2(a, b, c, d)
inline int findExeNameStart(const char *exec_name)
{
int exename_start = (int)strlen(exec_name);
while( (exename_start > 0) &&
(exec_name[exename_start] != '\\') &&
(exec_name[exename_start] != '/') )
{
exename_start--;
}
if (exec_name[exename_start] == '\\' ||
exec_name[exename_start] == '/')
{
return exename_start+1;
} else {
return exename_start;
}
}
inline int __shrQAStart(int argc, char **argv)
{
bool bQATest = false;
// First clear the output buffer
fflush(stdout);
fflush(stdout);
for (int i=1; i < argc; i++) {
int string_start = 0;
while (argv[i][string_start] == '-')
string_start++;
char *string_argv = &argv[i][string_start];
if (!STRCASECMP(string_argv, "qatest")) {
bQATest = true;
}
}
// We don't want to print the entire path, so we search for the first
int exename_start = findExeNameStart(argv[0]);
if (bQATest) {
fprintf(stdout, "&&&& RUNNING %s", &(argv[0][exename_start]));
for (int i=1; i < argc; i++) fprintf(stdout, " %s", argv[i]);
fprintf(stdout, "\n");
} else {
fprintf(stdout, "[%s] starting...\n", &(argv[0][exename_start]));
}
fflush(stdout);
printf("\n"); fflush(stdout);
return exename_start;
}
enum eQAstatus {
QA_FAILED = 0,
QA_PASSED = 1,
QA_WAIVED = 2
};
inline void __ExitInTime(int seconds)
{
fprintf(stdout, "> exiting in %d seconds: ", seconds);
fflush(stdout);
time_t t;
int count;
for (t=time(0)+seconds, count=seconds; time(0) < t; count--) {
fprintf(stdout, "%d...", count);
#ifdef WIN32
Sleep(1000);
#else
sleep(1);
#endif
}
fprintf(stdout,"done!\n\n");
fflush(stdout);
}
inline void __shrQAFinish(int argc, const char **argv, int iStatus)
{
// By default QATest is disabled and NoPrompt is Enabled (times out at seconds passed into __ExitInTime() )
bool bQATest = false, bNoPrompt = true, bQuitInTime = true;
const char *sStatus[] = { "FAILED", "PASSED", "WAIVED", NULL };
for (int i=1; i < argc; i++) {
int string_start = 0;
while (argv[i][string_start] == '-')
string_start++;
const char *string_argv = &argv[i][string_start];
if (!STRCASECMP(string_argv, "qatest")) {
bQATest = true;
}
// For SDK individual samples that don't specify -noprompt or -prompt,
// a 3 second delay will happen before exiting, giving a user time to view results
if (!STRCASECMP(string_argv, "noprompt") || !STRCASECMP(string_argv, "help")) {
bNoPrompt = true;
bQuitInTime = false;
}
if (!STRCASECMP(string_argv, "prompt")) {
bNoPrompt = false;
bQuitInTime = false;
}
}
int exename_start = findExeNameStart(argv[0]);
if (bQATest) {
fprintf(stdout, "&&&& %s %s", sStatus[iStatus], &(argv[0][exename_start]));
for (int i=1; i < argc; i++) fprintf(stdout, " %s", argv[i]);
fprintf(stdout, "\n");
} else {
fprintf(stdout, "[%s] test results...\n%s\n", &(argv[0][exename_start]), sStatus[iStatus]);
}
fflush(stdout);
printf("\n"); fflush(stdout);
if (bQuitInTime) {
__ExitInTime(3);
} else {
if (!bNoPrompt) {
fprintf(stdout, "\nPress <Enter> to exit...\n");
fflush(stdout);
getchar();
}
}
}
inline void __shrQAFinish2(bool bQATest, int argc, const char **argv, int iStatus)
{
bool bQuitInTime = true;
const char *sStatus[] = { "FAILED", "PASSED", "WAIVED", NULL };
for (int i=1; i < argc; i++) {
int string_start = 0;
while (argv[i][string_start] == '-')
string_start++;
const char *string_argv = &argv[i][string_start];
// For SDK individual samples that don't specify -noprompt or -prompt,
// a 3 second delay will happen before exiting, giving a user time to view results
if (!STRCASECMP(string_argv, "noprompt") || !STRCASECMP(string_argv, "help")) {
bQuitInTime = false;
}
if (!STRCASECMP(string_argv, "prompt")) {
bQuitInTime = false;
}
}
int exename_start = findExeNameStart(argv[0]);
if (bQATest) {
fprintf(stdout, "&&&& %s %s", sStatus[iStatus], &(argv[0][exename_start]));
for (int i=1; i < argc; i++) fprintf(stdout, " %s", argv[i]);
fprintf(stdout, "\n");
} else {
fprintf(stdout, "[%s] test results...\n%s\n", &(argv[0][exename_start]), sStatus[iStatus]);
}
fflush(stdout);
if (bQuitInTime) {
__ExitInTime(3);
}
}
inline void shrQAFinishExit(int argc, const char **argv, int iStatus)
{
__shrQAFinish(argc, argv, iStatus);
exit(iStatus ? EXIT_SUCCESS : EXIT_FAILURE);
}
inline void shrQAFinishExit2(bool bQAtest, int argc, const char **argv, int iStatus)
{
__shrQAFinish2(bQAtest, argc, argv, iStatus);
exit(iStatus ? EXIT_SUCCESS : EXIT_FAILURE);
}
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,642 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#ifndef SHR_UTILS_H
#define SHR_UTILS_H
// *********************************************************************
// Generic utilities for NVIDIA GPU Computing SDK
// *********************************************************************
// reminders for output window and build log
#ifdef _WIN32
#pragma message ("Note: including windows.h")
#pragma message ("Note: including math.h")
#pragma message ("Note: including assert.h")
#endif
// OS dependent includes
#ifdef _WIN32
// Headers needed for Windows
#include <windows.h>
#else
// Headers needed for Linux
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#endif
// Other headers needed for both Windows and Linux
#include <math.h>
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
// Un-comment the following #define to enable profiling code in SDK apps
//#define GPU_PROFILING
// Beginning of GPU Architecture definitions
inline int ConvertSMVer2Cores(int major, int minor)
{
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
typedef struct {
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
int Cores;
} sSMtoCores;
sSMtoCores nGpuArchCoresPerSM[] =
{ { 0x10, 8 }, // Tesla Generation (SM 1.0) G80 class
{ 0x11, 8 }, // Tesla Generation (SM 1.1) G8x class
{ 0x12, 8 }, // Tesla Generation (SM 1.2) G9x class
{ 0x13, 8 }, // Tesla Generation (SM 1.3) GT200 class
{ 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
{ 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
{ 0x30, 192}, // Fermi Generation (SM 3.0) GK10x class
{ -1, -1 }
};
int index = 0;
while (nGpuArchCoresPerSM[index].SM != -1) {
if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) {
return nGpuArchCoresPerSM[index].Cores;
}
index++;
}
printf("MapSMtoCores SM %d.%d is undefined (please update to the latest SDK)!\n", major, minor);
return -1;
}
// end of GPU Architecture definitions
// Defines and enum for use with logging functions
// *********************************************************************
#define DEFAULTLOGFILE "SdkConsoleLog.txt"
#define MASTERLOGFILE "SdkMasterLog.csv"
enum LOGMODES
{
LOGCONSOLE = 1, // bit to signal "log to console"
LOGFILE = 2, // bit to signal "log to file"
LOGBOTH = 3, // convenience union of first 2 bits to signal "log to both"
APPENDMODE = 4, // bit to set "file append" mode instead of "replace mode" on open
MASTER = 8, // bit to signal master .csv log output
ERRORMSG = 16, // bit to signal "pre-pend Error"
CLOSELOG = 32 // bit to close log file, if open, after any requested file write
};
#define HDASHLINE "-----------------------------------------------------------\n"
// Standardized boolean
enum shrBOOL
{
shrFALSE = 0,
shrTRUE = 1
};
// Standardized MAX, MIN and CLAMP
#define MAX(a, b) ((a > b) ? a : b)
#define MIN(a, b) ((a < b) ? a : b)
#define CLAMP(a, b, c) MIN(MAX(a, b), c) // double sided clip of input a
#define TOPCLAMP(a, b) (a < b ? a:b) // single top side clip of input a
// Error and Exit Handling Macros...
// *********************************************************************
// Full error handling macro with Cleanup() callback (if supplied)...
// (Companion Inline Function lower on page)
#define shrCheckErrorEX(a, b, c) __shrCheckErrorEX(a, b, c, __FILE__ , __LINE__)
// Short version without Cleanup() callback pointer
// Both Input (a) and Reference (b) are specified as args
#define shrCheckError(a, b) shrCheckErrorEX(a, b, 0)
// Standardized Exit Macro for leaving main()... extended version
// (Companion Inline Function lower on page)
#define shrExitEX(a, b, c) __shrExitEX(a, b, c)
// Standardized Exit Macro for leaving main()... short version
// (Companion Inline Function lower on page)
#define shrEXIT(a, b) __shrExitEX(a, b, EXIT_SUCCESS)
// Simple argument checker macro
#define ARGCHECK(a) if((a) != shrTRUE)return shrFALSE
// Define for user-customized error handling
#define STDERROR "file %s, line %i\n\n" , __FILE__ , __LINE__
// Function to deallocate memory allocated within shrUtils
// *********************************************************************
extern "C" void shrFree(void* ptr);
// *********************************************************************
// Helper function to log standardized information to Console, to File or to both
//! Examples: shrLogEx(LOGBOTH, 0, "Function A\n");
//! : shrLogEx(LOGBOTH | ERRORMSG, ciErrNum, STDERROR);
//!
//! Automatically opens file and stores handle if needed and not done yet
//! Closes file and nulls handle on request
//!
//! @param 0 iLogMode: LOGCONSOLE, LOGFILE, LOGBOTH, APPENDMODE, MASTER, ERRORMSG, CLOSELOG.
//! LOGFILE and LOGBOTH may be | 'd with APPENDMODE to select file append mode instead of overwrite mode
//! LOGFILE and LOGBOTH may be | 'd with CLOSELOG to "write and close"
//! First 3 options may be | 'd with MASTER to enable independent write to master data log file
//! First 3 options may be | 'd with ERRORMSG to start line with standard error message
//! @param 2 dValue:
//! Positive val = double value for time in secs to be formatted to 6 decimals.
//! Negative val is an error code and this give error preformatting.
//! @param 3 cFormatString: String with formatting specifiers like printf or fprintf.
//! ALL printf flags, width, precision and type specifiers are supported with this exception:
//! Wide char type specifiers intended for wprintf (%S and %C) are NOT supported
//! Single byte char type specifiers (%s and %c) ARE supported
//! @param 4... variable args: like printf or fprintf. Must match format specifer type above.
//! @return 0 if OK, negative value on error or if error occurs or was passed in.
// *********************************************************************
extern "C" int shrLogEx(int iLogMode, int iErrNum, const char* cFormatString, ...);
// Short version of shrLogEx defaulting to shrLogEx(LOGBOTH, 0,
// *********************************************************************
extern "C" int shrLog(const char* cFormatString, ...);
// *********************************************************************
// Delta timer function for up to 3 independent timers using host high performance counters
// Maintains state for 3 independent counters
//! Example: double dElapsedTime = shrDeltaTime(0);
//!
//! @param 0 iCounterID: Which timer to check/reset. (0, 1, 2)
//! @return delta time of specified counter since last call in seconds. Otherwise -9999.0 if error
// *********************************************************************
extern "C" double shrDeltaT(int iCounterID);
// Optional LogFileNameOverride function
// *********************************************************************
extern "C" void shrSetLogFileName (const char* cOverRideName);
// Helper function to init data arrays
// *********************************************************************
extern "C" void shrFillArray(float* pfData, int iSize);
// Helper function to print data arrays
// *********************************************************************
extern "C" void shrPrintArray(float* pfData, int iSize);
////////////////////////////////////////////////////////////////////////////
//! Find the path for a filename
//! @return the path if succeeded, otherwise 0
//! @param filename name of the file
//! @param executablePath optional absolute path of the executable
////////////////////////////////////////////////////////////////////////////
extern "C" char* shrFindFilePath(const char* filename, const char* executablePath);
////////////////////////////////////////////////////////////////////////////
//! Read file \filename containing single precision floating point data
//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE
//! @param filename name of the source file
//! @param data uninitialized pointer, returned initialized and pointing to
//! the data read
//! @param len number of data elements in data, -1 on error
//! @note If a NULL pointer is passed to this function and it is initialized
//! within shrUtils, then free() has to be used to deallocate the memory
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrReadFilef( const char* filename, float** data, unsigned int* len,
bool verbose = false);
////////////////////////////////////////////////////////////////////////////
//! Read file \filename containing double precision floating point data
//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE
//! @param filename name of the source file
//! @param data uninitialized pointer, returned initialized and pointing to
//! the data read
//! @param len number of data elements in data, -1 on error
//! @note If a NULL pointer is passed to this function and it is
//! @note If a NULL pointer is passed to this function and it is initialized
//! within shrUtils, then free() has to be used to deallocate the memory
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrReadFiled( const char* filename, double** data, unsigned int* len,
bool verbose = false);
////////////////////////////////////////////////////////////////////////////
//! Read file \filename containing integer data
//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE
//! @param filename name of the source file
//! @param data uninitialized pointer, returned initialized and pointing to
//! the data read
//! @param len number of data elements in data, -1 on error
//! @note If a NULL pointer is passed to this function and it is
//! @note If a NULL pointer is passed to this function and it is initialized
//! within shrUtils, then free() has to be used to deallocate the memory
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrReadFilei( const char* filename, int** data, unsigned int* len, bool verbose = false);
////////////////////////////////////////////////////////////////////////////
//! Read file \filename containing unsigned integer data
//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE
//! @param filename name of the source file
//! @param data uninitialized pointer, returned initialized and pointing to
//! the data read
//! @param len number of data elements in data, -1 on error
//! @note If a NULL pointer is passed to this function and it is
//! @note If a NULL pointer is passed to this function and it is initialized
//! within shrUtils, then free() has to be used to deallocate the memory
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrReadFileui( const char* filename, unsigned int** data,
unsigned int* len, bool verbose = false);
////////////////////////////////////////////////////////////////////////////
//! Read file \filename containing char / byte data
//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE
//! @param filename name of the source file
//! @param data uninitialized pointer, returned initialized and pointing to
//! the data read
//! @param len number of data elements in data, -1 on error
//! @note If a NULL pointer is passed to this function and it is
//! @note If a NULL pointer is passed to this function and it is initialized
//! within shrUtils, then free() has to be used to deallocate the memory
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrReadFileb( const char* filename, char** data, unsigned int* len,
bool verbose = false);
////////////////////////////////////////////////////////////////////////////
//! Read file \filename containing unsigned char / byte data
//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE
//! @param filename name of the source file
//! @param data uninitialized pointer, returned initialized and pointing to
//! the data read
//! @param len number of data elements in data, -1 on error
//! @note If a NULL pointer is passed to this function and it is
//! @note If a NULL pointer is passed to this function and it is initialized
//! within shrUtils, then free() has to be used to deallocate the memory
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrReadFileub( const char* filename, unsigned char** data,
unsigned int* len, bool verbose = false);
////////////////////////////////////////////////////////////////////////////
//! Write a data file \filename containing single precision floating point
//! data
//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE
//! @param filename name of the file to write
//! @param data pointer to data to write
//! @param len number of data elements in data, -1 on error
//! @param epsilon epsilon for comparison
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrWriteFilef( const char* filename, const float* data, unsigned int len,
const float epsilon, bool verbose = false);
////////////////////////////////////////////////////////////////////////////
//! Write a data file \filename containing double precision floating point
//! data
//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE
//! @param filename name of the file to write
//! @param data pointer to data to write
//! @param len number of data elements in data, -1 on error
//! @param epsilon epsilon for comparison
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrWriteFiled( const char* filename, const float* data, unsigned int len,
const double epsilon, bool verbose = false);
////////////////////////////////////////////////////////////////////////////
//! Write a data file \filename containing integer data
//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE
//! @param filename name of the file to write
//! @param data pointer to data to write
//! @param len number of data elements in data, -1 on error
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrWriteFilei( const char* filename, const int* data, unsigned int len,
bool verbose = false);
////////////////////////////////////////////////////////////////////////////
//! Write a data file \filename containing unsigned integer data
//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE
//! @param filename name of the file to write
//! @param data pointer to data to write
//! @param len number of data elements in data, -1 on error
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrWriteFileui( const char* filename, const unsigned int* data,
unsigned int len, bool verbose = false);
////////////////////////////////////////////////////////////////////////////
//! Write a data file \filename containing char / byte data
//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE
//! @param filename name of the file to write
//! @param data pointer to data to write
//! @param len number of data elements in data, -1 on error
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrWriteFileb( const char* filename, const char* data, unsigned int len,
bool verbose = false);
////////////////////////////////////////////////////////////////////////////
//! Write a data file \filename containing unsigned char / byte data
//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE
//! @param filename name of the file to write
//! @param data pointer to data to write
//! @param len number of data elements in data, -1 on error
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrWriteFileub( const char* filename, const unsigned char* data,
unsigned int len, bool verbose = false);
////////////////////////////////////////////////////////////////////////////
//! Load PPM image file (with unsigned char as data element type), padding
//! 4th component
//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE
//! @param file name of the image file
//! @param OutData handle to the data read
//! @param w width of the image
//! @param h height of the image
//!
//! Note: If *OutData is NULL this function allocates buffer that must be freed by caller
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrLoadPPM4ub(const char* file, unsigned char** OutData,
unsigned int *w, unsigned int *h);
////////////////////////////////////////////////////////////////////////////
//! Save PPM image file (with unsigned char as data element type, padded to
//! 4 bytes)
//! @return shrTRUE if saving the file succeeded, otherwise shrFALSE
//! @param file name of the image file
//! @param data handle to the data read
//! @param w width of the image
//! @param h height of the image
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrSavePPM4ub( const char* file, unsigned char *data,
unsigned int w, unsigned int h);
////////////////////////////////////////////////////////////////////////////////
//! Save PGM image file (with unsigned char as data element type)
//! @return shrTRUE if saving the file succeeded, otherwise shrFALSE
//! @param file name of the image file
//! @param data handle to the data read
//! @param w width of the image
//! @param h height of the image
////////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrSavePGMub( const char* file, unsigned char *data,
unsigned int w, unsigned int h);
////////////////////////////////////////////////////////////////////////////
//! Load PGM image file (with unsigned char as data element type)
//! @return shrTRUE if saving the file succeeded, otherwise shrFALSE
//! @param file name of the image file
//! @param data handle to the data read
//! @param w width of the image
//! @param h height of the image
//! @note If a NULL pointer is passed to this function and it is initialized
//! within shrUtils, then free() has to be used to deallocate the memory
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrLoadPGMub( const char* file, unsigned char** data,
unsigned int *w,unsigned int *h);
////////////////////////////////////////////////////////////////////////////
// Command line arguments: General notes
// * All command line arguments begin with '--' followed by the token;
// token and value are seperated by '='; example --samples=50
// * Arrays have the form --model=[one.obj,two.obj,three.obj]
// (without whitespaces)
////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
//! Check if command line argument \a flag-name is given
//! @return shrTRUE if command line argument \a flag_name has been given,
//! otherwise shrFALSE
//! @param argc argc as passed to main()
//! @param argv argv as passed to main()
//! @param flag_name name of command line flag
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrCheckCmdLineFlag( const int argc, const char** argv,
const char* flag_name);
////////////////////////////////////////////////////////////////////////////
//! Get the value of a command line argument of type int
//! @return shrTRUE if command line argument \a arg_name has been given and
//! is of the requested type, otherwise shrFALSE
//! @param argc argc as passed to main()
//! @param argv argv as passed to main()
//! @param arg_name name of the command line argument
//! @param val value of the command line argument
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrGetCmdLineArgumenti( const int argc, const char** argv,
const char* arg_name, int* val);
////////////////////////////////////////////////////////////////////////////
//! Get the value of a command line argument of type unsigned int
//! @return shrTRUE if command line argument \a arg_name has been given and
//! is of the requested type, otherwise shrFALSE
//! @param argc argc as passed to main()
//! @param argv argv as passed to main()
//! @param arg_name name of the command line argument
//! @param val value of the command line argument
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrGetCmdLineArgumentu( const int argc, const char** argv,
const char* arg_name, unsigned int* val);
////////////////////////////////////////////////////////////////////////////
//! Get the value of a command line argument of type float
//! @return shrTRUE if command line argument \a arg_name has been given and
//! is of the requested type, otherwise shrFALSE
//! @param argc argc as passed to main()
//! @param argv argv as passed to main()
//! @param arg_name name of the command line argument
//! @param val value of the command line argument
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrGetCmdLineArgumentf( const int argc, const char** argv,
const char* arg_name, float* val);
////////////////////////////////////////////////////////////////////////////
//! Get the value of a command line argument of type string
//! @return shrTRUE if command line argument \a arg_name has been given and
//! is of the requested type, otherwise shrFALSE
//! @param argc argc as passed to main()
//! @param argv argv as passed to main()
//! @param arg_name name of the command line argument
//! @param val value of the command line argument
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrGetCmdLineArgumentstr( const int argc, const char** argv,
const char* arg_name, char** val);
////////////////////////////////////////////////////////////////////////////
//! Get the value of a command line argument list those element are strings
//! @return shrTRUE if command line argument \a arg_name has been given and
//! is of the requested type, otherwise shrFALSE
//! @param argc argc as passed to main()
//! @param argv argv as passed to main()
//! @param arg_name name of the command line argument
//! @param val command line argument list
//! @param len length of the list / number of elements
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrGetCmdLineArgumentListstr( const int argc, const char** argv,
const char* arg_name, char** val,
unsigned int* len);
////////////////////////////////////////////////////////////////////////////
//! Compare two float arrays
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
//! @param reference handle to the reference data / gold image
//! @param data handle to the computed data
//! @param len number of elements in reference and data
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrComparef( const float* reference, const float* data,
const unsigned int len);
////////////////////////////////////////////////////////////////////////////
//! Compare two integer arrays
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
//! @param reference handle to the reference data / gold image
//! @param data handle to the computed data
//! @param len number of elements in reference and data
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrComparei( const int* reference, const int* data,
const unsigned int len );
////////////////////////////////////////////////////////////////////////////////
//! Compare two unsigned integer arrays, with epsilon and threshold
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
//! @param reference handle to the reference data / gold image
//! @param data handle to the computed data
//! @param len number of elements in reference and data
//! @param threshold tolerance % # of comparison errors (0.15f = 15%)
////////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrCompareuit( const unsigned int* reference, const unsigned int* data,
const unsigned int len, const float epsilon, const float threshold );
////////////////////////////////////////////////////////////////////////////
//! Compare two unsigned char arrays
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
//! @param reference handle to the reference data / gold image
//! @param data handle to the computed data
//! @param len number of elements in reference and data
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrCompareub( const unsigned char* reference, const unsigned char* data,
const unsigned int len );
////////////////////////////////////////////////////////////////////////////////
//! Compare two integers with a tolernance for # of byte errors
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
//! @param reference handle to the reference data / gold image
//! @param data handle to the computed data
//! @param len number of elements in reference and data
//! @param epsilon epsilon to use for the comparison
//! @param threshold tolerance % # of comparison errors (0.15f = 15%)
////////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrCompareubt( const unsigned char* reference, const unsigned char* data,
const unsigned int len, const float epsilon, const float threshold );
////////////////////////////////////////////////////////////////////////////////
//! Compare two integer arrays witha n epsilon tolerance for equality
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
//! @param reference handle to the reference data / gold image
//! @param data handle to the computed data
//! @param len number of elements in reference and data
//! @param epsilon epsilon to use for the comparison
////////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrCompareube( const unsigned char* reference, const unsigned char* data,
const unsigned int len, const float epsilon );
////////////////////////////////////////////////////////////////////////////
//! Compare two float arrays with an epsilon tolerance for equality
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
//! @param reference handle to the reference data / gold image
//! @param data handle to the computed data
//! @param len number of elements in reference and data
//! @param epsilon epsilon to use for the comparison
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrComparefe( const float* reference, const float* data,
const unsigned int len, const float epsilon );
////////////////////////////////////////////////////////////////////////////////
//! Compare two float arrays with an epsilon tolerance for equality and a
//! threshold for # pixel errors
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
//! @param reference handle to the reference data / gold image
//! @param data handle to the computed data
//! @param len number of elements in reference and data
//! @param epsilon epsilon to use for the comparison
////////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrComparefet( const float* reference, const float* data,
const unsigned int len, const float epsilon, const float threshold );
////////////////////////////////////////////////////////////////////////////
//! Compare two float arrays using L2-norm with an epsilon tolerance for
//! equality
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
//! @param reference handle to the reference data / gold image
//! @param data handle to the computed data
//! @param len number of elements in reference and data
//! @param epsilon epsilon to use for the comparison
////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrCompareL2fe( const float* reference, const float* data,
const unsigned int len, const float epsilon );
////////////////////////////////////////////////////////////////////////////////
//! Compare two PPM image files with an epsilon tolerance for equality
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
//! @param src_file filename for the image to be compared
//! @param data filename for the reference data / gold image
//! @param epsilon epsilon to use for the comparison
//! @param threshold threshold of pixels that can still mismatch to pass (i.e. 0.15f = 15% must pass)
//! $param verboseErrors output details of image mismatch to std::err
////////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrComparePPM( const char *src_file, const char *ref_file, const float epsilon, const float threshold);
////////////////////////////////////////////////////////////////////////////////
//! Compare two PGM image files with an epsilon tolerance for equality
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
//! @param src_file filename for the image to be compared
//! @param data filename for the reference data / gold image
//! @param epsilon epsilon to use for the comparison
//! @param threshold threshold of pixels that can still mismatch to pass (i.e. 0.15f = 15% must pass)
//! $param verboseErrors output details of image mismatch to std::err
////////////////////////////////////////////////////////////////////////////////
extern "C" shrBOOL shrComparePGM( const char *src_file, const char *ref_file, const float epsilon, const float threshold);
extern "C" unsigned char* shrLoadRawFile(const char* filename, size_t size);
extern "C" size_t shrRoundUp(int group_size, int global_size);
// companion inline function for error checking and exit on error WITH Cleanup Callback (if supplied)
// *********************************************************************
inline void __shrCheckErrorEX(int iSample, int iReference, void (*pCleanup)(int), const char* cFile, const int iLine)
{
if (iReference != iSample)
{
shrLogEx(LOGBOTH | ERRORMSG, iSample, "line %i , in file %s !!!\n\n" , iLine, cFile);
if (pCleanup != NULL)
{
pCleanup(EXIT_FAILURE);
}
else
{
shrLogEx(LOGBOTH | CLOSELOG, 0, "Exiting...\n");
exit(EXIT_FAILURE);
}
}
}
// Standardized Exit
// *********************************************************************
inline void __shrExitEX(int argc, const char** argv, int iExitCode)
{
#ifdef WIN32
if (!shrCheckCmdLineFlag(argc, argv, "noprompt") && !shrCheckCmdLineFlag(argc, argv, "qatest"))
#else
if (shrCheckCmdLineFlag(argc, argv, "prompt") && !shrCheckCmdLineFlag(argc, argv, "qatest"))
#endif
{
shrLogEx(LOGBOTH | CLOSELOG, 0, "\nPress <Enter> to Quit...\n");
getchar();
}
else
{
shrLogEx(LOGBOTH | CLOSELOG, 0, "%s Exiting...\n", argv[0]);
}
fflush(stderr);
exit(iExitCode);
}
#endif