Vortex 2.0 changes:
+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes minor update minor update minor update minor update minor update minor update cleanup cleanup cache bindings and memory perf refactory minor update minor update hw unit tests fixes minor update minor update minor update minor update minor update minor udpate minor update minor update minor update minor update minor update minor update minor update minor updates minor updates minor update minor update minor update minor update minor update minor update minor updates minor updates minor updates minor updates minor update minor update
This commit is contained in:
9
tests/opencl/blackscholes/Makefile
Normal file
9
tests/opencl/blackscholes/Makefile
Normal file
@@ -0,0 +1,9 @@
|
||||
PROJECT = blackscholes
|
||||
|
||||
SRCS = main.cpp oclUtils.cpp shrUtils.cpp cmd_arg_reader.cpp oclBlackScholes_launcher.cpp oclBlackScholes_gold.cpp
|
||||
|
||||
CXXFLAGS += -I.
|
||||
|
||||
OPTS ?=
|
||||
|
||||
include ../common.mk
|
||||
152
tests/opencl/blackscholes/cmd_arg_reader.cpp
Normal file
152
tests/opencl/blackscholes/cmd_arg_reader.cpp
Normal file
@@ -0,0 +1,152 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
/* CUda UTility Library */
|
||||
|
||||
// includes, file
|
||||
#include "cmd_arg_reader.h"
|
||||
|
||||
// includes, system
|
||||
#include <vector>
|
||||
|
||||
// internal unnamed namespace
|
||||
|
||||
namespace
|
||||
{
|
||||
// types, internal (class, enum, struct, union, typedef)
|
||||
|
||||
// variables, internal
|
||||
|
||||
} // namespace {
|
||||
|
||||
// variables, exported
|
||||
|
||||
/*static*/ CmdArgReader* CmdArgReader::self;
|
||||
/*static*/ char** CmdArgReader::rargv;
|
||||
/*static*/ int CmdArgReader::rargc;
|
||||
|
||||
// functions, exported
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Public construction interface
|
||||
//! @return a handle to the class instance
|
||||
//! @param argc number of command line arguments (as given to main())
|
||||
//! @param argv command line argument string (as given to main())
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/*static*/ void
|
||||
CmdArgReader::init( const int argc, const char** argv)
|
||||
{
|
||||
if ( NULL != self)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// command line arguments
|
||||
if (( 0 == argc) || ( 0 == argv))
|
||||
{
|
||||
LOGIC_EXCEPTION( "No command line arguments given.");
|
||||
}
|
||||
|
||||
self = new CmdArgReader();
|
||||
|
||||
self->createArgsMaps( argc, argv);
|
||||
|
||||
rargc = argc;
|
||||
rargv = const_cast<char**>( argv);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Constructor, default
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
CmdArgReader::CmdArgReader() :
|
||||
args(),
|
||||
unprocessed(),
|
||||
iter(),
|
||||
iter_unprocessed()
|
||||
{ }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Destructor
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
CmdArgReader::~CmdArgReader()
|
||||
{
|
||||
for( iter = args.begin(); iter != args.end(); ++iter)
|
||||
{
|
||||
if( *(iter->second.first) == typeid( int))
|
||||
{
|
||||
delete static_cast<int*>( iter->second.second);
|
||||
break;
|
||||
}
|
||||
else if( *(iter->second.first) == typeid( bool))
|
||||
{
|
||||
delete static_cast<bool*>( iter->second.second);
|
||||
break;
|
||||
}
|
||||
else if( *(iter->second.first) == typeid( std::string))
|
||||
{
|
||||
delete static_cast<std::string*>( iter->second.second);
|
||||
break;
|
||||
}
|
||||
else if( *(iter->second.first) == typeid( std::vector< std::string>) )
|
||||
{
|
||||
delete static_cast< std::vector< std::string>* >( iter->second.second);
|
||||
break;
|
||||
}
|
||||
else if( *(iter->second.first) == typeid( std::vector<int>) )
|
||||
{
|
||||
delete static_cast< std::vector<int>* >( iter->second.second);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Read args as token value pair into map for better processing (Even the
|
||||
//! values remain strings until the parameter values is requested by the
|
||||
//! program.)
|
||||
//! @param argc the argument count (as given to 'main')
|
||||
//! @param argv the char* array containing the command line arguments
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void
|
||||
CmdArgReader::createArgsMaps( const int argc, const char** argv) {
|
||||
|
||||
std::string token;
|
||||
std::string val_str;
|
||||
|
||||
std::map< std::string, std::string> args;
|
||||
|
||||
std::string::size_type pos;
|
||||
std::string arg;
|
||||
for( int i=1; i<argc; ++i)
|
||||
{
|
||||
arg = argv[i];
|
||||
|
||||
// check if valid command line argument: all arguments begin with - or --
|
||||
if (arg[0] != '-')
|
||||
{
|
||||
RUNTIME_EXCEPTION("Invalid command line argument.");
|
||||
}
|
||||
|
||||
int numDashes = (arg[1] == '-' ? 2 : 1);
|
||||
|
||||
// check if only flag or if a value is given
|
||||
if ( (pos = arg.find( '=')) == std::string::npos)
|
||||
{
|
||||
unprocessed[ std::string( arg, numDashes, arg.length()-numDashes)] = "FLAG";
|
||||
}
|
||||
else
|
||||
{
|
||||
unprocessed[ std::string( arg, numDashes, pos-numDashes)] =
|
||||
std::string( arg, pos+1, arg.length()-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
488
tests/opencl/blackscholes/cmd_arg_reader.h
Normal file
488
tests/opencl/blackscholes/cmd_arg_reader.h
Normal file
@@ -0,0 +1,488 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
/* CUda UTility Library */
|
||||
|
||||
#ifndef _CMDARGREADER_H_
|
||||
#define _CMDARGREADER_H_
|
||||
|
||||
// includes, system
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <typeinfo>
|
||||
|
||||
// includes, project
|
||||
#include "exception.h"
|
||||
|
||||
//! Preprocessed command line arguments
|
||||
//! @note Lazy evaluation: The arguments are converted from strings to
|
||||
//! the correct data type upon request. Converted values are stored
|
||||
//! in an additonal map so that no additional conversion is
|
||||
//! necessary. Arrays of command line arguments are stored in
|
||||
//! std::vectors
|
||||
//! @note Usage:
|
||||
//! const std::string* file =
|
||||
//! CmdArgReader::getArg< std::string>( "model")
|
||||
//! const std::vector< std::string>* files =
|
||||
//! CmdArgReader::getArg< std::vector< std::string> >( "model")
|
||||
//! @note All command line arguments begin with '--' followed by the token;
|
||||
//! token and value are seperated by '='; example --samples=50
|
||||
//! @note Arrays have the form --model=[one.obj,two.obj,three.obj]
|
||||
//! (without whitespaces)
|
||||
|
||||
//! Command line argument parser
|
||||
class CmdArgReader
|
||||
{
|
||||
template<class> friend class TestCmdArgReader;
|
||||
|
||||
protected:
|
||||
|
||||
//! @param self handle to the only instance of this class
|
||||
static CmdArgReader* self;
|
||||
|
||||
public:
|
||||
|
||||
//! Public construction interface
|
||||
//! @return a handle to the class instance
|
||||
//! @param argc number of command line arguments (as given to main())
|
||||
//! @param argv command line argument string (as given to main())
|
||||
static void init( const int argc, const char** argv);
|
||||
|
||||
public:
|
||||
|
||||
//! Get the value of the command line argument with given name
|
||||
//! @return A const handle to the requested argument.
|
||||
//! If the argument does not exist or if it
|
||||
//! is not from type T NULL is returned
|
||||
//! @param name the name of the requested argument
|
||||
//! @note T the type of the argument requested
|
||||
template<class T>
|
||||
static inline const T* getArg( const std::string& name);
|
||||
|
||||
//! Check if a command line argument with the given name exists
|
||||
//! @return true if a command line argument with name \a name exists,
|
||||
//! otherwise false
|
||||
//! @param name name of the command line argument in question
|
||||
static inline bool existArg( const std::string& name);
|
||||
|
||||
//! Get the original / raw argc program argument
|
||||
static inline int& getRArgc();
|
||||
|
||||
//! Get the original / raw argv program argument
|
||||
static inline char**& getRArgv();
|
||||
|
||||
public:
|
||||
|
||||
//! Destructor
|
||||
~CmdArgReader();
|
||||
|
||||
protected:
|
||||
|
||||
//! Constructor, default
|
||||
CmdArgReader();
|
||||
|
||||
private:
|
||||
|
||||
// private helper functions
|
||||
|
||||
//! Get the value of the command line argument with given name
|
||||
//! @note Private helper function for 'getArg' to work on the members
|
||||
//! @return A const handle to the requested argument. If the argument
|
||||
//! does not exist or if it is not from type T a NULL pointer
|
||||
//! is returned.
|
||||
//! @param name the name of the requested argument
|
||||
//! @note T the type of the argument requested
|
||||
template<class T>
|
||||
inline const T* getArgHelper( const std::string& name);
|
||||
|
||||
//! Check if a command line argument with name \a name exists
|
||||
//! @return true if a command line argument of name \a name exists,
|
||||
//! otherwise false
|
||||
//! @param name the name of the requested argument
|
||||
inline bool existArgHelper( const std::string& name) const;
|
||||
|
||||
//! Read args as token value pair into map for better processing
|
||||
//! (Even the values remain strings until the parameter values is
|
||||
//! requested by the program.)
|
||||
//! @param argc the argument count (as given to 'main')
|
||||
//! @param argv the char* array containing the command line arguments
|
||||
void createArgsMaps( const int argc, const char** argv);
|
||||
|
||||
//! Helper for "casting" the strings from the map with the unprocessed
|
||||
//! values to the correct
|
||||
//! data type.
|
||||
//! @return true if conversion succeeded, otherwise false
|
||||
//! @param element the value as string
|
||||
//! @param val the value as type T
|
||||
template<class T>
|
||||
static inline bool convertToT( const std::string& element, T& val);
|
||||
|
||||
public:
|
||||
|
||||
// typedefs internal
|
||||
|
||||
//! container for a processed command line argument
|
||||
//! typeid is used to easily be able to decide if a re-requested token-value
|
||||
//! pair match the type of the first conversion
|
||||
typedef std::pair< const std::type_info*, void*> ValType;
|
||||
//! map of already converted values
|
||||
typedef std::map< std::string, ValType > ArgsMap;
|
||||
//! iterator for the map of already converted values
|
||||
typedef ArgsMap::iterator ArgsMapIter;
|
||||
typedef ArgsMap::const_iterator ConstArgsMapIter;
|
||||
|
||||
//! map of unprocessed (means unconverted) token-value pairs
|
||||
typedef std::map< std::string, std::string> UnpMap;
|
||||
//! iterator for the map of unprocessed (means unconverted) token-value pairs
|
||||
typedef std::map< std::string, std::string>::iterator UnpMapIter;
|
||||
|
||||
private:
|
||||
|
||||
#ifdef _WIN32
|
||||
# pragma warning( disable: 4251)
|
||||
#endif
|
||||
|
||||
//! rargc original value of argc
|
||||
static int rargc;
|
||||
|
||||
//! rargv contains command line arguments in raw format
|
||||
static char** rargv;
|
||||
|
||||
//! args Map containing the already converted token-value pairs
|
||||
ArgsMap args;
|
||||
|
||||
//! args Map containing the unprocessed / unconverted token-value pairs
|
||||
UnpMap unprocessed;
|
||||
|
||||
//! iter Iterator for the map with the already converted token-value
|
||||
//! pairs (to avoid frequent reallocation)
|
||||
ArgsMapIter iter;
|
||||
|
||||
//! iter Iterator for the map with the unconverted token-value
|
||||
//! pairs (to avoid frequent reallocation)
|
||||
UnpMapIter iter_unprocessed;
|
||||
|
||||
#ifdef _WIN32
|
||||
# pragma warning( default: 4251)
|
||||
#endif
|
||||
|
||||
private:
|
||||
|
||||
//! Constructor, copy (not implemented)
|
||||
CmdArgReader( const CmdArgReader&);
|
||||
|
||||
//! Assignment operator (not implemented)
|
||||
CmdArgReader& operator=( const CmdArgReader&);
|
||||
};
|
||||
|
||||
// variables, exported (extern)
|
||||
|
||||
// functions, inlined (inline)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Conversion function for command line argument arrays
|
||||
//! @note This function is used each type for which no template specialization
|
||||
//! exist (which will cause errors if the type does not fulfill the std::vector
|
||||
//! interface).
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<class T>
|
||||
/*static*/ inline bool
|
||||
CmdArgReader::convertToT( const std::string& element, T& val)
|
||||
{
|
||||
// preallocate storage
|
||||
val.resize( std::count( element.begin(), element.end(), ',') + 1);
|
||||
|
||||
unsigned int i = 0;
|
||||
std::string::size_type pos_start = 1; // leave array prefix '['
|
||||
std::string::size_type pos_end = 0;
|
||||
|
||||
// do for all elements of the comma seperated list
|
||||
while( std::string::npos != ( pos_end = element.find(',', pos_end+1)) )
|
||||
{
|
||||
// convert each element by the appropriate function
|
||||
if ( ! convertToT< typename T::value_type >(
|
||||
std::string( element, pos_start, pos_end - pos_start), val[i]))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
pos_start = pos_end + 1;
|
||||
++i;
|
||||
}
|
||||
|
||||
std::string tmp1( element, pos_start, element.length() - pos_start - 1);
|
||||
|
||||
// process last element (leave array postfix ']')
|
||||
if ( ! convertToT< typename T::value_type >( std::string( element,
|
||||
pos_start,
|
||||
element.length() - pos_start - 1),
|
||||
val[i]))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// possible to process all elements?
|
||||
return true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Conversion function for command line arguments of type int
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<>
|
||||
inline bool
|
||||
CmdArgReader::convertToT<int>( const std::string& element, int& val)
|
||||
{
|
||||
std::istringstream ios( element);
|
||||
ios >> val;
|
||||
|
||||
bool ret_val = false;
|
||||
if ( ios.eof())
|
||||
{
|
||||
ret_val = true;
|
||||
}
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Conversion function for command line arguments of type float
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<>
|
||||
inline bool
|
||||
CmdArgReader::convertToT<float>( const std::string& element, float& val)
|
||||
{
|
||||
std::istringstream ios( element);
|
||||
ios >> val;
|
||||
|
||||
bool ret_val = false;
|
||||
if ( ios.eof())
|
||||
{
|
||||
ret_val = true;
|
||||
}
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Conversion function for command line arguments of type double
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<>
|
||||
inline bool
|
||||
CmdArgReader::convertToT<double>( const std::string& element, double& val)
|
||||
{
|
||||
std::istringstream ios( element);
|
||||
ios >> val;
|
||||
|
||||
bool ret_val = false;
|
||||
if ( ios.eof())
|
||||
{
|
||||
ret_val = true;
|
||||
}
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Conversion function for command line arguments of type string
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<>
|
||||
inline bool
|
||||
CmdArgReader::convertToT<std::string>( const std::string& element,
|
||||
std::string& val)
|
||||
{
|
||||
val = element;
|
||||
return true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Conversion function for command line arguments of type bool
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<>
|
||||
inline bool
|
||||
CmdArgReader::convertToT<bool>( const std::string& element, bool& val)
|
||||
{
|
||||
// check if value is given as string-type { true | false }
|
||||
if ( "true" == element)
|
||||
{
|
||||
val = true;
|
||||
return true;
|
||||
}
|
||||
else if ( "false" == element)
|
||||
{
|
||||
val = false;
|
||||
return true;
|
||||
}
|
||||
// check if argument is given as integer { 0 | 1 }
|
||||
else
|
||||
{
|
||||
int tmp;
|
||||
if ( convertToT<int>( element, tmp))
|
||||
{
|
||||
if ( 1 == tmp)
|
||||
{
|
||||
val = true;
|
||||
return true;
|
||||
}
|
||||
else if ( 0 == tmp)
|
||||
{
|
||||
val = false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Get the value of the command line argument with given name
|
||||
//! @return A const handle to the requested argument. If the argument does
|
||||
//! not exist or if it is not from type T NULL is returned
|
||||
//! @param T the type of the argument requested
|
||||
//! @param name the name of the requested argument
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<class T>
|
||||
/*static*/ const T*
|
||||
CmdArgReader::getArg( const std::string& name)
|
||||
{
|
||||
if( ! self)
|
||||
{
|
||||
RUNTIME_EXCEPTION("CmdArgReader::getArg(): CmdArgReader not initialized.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return self->getArgHelper<T>( name);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Check if a command line argument with the given name exists
|
||||
//! @return true if a command line argument with name \a name exists,
|
||||
//! otherwise false
|
||||
//! @param name name of the command line argument in question
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/*static*/ inline bool
|
||||
CmdArgReader::existArg( const std::string& name)
|
||||
{
|
||||
if( ! self)
|
||||
{
|
||||
RUNTIME_EXCEPTION("CmdArgReader::getArg(): CmdArgReader not initialized.");
|
||||
return false;
|
||||
}
|
||||
|
||||
return self->existArgHelper( name);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! @brief Get the value of the command line argument with given name
|
||||
//! @return A const handle to the requested argument. If the argument does
|
||||
//! not exist or if it is not from type T NULL is returned
|
||||
//! @param T the type of the argument requested
|
||||
//! @param name the name of the requested argument
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<class T>
|
||||
const T*
|
||||
CmdArgReader::getArgHelper( const std::string& name)
|
||||
{
|
||||
// check if argument already processed and stored in correct type
|
||||
if ( args.end() != (iter = args.find( name)))
|
||||
{
|
||||
if ( (*(iter->second.first)) == typeid( T) )
|
||||
{
|
||||
return (T*) iter->second.second;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
T* tmp = new T;
|
||||
|
||||
// check the array with unprocessed values
|
||||
if ( unprocessed.end() != (iter_unprocessed = unprocessed.find( name)))
|
||||
{
|
||||
// try to "cast" the string to the type requested
|
||||
if ( convertToT< T >( iter_unprocessed->second, *tmp))
|
||||
{
|
||||
// add the token element pair to map of already converted values
|
||||
args[name] = std::make_pair( &(typeid( T)), (void*) tmp);
|
||||
|
||||
return tmp;
|
||||
}
|
||||
}
|
||||
|
||||
// not used while not inserted into the map -> cleanup
|
||||
delete tmp;
|
||||
}
|
||||
|
||||
// failed, argument not available
|
||||
return NULL;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Check if a command line argument with name \a name exists
|
||||
//! @return true if a command line argument of name \a name exists,
|
||||
//! otherwise false
|
||||
//! @param name the name of the requested argument
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline bool
|
||||
CmdArgReader::existArgHelper( const std::string& name) const
|
||||
{
|
||||
bool ret_val = false;
|
||||
|
||||
// check if argument already processed and stored in correct type
|
||||
if( args.end() != args.find( name))
|
||||
{
|
||||
ret_val = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
// check the array with unprocessed values
|
||||
if ( unprocessed.end() != unprocessed.find( name))
|
||||
{
|
||||
ret_val = true;
|
||||
}
|
||||
}
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Get the original / raw argc program argument
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/*static*/ inline int&
|
||||
CmdArgReader::getRArgc()
|
||||
{
|
||||
if( ! self)
|
||||
{
|
||||
RUNTIME_EXCEPTION("CmdArgReader::getRArgc(): CmdArgReader not initialized.");
|
||||
}
|
||||
|
||||
return rargc;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Get the original / raw argv program argument
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/*static*/ inline char**&
|
||||
CmdArgReader::getRArgv()
|
||||
{
|
||||
if( ! self)
|
||||
{
|
||||
RUNTIME_EXCEPTION("CmdArgReader::getRArgc(): CmdArgReader not initialized.");
|
||||
}
|
||||
|
||||
return rargv;
|
||||
}
|
||||
|
||||
// functions, exported (extern)
|
||||
|
||||
#endif // #ifndef _CMDARGREADER_H_
|
||||
151
tests/opencl/blackscholes/exception.h
Normal file
151
tests/opencl/blackscholes/exception.h
Normal file
@@ -0,0 +1,151 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
/* CUda UTility Library */
|
||||
#ifndef _EXCEPTION_H_
|
||||
#define _EXCEPTION_H_
|
||||
|
||||
// includes, system
|
||||
#include <exception>
|
||||
#include <stdexcept>
|
||||
#include <iostream>
|
||||
#include <stdlib.h>
|
||||
|
||||
//! Exception wrapper.
|
||||
//! @param Std_Exception Exception out of namespace std for easy typing.
|
||||
template<class Std_Exception>
|
||||
class Exception : public Std_Exception
|
||||
{
|
||||
public:
|
||||
|
||||
//! @brief Static construction interface
|
||||
//! @return Alwayss throws ( Located_Exception<Exception>)
|
||||
//! @param file file in which the Exception occurs
|
||||
//! @param line line in which the Exception occurs
|
||||
//! @param detailed details on the code fragment causing the Exception
|
||||
static void throw_it( const char* file,
|
||||
const int line,
|
||||
const char* detailed = "-" );
|
||||
|
||||
//! Static construction interface
|
||||
//! @return Alwayss throws ( Located_Exception<Exception>)
|
||||
//! @param file file in which the Exception occurs
|
||||
//! @param line line in which the Exception occurs
|
||||
//! @param detailed details on the code fragment causing the Exception
|
||||
static void throw_it( const char* file,
|
||||
const int line,
|
||||
const std::string& detailed);
|
||||
|
||||
//! Destructor
|
||||
virtual ~Exception() throw();
|
||||
|
||||
private:
|
||||
|
||||
//! Constructor, default (private)
|
||||
Exception();
|
||||
|
||||
//! Constructor, standard
|
||||
//! @param str string returned by what()
|
||||
Exception( const std::string& str);
|
||||
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Exception handler function for arbitrary exceptions
|
||||
//! @param ex exception to handle
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Exception_Typ>
|
||||
inline void
|
||||
handleException( const Exception_Typ& ex)
|
||||
{
|
||||
std::cerr << ex.what() << std::endl;
|
||||
|
||||
exit( EXIT_FAILURE);
|
||||
}
|
||||
|
||||
//! Convenience macros
|
||||
|
||||
//! Exception caused by dynamic program behavior, e.g. file does not exist
|
||||
#define RUNTIME_EXCEPTION( msg) \
|
||||
Exception<std::runtime_error>::throw_it( __FILE__, __LINE__, msg)
|
||||
|
||||
//! Logic exception in program, e.g. an assert failed
|
||||
#define LOGIC_EXCEPTION( msg) \
|
||||
Exception<std::logic_error>::throw_it( __FILE__, __LINE__, msg)
|
||||
|
||||
//! Out of range exception
|
||||
#define RANGE_EXCEPTION( msg) \
|
||||
Exception<std::range_error>::throw_it( __FILE__, __LINE__, msg)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Implementation
|
||||
|
||||
// includes, system
|
||||
#include <sstream>
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Static construction interface.
|
||||
//! @param Exception causing code fragment (file and line) and detailed infos.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/*static*/ template<class Std_Exception>
|
||||
void
|
||||
Exception<Std_Exception>::
|
||||
throw_it( const char* file, const int line, const char* detailed)
|
||||
{
|
||||
std::stringstream s;
|
||||
|
||||
// Quiet heavy-weight but exceptions are not for
|
||||
// performance / release versions
|
||||
s << "Exception in file '" << file << "' in line " << line << "\n"
|
||||
<< "Detailed description: " << detailed << "\n";
|
||||
|
||||
throw Exception( s.str());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Static construction interface.
|
||||
//! @param Exception causing code fragment (file and line) and detailed infos.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/*static*/ template<class Std_Exception>
|
||||
void
|
||||
Exception<Std_Exception>::
|
||||
throw_it( const char* file, const int line, const std::string& msg)
|
||||
{
|
||||
throw_it( file, line, msg.c_str());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Constructor, default (private).
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Std_Exception>
|
||||
Exception<Std_Exception>::Exception() :
|
||||
Exception("Unknown Exception.\n")
|
||||
{ }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Constructor, standard (private).
|
||||
//! String returned by what().
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Std_Exception>
|
||||
Exception<Std_Exception>::Exception( const std::string& s) :
|
||||
Std_Exception( s)
|
||||
{ }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Destructor
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Std_Exception>
|
||||
Exception<Std_Exception>::~Exception() throw() { }
|
||||
|
||||
// functions, exported
|
||||
|
||||
#endif // #ifndef _EXCEPTION_H_
|
||||
|
||||
101
tests/opencl/blackscholes/kernel.cl
Normal file
101
tests/opencl/blackscholes/kernel.cl
Normal file
@@ -0,0 +1,101 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
#if(0)
|
||||
#define EXP(a) native_exp(a)
|
||||
#define LOG(a) native_log(a)
|
||||
#define SQRT(a) native_sqrt(a)
|
||||
#else
|
||||
#define EXP(a) exp(a)
|
||||
#define LOG(a) log(a)
|
||||
#define SQRT(a) sqrt(a)
|
||||
#endif
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Predefine functions to avoid bug in OpenCL compiler on Mac OSX 10.7 systems
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
float CND(float d);
|
||||
void BlackScholesBody(__global float *call, __global float *put, float S,
|
||||
float X, float T, float R, float V);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rational approximation of cumulative normal distribution function
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
float CND(float d){
|
||||
const float A1 = 0.31938153f;
|
||||
const float A2 = -0.356563782f;
|
||||
const float A3 = 1.781477937f;
|
||||
const float A4 = -1.821255978f;
|
||||
const float A5 = 1.330274429f;
|
||||
const float RSQRT2PI = 0.39894228040143267793994605993438f;
|
||||
|
||||
float
|
||||
K = 1.0f / (1.0f + 0.2316419f * fabs(d));
|
||||
|
||||
float
|
||||
cnd = RSQRT2PI * EXP(- 0.5f * d * d) *
|
||||
(K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))));
|
||||
|
||||
if(d > 0)
|
||||
cnd = 1.0f - cnd;
|
||||
|
||||
return cnd;
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Black-Scholes formula for both call and put
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
void BlackScholesBody(
|
||||
__global float *call, //Call option price
|
||||
__global float *put, //Put option price
|
||||
float S, //Current stock price
|
||||
float X, //Option strike price
|
||||
float T, //Option years
|
||||
float R, //Riskless rate of return
|
||||
float V //Stock volatility
|
||||
){
|
||||
float sqrtT = SQRT(T);
|
||||
float d1 = (LOG(S / X) + (R + 0.5f * V * V) * T) / (V * sqrtT);
|
||||
float d2 = d1 - V * sqrtT;
|
||||
float CNDD1 = CND(d1);
|
||||
float CNDD2 = CND(d2);
|
||||
|
||||
//Calculate Call and Put simultaneously
|
||||
float expRT = EXP(- R * T);
|
||||
*call = (S * CNDD1 - X * expRT * CNDD2);
|
||||
*put = (X * expRT * (1.0f - CNDD2) - S * (1.0f - CNDD1));
|
||||
}
|
||||
|
||||
|
||||
|
||||
__kernel void BlackScholes(
|
||||
__global float *d_Call, //Call option price
|
||||
__global float *d_Put, //Put option price
|
||||
__global float *d_S, //Current stock price
|
||||
__global float *d_X, //Option strike price
|
||||
__global float *d_T, //Option years
|
||||
float R, //Riskless rate of return
|
||||
float V, //Stock volatility
|
||||
unsigned int optN
|
||||
){
|
||||
for(unsigned int opt = get_global_id(0); opt < optN; opt += get_global_size(0))
|
||||
BlackScholesBody(
|
||||
&d_Call[opt],
|
||||
&d_Put[opt],
|
||||
d_S[opt],
|
||||
d_X[opt],
|
||||
d_T[opt],
|
||||
R,
|
||||
V
|
||||
);
|
||||
}
|
||||
248
tests/opencl/blackscholes/main.cpp
Normal file
248
tests/opencl/blackscholes/main.cpp
Normal file
@@ -0,0 +1,248 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
// standard utilities and systems includes
|
||||
#include <oclUtils.h>
|
||||
#include <shrQATest.h>
|
||||
#include "oclBlackScholes_common.h"
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Helper functions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
double executionTime(cl_event &event){
|
||||
cl_ulong start, end;
|
||||
|
||||
clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL);
|
||||
clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
|
||||
|
||||
return (double)1.0e-9 * (end - start); // convert nanoseconds to seconds on return
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Random float helper
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
float randFloat(float low, float high){
|
||||
float t = (float)rand() / (float)RAND_MAX;
|
||||
return (1.0f - t) * low + t * high;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Main program
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
cl_platform_id cpPlatform; //OpenCL platform
|
||||
cl_device_id* cdDevices = NULL; //OpenCL devices list (array)
|
||||
cl_context cxGPUContext; //OpenCL context
|
||||
cl_command_queue cqCommandQueue; //OpenCL command que
|
||||
cl_mem //OpenCL memory buffer objects
|
||||
d_Call,
|
||||
d_Put,
|
||||
d_S,
|
||||
d_X,
|
||||
d_T;
|
||||
|
||||
cl_int ciErrNum;
|
||||
|
||||
float
|
||||
*h_CallCPU,
|
||||
*h_PutCPU,
|
||||
*h_CallGPU,
|
||||
*h_PutGPU,
|
||||
*h_S,
|
||||
*h_X,
|
||||
*h_T;
|
||||
|
||||
const unsigned int optionCount = 64;
|
||||
const float R = 0.02f;
|
||||
const float V = 0.30f;
|
||||
|
||||
shrQAStart(argc, argv);
|
||||
|
||||
// Get the NVIDIA platform
|
||||
ciErrNum = oclGetPlatformID(&cpPlatform);
|
||||
oclCheckErrorEX(ciErrNum, CL_SUCCESS, NULL);
|
||||
shrLog("clGetPlatformID...\n");
|
||||
|
||||
//Get all the devices
|
||||
cl_uint uiNumDevices = 0; // Number of devices available
|
||||
cl_uint uiTargetDevice = 0; // Default Device to compute on
|
||||
cl_uint uiNumComputeUnits; // Number of compute units (SM's on NV GPU)
|
||||
shrLog("Get the Device info and select Device...\n");
|
||||
ciErrNum = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_DEFAULT, 0, NULL, &uiNumDevices);
|
||||
oclCheckErrorEX(ciErrNum, CL_SUCCESS, NULL);
|
||||
cdDevices = (cl_device_id *)malloc(uiNumDevices * sizeof(cl_device_id) );
|
||||
ciErrNum = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_DEFAULT, uiNumDevices, cdDevices, NULL);
|
||||
oclCheckErrorEX(ciErrNum, CL_SUCCESS, NULL);
|
||||
|
||||
// Get command line device options and config accordingly
|
||||
shrLog(" # of Devices Available = %u\n", uiNumDevices);
|
||||
if(shrGetCmdLineArgumentu(argc, (const char**)argv, "device", &uiTargetDevice)== shrTRUE)
|
||||
{
|
||||
uiTargetDevice = CLAMP(uiTargetDevice, 0, (uiNumDevices - 1));
|
||||
}
|
||||
shrLog(" Using Device %u: ", uiTargetDevice);
|
||||
oclPrintDevName(LOGBOTH, cdDevices[uiTargetDevice]);
|
||||
ciErrNum = clGetDeviceInfo(cdDevices[uiTargetDevice], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(uiNumComputeUnits), &uiNumComputeUnits, NULL);
|
||||
oclCheckErrorEX(ciErrNum, CL_SUCCESS, NULL);
|
||||
shrLog("\n # of Compute Units = %u\n", uiNumComputeUnits);
|
||||
|
||||
// set logfile name and start logs
|
||||
shrSetLogFileName ("oclBlackScholes.txt");
|
||||
shrLog("%s Starting...\n\n", argv[0]);
|
||||
|
||||
shrLog("Allocating and initializing host memory...\n");
|
||||
h_CallCPU = (float *)malloc(optionCount * sizeof(float));
|
||||
h_PutCPU = (float *)malloc(optionCount * sizeof(float));
|
||||
h_CallGPU = (float *)malloc(optionCount * sizeof(float));
|
||||
h_PutGPU = (float *)malloc(optionCount * sizeof(float));
|
||||
h_S = (float *)malloc(optionCount * sizeof(float));
|
||||
h_X = (float *)malloc(optionCount * sizeof(float));
|
||||
h_T = (float *)malloc(optionCount * sizeof(float));
|
||||
|
||||
srand(2009);
|
||||
for(unsigned int i = 0; i < optionCount; i++){
|
||||
h_CallCPU[i] = -1.0f;
|
||||
h_PutCPU[i] = -1.0f;
|
||||
h_S[i] = randFloat(5.0f, 30.0f);
|
||||
h_X[i] = randFloat(1.0f, 100.0f);
|
||||
h_T[i] = randFloat(0.25f, 10.0f);
|
||||
}
|
||||
|
||||
shrLog("Initializing OpenCL...\n");
|
||||
// Get the NVIDIA platform
|
||||
ciErrNum = oclGetPlatformID(&cpPlatform);
|
||||
oclCheckError(ciErrNum, CL_SUCCESS);
|
||||
|
||||
// Get a GPU device
|
||||
ciErrNum = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_DEFAULT, 1, &cdDevices[uiTargetDevice], NULL);
|
||||
oclCheckError(ciErrNum, CL_SUCCESS);
|
||||
|
||||
// Create the context
|
||||
cxGPUContext = clCreateContext(0, 1, &cdDevices[uiTargetDevice], NULL, NULL, &ciErrNum);
|
||||
oclCheckError(ciErrNum, CL_SUCCESS);
|
||||
|
||||
//Create a command-queue
|
||||
cqCommandQueue = clCreateCommandQueue(cxGPUContext, cdDevices[uiTargetDevice], CL_QUEUE_PROFILING_ENABLE, &ciErrNum);
|
||||
oclCheckError(ciErrNum, CL_SUCCESS);
|
||||
|
||||
shrLog("Creating OpenCL memory objects...\n");
|
||||
d_Call = clCreateBuffer(cxGPUContext, CL_MEM_READ_WRITE, optionCount * sizeof(float), NULL, &ciErrNum);
|
||||
oclCheckError(ciErrNum, CL_SUCCESS);
|
||||
d_Put = clCreateBuffer(cxGPUContext, CL_MEM_READ_WRITE, optionCount * sizeof(float), NULL, &ciErrNum);
|
||||
oclCheckError(ciErrNum, CL_SUCCESS);
|
||||
d_S = clCreateBuffer(cxGPUContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, optionCount * sizeof(float), h_S, &ciErrNum);
|
||||
oclCheckError(ciErrNum, CL_SUCCESS);
|
||||
d_X = clCreateBuffer(cxGPUContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, optionCount * sizeof(float), h_X, &ciErrNum);
|
||||
oclCheckError(ciErrNum, CL_SUCCESS);
|
||||
d_T = clCreateBuffer(cxGPUContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, optionCount * sizeof(float), h_T, &ciErrNum);
|
||||
oclCheckError(ciErrNum, CL_SUCCESS);
|
||||
|
||||
shrLog("Starting up BlackScholes...\n");
|
||||
initBlackScholes(cxGPUContext, cqCommandQueue, (const char **)argv);
|
||||
|
||||
shrLog("Running OpenCL BlackScholes...\n\n");
|
||||
//Just a single run or a warmup iteration
|
||||
BlackScholes(
|
||||
NULL,
|
||||
d_Call,
|
||||
d_Put,
|
||||
d_S,
|
||||
d_X,
|
||||
d_T,
|
||||
R,
|
||||
V,
|
||||
optionCount
|
||||
);
|
||||
|
||||
#ifdef GPU_PROFILING
|
||||
const int numIterations = 16;
|
||||
cl_event startMark, endMark;
|
||||
ciErrNum = clEnqueueMarker(cqCommandQueue, &startMark);
|
||||
ciErrNum |= clFinish(cqCommandQueue);
|
||||
shrCheckError(ciErrNum, CL_SUCCESS);
|
||||
shrDeltaT(0);
|
||||
|
||||
for(int i = 0; i < numIterations; i++){
|
||||
BlackScholes(
|
||||
cqCommandQueue,
|
||||
d_Call,
|
||||
d_Put,
|
||||
d_S,
|
||||
d_X,
|
||||
d_T,
|
||||
R,
|
||||
V,
|
||||
optionCount
|
||||
);
|
||||
}
|
||||
|
||||
ciErrNum = clEnqueueMarker(cqCommandQueue, &endMark);
|
||||
ciErrNum |= clFinish(cqCommandQueue);
|
||||
shrCheckError(ciErrNum, CL_SUCCESS);
|
||||
|
||||
//Calculate performance metrics by wallclock time
|
||||
double gpuTime = shrDeltaT(0) / numIterations;
|
||||
shrLogEx(LOGBOTH | MASTER, 0, "oclBlackScholes, Throughput = %.4f GOptions/s, Time = %.5f s, Size = %u options, NumDevsUsed = %i, Workgroup = %u\n",
|
||||
(double)(2.0 * optionCount * 1.0e-9)/gpuTime, gpuTime, (2 * optionCount), 1, 0);
|
||||
|
||||
//Get profiling info
|
||||
cl_ulong startTime = 0, endTime = 0;
|
||||
ciErrNum = clGetEventProfilingInfo(startMark, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &startTime, NULL);
|
||||
ciErrNum |= clGetEventProfilingInfo(endMark, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime, NULL);
|
||||
shrCheckError(ciErrNum, CL_SUCCESS);
|
||||
shrLog("\nOpenCL time: %.5f s\n\n", 1.0e-9 * ((double)endTime - (double)startTime) / (double)numIterations);
|
||||
#endif
|
||||
|
||||
shrLog("\nReading back OpenCL BlackScholes results...\n");
|
||||
ciErrNum = clEnqueueReadBuffer(cqCommandQueue, d_Call, CL_TRUE, 0, optionCount * sizeof(float), h_CallGPU, 0, NULL, NULL);
|
||||
oclCheckError(ciErrNum, CL_SUCCESS);
|
||||
ciErrNum = clEnqueueReadBuffer(cqCommandQueue, d_Put, CL_TRUE, 0, optionCount * sizeof(float), h_PutGPU, 0, NULL, NULL);
|
||||
oclCheckError(ciErrNum, CL_SUCCESS);
|
||||
|
||||
shrLog("Comparing against Host/C++ computation...\n");
|
||||
BlackScholesCPU(h_CallCPU, h_PutCPU, h_S, h_X, h_T, R, V, optionCount);
|
||||
double deltaCall = 0, deltaPut = 0, sumCall = 0, sumPut = 0;
|
||||
double L1call, L1put;
|
||||
for(unsigned int i = 0; i < optionCount; i++)
|
||||
{
|
||||
sumCall += fabs(h_CallCPU[i]);
|
||||
sumPut += fabs(h_PutCPU[i]);
|
||||
deltaCall += fabs(h_CallCPU[i] - h_CallGPU[i]);
|
||||
deltaPut += fabs(h_PutCPU[i] - h_PutGPU[i]);
|
||||
}
|
||||
L1call = deltaCall / sumCall;
|
||||
L1put = deltaPut / sumPut;
|
||||
shrLog("Relative L1 (call, put) = (%.3e, %.3e)\n\n", L1call, L1put);
|
||||
|
||||
shrLog("Shutting down...\n");
|
||||
closeBlackScholes();
|
||||
ciErrNum = clReleaseMemObject(d_T);
|
||||
ciErrNum |= clReleaseMemObject(d_X);
|
||||
ciErrNum |= clReleaseMemObject(d_S);
|
||||
ciErrNum |= clReleaseMemObject(d_Put);
|
||||
ciErrNum |= clReleaseMemObject(d_Call);
|
||||
ciErrNum |= clReleaseCommandQueue(cqCommandQueue);
|
||||
ciErrNum |= clReleaseContext(cxGPUContext);
|
||||
oclCheckError(ciErrNum, CL_SUCCESS);
|
||||
|
||||
free(h_T);
|
||||
free(h_X);
|
||||
free(h_S);
|
||||
free(h_PutGPU);
|
||||
free(h_CallGPU);
|
||||
free(h_PutCPU);
|
||||
free(h_CallCPU);
|
||||
|
||||
if(cdDevices)free(cdDevices);
|
||||
|
||||
shrQAFinishExit(argc, (const char **)argv, ((L1call < 1E-6) && (L1put < 1E-6)) ? QA_PASSED : QA_FAILED );
|
||||
}
|
||||
BIN
tests/opencl/blackscholes/oclBlackScholes.pdf
Normal file
BIN
tests/opencl/blackscholes/oclBlackScholes.pdf
Normal file
Binary file not shown.
50
tests/opencl/blackscholes/oclBlackScholes_common.h
Normal file
50
tests/opencl/blackscholes/oclBlackScholes_common.h
Normal file
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include <oclUtils.h>
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Process an array of optN options on CPU
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" void BlackScholesCPU(
|
||||
float *h_Call, //Call option price
|
||||
float *h_Put, //Put option price
|
||||
float *h_S, //Current stock price
|
||||
float *h_X, //Option strike price
|
||||
float *h_T, //Option years
|
||||
float R, //Riskless rate of return
|
||||
float V, //Stock volatility
|
||||
unsigned int optionCount
|
||||
);
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// OpenCL Black-Scholes kernel launcher
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" void initBlackScholes(cl_context cxGPUContext, cl_command_queue cqParamCommandQue, const char **argv);
|
||||
|
||||
extern "C" void closeBlackScholes(void);
|
||||
|
||||
extern "C" void BlackScholes(
|
||||
cl_command_queue cqCommandQueue,
|
||||
cl_mem d_Call, //Call option price
|
||||
cl_mem d_Put, //Put option price
|
||||
cl_mem d_S, //Current stock price
|
||||
cl_mem d_X, //Option strike price
|
||||
cl_mem d_T, //Option years
|
||||
cl_float R, //Riskless rate of return
|
||||
cl_float V, //Stock volatility
|
||||
cl_uint optionCount
|
||||
);
|
||||
92
tests/opencl/blackscholes/oclBlackScholes_gold.cpp
Normal file
92
tests/opencl/blackscholes/oclBlackScholes_gold.cpp
Normal file
@@ -0,0 +1,92 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include <math.h>
|
||||
#include "oclBlackScholes_common.h"
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rational approximation of cumulative normal distribution function
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
static double CND(double d){
|
||||
const double A1 = 0.31938153;
|
||||
const double A2 = -0.356563782;
|
||||
const double A3 = 1.781477937;
|
||||
const double A4 = -1.821255978;
|
||||
const double A5 = 1.330274429;
|
||||
const double RSQRT2PI = 0.39894228040143267793994605993438;
|
||||
|
||||
double
|
||||
K = 1.0 / (1.0 + 0.2316419 * fabs(d));
|
||||
|
||||
double
|
||||
cnd = RSQRT2PI * exp(- 0.5 * d * d) *
|
||||
(K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))));
|
||||
|
||||
if(d > 0)
|
||||
cnd = 1.0 - cnd;
|
||||
|
||||
return cnd;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Black-Scholes formula for both call and put
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
static void BlackScholesBodyCPU(
|
||||
float& call, //Call option price
|
||||
float& put, //Put option price
|
||||
float Sf, //Current stock price
|
||||
float Xf, //Option strike price
|
||||
float Tf, //Option years
|
||||
float Rf, //Riskless rate of return
|
||||
float Vf //Stock volatility
|
||||
){
|
||||
double S = Sf, X = Xf, T = Tf, R = Rf, V = Vf;
|
||||
|
||||
double sqrtT = sqrt(T);
|
||||
double d1 = (log(S / X) + (R + 0.5 * V * V) * T) / (V * sqrtT);
|
||||
double d2 = d1 - V * sqrtT;
|
||||
double CNDD1 = CND(d1);
|
||||
double CNDD2 = CND(d2);
|
||||
|
||||
//Calculate Call and Put simultaneously
|
||||
double expRT = exp(- R * T);
|
||||
call = (float)(S * CNDD1 - X * expRT * CNDD2);
|
||||
put = (float)(X * expRT * (1.0 - CNDD2) - S * (1.0 - CNDD1));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Process an array of optN options
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" void BlackScholesCPU(
|
||||
float *h_Call, //Call option price
|
||||
float *h_Put, //Put option price
|
||||
float *h_S, //Current stock price
|
||||
float *h_X, //Option strike price
|
||||
float *h_T, //Option years
|
||||
float R, //Riskless rate of return
|
||||
float V, //Stock volatility
|
||||
unsigned int optionCount
|
||||
){
|
||||
for(unsigned int i = 0; i < optionCount; i++)
|
||||
BlackScholesBodyCPU(
|
||||
h_Call[i],
|
||||
h_Put[i],
|
||||
h_S[i],
|
||||
h_X[i],
|
||||
h_T[i],
|
||||
R,
|
||||
V
|
||||
);
|
||||
}
|
||||
151
tests/opencl/blackscholes/oclBlackScholes_launcher.cpp
Normal file
151
tests/opencl/blackscholes/oclBlackScholes_launcher.cpp
Normal file
@@ -0,0 +1,151 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <oclUtils.h>
|
||||
#include "oclBlackScholes_common.h"
|
||||
|
||||
static cl_program cpBlackScholes; //OpenCL program
|
||||
static cl_kernel ckBlackScholes; //OpenCL kernel
|
||||
static cl_command_queue cqDefaultCommandQueue;
|
||||
|
||||
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
|
||||
if (NULL == filename || NULL == data || 0 == size)
|
||||
return CL_INVALID_VALUE;
|
||||
|
||||
FILE* fp = fopen(filename, "r");
|
||||
if (NULL == fp) {
|
||||
fprintf(stderr, "Failed to load kernel.");
|
||||
return CL_INVALID_VALUE;
|
||||
}
|
||||
fseek(fp , 0 , SEEK_END);
|
||||
long fsize = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
*data = (uint8_t*)malloc(fsize);
|
||||
*size = fread(*data, 1, fsize, fp);
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
extern "C" void initBlackScholes(cl_context cxGPUContext, cl_command_queue cqParamCommandQueue, const char **argv){
|
||||
cl_int ciErrNum;
|
||||
size_t kernelLength;
|
||||
|
||||
/*shrLog("...loading BlackScholes.cl\n");
|
||||
char *cPathAndName = shrFindFilePath("BlackScholes.cl", argv[0]);
|
||||
shrCheckError(cPathAndName != NULL, shrTRUE);
|
||||
char *cBlackScholes = oclLoadProgSource(cPathAndName, "// My comment\n", &kernelLength);
|
||||
shrCheckError(cBlackScholes != NULL, shrTRUE);*/
|
||||
|
||||
shrLog("...creating BlackScholes program\n");
|
||||
//cpBlackScholes = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&cBlackScholes, &kernelLength, &ciErrNum);
|
||||
uint8_t *kernel_bin = NULL;
|
||||
size_t kernel_size;
|
||||
cl_int binary_status = 0;
|
||||
ciErrNum = read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size);
|
||||
shrCheckError(ciErrNum, CL_SUCCESS);
|
||||
cl_device_id device_id = oclGetFirstDev(cxGPUContext);
|
||||
cpBlackScholes = clCreateProgramWithBinary(
|
||||
cxGPUContext, 1, &device_id, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &ciErrNum);
|
||||
shrCheckError(ciErrNum, CL_SUCCESS);
|
||||
|
||||
shrLog("...building BlackScholes program\n");
|
||||
ciErrNum = clBuildProgram(cpBlackScholes, 0, NULL, "-cl-fast-relaxed-math -Werror", NULL, NULL);
|
||||
|
||||
if(ciErrNum != CL_BUILD_SUCCESS){
|
||||
shrLog("*** Compilation failure ***\n");
|
||||
|
||||
size_t deviceNum;
|
||||
cl_device_id *cdDevices;
|
||||
ciErrNum = clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &deviceNum);
|
||||
shrCheckError(ciErrNum, CL_SUCCESS);
|
||||
|
||||
cdDevices = (cl_device_id *)malloc(deviceNum * sizeof(cl_device_id));
|
||||
shrCheckError(cdDevices != NULL, shrTRUE);
|
||||
|
||||
ciErrNum = clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, deviceNum * sizeof(cl_device_id), cdDevices, NULL);
|
||||
shrCheckError(ciErrNum, CL_SUCCESS);
|
||||
|
||||
size_t logSize;
|
||||
char *logTxt;
|
||||
|
||||
ciErrNum = clGetProgramBuildInfo(cpBlackScholes, cdDevices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
|
||||
shrCheckError(ciErrNum, CL_SUCCESS);
|
||||
|
||||
logTxt = (char *)malloc(logSize);
|
||||
shrCheckError(logTxt != NULL, shrTRUE);
|
||||
|
||||
ciErrNum = clGetProgramBuildInfo(cpBlackScholes, cdDevices[0], CL_PROGRAM_BUILD_LOG, logSize, logTxt, NULL);
|
||||
shrCheckError(ciErrNum, CL_SUCCESS);
|
||||
|
||||
shrLog("%s\n", logTxt);
|
||||
shrLog("*** Exiting ***\n");
|
||||
free(logTxt);
|
||||
free(cdDevices);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
//Save ptx code to separate file
|
||||
oclLogPtx(cpBlackScholes, oclGetFirstDev(cxGPUContext), "BlackScholes.ptx");
|
||||
|
||||
shrLog("...creating BlackScholes kernels\n");
|
||||
ckBlackScholes = clCreateKernel(cpBlackScholes, "BlackScholes", &ciErrNum);
|
||||
shrCheckError(ciErrNum, CL_SUCCESS);
|
||||
|
||||
cqDefaultCommandQueue = cqParamCommandQueue;
|
||||
//free(cBlackScholes);
|
||||
//free(cPathAndName);
|
||||
}
|
||||
|
||||
extern "C" void closeBlackScholes(void){
|
||||
cl_int ciErrNum;
|
||||
ciErrNum = clReleaseKernel(ckBlackScholes);
|
||||
ciErrNum |= clReleaseProgram(cpBlackScholes);
|
||||
shrCheckError(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// OpenCL Black-Scholes kernel launcher
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" void BlackScholes(
|
||||
cl_command_queue cqCommandQueue,
|
||||
cl_mem d_Call, //Call option price
|
||||
cl_mem d_Put, //Put option price
|
||||
cl_mem d_S, //Current stock price
|
||||
cl_mem d_X, //Option strike price
|
||||
cl_mem d_T, //Option years
|
||||
cl_float R, //Riskless rate of return
|
||||
cl_float V, //Stock volatility
|
||||
cl_uint optionCount
|
||||
){
|
||||
cl_int ciErrNum;
|
||||
|
||||
if(!cqCommandQueue)
|
||||
cqCommandQueue = cqDefaultCommandQueue;
|
||||
|
||||
ciErrNum = clSetKernelArg(ckBlackScholes, 0, sizeof(cl_mem), (void *)&d_Call);
|
||||
ciErrNum |= clSetKernelArg(ckBlackScholes, 1, sizeof(cl_mem), (void *)&d_Put);
|
||||
ciErrNum |= clSetKernelArg(ckBlackScholes, 2, sizeof(cl_mem), (void *)&d_S);
|
||||
ciErrNum |= clSetKernelArg(ckBlackScholes, 3, sizeof(cl_mem), (void *)&d_X);
|
||||
ciErrNum |= clSetKernelArg(ckBlackScholes, 4, sizeof(cl_mem), (void *)&d_T);
|
||||
ciErrNum |= clSetKernelArg(ckBlackScholes, 5, sizeof(cl_float), (void *)&R);
|
||||
ciErrNum |= clSetKernelArg(ckBlackScholes, 6, sizeof(cl_float), (void *)&V);
|
||||
ciErrNum |= clSetKernelArg(ckBlackScholes, 7, sizeof(cl_uint), (void *)&optionCount);
|
||||
shrCheckError(ciErrNum, CL_SUCCESS);
|
||||
|
||||
//Run the kernel
|
||||
size_t globalWorkSize = 16;//60 * 1024;
|
||||
size_t localWorkSize = 16;//128;
|
||||
ciErrNum = clEnqueueNDRangeKernel(cqCommandQueue, ckBlackScholes, 1, NULL, &globalWorkSize, &localWorkSize, 0, NULL, NULL);
|
||||
shrCheckError(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
806
tests/opencl/blackscholes/oclUtils.cpp
Normal file
806
tests/opencl/blackscholes/oclUtils.cpp
Normal file
@@ -0,0 +1,806 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
// *********************************************************************
|
||||
// Utilities specific to OpenCL samples in NVIDIA GPU Computing SDK
|
||||
// *********************************************************************
|
||||
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <stdarg.h>
|
||||
#include "oclUtils.h"
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Gets the platform ID for NVIDIA if available, otherwise default
|
||||
//!
|
||||
//! @return the id
|
||||
//! @param clSelectedPlatformID OpenCL platoform ID
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
cl_int oclGetPlatformID(cl_platform_id* clSelectedPlatformID)
|
||||
{
|
||||
char chBuffer[1024];
|
||||
cl_uint num_platforms;
|
||||
cl_platform_id* clPlatformIDs;
|
||||
cl_int ciErrNum;
|
||||
*clSelectedPlatformID = NULL;
|
||||
|
||||
// Get OpenCL platform count
|
||||
ciErrNum = clGetPlatformIDs (0, NULL, &num_platforms);
|
||||
if (ciErrNum != CL_SUCCESS)
|
||||
{
|
||||
shrLog(" Error %i in clGetPlatformIDs Call !!!\n\n", ciErrNum);
|
||||
return -1000;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(num_platforms == 0)
|
||||
{
|
||||
shrLog("No OpenCL platform found!\n\n");
|
||||
return -2000;
|
||||
}
|
||||
else
|
||||
{
|
||||
// if there's a platform or more, make space for ID's
|
||||
if ((clPlatformIDs = (cl_platform_id*)malloc(num_platforms * sizeof(cl_platform_id))) == NULL)
|
||||
{
|
||||
shrLog("Failed to allocate memory for cl_platform ID's!\n\n");
|
||||
return -3000;
|
||||
}
|
||||
|
||||
// get platform info for each platform and trap the NVIDIA platform if found
|
||||
ciErrNum = clGetPlatformIDs (num_platforms, clPlatformIDs, NULL);
|
||||
for(cl_uint i = 0; i < num_platforms; ++i)
|
||||
{
|
||||
ciErrNum = clGetPlatformInfo (clPlatformIDs[i], CL_PLATFORM_NAME, 1024, &chBuffer, NULL);
|
||||
if(ciErrNum == CL_SUCCESS)
|
||||
{
|
||||
if(strstr(chBuffer, "NVIDIA") != NULL)
|
||||
{
|
||||
*clSelectedPlatformID = clPlatformIDs[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// default to zeroeth platform if NVIDIA not found
|
||||
if(*clSelectedPlatformID == NULL)
|
||||
{
|
||||
shrLog("WARNING: NVIDIA OpenCL platform not found - defaulting to first platform!\n\n");
|
||||
*clSelectedPlatformID = clPlatformIDs[0];
|
||||
}
|
||||
|
||||
free(clPlatformIDs);
|
||||
}
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Print the device name
|
||||
//!
|
||||
//! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE
|
||||
//! @param device OpenCL id of the device
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
void oclPrintDevName(int iLogMode, cl_device_id device)
|
||||
{
|
||||
char device_string[1024];
|
||||
clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL);
|
||||
shrLogEx(iLogMode, 0, "%s", device_string);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Print info about the device
|
||||
//!
|
||||
//! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE
|
||||
//! @param device OpenCL id of the device
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
void oclPrintDevInfo(int iLogMode, cl_device_id device)
|
||||
{
|
||||
char device_string[1024];
|
||||
bool nv_device_attibute_query = false;
|
||||
|
||||
// CL_DEVICE_NAME
|
||||
clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_NAME: \t\t\t%s\n", device_string);
|
||||
|
||||
// CL_DEVICE_VENDOR
|
||||
clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(device_string), &device_string, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_VENDOR: \t\t\t%s\n", device_string);
|
||||
|
||||
// CL_DRIVER_VERSION
|
||||
clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(device_string), &device_string, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DRIVER_VERSION: \t\t\t%s\n", device_string);
|
||||
|
||||
// CL_DEVICE_VERSION
|
||||
clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(device_string), &device_string, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_VERSION: \t\t\t%s\n", device_string);
|
||||
|
||||
// CL_DEVICE_OPENCL_C_VERSION (if CL_DEVICE_VERSION version > 1.0)
|
||||
if(strncmp("OpenCL 1.0", device_string, 10) != 0)
|
||||
{
|
||||
// This code is unused for devices reporting OpenCL 1.0, but a def is needed anyway to allow compilation using v 1.0 headers
|
||||
// This constant isn't #defined in 1.0
|
||||
#ifndef CL_DEVICE_OPENCL_C_VERSION
|
||||
#define CL_DEVICE_OPENCL_C_VERSION 0x103D
|
||||
#endif
|
||||
|
||||
clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, sizeof(device_string), &device_string, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_OPENCL_C_VERSION: \t\t%s\n", device_string);
|
||||
}
|
||||
|
||||
// CL_DEVICE_TYPE
|
||||
cl_device_type type;
|
||||
clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(type), &type, NULL);
|
||||
if( type & CL_DEVICE_TYPE_CPU )
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_CPU");
|
||||
if( type & CL_DEVICE_TYPE_GPU )
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_GPU");
|
||||
if( type & CL_DEVICE_TYPE_ACCELERATOR )
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR");
|
||||
if( type & CL_DEVICE_TYPE_DEFAULT )
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT");
|
||||
|
||||
// CL_DEVICE_MAX_COMPUTE_UNITS
|
||||
cl_uint compute_units;
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_COMPUTE_UNITS:\t\t%u\n", compute_units);
|
||||
|
||||
// CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
|
||||
size_t workitem_dims;
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(workitem_dims), &workitem_dims, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:\t%u\n", workitem_dims);
|
||||
|
||||
// CL_DEVICE_MAX_WORK_ITEM_SIZES
|
||||
size_t workitem_size[3];
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(workitem_size), &workitem_size, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%u / %u / %u \n", workitem_size[0], workitem_size[1], workitem_size[2]);
|
||||
|
||||
// CL_DEVICE_MAX_WORK_GROUP_SIZE
|
||||
size_t workgroup_size;
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(workgroup_size), &workgroup_size, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_WORK_GROUP_SIZE:\t%u\n", workgroup_size);
|
||||
|
||||
// CL_DEVICE_MAX_CLOCK_FREQUENCY
|
||||
cl_uint clock_frequency;
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_CLOCK_FREQUENCY:\t%u MHz\n", clock_frequency);
|
||||
|
||||
// CL_DEVICE_ADDRESS_BITS
|
||||
cl_uint addr_bits;
|
||||
clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(addr_bits), &addr_bits, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_ADDRESS_BITS:\t\t%u\n", addr_bits);
|
||||
|
||||
// CL_DEVICE_MAX_MEM_ALLOC_SIZE
|
||||
cl_ulong max_mem_alloc_size;
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_alloc_size), &max_mem_alloc_size, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_MEM_ALLOC_SIZE:\t\t%u MByte\n", (unsigned int)(max_mem_alloc_size / (1024 * 1024)));
|
||||
|
||||
// CL_DEVICE_GLOBAL_MEM_SIZE
|
||||
cl_ulong mem_size;
|
||||
clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_GLOBAL_MEM_SIZE:\t\t%u MByte\n", (unsigned int)(mem_size / (1024 * 1024)));
|
||||
|
||||
// CL_DEVICE_ERROR_CORRECTION_SUPPORT
|
||||
cl_bool error_correction_support;
|
||||
clGetDeviceInfo(device, CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(error_correction_support), &error_correction_support, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_ERROR_CORRECTION_SUPPORT:\t%s\n", error_correction_support == CL_TRUE ? "yes" : "no");
|
||||
|
||||
// CL_DEVICE_LOCAL_MEM_TYPE
|
||||
cl_device_local_mem_type local_mem_type;
|
||||
clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(local_mem_type), &local_mem_type, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_LOCAL_MEM_TYPE:\t\t%s\n", local_mem_type == 1 ? "local" : "global");
|
||||
|
||||
// CL_DEVICE_LOCAL_MEM_SIZE
|
||||
clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_LOCAL_MEM_SIZE:\t\t%u KByte\n", (unsigned int)(mem_size / 1024));
|
||||
|
||||
// CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(mem_size), &mem_size, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:\t%u KByte\n", (unsigned int)(mem_size / 1024));
|
||||
|
||||
// CL_DEVICE_QUEUE_PROPERTIES
|
||||
cl_command_queue_properties queue_properties;
|
||||
clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(queue_properties), &queue_properties, NULL);
|
||||
if( queue_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE )
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE");
|
||||
if( queue_properties & CL_QUEUE_PROFILING_ENABLE )
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_PROFILING_ENABLE");
|
||||
|
||||
// CL_DEVICE_IMAGE_SUPPORT
|
||||
cl_bool image_support;
|
||||
clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(image_support), &image_support, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_IMAGE_SUPPORT:\t\t%u\n", image_support);
|
||||
|
||||
// CL_DEVICE_MAX_READ_IMAGE_ARGS
|
||||
cl_uint max_read_image_args;
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(max_read_image_args), &max_read_image_args, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_READ_IMAGE_ARGS:\t%u\n", max_read_image_args);
|
||||
|
||||
// CL_DEVICE_MAX_WRITE_IMAGE_ARGS
|
||||
cl_uint max_write_image_args;
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(max_write_image_args), &max_write_image_args, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_WRITE_IMAGE_ARGS:\t%u\n", max_write_image_args);
|
||||
|
||||
// CL_DEVICE_SINGLE_FP_CONFIG
|
||||
cl_device_fp_config fp_config;
|
||||
clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(cl_device_fp_config), &fp_config, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_SINGLE_FP_CONFIG:\t\t%s%s%s%s%s%s\n",
|
||||
fp_config & CL_FP_DENORM ? "denorms " : "",
|
||||
fp_config & CL_FP_INF_NAN ? "INF-quietNaNs " : "",
|
||||
fp_config & CL_FP_ROUND_TO_NEAREST ? "round-to-nearest " : "",
|
||||
fp_config & CL_FP_ROUND_TO_ZERO ? "round-to-zero " : "",
|
||||
fp_config & CL_FP_ROUND_TO_INF ? "round-to-inf " : "",
|
||||
fp_config & CL_FP_FMA ? "fma " : "");
|
||||
|
||||
// CL_DEVICE_IMAGE2D_MAX_WIDTH, CL_DEVICE_IMAGE2D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_WIDTH, CL_DEVICE_IMAGE3D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_DEPTH
|
||||
size_t szMaxDims[5];
|
||||
shrLogEx(iLogMode, 0, "\n CL_DEVICE_IMAGE <dim>");
|
||||
clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &szMaxDims[0], NULL);
|
||||
shrLogEx(iLogMode, 0, "\t\t\t2D_MAX_WIDTH\t %u\n", szMaxDims[0]);
|
||||
clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &szMaxDims[1], NULL);
|
||||
shrLogEx(iLogMode, 0, "\t\t\t\t\t2D_MAX_HEIGHT\t %u\n", szMaxDims[1]);
|
||||
clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &szMaxDims[2], NULL);
|
||||
shrLogEx(iLogMode, 0, "\t\t\t\t\t3D_MAX_WIDTH\t %u\n", szMaxDims[2]);
|
||||
clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &szMaxDims[3], NULL);
|
||||
shrLogEx(iLogMode, 0, "\t\t\t\t\t3D_MAX_HEIGHT\t %u\n", szMaxDims[3]);
|
||||
clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &szMaxDims[4], NULL);
|
||||
shrLogEx(iLogMode, 0, "\t\t\t\t\t3D_MAX_DEPTH\t %u\n", szMaxDims[4]);
|
||||
|
||||
// CL_DEVICE_EXTENSIONS: get device extensions, and if any then parse & log the string onto separate lines
|
||||
clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, sizeof(device_string), &device_string, NULL);
|
||||
if (device_string != 0)
|
||||
{
|
||||
shrLogEx(iLogMode, 0, "\n CL_DEVICE_EXTENSIONS:");
|
||||
std::string stdDevString;
|
||||
stdDevString = std::string(device_string);
|
||||
size_t szOldPos = 0;
|
||||
size_t szSpacePos = stdDevString.find(' ', szOldPos); // extensions string is space delimited
|
||||
while (szSpacePos != stdDevString.npos)
|
||||
{
|
||||
if( strcmp("cl_nv_device_attribute_query", stdDevString.substr(szOldPos, szSpacePos - szOldPos).c_str()) == 0 )
|
||||
nv_device_attibute_query = true;
|
||||
|
||||
if (szOldPos > 0)
|
||||
{
|
||||
shrLogEx(iLogMode, 0, "\t\t");
|
||||
}
|
||||
shrLogEx(iLogMode, 0, "\t\t\t%s\n", stdDevString.substr(szOldPos, szSpacePos - szOldPos).c_str());
|
||||
|
||||
do {
|
||||
szOldPos = szSpacePos + 1;
|
||||
szSpacePos = stdDevString.find(' ', szOldPos);
|
||||
} while (szSpacePos == szOldPos);
|
||||
}
|
||||
shrLogEx(iLogMode, 0, "\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_EXTENSIONS: None\n");
|
||||
}
|
||||
|
||||
if(nv_device_attibute_query)
|
||||
{
|
||||
cl_uint compute_capability_major, compute_capability_minor;
|
||||
clGetDeviceInfo(device, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof(cl_uint), &compute_capability_major, NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof(cl_uint), &compute_capability_minor, NULL);
|
||||
shrLogEx(iLogMode, 0, "\n CL_DEVICE_COMPUTE_CAPABILITY_NV:\t%u.%u\n", compute_capability_major, compute_capability_minor);
|
||||
|
||||
shrLogEx(iLogMode, 0, " NUMBER OF MULTIPROCESSORS:\t\t%u\n", compute_units); // this is the same value reported by CL_DEVICE_MAX_COMPUTE_UNITS
|
||||
shrLogEx(iLogMode, 0, " NUMBER OF CUDA CORES:\t\t\t%u\n", ConvertSMVer2Cores(compute_capability_major, compute_capability_minor) * compute_units);
|
||||
|
||||
cl_uint regs_per_block;
|
||||
clGetDeviceInfo(device, CL_DEVICE_REGISTERS_PER_BLOCK_NV, sizeof(cl_uint), ®s_per_block, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_REGISTERS_PER_BLOCK_NV:\t%u\n", regs_per_block);
|
||||
|
||||
cl_uint warp_size;
|
||||
clGetDeviceInfo(device, CL_DEVICE_WARP_SIZE_NV, sizeof(cl_uint), &warp_size, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_WARP_SIZE_NV:\t\t%u\n", warp_size);
|
||||
|
||||
cl_bool gpu_overlap;
|
||||
clGetDeviceInfo(device, CL_DEVICE_GPU_OVERLAP_NV, sizeof(cl_bool), &gpu_overlap, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_GPU_OVERLAP_NV:\t\t%s\n", gpu_overlap == CL_TRUE ? "CL_TRUE" : "CL_FALSE");
|
||||
|
||||
cl_bool exec_timeout;
|
||||
clGetDeviceInfo(device, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, sizeof(cl_bool), &exec_timeout, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV:\t%s\n", exec_timeout == CL_TRUE ? "CL_TRUE" : "CL_FALSE");
|
||||
|
||||
cl_bool integrated_memory;
|
||||
clGetDeviceInfo(device, CL_DEVICE_INTEGRATED_MEMORY_NV, sizeof(cl_bool), &integrated_memory, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_INTEGRATED_MEMORY_NV:\t%s\n", integrated_memory == CL_TRUE ? "CL_TRUE" : "CL_FALSE");
|
||||
}
|
||||
|
||||
// CL_DEVICE_PREFERRED_VECTOR_WIDTH_<type>
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_PREFERRED_VECTOR_WIDTH_<t>\t");
|
||||
cl_uint vec_width [6];
|
||||
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), &vec_width[0], NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint), &vec_width[1], NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), &vec_width[2], NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint), &vec_width[3], NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), &vec_width[4], NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), &vec_width[5], NULL);
|
||||
shrLogEx(iLogMode, 0, "CHAR %u, SHORT %u, INT %u, LONG %u, FLOAT %u, DOUBLE %u\n\n\n",
|
||||
vec_width[0], vec_width[1], vec_width[2], vec_width[3], vec_width[4], vec_width[5]);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Get and return device capability
|
||||
//!
|
||||
//! @return the 2 digit integer representation of device Cap (major minor). return -1 if NA
|
||||
//! @param device OpenCL id of the device
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
int oclGetDevCap(cl_device_id device)
|
||||
{
|
||||
char cDevString[1024];
|
||||
bool bDevAttributeQuery = false;
|
||||
int iDevArch = -1;
|
||||
|
||||
// Get device extensions, and if any then search for cl_nv_device_attribute_query
|
||||
clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, sizeof(cDevString), &cDevString, NULL);
|
||||
if (cDevString != 0)
|
||||
{
|
||||
std::string stdDevString;
|
||||
stdDevString = std::string(cDevString);
|
||||
size_t szOldPos = 0;
|
||||
size_t szSpacePos = stdDevString.find(' ', szOldPos); // extensions string is space delimited
|
||||
while (szSpacePos != stdDevString.npos)
|
||||
{
|
||||
if( strcmp("cl_nv_device_attribute_query", stdDevString.substr(szOldPos, szSpacePos - szOldPos).c_str()) == 0 )
|
||||
{
|
||||
bDevAttributeQuery = true;
|
||||
}
|
||||
|
||||
do {
|
||||
szOldPos = szSpacePos + 1;
|
||||
szSpacePos = stdDevString.find(' ', szOldPos);
|
||||
} while (szSpacePos == szOldPos);
|
||||
}
|
||||
}
|
||||
|
||||
// if search succeeded, get device caps
|
||||
if(bDevAttributeQuery)
|
||||
{
|
||||
cl_int iComputeCapMajor, iComputeCapMinor;
|
||||
clGetDeviceInfo(device, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof(cl_uint), (void*)&iComputeCapMajor, NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof(cl_uint), (void*)&iComputeCapMinor, NULL);
|
||||
iDevArch = (10 * iComputeCapMajor) + iComputeCapMinor;
|
||||
}
|
||||
|
||||
return iDevArch;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Gets the id of the first device from the context
|
||||
//!
|
||||
//! @return the id
|
||||
//! @param cxGPUContext OpenCL context
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
cl_device_id oclGetFirstDev(cl_context cxGPUContext)
|
||||
{
|
||||
size_t szParmDataBytes;
|
||||
cl_device_id* cdDevices;
|
||||
|
||||
// get the list of GPU devices associated with context
|
||||
clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
|
||||
cdDevices = (cl_device_id*) malloc(szParmDataBytes);
|
||||
|
||||
clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
|
||||
|
||||
cl_device_id first = cdDevices[0];
|
||||
free(cdDevices);
|
||||
|
||||
return first;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Gets the id of device with maximal FLOPS from the context
|
||||
//!
|
||||
//! @return the id
|
||||
//! @param cxGPUContext OpenCL context
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
cl_device_id oclGetMaxFlopsDev(cl_context cxGPUContext)
|
||||
{
|
||||
size_t szParmDataBytes;
|
||||
cl_device_id* cdDevices;
|
||||
|
||||
// get the list of GPU devices associated with context
|
||||
clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
|
||||
cdDevices = (cl_device_id*) malloc(szParmDataBytes);
|
||||
size_t device_count = szParmDataBytes / sizeof(cl_device_id);
|
||||
|
||||
clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
|
||||
|
||||
cl_device_id max_flops_device = cdDevices[0];
|
||||
int max_flops = 0;
|
||||
|
||||
size_t current_device = 0;
|
||||
|
||||
// CL_DEVICE_MAX_COMPUTE_UNITS
|
||||
cl_uint compute_units;
|
||||
clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL);
|
||||
|
||||
// CL_DEVICE_MAX_CLOCK_FREQUENCY
|
||||
cl_uint clock_frequency;
|
||||
clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL);
|
||||
|
||||
max_flops = compute_units * clock_frequency;
|
||||
++current_device;
|
||||
|
||||
while( current_device < device_count )
|
||||
{
|
||||
// CL_DEVICE_MAX_COMPUTE_UNITS
|
||||
cl_uint compute_units;
|
||||
clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL);
|
||||
|
||||
// CL_DEVICE_MAX_CLOCK_FREQUENCY
|
||||
cl_uint clock_frequency;
|
||||
clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL);
|
||||
|
||||
int flops = compute_units * clock_frequency;
|
||||
if( flops > max_flops )
|
||||
{
|
||||
max_flops = flops;
|
||||
max_flops_device = cdDevices[current_device];
|
||||
}
|
||||
++current_device;
|
||||
}
|
||||
|
||||
free(cdDevices);
|
||||
|
||||
return max_flops_device;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Loads a Program file and prepends the cPreamble to the code.
|
||||
//!
|
||||
//! @return the source string if succeeded, 0 otherwise
|
||||
//! @param cFilename program filename
|
||||
//! @param cPreamble code that is prepended to the loaded file, typically a set of #defines or a header
|
||||
//! @param szFinalLength returned length of the code string
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
char* oclLoadProgSource(const char* cFilename, const char* cPreamble, size_t* szFinalLength)
|
||||
{
|
||||
// locals
|
||||
FILE* pFileStream = NULL;
|
||||
size_t szSourceLength;
|
||||
|
||||
// open the OpenCL source code file
|
||||
#ifdef _WIN32 // Windows version
|
||||
if(fopen_s(&pFileStream, cFilename, "rb") != 0)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#else // Linux version
|
||||
pFileStream = fopen(cFilename, "rb");
|
||||
if(pFileStream == 0)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
size_t szPreambleLength = strlen(cPreamble);
|
||||
|
||||
// get the length of the source code
|
||||
fseek(pFileStream, 0, SEEK_END);
|
||||
szSourceLength = ftell(pFileStream);
|
||||
fseek(pFileStream, 0, SEEK_SET);
|
||||
|
||||
// allocate a buffer for the source code string and read it in
|
||||
char* cSourceString = (char *)malloc(szSourceLength + szPreambleLength + 1);
|
||||
memcpy(cSourceString, cPreamble, szPreambleLength);
|
||||
if (fread((cSourceString) + szPreambleLength, szSourceLength, 1, pFileStream) != 1)
|
||||
{
|
||||
fclose(pFileStream);
|
||||
free(cSourceString);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// close the file and return the total length of the combined (preamble + source) string
|
||||
fclose(pFileStream);
|
||||
if(szFinalLength != 0)
|
||||
{
|
||||
*szFinalLength = szSourceLength + szPreambleLength;
|
||||
}
|
||||
cSourceString[szSourceLength + szPreambleLength] = '\0';
|
||||
|
||||
return cSourceString;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Gets the id of the nth device from the context
|
||||
//!
|
||||
//! @return the id or -1 when out of range
|
||||
//! @param cxGPUContext OpenCL context
|
||||
//! @param device_idx index of the device of interest
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
cl_device_id oclGetDev(cl_context cxGPUContext, unsigned int nr)
|
||||
{
|
||||
size_t szParmDataBytes;
|
||||
cl_device_id* cdDevices;
|
||||
|
||||
// get the list of GPU devices associated with context
|
||||
clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
|
||||
|
||||
if( szParmDataBytes / sizeof(cl_device_id) <= nr ) {
|
||||
return (cl_device_id)-1;
|
||||
}
|
||||
|
||||
cdDevices = (cl_device_id*) malloc(szParmDataBytes);
|
||||
|
||||
clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
|
||||
|
||||
cl_device_id device = cdDevices[nr];
|
||||
free(cdDevices);
|
||||
|
||||
return device;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Get the binary (PTX) of the program associated with the device
|
||||
//!
|
||||
//! @param cpProgram OpenCL program
|
||||
//! @param cdDevice device of interest
|
||||
//! @param binary returned code
|
||||
//! @param length length of returned code
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
void oclGetProgBinary( cl_program cpProgram, cl_device_id cdDevice, char** binary, size_t* length)
|
||||
{
|
||||
// Grab the number of devices associated witht the program
|
||||
cl_uint num_devices;
|
||||
clGetProgramInfo(cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_devices, NULL);
|
||||
|
||||
// Grab the device ids
|
||||
cl_device_id* devices = (cl_device_id*) malloc(num_devices * sizeof(cl_device_id));
|
||||
clGetProgramInfo(cpProgram, CL_PROGRAM_DEVICES, num_devices * sizeof(cl_device_id), devices, 0);
|
||||
|
||||
// Grab the sizes of the binaries
|
||||
size_t* binary_sizes = (size_t*)malloc(num_devices * sizeof(size_t));
|
||||
clGetProgramInfo(cpProgram, CL_PROGRAM_BINARY_SIZES, num_devices * sizeof(size_t), binary_sizes, NULL);
|
||||
|
||||
// Now get the binaries
|
||||
char** ptx_code = (char**) malloc(num_devices * sizeof(char*));
|
||||
for( unsigned int i=0; i<num_devices; ++i) {
|
||||
ptx_code[i]= (char*)malloc(binary_sizes[i]);
|
||||
}
|
||||
clGetProgramInfo(cpProgram, CL_PROGRAM_BINARIES, 0, ptx_code, NULL);
|
||||
|
||||
// Find the index of the device of interest
|
||||
unsigned int idx = 0;
|
||||
while( idx<num_devices && devices[idx] != cdDevice ) ++idx;
|
||||
|
||||
// If it is associated prepare the result
|
||||
if( idx < num_devices )
|
||||
{
|
||||
*binary = ptx_code[idx];
|
||||
*length = binary_sizes[idx];
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
free( devices );
|
||||
free( binary_sizes );
|
||||
for( unsigned int i=0; i<num_devices; ++i) {
|
||||
if( i != idx ) free(ptx_code[i]);
|
||||
}
|
||||
free( ptx_code );
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Get and log the binary (PTX) from the OpenCL compiler for the requested program & device
|
||||
//!
|
||||
//! @param cpProgram OpenCL program
|
||||
//! @param cdDevice device of interest
|
||||
//! @param const char* cPtxFileName optional PTX file name
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
void oclLogPtx(cl_program cpProgram, cl_device_id cdDevice, const char* cPtxFileName)
|
||||
{
|
||||
// Grab the number of devices associated with the program
|
||||
cl_uint num_devices;
|
||||
clGetProgramInfo(cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_devices, NULL);
|
||||
|
||||
// Grab the device ids
|
||||
cl_device_id* devices = (cl_device_id*) malloc(num_devices * sizeof(cl_device_id));
|
||||
clGetProgramInfo(cpProgram, CL_PROGRAM_DEVICES, num_devices * sizeof(cl_device_id), devices, 0);
|
||||
|
||||
// Grab the sizes of the binaries
|
||||
size_t* binary_sizes = (size_t*)malloc(num_devices * sizeof(size_t));
|
||||
clGetProgramInfo(cpProgram, CL_PROGRAM_BINARY_SIZES, num_devices * sizeof(size_t), binary_sizes, NULL);
|
||||
|
||||
// Now get the binaries
|
||||
char** ptx_code = (char**)malloc(num_devices * sizeof(char*));
|
||||
for( unsigned int i=0; i<num_devices; ++i)
|
||||
{
|
||||
ptx_code[i] = (char*)malloc(binary_sizes[i]);
|
||||
}
|
||||
clGetProgramInfo(cpProgram, CL_PROGRAM_BINARIES, 0, ptx_code, NULL);
|
||||
|
||||
// Find the index of the device of interest
|
||||
unsigned int idx = 0;
|
||||
while((idx < num_devices) && (devices[idx] != cdDevice))
|
||||
{
|
||||
++idx;
|
||||
}
|
||||
|
||||
// If the index is associated, log the result
|
||||
if(idx < num_devices)
|
||||
{
|
||||
|
||||
// if a separate filename is supplied, dump ptx there
|
||||
if (NULL != cPtxFileName)
|
||||
{
|
||||
shrLog("\nWriting ptx to separate file: %s ...\n\n", cPtxFileName);
|
||||
FILE* pFileStream = NULL;
|
||||
#ifdef _WIN32
|
||||
fopen_s(&pFileStream, cPtxFileName, "wb");
|
||||
#else
|
||||
pFileStream = fopen(cPtxFileName, "wb");
|
||||
#endif
|
||||
|
||||
fwrite(ptx_code[idx], binary_sizes[idx], 1, pFileStream);
|
||||
fclose(pFileStream);
|
||||
}
|
||||
else // log to logfile and console if no ptx file specified
|
||||
{
|
||||
shrLog("\n%s\nProgram Binary:\n%s\n%s\n", HDASHLINE, ptx_code[idx], HDASHLINE);
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
free(devices);
|
||||
free(binary_sizes);
|
||||
for(unsigned int i = 0; i < num_devices; ++i)
|
||||
{
|
||||
free(ptx_code[i]);
|
||||
}
|
||||
free( ptx_code );
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Get and log the binary (PTX) from the OpenCL compiler for the requested program & device
|
||||
//!
|
||||
//! @param cpProgram OpenCL program
|
||||
//! @param cdDevice device of interest
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
void oclLogBuildInfo(cl_program cpProgram, cl_device_id cdDevice)
|
||||
{
|
||||
// write out the build log and ptx, then exit
|
||||
char cBuildLog[10240];
|
||||
clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG,
|
||||
sizeof(cBuildLog), cBuildLog, NULL );
|
||||
shrLog("\n%s\nBuild Log:\n%s\n%s\n", HDASHLINE, cBuildLog, HDASHLINE);
|
||||
}
|
||||
|
||||
// Helper function for De-allocating cl objects
|
||||
// *********************************************************************
|
||||
void oclDeleteMemObjs(cl_mem* cmMemObjs, int iNumObjs)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < iNumObjs; i++)
|
||||
{
|
||||
if (cmMemObjs[i])clReleaseMemObject(cmMemObjs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to get OpenCL error string from constant
|
||||
// *********************************************************************
|
||||
const char* oclErrorString(cl_int error)
|
||||
{
|
||||
static const char* errorString[] = {
|
||||
"CL_SUCCESS",
|
||||
"CL_DEVICE_NOT_FOUND",
|
||||
"CL_DEVICE_NOT_AVAILABLE",
|
||||
"CL_COMPILER_NOT_AVAILABLE",
|
||||
"CL_MEM_OBJECT_ALLOCATION_FAILURE",
|
||||
"CL_OUT_OF_RESOURCES",
|
||||
"CL_OUT_OF_HOST_MEMORY",
|
||||
"CL_PROFILING_INFO_NOT_AVAILABLE",
|
||||
"CL_MEM_COPY_OVERLAP",
|
||||
"CL_IMAGE_FORMAT_MISMATCH",
|
||||
"CL_IMAGE_FORMAT_NOT_SUPPORTED",
|
||||
"CL_BUILD_PROGRAM_FAILURE",
|
||||
"CL_MAP_FAILURE",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"CL_INVALID_VALUE",
|
||||
"CL_INVALID_DEVICE_TYPE",
|
||||
"CL_INVALID_PLATFORM",
|
||||
"CL_INVALID_DEVICE",
|
||||
"CL_INVALID_CONTEXT",
|
||||
"CL_INVALID_QUEUE_PROPERTIES",
|
||||
"CL_INVALID_COMMAND_QUEUE",
|
||||
"CL_INVALID_HOST_PTR",
|
||||
"CL_INVALID_MEM_OBJECT",
|
||||
"CL_INVALID_IMAGE_FORMAT_DESCRIPTOR",
|
||||
"CL_INVALID_IMAGE_SIZE",
|
||||
"CL_INVALID_SAMPLER",
|
||||
"CL_INVALID_BINARY",
|
||||
"CL_INVALID_BUILD_OPTIONS",
|
||||
"CL_INVALID_PROGRAM",
|
||||
"CL_INVALID_PROGRAM_EXECUTABLE",
|
||||
"CL_INVALID_KERNEL_NAME",
|
||||
"CL_INVALID_KERNEL_DEFINITION",
|
||||
"CL_INVALID_KERNEL",
|
||||
"CL_INVALID_ARG_INDEX",
|
||||
"CL_INVALID_ARG_VALUE",
|
||||
"CL_INVALID_ARG_SIZE",
|
||||
"CL_INVALID_KERNEL_ARGS",
|
||||
"CL_INVALID_WORK_DIMENSION",
|
||||
"CL_INVALID_WORK_GROUP_SIZE",
|
||||
"CL_INVALID_WORK_ITEM_SIZE",
|
||||
"CL_INVALID_GLOBAL_OFFSET",
|
||||
"CL_INVALID_EVENT_WAIT_LIST",
|
||||
"CL_INVALID_EVENT",
|
||||
"CL_INVALID_OPERATION",
|
||||
"CL_INVALID_GL_OBJECT",
|
||||
"CL_INVALID_BUFFER_SIZE",
|
||||
"CL_INVALID_MIP_LEVEL",
|
||||
"CL_INVALID_GLOBAL_WORK_SIZE",
|
||||
};
|
||||
|
||||
const int errorCount = sizeof(errorString) / sizeof(errorString[0]);
|
||||
|
||||
const int index = -error;
|
||||
|
||||
return (index >= 0 && index < errorCount) ? errorString[index] : "Unspecified Error";
|
||||
}
|
||||
|
||||
// Helper function to get OpenCL image format string (channel order and type) from constant
|
||||
// *********************************************************************
|
||||
const char* oclImageFormatString(cl_uint uiImageFormat)
|
||||
{
|
||||
// cl_channel_order
|
||||
if (uiImageFormat == CL_R)return "CL_R";
|
||||
if (uiImageFormat == CL_A)return "CL_A";
|
||||
if (uiImageFormat == CL_RG)return "CL_RG";
|
||||
if (uiImageFormat == CL_RA)return "CL_RA";
|
||||
if (uiImageFormat == CL_RGB)return "CL_RGB";
|
||||
if (uiImageFormat == CL_RGBA)return "CL_RGBA";
|
||||
if (uiImageFormat == CL_BGRA)return "CL_BGRA";
|
||||
if (uiImageFormat == CL_ARGB)return "CL_ARGB";
|
||||
if (uiImageFormat == CL_INTENSITY)return "CL_INTENSITY";
|
||||
if (uiImageFormat == CL_LUMINANCE)return "CL_LUMINANCE";
|
||||
|
||||
// cl_channel_type
|
||||
if (uiImageFormat == CL_SNORM_INT8)return "CL_SNORM_INT8";
|
||||
if (uiImageFormat == CL_SNORM_INT16)return "CL_SNORM_INT16";
|
||||
if (uiImageFormat == CL_UNORM_INT8)return "CL_UNORM_INT8";
|
||||
if (uiImageFormat == CL_UNORM_INT16)return "CL_UNORM_INT16";
|
||||
if (uiImageFormat == CL_UNORM_SHORT_565)return "CL_UNORM_SHORT_565";
|
||||
if (uiImageFormat == CL_UNORM_SHORT_555)return "CL_UNORM_SHORT_555";
|
||||
if (uiImageFormat == CL_UNORM_INT_101010)return "CL_UNORM_INT_101010";
|
||||
if (uiImageFormat == CL_SIGNED_INT8)return "CL_SIGNED_INT8";
|
||||
if (uiImageFormat == CL_SIGNED_INT16)return "CL_SIGNED_INT16";
|
||||
if (uiImageFormat == CL_SIGNED_INT32)return "CL_SIGNED_INT32";
|
||||
if (uiImageFormat == CL_UNSIGNED_INT8)return "CL_UNSIGNED_INT8";
|
||||
if (uiImageFormat == CL_UNSIGNED_INT16)return "CL_UNSIGNED_INT16";
|
||||
if (uiImageFormat == CL_UNSIGNED_INT32)return "CL_UNSIGNED_INT32";
|
||||
if (uiImageFormat == CL_HALF_FLOAT)return "CL_HALF_FLOAT";
|
||||
if (uiImageFormat == CL_FLOAT)return "CL_FLOAT";
|
||||
|
||||
// unknown constant
|
||||
return "Unknown";
|
||||
}
|
||||
198
tests/opencl/blackscholes/oclUtils.h
Normal file
198
tests/opencl/blackscholes/oclUtils.h
Normal file
@@ -0,0 +1,198 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef OCL_UTILS_H
|
||||
#define OCL_UTILS_H
|
||||
|
||||
// *********************************************************************
|
||||
// Utilities specific to OpenCL samples in NVIDIA GPU Computing SDK
|
||||
// *********************************************************************
|
||||
|
||||
// Common headers: Cross-API utililties and OpenCL header
|
||||
#include <shrUtils.h>
|
||||
|
||||
// All OpenCL headers
|
||||
#if defined (__APPLE__) || defined(MACOSX)
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
|
||||
// Includes
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
// For systems with CL_EXT that are not updated with these extensions, we copied these
|
||||
// extensions from <CL/cl_ext.h>
|
||||
#ifndef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV
|
||||
/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
|
||||
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
|
||||
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
|
||||
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
|
||||
#define CL_DEVICE_WARP_SIZE_NV 0x4003
|
||||
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
|
||||
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
|
||||
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
|
||||
#endif
|
||||
|
||||
// reminders for build output window and log
|
||||
#ifdef _WIN32
|
||||
#pragma message ("Note: including shrUtils.h")
|
||||
#pragma message ("Note: including opencl.h")
|
||||
#endif
|
||||
|
||||
// SDK Revision #
|
||||
#define OCL_SDKREVISION "7027912"
|
||||
|
||||
// Error and Exit Handling Macros...
|
||||
// *********************************************************************
|
||||
// Full error handling macro with Cleanup() callback (if supplied)...
|
||||
// (Companion Inline Function lower on page)
|
||||
#define oclCheckErrorEX(a, b, c) __oclCheckErrorEX(a, b, c, __FILE__ , __LINE__)
|
||||
|
||||
// Short version without Cleanup() callback pointer
|
||||
// Both Input (a) and Reference (b) are specified as args
|
||||
#define oclCheckError(a, b) oclCheckErrorEX(a, b, 0)
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Gets the platform ID for NVIDIA if available, otherwise default to platform 0
|
||||
//!
|
||||
//! @return the id
|
||||
//! @param clSelectedPlatformID OpenCL platform ID
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" cl_int oclGetPlatformID(cl_platform_id* clSelectedPlatformID);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Print info about the device
|
||||
//!
|
||||
//! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE
|
||||
//! @param device OpenCL id of the device
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" void oclPrintDevInfo(int iLogMode, cl_device_id device);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Get and return device capability
|
||||
//!
|
||||
//! @return the 2 digit integer representation of device Cap (major minor). return -1 if NA
|
||||
//! @param device OpenCL id of the device
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" int oclGetDevCap(cl_device_id device);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Print the device name
|
||||
//!
|
||||
//! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE
|
||||
//! @param device OpenCL id of the device
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" void oclPrintDevName(int iLogMode, cl_device_id device);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Gets the id of the first device from the context
|
||||
//!
|
||||
//! @return the id
|
||||
//! @param cxGPUContext OpenCL context
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" cl_device_id oclGetFirstDev(cl_context cxGPUContext);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Gets the id of the nth device from the context
|
||||
//!
|
||||
//! @return the id or -1 when out of range
|
||||
//! @param cxGPUContext OpenCL context
|
||||
//! @param device_idx index of the device of interest
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" cl_device_id oclGetDev(cl_context cxGPUContext, unsigned int device_idx);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Gets the id of device with maximal FLOPS from the context
|
||||
//!
|
||||
//! @return the id
|
||||
//! @param cxGPUContext OpenCL context
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" cl_device_id oclGetMaxFlopsDev(cl_context cxGPUContext);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Loads a Program file and prepends the cPreamble to the code.
|
||||
//!
|
||||
//! @return the source string if succeeded, 0 otherwise
|
||||
//! @param cFilename program filename
|
||||
//! @param cPreamble code that is prepended to the loaded file, typically a set of #defines or a header
|
||||
//! @param szFinalLength returned length of the code string
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" char* oclLoadProgSource(const char* cFilename, const char* cPreamble, size_t* szFinalLength);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Get the binary (PTX) of the program associated with the device
|
||||
//!
|
||||
//! @param cpProgram OpenCL program
|
||||
//! @param cdDevice device of interest
|
||||
//! @param binary returned code
|
||||
//! @param length length of returned code
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" void oclGetProgBinary( cl_program cpProgram, cl_device_id cdDevice, char** binary, size_t* length);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Get and log the binary (PTX) from the OpenCL compiler for the requested program & device
|
||||
//!
|
||||
//! @param cpProgram OpenCL program
|
||||
//! @param cdDevice device of interest
|
||||
//! @param const char* cPtxFileName optional PTX file name
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" void oclLogPtx(cl_program cpProgram, cl_device_id cdDevice, const char* cPtxFileName);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Get and log the Build Log from the OpenCL compiler for the requested program & device
|
||||
//!
|
||||
//! @param cpProgram OpenCL program
|
||||
//! @param cdDevice device of interest
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" void oclLogBuildInfo(cl_program cpProgram, cl_device_id cdDevice);
|
||||
|
||||
// Helper function for De-allocating cl objects
|
||||
// *********************************************************************
|
||||
extern "C" void oclDeleteMemObjs(cl_mem* cmMemObjs, int iNumObjs);
|
||||
|
||||
// Helper function to get OpenCL error string from constant
|
||||
// *********************************************************************
|
||||
extern "C" const char* oclErrorString(cl_int error);
|
||||
|
||||
// Helper function to get OpenCL image format string (channel order and type) from constant
|
||||
// *********************************************************************
|
||||
extern "C" const char* oclImageFormatString(cl_uint uiImageFormat);
|
||||
|
||||
// companion inline function for error checking and exit on error WITH Cleanup Callback (if supplied)
|
||||
// *********************************************************************
|
||||
inline void __oclCheckErrorEX(cl_int iSample, cl_int iReference, void (*pCleanup)(int), const char* cFile, const int iLine)
|
||||
{
|
||||
// An error condition is defined by the sample/test value not equal to the reference
|
||||
if (iReference != iSample)
|
||||
{
|
||||
// If the sample/test value isn't equal to the ref, it's an error by defnition, so override 0 sample/test value
|
||||
iSample = (iSample == 0) ? -9999 : iSample;
|
||||
|
||||
// Log the error info
|
||||
shrLog("\n !!! Error # %i (%s) at line %i , in file %s !!!\n\n", iSample, oclErrorString(iSample), iLine, cFile);
|
||||
|
||||
// Cleanup and exit, or just exit if no cleanup function pointer provided. Use iSample (error code in this case) as process exit code.
|
||||
if (pCleanup != NULL)
|
||||
{
|
||||
pCleanup(iSample);
|
||||
}
|
||||
else
|
||||
{
|
||||
shrLogEx(LOGBOTH | CLOSELOG, 0, "Exiting...\n");
|
||||
exit(iSample);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
238
tests/opencl/blackscholes/shrQATest.h
Normal file
238
tests/opencl/blackscholes/shrQATest.h
Normal file
@@ -0,0 +1,238 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef SHR_QATEST_H
|
||||
#define SHR_QATEST_H
|
||||
|
||||
// *********************************************************************
|
||||
// Generic utilities for NVIDIA GPU Computing SDK
|
||||
// *********************************************************************
|
||||
|
||||
// OS dependent includes
|
||||
#ifdef _WIN32
|
||||
#pragma message ("Note: including windows.h")
|
||||
#pragma message ("Note: including math.h")
|
||||
#pragma message ("Note: including assert.h")
|
||||
#pragma message ("Note: including time.h")
|
||||
|
||||
// Headers needed for Windows
|
||||
#include <windows.h>
|
||||
#include <time.h>
|
||||
#else
|
||||
// Headers needed for Linux
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/time.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdarg.h>
|
||||
#include <unistd.h>
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
||||
#ifndef STRCASECMP
|
||||
#ifdef _WIN32
|
||||
#define STRCASECMP _stricmp
|
||||
#else
|
||||
#define STRCASECMP strcasecmp
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef STRNCASECMP
|
||||
#ifdef _WIN32
|
||||
#define STRNCASECMP _strnicmp
|
||||
#else
|
||||
#define STRNCASECMP strncasecmp
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
// Standardized QA Start/Finish for CUDA SDK tests
|
||||
#define shrQAStart(a, b) __shrQAStart(a, b)
|
||||
#define shrQAFinish(a, b, c) __shrQAFinish(a, b, c)
|
||||
#define shrQAFinish2(a, b, c, d) __shrQAFinish2(a, b, c, d)
|
||||
|
||||
inline int findExeNameStart(const char *exec_name)
|
||||
{
|
||||
int exename_start = (int)strlen(exec_name);
|
||||
|
||||
while( (exename_start > 0) &&
|
||||
(exec_name[exename_start] != '\\') &&
|
||||
(exec_name[exename_start] != '/') )
|
||||
{
|
||||
exename_start--;
|
||||
}
|
||||
if (exec_name[exename_start] == '\\' ||
|
||||
exec_name[exename_start] == '/')
|
||||
{
|
||||
return exename_start+1;
|
||||
} else {
|
||||
return exename_start;
|
||||
}
|
||||
}
|
||||
|
||||
inline int __shrQAStart(int argc, char **argv)
|
||||
{
|
||||
bool bQATest = false;
|
||||
// First clear the output buffer
|
||||
fflush(stdout);
|
||||
fflush(stdout);
|
||||
|
||||
for (int i=1; i < argc; i++) {
|
||||
int string_start = 0;
|
||||
while (argv[i][string_start] == '-')
|
||||
string_start++;
|
||||
char *string_argv = &argv[i][string_start];
|
||||
|
||||
if (!STRCASECMP(string_argv, "qatest")) {
|
||||
bQATest = true;
|
||||
}
|
||||
}
|
||||
|
||||
// We don't want to print the entire path, so we search for the first
|
||||
int exename_start = findExeNameStart(argv[0]);
|
||||
if (bQATest) {
|
||||
fprintf(stdout, "&&&& RUNNING %s", &(argv[0][exename_start]));
|
||||
for (int i=1; i < argc; i++) fprintf(stdout, " %s", argv[i]);
|
||||
fprintf(stdout, "\n");
|
||||
} else {
|
||||
fprintf(stdout, "[%s] starting...\n", &(argv[0][exename_start]));
|
||||
}
|
||||
fflush(stdout);
|
||||
printf("\n"); fflush(stdout);
|
||||
return exename_start;
|
||||
}
|
||||
|
||||
enum eQAstatus {
|
||||
QA_FAILED = 0,
|
||||
QA_PASSED = 1,
|
||||
QA_WAIVED = 2
|
||||
};
|
||||
|
||||
inline void __ExitInTime(int seconds)
|
||||
{
|
||||
fprintf(stdout, "> exiting in %d seconds: ", seconds);
|
||||
fflush(stdout);
|
||||
time_t t;
|
||||
int count;
|
||||
for (t=time(0)+seconds, count=seconds; time(0) < t; count--) {
|
||||
fprintf(stdout, "%d...", count);
|
||||
#ifdef WIN32
|
||||
Sleep(1000);
|
||||
#else
|
||||
sleep(1);
|
||||
#endif
|
||||
}
|
||||
fprintf(stdout,"done!\n\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
|
||||
inline void __shrQAFinish(int argc, const char **argv, int iStatus)
|
||||
{
|
||||
// By default QATest is disabled and NoPrompt is Enabled (times out at seconds passed into __ExitInTime() )
|
||||
bool bQATest = false, bNoPrompt = true, bQuitInTime = true;
|
||||
const char *sStatus[] = { "FAILED", "PASSED", "WAIVED", NULL };
|
||||
|
||||
for (int i=1; i < argc; i++) {
|
||||
int string_start = 0;
|
||||
while (argv[i][string_start] == '-')
|
||||
string_start++;
|
||||
|
||||
const char *string_argv = &argv[i][string_start];
|
||||
if (!STRCASECMP(string_argv, "qatest")) {
|
||||
bQATest = true;
|
||||
}
|
||||
// For SDK individual samples that don't specify -noprompt or -prompt,
|
||||
// a 3 second delay will happen before exiting, giving a user time to view results
|
||||
if (!STRCASECMP(string_argv, "noprompt") || !STRCASECMP(string_argv, "help")) {
|
||||
bNoPrompt = true;
|
||||
bQuitInTime = false;
|
||||
}
|
||||
if (!STRCASECMP(string_argv, "prompt")) {
|
||||
bNoPrompt = false;
|
||||
bQuitInTime = false;
|
||||
}
|
||||
}
|
||||
|
||||
int exename_start = findExeNameStart(argv[0]);
|
||||
if (bQATest) {
|
||||
fprintf(stdout, "&&&& %s %s", sStatus[iStatus], &(argv[0][exename_start]));
|
||||
for (int i=1; i < argc; i++) fprintf(stdout, " %s", argv[i]);
|
||||
fprintf(stdout, "\n");
|
||||
} else {
|
||||
fprintf(stdout, "[%s] test results...\n%s\n", &(argv[0][exename_start]), sStatus[iStatus]);
|
||||
}
|
||||
fflush(stdout);
|
||||
printf("\n"); fflush(stdout);
|
||||
if (bQuitInTime) {
|
||||
__ExitInTime(3);
|
||||
} else {
|
||||
if (!bNoPrompt) {
|
||||
fprintf(stdout, "\nPress <Enter> to exit...\n");
|
||||
fflush(stdout);
|
||||
getchar();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void __shrQAFinish2(bool bQATest, int argc, const char **argv, int iStatus)
|
||||
{
|
||||
bool bQuitInTime = true;
|
||||
const char *sStatus[] = { "FAILED", "PASSED", "WAIVED", NULL };
|
||||
|
||||
for (int i=1; i < argc; i++) {
|
||||
int string_start = 0;
|
||||
while (argv[i][string_start] == '-')
|
||||
string_start++;
|
||||
|
||||
const char *string_argv = &argv[i][string_start];
|
||||
// For SDK individual samples that don't specify -noprompt or -prompt,
|
||||
// a 3 second delay will happen before exiting, giving a user time to view results
|
||||
if (!STRCASECMP(string_argv, "noprompt") || !STRCASECMP(string_argv, "help")) {
|
||||
bQuitInTime = false;
|
||||
}
|
||||
if (!STRCASECMP(string_argv, "prompt")) {
|
||||
bQuitInTime = false;
|
||||
}
|
||||
}
|
||||
|
||||
int exename_start = findExeNameStart(argv[0]);
|
||||
if (bQATest) {
|
||||
fprintf(stdout, "&&&& %s %s", sStatus[iStatus], &(argv[0][exename_start]));
|
||||
for (int i=1; i < argc; i++) fprintf(stdout, " %s", argv[i]);
|
||||
fprintf(stdout, "\n");
|
||||
} else {
|
||||
fprintf(stdout, "[%s] test results...\n%s\n", &(argv[0][exename_start]), sStatus[iStatus]);
|
||||
}
|
||||
fflush(stdout);
|
||||
|
||||
if (bQuitInTime) {
|
||||
__ExitInTime(3);
|
||||
}
|
||||
}
|
||||
|
||||
inline void shrQAFinishExit(int argc, const char **argv, int iStatus)
|
||||
{
|
||||
__shrQAFinish(argc, argv, iStatus);
|
||||
|
||||
exit(iStatus ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||
}
|
||||
|
||||
inline void shrQAFinishExit2(bool bQAtest, int argc, const char **argv, int iStatus)
|
||||
{
|
||||
__shrQAFinish2(bQAtest, argc, argv, iStatus);
|
||||
|
||||
exit(iStatus ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||
}
|
||||
|
||||
#endif
|
||||
1954
tests/opencl/blackscholes/shrUtils.cpp
Normal file
1954
tests/opencl/blackscholes/shrUtils.cpp
Normal file
File diff suppressed because it is too large
Load Diff
642
tests/opencl/blackscholes/shrUtils.h
Normal file
642
tests/opencl/blackscholes/shrUtils.h
Normal file
@@ -0,0 +1,642 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef SHR_UTILS_H
|
||||
#define SHR_UTILS_H
|
||||
|
||||
// *********************************************************************
|
||||
// Generic utilities for NVIDIA GPU Computing SDK
|
||||
// *********************************************************************
|
||||
|
||||
// reminders for output window and build log
|
||||
#ifdef _WIN32
|
||||
#pragma message ("Note: including windows.h")
|
||||
#pragma message ("Note: including math.h")
|
||||
#pragma message ("Note: including assert.h")
|
||||
#endif
|
||||
|
||||
// OS dependent includes
|
||||
#ifdef _WIN32
|
||||
// Headers needed for Windows
|
||||
#include <windows.h>
|
||||
#else
|
||||
// Headers needed for Linux
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/time.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdarg.h>
|
||||
#endif
|
||||
|
||||
// Other headers needed for both Windows and Linux
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
// Un-comment the following #define to enable profiling code in SDK apps
|
||||
//#define GPU_PROFILING
|
||||
|
||||
// Beginning of GPU Architecture definitions
|
||||
inline int ConvertSMVer2Cores(int major, int minor)
|
||||
{
|
||||
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
|
||||
typedef struct {
|
||||
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
|
||||
int Cores;
|
||||
} sSMtoCores;
|
||||
|
||||
sSMtoCores nGpuArchCoresPerSM[] =
|
||||
{ { 0x10, 8 }, // Tesla Generation (SM 1.0) G80 class
|
||||
{ 0x11, 8 }, // Tesla Generation (SM 1.1) G8x class
|
||||
{ 0x12, 8 }, // Tesla Generation (SM 1.2) G9x class
|
||||
{ 0x13, 8 }, // Tesla Generation (SM 1.3) GT200 class
|
||||
{ 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
|
||||
{ 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
|
||||
{ 0x30, 192}, // Fermi Generation (SM 3.0) GK10x class
|
||||
{ -1, -1 }
|
||||
};
|
||||
|
||||
int index = 0;
|
||||
while (nGpuArchCoresPerSM[index].SM != -1) {
|
||||
if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) {
|
||||
return nGpuArchCoresPerSM[index].Cores;
|
||||
}
|
||||
index++;
|
||||
}
|
||||
printf("MapSMtoCores SM %d.%d is undefined (please update to the latest SDK)!\n", major, minor);
|
||||
return -1;
|
||||
}
|
||||
// end of GPU Architecture definitions
|
||||
|
||||
|
||||
// Defines and enum for use with logging functions
|
||||
// *********************************************************************
|
||||
#define DEFAULTLOGFILE "SdkConsoleLog.txt"
|
||||
#define MASTERLOGFILE "SdkMasterLog.csv"
|
||||
enum LOGMODES
|
||||
{
|
||||
LOGCONSOLE = 1, // bit to signal "log to console"
|
||||
LOGFILE = 2, // bit to signal "log to file"
|
||||
LOGBOTH = 3, // convenience union of first 2 bits to signal "log to both"
|
||||
APPENDMODE = 4, // bit to set "file append" mode instead of "replace mode" on open
|
||||
MASTER = 8, // bit to signal master .csv log output
|
||||
ERRORMSG = 16, // bit to signal "pre-pend Error"
|
||||
CLOSELOG = 32 // bit to close log file, if open, after any requested file write
|
||||
};
|
||||
#define HDASHLINE "-----------------------------------------------------------\n"
|
||||
|
||||
// Standardized boolean
|
||||
enum shrBOOL
|
||||
{
|
||||
shrFALSE = 0,
|
||||
shrTRUE = 1
|
||||
};
|
||||
|
||||
// Standardized MAX, MIN and CLAMP
|
||||
#define MAX(a, b) ((a > b) ? a : b)
|
||||
#define MIN(a, b) ((a < b) ? a : b)
|
||||
#define CLAMP(a, b, c) MIN(MAX(a, b), c) // double sided clip of input a
|
||||
#define TOPCLAMP(a, b) (a < b ? a:b) // single top side clip of input a
|
||||
|
||||
// Error and Exit Handling Macros...
|
||||
// *********************************************************************
|
||||
// Full error handling macro with Cleanup() callback (if supplied)...
|
||||
// (Companion Inline Function lower on page)
|
||||
#define shrCheckErrorEX(a, b, c) __shrCheckErrorEX(a, b, c, __FILE__ , __LINE__)
|
||||
|
||||
// Short version without Cleanup() callback pointer
|
||||
// Both Input (a) and Reference (b) are specified as args
|
||||
#define shrCheckError(a, b) shrCheckErrorEX(a, b, 0)
|
||||
|
||||
// Standardized Exit Macro for leaving main()... extended version
|
||||
// (Companion Inline Function lower on page)
|
||||
#define shrExitEX(a, b, c) __shrExitEX(a, b, c)
|
||||
|
||||
// Standardized Exit Macro for leaving main()... short version
|
||||
// (Companion Inline Function lower on page)
|
||||
#define shrEXIT(a, b) __shrExitEX(a, b, EXIT_SUCCESS)
|
||||
|
||||
// Simple argument checker macro
|
||||
#define ARGCHECK(a) if((a) != shrTRUE)return shrFALSE
|
||||
|
||||
// Define for user-customized error handling
|
||||
#define STDERROR "file %s, line %i\n\n" , __FILE__ , __LINE__
|
||||
|
||||
// Function to deallocate memory allocated within shrUtils
|
||||
// *********************************************************************
|
||||
extern "C" void shrFree(void* ptr);
|
||||
|
||||
// *********************************************************************
|
||||
// Helper function to log standardized information to Console, to File or to both
|
||||
//! Examples: shrLogEx(LOGBOTH, 0, "Function A\n");
|
||||
//! : shrLogEx(LOGBOTH | ERRORMSG, ciErrNum, STDERROR);
|
||||
//!
|
||||
//! Automatically opens file and stores handle if needed and not done yet
|
||||
//! Closes file and nulls handle on request
|
||||
//!
|
||||
//! @param 0 iLogMode: LOGCONSOLE, LOGFILE, LOGBOTH, APPENDMODE, MASTER, ERRORMSG, CLOSELOG.
|
||||
//! LOGFILE and LOGBOTH may be | 'd with APPENDMODE to select file append mode instead of overwrite mode
|
||||
//! LOGFILE and LOGBOTH may be | 'd with CLOSELOG to "write and close"
|
||||
//! First 3 options may be | 'd with MASTER to enable independent write to master data log file
|
||||
//! First 3 options may be | 'd with ERRORMSG to start line with standard error message
|
||||
//! @param 2 dValue:
|
||||
//! Positive val = double value for time in secs to be formatted to 6 decimals.
|
||||
//! Negative val is an error code and this give error preformatting.
|
||||
//! @param 3 cFormatString: String with formatting specifiers like printf or fprintf.
|
||||
//! ALL printf flags, width, precision and type specifiers are supported with this exception:
|
||||
//! Wide char type specifiers intended for wprintf (%S and %C) are NOT supported
|
||||
//! Single byte char type specifiers (%s and %c) ARE supported
|
||||
//! @param 4... variable args: like printf or fprintf. Must match format specifer type above.
|
||||
//! @return 0 if OK, negative value on error or if error occurs or was passed in.
|
||||
// *********************************************************************
|
||||
extern "C" int shrLogEx(int iLogMode, int iErrNum, const char* cFormatString, ...);
|
||||
|
||||
// Short version of shrLogEx defaulting to shrLogEx(LOGBOTH, 0,
|
||||
// *********************************************************************
|
||||
extern "C" int shrLog(const char* cFormatString, ...);
|
||||
|
||||
// *********************************************************************
|
||||
// Delta timer function for up to 3 independent timers using host high performance counters
|
||||
// Maintains state for 3 independent counters
|
||||
//! Example: double dElapsedTime = shrDeltaTime(0);
|
||||
//!
|
||||
//! @param 0 iCounterID: Which timer to check/reset. (0, 1, 2)
|
||||
//! @return delta time of specified counter since last call in seconds. Otherwise -9999.0 if error
|
||||
// *********************************************************************
|
||||
extern "C" double shrDeltaT(int iCounterID);
|
||||
|
||||
// Optional LogFileNameOverride function
|
||||
// *********************************************************************
|
||||
extern "C" void shrSetLogFileName (const char* cOverRideName);
|
||||
|
||||
// Helper function to init data arrays
|
||||
// *********************************************************************
|
||||
extern "C" void shrFillArray(float* pfData, int iSize);
|
||||
|
||||
// Helper function to print data arrays
|
||||
// *********************************************************************
|
||||
extern "C" void shrPrintArray(float* pfData, int iSize);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Find the path for a filename
|
||||
//! @return the path if succeeded, otherwise 0
|
||||
//! @param filename name of the file
|
||||
//! @param executablePath optional absolute path of the executable
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" char* shrFindFilePath(const char* filename, const char* executablePath);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Read file \filename containing single precision floating point data
|
||||
//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the source file
|
||||
//! @param data uninitialized pointer, returned initialized and pointing to
|
||||
//! the data read
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
//! @note If a NULL pointer is passed to this function and it is initialized
|
||||
//! within shrUtils, then free() has to be used to deallocate the memory
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrReadFilef( const char* filename, float** data, unsigned int* len,
|
||||
bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Read file \filename containing double precision floating point data
|
||||
//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the source file
|
||||
//! @param data uninitialized pointer, returned initialized and pointing to
|
||||
//! the data read
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
//! @note If a NULL pointer is passed to this function and it is
|
||||
//! @note If a NULL pointer is passed to this function and it is initialized
|
||||
//! within shrUtils, then free() has to be used to deallocate the memory
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrReadFiled( const char* filename, double** data, unsigned int* len,
|
||||
bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Read file \filename containing integer data
|
||||
//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the source file
|
||||
//! @param data uninitialized pointer, returned initialized and pointing to
|
||||
//! the data read
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
//! @note If a NULL pointer is passed to this function and it is
|
||||
//! @note If a NULL pointer is passed to this function and it is initialized
|
||||
//! within shrUtils, then free() has to be used to deallocate the memory
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrReadFilei( const char* filename, int** data, unsigned int* len, bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Read file \filename containing unsigned integer data
|
||||
//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the source file
|
||||
//! @param data uninitialized pointer, returned initialized and pointing to
|
||||
//! the data read
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
//! @note If a NULL pointer is passed to this function and it is
|
||||
//! @note If a NULL pointer is passed to this function and it is initialized
|
||||
//! within shrUtils, then free() has to be used to deallocate the memory
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrReadFileui( const char* filename, unsigned int** data,
|
||||
unsigned int* len, bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Read file \filename containing char / byte data
|
||||
//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the source file
|
||||
//! @param data uninitialized pointer, returned initialized and pointing to
|
||||
//! the data read
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
//! @note If a NULL pointer is passed to this function and it is
|
||||
//! @note If a NULL pointer is passed to this function and it is initialized
|
||||
//! within shrUtils, then free() has to be used to deallocate the memory
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrReadFileb( const char* filename, char** data, unsigned int* len,
|
||||
bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Read file \filename containing unsigned char / byte data
|
||||
//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the source file
|
||||
//! @param data uninitialized pointer, returned initialized and pointing to
|
||||
//! the data read
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
//! @note If a NULL pointer is passed to this function and it is
|
||||
//! @note If a NULL pointer is passed to this function and it is initialized
|
||||
//! within shrUtils, then free() has to be used to deallocate the memory
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrReadFileub( const char* filename, unsigned char** data,
|
||||
unsigned int* len, bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Write a data file \filename containing single precision floating point
|
||||
//! data
|
||||
//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the file to write
|
||||
//! @param data pointer to data to write
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
//! @param epsilon epsilon for comparison
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrWriteFilef( const char* filename, const float* data, unsigned int len,
|
||||
const float epsilon, bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Write a data file \filename containing double precision floating point
|
||||
//! data
|
||||
//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the file to write
|
||||
//! @param data pointer to data to write
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
//! @param epsilon epsilon for comparison
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrWriteFiled( const char* filename, const float* data, unsigned int len,
|
||||
const double epsilon, bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Write a data file \filename containing integer data
|
||||
//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the file to write
|
||||
//! @param data pointer to data to write
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrWriteFilei( const char* filename, const int* data, unsigned int len,
|
||||
bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Write a data file \filename containing unsigned integer data
|
||||
//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the file to write
|
||||
//! @param data pointer to data to write
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrWriteFileui( const char* filename, const unsigned int* data,
|
||||
unsigned int len, bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Write a data file \filename containing char / byte data
|
||||
//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the file to write
|
||||
//! @param data pointer to data to write
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrWriteFileb( const char* filename, const char* data, unsigned int len,
|
||||
bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Write a data file \filename containing unsigned char / byte data
|
||||
//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the file to write
|
||||
//! @param data pointer to data to write
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrWriteFileub( const char* filename, const unsigned char* data,
|
||||
unsigned int len, bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Load PPM image file (with unsigned char as data element type), padding
|
||||
//! 4th component
|
||||
//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE
|
||||
//! @param file name of the image file
|
||||
//! @param OutData handle to the data read
|
||||
//! @param w width of the image
|
||||
//! @param h height of the image
|
||||
//!
|
||||
//! Note: If *OutData is NULL this function allocates buffer that must be freed by caller
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrLoadPPM4ub(const char* file, unsigned char** OutData,
|
||||
unsigned int *w, unsigned int *h);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Save PPM image file (with unsigned char as data element type, padded to
|
||||
//! 4 bytes)
|
||||
//! @return shrTRUE if saving the file succeeded, otherwise shrFALSE
|
||||
//! @param file name of the image file
|
||||
//! @param data handle to the data read
|
||||
//! @param w width of the image
|
||||
//! @param h height of the image
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrSavePPM4ub( const char* file, unsigned char *data,
|
||||
unsigned int w, unsigned int h);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Save PGM image file (with unsigned char as data element type)
|
||||
//! @return shrTRUE if saving the file succeeded, otherwise shrFALSE
|
||||
//! @param file name of the image file
|
||||
//! @param data handle to the data read
|
||||
//! @param w width of the image
|
||||
//! @param h height of the image
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrSavePGMub( const char* file, unsigned char *data,
|
||||
unsigned int w, unsigned int h);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Load PGM image file (with unsigned char as data element type)
|
||||
//! @return shrTRUE if saving the file succeeded, otherwise shrFALSE
|
||||
//! @param file name of the image file
|
||||
//! @param data handle to the data read
|
||||
//! @param w width of the image
|
||||
//! @param h height of the image
|
||||
//! @note If a NULL pointer is passed to this function and it is initialized
|
||||
//! within shrUtils, then free() has to be used to deallocate the memory
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrLoadPGMub( const char* file, unsigned char** data,
|
||||
unsigned int *w,unsigned int *h);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Command line arguments: General notes
|
||||
// * All command line arguments begin with '--' followed by the token;
|
||||
// token and value are seperated by '='; example --samples=50
|
||||
// * Arrays have the form --model=[one.obj,two.obj,three.obj]
|
||||
// (without whitespaces)
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Check if command line argument \a flag-name is given
|
||||
//! @return shrTRUE if command line argument \a flag_name has been given,
|
||||
//! otherwise shrFALSE
|
||||
//! @param argc argc as passed to main()
|
||||
//! @param argv argv as passed to main()
|
||||
//! @param flag_name name of command line flag
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrCheckCmdLineFlag( const int argc, const char** argv,
|
||||
const char* flag_name);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Get the value of a command line argument of type int
|
||||
//! @return shrTRUE if command line argument \a arg_name has been given and
|
||||
//! is of the requested type, otherwise shrFALSE
|
||||
//! @param argc argc as passed to main()
|
||||
//! @param argv argv as passed to main()
|
||||
//! @param arg_name name of the command line argument
|
||||
//! @param val value of the command line argument
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrGetCmdLineArgumenti( const int argc, const char** argv,
|
||||
const char* arg_name, int* val);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Get the value of a command line argument of type unsigned int
|
||||
//! @return shrTRUE if command line argument \a arg_name has been given and
|
||||
//! is of the requested type, otherwise shrFALSE
|
||||
//! @param argc argc as passed to main()
|
||||
//! @param argv argv as passed to main()
|
||||
//! @param arg_name name of the command line argument
|
||||
//! @param val value of the command line argument
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrGetCmdLineArgumentu( const int argc, const char** argv,
|
||||
const char* arg_name, unsigned int* val);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Get the value of a command line argument of type float
|
||||
//! @return shrTRUE if command line argument \a arg_name has been given and
|
||||
//! is of the requested type, otherwise shrFALSE
|
||||
//! @param argc argc as passed to main()
|
||||
//! @param argv argv as passed to main()
|
||||
//! @param arg_name name of the command line argument
|
||||
//! @param val value of the command line argument
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrGetCmdLineArgumentf( const int argc, const char** argv,
|
||||
const char* arg_name, float* val);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Get the value of a command line argument of type string
|
||||
//! @return shrTRUE if command line argument \a arg_name has been given and
|
||||
//! is of the requested type, otherwise shrFALSE
|
||||
//! @param argc argc as passed to main()
|
||||
//! @param argv argv as passed to main()
|
||||
//! @param arg_name name of the command line argument
|
||||
//! @param val value of the command line argument
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrGetCmdLineArgumentstr( const int argc, const char** argv,
|
||||
const char* arg_name, char** val);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Get the value of a command line argument list those element are strings
|
||||
//! @return shrTRUE if command line argument \a arg_name has been given and
|
||||
//! is of the requested type, otherwise shrFALSE
|
||||
//! @param argc argc as passed to main()
|
||||
//! @param argv argv as passed to main()
|
||||
//! @param arg_name name of the command line argument
|
||||
//! @param val command line argument list
|
||||
//! @param len length of the list / number of elements
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrGetCmdLineArgumentListstr( const int argc, const char** argv,
|
||||
const char* arg_name, char** val,
|
||||
unsigned int* len);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two float arrays
|
||||
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
|
||||
//! @param reference handle to the reference data / gold image
|
||||
//! @param data handle to the computed data
|
||||
//! @param len number of elements in reference and data
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrComparef( const float* reference, const float* data,
|
||||
const unsigned int len);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two integer arrays
|
||||
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
|
||||
//! @param reference handle to the reference data / gold image
|
||||
//! @param data handle to the computed data
|
||||
//! @param len number of elements in reference and data
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrComparei( const int* reference, const int* data,
|
||||
const unsigned int len );
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two unsigned integer arrays, with epsilon and threshold
|
||||
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
|
||||
//! @param reference handle to the reference data / gold image
|
||||
//! @param data handle to the computed data
|
||||
//! @param len number of elements in reference and data
|
||||
//! @param threshold tolerance % # of comparison errors (0.15f = 15%)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrCompareuit( const unsigned int* reference, const unsigned int* data,
|
||||
const unsigned int len, const float epsilon, const float threshold );
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two unsigned char arrays
|
||||
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
|
||||
//! @param reference handle to the reference data / gold image
|
||||
//! @param data handle to the computed data
|
||||
//! @param len number of elements in reference and data
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrCompareub( const unsigned char* reference, const unsigned char* data,
|
||||
const unsigned int len );
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two integers with a tolernance for # of byte errors
|
||||
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
|
||||
//! @param reference handle to the reference data / gold image
|
||||
//! @param data handle to the computed data
|
||||
//! @param len number of elements in reference and data
|
||||
//! @param epsilon epsilon to use for the comparison
|
||||
//! @param threshold tolerance % # of comparison errors (0.15f = 15%)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrCompareubt( const unsigned char* reference, const unsigned char* data,
|
||||
const unsigned int len, const float epsilon, const float threshold );
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two integer arrays witha n epsilon tolerance for equality
|
||||
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
|
||||
//! @param reference handle to the reference data / gold image
|
||||
//! @param data handle to the computed data
|
||||
//! @param len number of elements in reference and data
|
||||
//! @param epsilon epsilon to use for the comparison
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrCompareube( const unsigned char* reference, const unsigned char* data,
|
||||
const unsigned int len, const float epsilon );
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two float arrays with an epsilon tolerance for equality
|
||||
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
|
||||
//! @param reference handle to the reference data / gold image
|
||||
//! @param data handle to the computed data
|
||||
//! @param len number of elements in reference and data
|
||||
//! @param epsilon epsilon to use for the comparison
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrComparefe( const float* reference, const float* data,
|
||||
const unsigned int len, const float epsilon );
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two float arrays with an epsilon tolerance for equality and a
|
||||
//! threshold for # pixel errors
|
||||
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
|
||||
//! @param reference handle to the reference data / gold image
|
||||
//! @param data handle to the computed data
|
||||
//! @param len number of elements in reference and data
|
||||
//! @param epsilon epsilon to use for the comparison
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrComparefet( const float* reference, const float* data,
|
||||
const unsigned int len, const float epsilon, const float threshold );
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two float arrays using L2-norm with an epsilon tolerance for
|
||||
//! equality
|
||||
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
|
||||
//! @param reference handle to the reference data / gold image
|
||||
//! @param data handle to the computed data
|
||||
//! @param len number of elements in reference and data
|
||||
//! @param epsilon epsilon to use for the comparison
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrCompareL2fe( const float* reference, const float* data,
|
||||
const unsigned int len, const float epsilon );
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two PPM image files with an epsilon tolerance for equality
|
||||
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
|
||||
//! @param src_file filename for the image to be compared
|
||||
//! @param data filename for the reference data / gold image
|
||||
//! @param epsilon epsilon to use for the comparison
|
||||
//! @param threshold threshold of pixels that can still mismatch to pass (i.e. 0.15f = 15% must pass)
|
||||
//! $param verboseErrors output details of image mismatch to std::err
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrComparePPM( const char *src_file, const char *ref_file, const float epsilon, const float threshold);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two PGM image files with an epsilon tolerance for equality
|
||||
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
|
||||
//! @param src_file filename for the image to be compared
|
||||
//! @param data filename for the reference data / gold image
|
||||
//! @param epsilon epsilon to use for the comparison
|
||||
//! @param threshold threshold of pixels that can still mismatch to pass (i.e. 0.15f = 15% must pass)
|
||||
//! $param verboseErrors output details of image mismatch to std::err
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrComparePGM( const char *src_file, const char *ref_file, const float epsilon, const float threshold);
|
||||
|
||||
extern "C" unsigned char* shrLoadRawFile(const char* filename, size_t size);
|
||||
|
||||
extern "C" size_t shrRoundUp(int group_size, int global_size);
|
||||
|
||||
// companion inline function for error checking and exit on error WITH Cleanup Callback (if supplied)
|
||||
// *********************************************************************
|
||||
inline void __shrCheckErrorEX(int iSample, int iReference, void (*pCleanup)(int), const char* cFile, const int iLine)
|
||||
{
|
||||
if (iReference != iSample)
|
||||
{
|
||||
shrLogEx(LOGBOTH | ERRORMSG, iSample, "line %i , in file %s !!!\n\n" , iLine, cFile);
|
||||
if (pCleanup != NULL)
|
||||
{
|
||||
pCleanup(EXIT_FAILURE);
|
||||
}
|
||||
else
|
||||
{
|
||||
shrLogEx(LOGBOTH | CLOSELOG, 0, "Exiting...\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Standardized Exit
|
||||
// *********************************************************************
|
||||
inline void __shrExitEX(int argc, const char** argv, int iExitCode)
|
||||
{
|
||||
#ifdef WIN32
|
||||
if (!shrCheckCmdLineFlag(argc, argv, "noprompt") && !shrCheckCmdLineFlag(argc, argv, "qatest"))
|
||||
#else
|
||||
if (shrCheckCmdLineFlag(argc, argv, "prompt") && !shrCheckCmdLineFlag(argc, argv, "qatest"))
|
||||
#endif
|
||||
{
|
||||
shrLogEx(LOGBOTH | CLOSELOG, 0, "\nPress <Enter> to Quit...\n");
|
||||
getchar();
|
||||
}
|
||||
else
|
||||
{
|
||||
shrLogEx(LOGBOTH | CLOSELOG, 0, "%s Exiting...\n", argv[0]);
|
||||
}
|
||||
fflush(stderr);
|
||||
exit(iExitCode);
|
||||
}
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user