Vortex 2.0 changes:
+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes
This commit is contained in:
29
tests/opencl/dotproduct/DotProduct.cl
Normal file
29
tests/opencl/dotproduct/DotProduct.cl
Normal file
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
__kernel void DotProduct (__global float* a, __global float* b, __global float* c, int iNumElements)
|
||||
{
|
||||
// find position in global arrays
|
||||
int iGID = get_global_id(0);
|
||||
|
||||
// bound check (equivalent to the limit on a 'for' loop for standard/serial C code
|
||||
if (iGID >= iNumElements)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// process
|
||||
int iInOffset = iGID << 2;
|
||||
c[iGID] = a[iInOffset] * b[iInOffset]
|
||||
+ a[iInOffset + 1] * b[iInOffset + 1]
|
||||
+ a[iInOffset + 2] * b[iInOffset + 2]
|
||||
+ a[iInOffset + 3] * b[iInOffset + 3];
|
||||
}
|
||||
7
tests/opencl/dotproduct/Makefile
Normal file
7
tests/opencl/dotproduct/Makefile
Normal file
@@ -0,0 +1,7 @@
|
||||
PROJECT = dotproduct
|
||||
|
||||
SRCS = main.cc oclUtils.cpp shrUtils.cpp cmd_arg_reader.cpp
|
||||
|
||||
OPTS ?= -n64
|
||||
|
||||
include ../common.mk
|
||||
152
tests/opencl/dotproduct/cmd_arg_reader.cpp
Normal file
152
tests/opencl/dotproduct/cmd_arg_reader.cpp
Normal file
@@ -0,0 +1,152 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
/* CUda UTility Library */
|
||||
|
||||
// includes, file
|
||||
#include "cmd_arg_reader.h"
|
||||
|
||||
// includes, system
|
||||
#include <vector>
|
||||
|
||||
// internal unnamed namespace
|
||||
|
||||
namespace
|
||||
{
|
||||
// types, internal (class, enum, struct, union, typedef)
|
||||
|
||||
// variables, internal
|
||||
|
||||
} // namespace {
|
||||
|
||||
// variables, exported
|
||||
|
||||
/*static*/ CmdArgReader* CmdArgReader::self;
|
||||
/*static*/ char** CmdArgReader::rargv;
|
||||
/*static*/ int CmdArgReader::rargc;
|
||||
|
||||
// functions, exported
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Public construction interface
|
||||
//! @return a handle to the class instance
|
||||
//! @param argc number of command line arguments (as given to main())
|
||||
//! @param argv command line argument string (as given to main())
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/*static*/ void
|
||||
CmdArgReader::init( const int argc, const char** argv)
|
||||
{
|
||||
if ( NULL != self)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// command line arguments
|
||||
if (( 0 == argc) || ( 0 == argv))
|
||||
{
|
||||
LOGIC_EXCEPTION( "No command line arguments given.");
|
||||
}
|
||||
|
||||
self = new CmdArgReader();
|
||||
|
||||
self->createArgsMaps( argc, argv);
|
||||
|
||||
rargc = argc;
|
||||
rargv = const_cast<char**>( argv);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Constructor, default
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
CmdArgReader::CmdArgReader() :
|
||||
args(),
|
||||
unprocessed(),
|
||||
iter(),
|
||||
iter_unprocessed()
|
||||
{ }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Destructor
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
CmdArgReader::~CmdArgReader()
|
||||
{
|
||||
for( iter = args.begin(); iter != args.end(); ++iter)
|
||||
{
|
||||
if( *(iter->second.first) == typeid( int))
|
||||
{
|
||||
delete static_cast<int*>( iter->second.second);
|
||||
break;
|
||||
}
|
||||
else if( *(iter->second.first) == typeid( bool))
|
||||
{
|
||||
delete static_cast<bool*>( iter->second.second);
|
||||
break;
|
||||
}
|
||||
else if( *(iter->second.first) == typeid( std::string))
|
||||
{
|
||||
delete static_cast<std::string*>( iter->second.second);
|
||||
break;
|
||||
}
|
||||
else if( *(iter->second.first) == typeid( std::vector< std::string>) )
|
||||
{
|
||||
delete static_cast< std::vector< std::string>* >( iter->second.second);
|
||||
break;
|
||||
}
|
||||
else if( *(iter->second.first) == typeid( std::vector<int>) )
|
||||
{
|
||||
delete static_cast< std::vector<int>* >( iter->second.second);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Read args as token value pair into map for better processing (Even the
|
||||
//! values remain strings until the parameter values is requested by the
|
||||
//! program.)
|
||||
//! @param argc the argument count (as given to 'main')
|
||||
//! @param argv the char* array containing the command line arguments
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void
|
||||
CmdArgReader::createArgsMaps( const int argc, const char** argv) {
|
||||
|
||||
std::string token;
|
||||
std::string val_str;
|
||||
|
||||
std::map< std::string, std::string> args;
|
||||
|
||||
std::string::size_type pos;
|
||||
std::string arg;
|
||||
for( int i=1; i<argc; ++i)
|
||||
{
|
||||
arg = argv[i];
|
||||
|
||||
// check if valid command line argument: all arguments begin with - or --
|
||||
if (arg[0] != '-')
|
||||
{
|
||||
RUNTIME_EXCEPTION("Invalid command line argument.");
|
||||
}
|
||||
|
||||
int numDashes = (arg[1] == '-' ? 2 : 1);
|
||||
|
||||
// check if only flag or if a value is given
|
||||
if ( (pos = arg.find( '=')) == std::string::npos)
|
||||
{
|
||||
unprocessed[ std::string( arg, numDashes, arg.length()-numDashes)] = "FLAG";
|
||||
}
|
||||
else
|
||||
{
|
||||
unprocessed[ std::string( arg, numDashes, pos-numDashes)] =
|
||||
std::string( arg, pos+1, arg.length()-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
488
tests/opencl/dotproduct/cmd_arg_reader.h
Normal file
488
tests/opencl/dotproduct/cmd_arg_reader.h
Normal file
@@ -0,0 +1,488 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
/* CUda UTility Library */
|
||||
|
||||
#ifndef _CMDARGREADER_H_
|
||||
#define _CMDARGREADER_H_
|
||||
|
||||
// includes, system
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <typeinfo>
|
||||
|
||||
// includes, project
|
||||
#include "exception.h"
|
||||
|
||||
//! Preprocessed command line arguments
|
||||
//! @note Lazy evaluation: The arguments are converted from strings to
|
||||
//! the correct data type upon request. Converted values are stored
|
||||
//! in an additonal map so that no additional conversion is
|
||||
//! necessary. Arrays of command line arguments are stored in
|
||||
//! std::vectors
|
||||
//! @note Usage:
|
||||
//! const std::string* file =
|
||||
//! CmdArgReader::getArg< std::string>( "model")
|
||||
//! const std::vector< std::string>* files =
|
||||
//! CmdArgReader::getArg< std::vector< std::string> >( "model")
|
||||
//! @note All command line arguments begin with '--' followed by the token;
|
||||
//! token and value are seperated by '='; example --samples=50
|
||||
//! @note Arrays have the form --model=[one.obj,two.obj,three.obj]
|
||||
//! (without whitespaces)
|
||||
|
||||
//! Command line argument parser
|
||||
class CmdArgReader
|
||||
{
|
||||
template<class> friend class TestCmdArgReader;
|
||||
|
||||
protected:
|
||||
|
||||
//! @param self handle to the only instance of this class
|
||||
static CmdArgReader* self;
|
||||
|
||||
public:
|
||||
|
||||
//! Public construction interface
|
||||
//! @return a handle to the class instance
|
||||
//! @param argc number of command line arguments (as given to main())
|
||||
//! @param argv command line argument string (as given to main())
|
||||
static void init( const int argc, const char** argv);
|
||||
|
||||
public:
|
||||
|
||||
//! Get the value of the command line argument with given name
|
||||
//! @return A const handle to the requested argument.
|
||||
//! If the argument does not exist or if it
|
||||
//! is not from type T NULL is returned
|
||||
//! @param name the name of the requested argument
|
||||
//! @note T the type of the argument requested
|
||||
template<class T>
|
||||
static inline const T* getArg( const std::string& name);
|
||||
|
||||
//! Check if a command line argument with the given name exists
|
||||
//! @return true if a command line argument with name \a name exists,
|
||||
//! otherwise false
|
||||
//! @param name name of the command line argument in question
|
||||
static inline bool existArg( const std::string& name);
|
||||
|
||||
//! Get the original / raw argc program argument
|
||||
static inline int& getRArgc();
|
||||
|
||||
//! Get the original / raw argv program argument
|
||||
static inline char**& getRArgv();
|
||||
|
||||
public:
|
||||
|
||||
//! Destructor
|
||||
~CmdArgReader();
|
||||
|
||||
protected:
|
||||
|
||||
//! Constructor, default
|
||||
CmdArgReader();
|
||||
|
||||
private:
|
||||
|
||||
// private helper functions
|
||||
|
||||
//! Get the value of the command line argument with given name
|
||||
//! @note Private helper function for 'getArg' to work on the members
|
||||
//! @return A const handle to the requested argument. If the argument
|
||||
//! does not exist or if it is not from type T a NULL pointer
|
||||
//! is returned.
|
||||
//! @param name the name of the requested argument
|
||||
//! @note T the type of the argument requested
|
||||
template<class T>
|
||||
inline const T* getArgHelper( const std::string& name);
|
||||
|
||||
//! Check if a command line argument with name \a name exists
|
||||
//! @return true if a command line argument of name \a name exists,
|
||||
//! otherwise false
|
||||
//! @param name the name of the requested argument
|
||||
inline bool existArgHelper( const std::string& name) const;
|
||||
|
||||
//! Read args as token value pair into map for better processing
|
||||
//! (Even the values remain strings until the parameter values is
|
||||
//! requested by the program.)
|
||||
//! @param argc the argument count (as given to 'main')
|
||||
//! @param argv the char* array containing the command line arguments
|
||||
void createArgsMaps( const int argc, const char** argv);
|
||||
|
||||
//! Helper for "casting" the strings from the map with the unprocessed
|
||||
//! values to the correct
|
||||
//! data type.
|
||||
//! @return true if conversion succeeded, otherwise false
|
||||
//! @param element the value as string
|
||||
//! @param val the value as type T
|
||||
template<class T>
|
||||
static inline bool convertToT( const std::string& element, T& val);
|
||||
|
||||
public:
|
||||
|
||||
// typedefs internal
|
||||
|
||||
//! container for a processed command line argument
|
||||
//! typeid is used to easily be able to decide if a re-requested token-value
|
||||
//! pair match the type of the first conversion
|
||||
typedef std::pair< const std::type_info*, void*> ValType;
|
||||
//! map of already converted values
|
||||
typedef std::map< std::string, ValType > ArgsMap;
|
||||
//! iterator for the map of already converted values
|
||||
typedef ArgsMap::iterator ArgsMapIter;
|
||||
typedef ArgsMap::const_iterator ConstArgsMapIter;
|
||||
|
||||
//! map of unprocessed (means unconverted) token-value pairs
|
||||
typedef std::map< std::string, std::string> UnpMap;
|
||||
//! iterator for the map of unprocessed (means unconverted) token-value pairs
|
||||
typedef std::map< std::string, std::string>::iterator UnpMapIter;
|
||||
|
||||
private:
|
||||
|
||||
#ifdef _WIN32
|
||||
# pragma warning( disable: 4251)
|
||||
#endif
|
||||
|
||||
//! rargc original value of argc
|
||||
static int rargc;
|
||||
|
||||
//! rargv contains command line arguments in raw format
|
||||
static char** rargv;
|
||||
|
||||
//! args Map containing the already converted token-value pairs
|
||||
ArgsMap args;
|
||||
|
||||
//! args Map containing the unprocessed / unconverted token-value pairs
|
||||
UnpMap unprocessed;
|
||||
|
||||
//! iter Iterator for the map with the already converted token-value
|
||||
//! pairs (to avoid frequent reallocation)
|
||||
ArgsMapIter iter;
|
||||
|
||||
//! iter Iterator for the map with the unconverted token-value
|
||||
//! pairs (to avoid frequent reallocation)
|
||||
UnpMapIter iter_unprocessed;
|
||||
|
||||
#ifdef _WIN32
|
||||
# pragma warning( default: 4251)
|
||||
#endif
|
||||
|
||||
private:
|
||||
|
||||
//! Constructor, copy (not implemented)
|
||||
CmdArgReader( const CmdArgReader&);
|
||||
|
||||
//! Assignment operator (not implemented)
|
||||
CmdArgReader& operator=( const CmdArgReader&);
|
||||
};
|
||||
|
||||
// variables, exported (extern)
|
||||
|
||||
// functions, inlined (inline)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Conversion function for command line argument arrays
|
||||
//! @note This function is used each type for which no template specialization
|
||||
//! exist (which will cause errors if the type does not fulfill the std::vector
|
||||
//! interface).
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<class T>
|
||||
/*static*/ inline bool
|
||||
CmdArgReader::convertToT( const std::string& element, T& val)
|
||||
{
|
||||
// preallocate storage
|
||||
val.resize( std::count( element.begin(), element.end(), ',') + 1);
|
||||
|
||||
unsigned int i = 0;
|
||||
std::string::size_type pos_start = 1; // leave array prefix '['
|
||||
std::string::size_type pos_end = 0;
|
||||
|
||||
// do for all elements of the comma seperated list
|
||||
while( std::string::npos != ( pos_end = element.find(',', pos_end+1)) )
|
||||
{
|
||||
// convert each element by the appropriate function
|
||||
if ( ! convertToT< typename T::value_type >(
|
||||
std::string( element, pos_start, pos_end - pos_start), val[i]))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
pos_start = pos_end + 1;
|
||||
++i;
|
||||
}
|
||||
|
||||
std::string tmp1( element, pos_start, element.length() - pos_start - 1);
|
||||
|
||||
// process last element (leave array postfix ']')
|
||||
if ( ! convertToT< typename T::value_type >( std::string( element,
|
||||
pos_start,
|
||||
element.length() - pos_start - 1),
|
||||
val[i]))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// possible to process all elements?
|
||||
return true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Conversion function for command line arguments of type int
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<>
|
||||
inline bool
|
||||
CmdArgReader::convertToT<int>( const std::string& element, int& val)
|
||||
{
|
||||
std::istringstream ios( element);
|
||||
ios >> val;
|
||||
|
||||
bool ret_val = false;
|
||||
if ( ios.eof())
|
||||
{
|
||||
ret_val = true;
|
||||
}
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Conversion function for command line arguments of type float
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<>
|
||||
inline bool
|
||||
CmdArgReader::convertToT<float>( const std::string& element, float& val)
|
||||
{
|
||||
std::istringstream ios( element);
|
||||
ios >> val;
|
||||
|
||||
bool ret_val = false;
|
||||
if ( ios.eof())
|
||||
{
|
||||
ret_val = true;
|
||||
}
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Conversion function for command line arguments of type double
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<>
|
||||
inline bool
|
||||
CmdArgReader::convertToT<double>( const std::string& element, double& val)
|
||||
{
|
||||
std::istringstream ios( element);
|
||||
ios >> val;
|
||||
|
||||
bool ret_val = false;
|
||||
if ( ios.eof())
|
||||
{
|
||||
ret_val = true;
|
||||
}
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Conversion function for command line arguments of type string
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<>
|
||||
inline bool
|
||||
CmdArgReader::convertToT<std::string>( const std::string& element,
|
||||
std::string& val)
|
||||
{
|
||||
val = element;
|
||||
return true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Conversion function for command line arguments of type bool
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<>
|
||||
inline bool
|
||||
CmdArgReader::convertToT<bool>( const std::string& element, bool& val)
|
||||
{
|
||||
// check if value is given as string-type { true | false }
|
||||
if ( "true" == element)
|
||||
{
|
||||
val = true;
|
||||
return true;
|
||||
}
|
||||
else if ( "false" == element)
|
||||
{
|
||||
val = false;
|
||||
return true;
|
||||
}
|
||||
// check if argument is given as integer { 0 | 1 }
|
||||
else
|
||||
{
|
||||
int tmp;
|
||||
if ( convertToT<int>( element, tmp))
|
||||
{
|
||||
if ( 1 == tmp)
|
||||
{
|
||||
val = true;
|
||||
return true;
|
||||
}
|
||||
else if ( 0 == tmp)
|
||||
{
|
||||
val = false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Get the value of the command line argument with given name
|
||||
//! @return A const handle to the requested argument. If the argument does
|
||||
//! not exist or if it is not from type T NULL is returned
|
||||
//! @param T the type of the argument requested
|
||||
//! @param name the name of the requested argument
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<class T>
|
||||
/*static*/ const T*
|
||||
CmdArgReader::getArg( const std::string& name)
|
||||
{
|
||||
if( ! self)
|
||||
{
|
||||
RUNTIME_EXCEPTION("CmdArgReader::getArg(): CmdArgReader not initialized.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return self->getArgHelper<T>( name);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Check if a command line argument with the given name exists
|
||||
//! @return true if a command line argument with name \a name exists,
|
||||
//! otherwise false
|
||||
//! @param name name of the command line argument in question
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/*static*/ inline bool
|
||||
CmdArgReader::existArg( const std::string& name)
|
||||
{
|
||||
if( ! self)
|
||||
{
|
||||
RUNTIME_EXCEPTION("CmdArgReader::getArg(): CmdArgReader not initialized.");
|
||||
return false;
|
||||
}
|
||||
|
||||
return self->existArgHelper( name);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! @brief Get the value of the command line argument with given name
|
||||
//! @return A const handle to the requested argument. If the argument does
|
||||
//! not exist or if it is not from type T NULL is returned
|
||||
//! @param T the type of the argument requested
|
||||
//! @param name the name of the requested argument
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<class T>
|
||||
const T*
|
||||
CmdArgReader::getArgHelper( const std::string& name)
|
||||
{
|
||||
// check if argument already processed and stored in correct type
|
||||
if ( args.end() != (iter = args.find( name)))
|
||||
{
|
||||
if ( (*(iter->second.first)) == typeid( T) )
|
||||
{
|
||||
return (T*) iter->second.second;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
T* tmp = new T;
|
||||
|
||||
// check the array with unprocessed values
|
||||
if ( unprocessed.end() != (iter_unprocessed = unprocessed.find( name)))
|
||||
{
|
||||
// try to "cast" the string to the type requested
|
||||
if ( convertToT< T >( iter_unprocessed->second, *tmp))
|
||||
{
|
||||
// add the token element pair to map of already converted values
|
||||
args[name] = std::make_pair( &(typeid( T)), (void*) tmp);
|
||||
|
||||
return tmp;
|
||||
}
|
||||
}
|
||||
|
||||
// not used while not inserted into the map -> cleanup
|
||||
delete tmp;
|
||||
}
|
||||
|
||||
// failed, argument not available
|
||||
return NULL;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Check if a command line argument with name \a name exists
|
||||
//! @return true if a command line argument of name \a name exists,
|
||||
//! otherwise false
|
||||
//! @param name the name of the requested argument
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline bool
|
||||
CmdArgReader::existArgHelper( const std::string& name) const
|
||||
{
|
||||
bool ret_val = false;
|
||||
|
||||
// check if argument already processed and stored in correct type
|
||||
if( args.end() != args.find( name))
|
||||
{
|
||||
ret_val = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
// check the array with unprocessed values
|
||||
if ( unprocessed.end() != unprocessed.find( name))
|
||||
{
|
||||
ret_val = true;
|
||||
}
|
||||
}
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Get the original / raw argc program argument
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/*static*/ inline int&
|
||||
CmdArgReader::getRArgc()
|
||||
{
|
||||
if( ! self)
|
||||
{
|
||||
RUNTIME_EXCEPTION("CmdArgReader::getRArgc(): CmdArgReader not initialized.");
|
||||
}
|
||||
|
||||
return rargc;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Get the original / raw argv program argument
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/*static*/ inline char**&
|
||||
CmdArgReader::getRArgv()
|
||||
{
|
||||
if( ! self)
|
||||
{
|
||||
RUNTIME_EXCEPTION("CmdArgReader::getRArgc(): CmdArgReader not initialized.");
|
||||
}
|
||||
|
||||
return rargv;
|
||||
}
|
||||
|
||||
// functions, exported (extern)
|
||||
|
||||
#endif // #ifndef _CMDARGREADER_H_
|
||||
151
tests/opencl/dotproduct/exception.h
Normal file
151
tests/opencl/dotproduct/exception.h
Normal file
@@ -0,0 +1,151 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
/* CUda UTility Library */
|
||||
#ifndef _EXCEPTION_H_
|
||||
#define _EXCEPTION_H_
|
||||
|
||||
// includes, system
|
||||
#include <exception>
|
||||
#include <stdexcept>
|
||||
#include <iostream>
|
||||
#include <stdlib.h>
|
||||
|
||||
//! Exception wrapper.
|
||||
//! @param Std_Exception Exception out of namespace std for easy typing.
|
||||
template<class Std_Exception>
|
||||
class Exception : public Std_Exception
|
||||
{
|
||||
public:
|
||||
|
||||
//! @brief Static construction interface
|
||||
//! @return Alwayss throws ( Located_Exception<Exception>)
|
||||
//! @param file file in which the Exception occurs
|
||||
//! @param line line in which the Exception occurs
|
||||
//! @param detailed details on the code fragment causing the Exception
|
||||
static void throw_it( const char* file,
|
||||
const int line,
|
||||
const char* detailed = "-" );
|
||||
|
||||
//! Static construction interface
|
||||
//! @return Alwayss throws ( Located_Exception<Exception>)
|
||||
//! @param file file in which the Exception occurs
|
||||
//! @param line line in which the Exception occurs
|
||||
//! @param detailed details on the code fragment causing the Exception
|
||||
static void throw_it( const char* file,
|
||||
const int line,
|
||||
const std::string& detailed);
|
||||
|
||||
//! Destructor
|
||||
virtual ~Exception() throw();
|
||||
|
||||
private:
|
||||
|
||||
//! Constructor, default (private)
|
||||
Exception();
|
||||
|
||||
//! Constructor, standard
|
||||
//! @param str string returned by what()
|
||||
Exception( const std::string& str);
|
||||
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Exception handler function for arbitrary exceptions
|
||||
//! @param ex exception to handle
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Exception_Typ>
|
||||
inline void
|
||||
handleException( const Exception_Typ& ex)
|
||||
{
|
||||
std::cerr << ex.what() << std::endl;
|
||||
|
||||
exit( EXIT_FAILURE);
|
||||
}
|
||||
|
||||
//! Convenience macros
|
||||
|
||||
//! Exception caused by dynamic program behavior, e.g. file does not exist
|
||||
#define RUNTIME_EXCEPTION( msg) \
|
||||
Exception<std::runtime_error>::throw_it( __FILE__, __LINE__, msg)
|
||||
|
||||
//! Logic exception in program, e.g. an assert failed
|
||||
#define LOGIC_EXCEPTION( msg) \
|
||||
Exception<std::logic_error>::throw_it( __FILE__, __LINE__, msg)
|
||||
|
||||
//! Out of range exception
|
||||
#define RANGE_EXCEPTION( msg) \
|
||||
Exception<std::range_error>::throw_it( __FILE__, __LINE__, msg)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Implementation
|
||||
|
||||
// includes, system
|
||||
#include <sstream>
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Static construction interface.
|
||||
//! @param Exception causing code fragment (file and line) and detailed infos.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/*static*/ template<class Std_Exception>
|
||||
void
|
||||
Exception<Std_Exception>::
|
||||
throw_it( const char* file, const int line, const char* detailed)
|
||||
{
|
||||
std::stringstream s;
|
||||
|
||||
// Quiet heavy-weight but exceptions are not for
|
||||
// performance / release versions
|
||||
s << "Exception in file '" << file << "' in line " << line << "\n"
|
||||
<< "Detailed description: " << detailed << "\n";
|
||||
|
||||
throw Exception( s.str());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Static construction interface.
|
||||
//! @param Exception causing code fragment (file and line) and detailed infos.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/*static*/ template<class Std_Exception>
|
||||
void
|
||||
Exception<Std_Exception>::
|
||||
throw_it( const char* file, const int line, const std::string& msg)
|
||||
{
|
||||
throw_it( file, line, msg.c_str());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Constructor, default (private).
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Std_Exception>
|
||||
Exception<Std_Exception>::Exception() :
|
||||
Exception("Unknown Exception.\n")
|
||||
{ }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Constructor, standard (private).
|
||||
//! String returned by what().
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Std_Exception>
|
||||
Exception<Std_Exception>::Exception( const std::string& s) :
|
||||
Std_Exception( s)
|
||||
{ }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Destructor
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Std_Exception>
|
||||
Exception<Std_Exception>::~Exception() throw() { }
|
||||
|
||||
// functions, exported
|
||||
|
||||
#endif // #ifndef _EXCEPTION_H_
|
||||
|
||||
22
tests/opencl/dotproduct/kernel.cl
Normal file
22
tests/opencl/dotproduct/kernel.cl
Normal file
@@ -0,0 +1,22 @@
|
||||
__kernel void DotProduct (__global float* a, __global float* b, __global float* c, int iNumElements)
|
||||
{
|
||||
// find position in global arrays
|
||||
int iGID = get_global_id(0);
|
||||
|
||||
// bound check (equivalent to the limit on a 'for' loop for standard/serial C code
|
||||
//printf("%d, %d\n", iGID, iNumElements);
|
||||
if (iGID >= iNumElements)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// process
|
||||
int iInOffset = iGID << 2;
|
||||
c[iGID] = a[iInOffset] * b[iInOffset]
|
||||
+ a[iInOffset + 1] * b[iInOffset + 1]
|
||||
+ a[iInOffset + 2] * b[iInOffset + 2]
|
||||
+ a[iInOffset + 3] * b[iInOffset + 3];
|
||||
//float cc = c[iGID];
|
||||
|
||||
//printf("c[%d]=%f\n", iGID, cc);
|
||||
}
|
||||
267
tests/opencl/dotproduct/main.cc
Normal file
267
tests/opencl/dotproduct/main.cc
Normal file
@@ -0,0 +1,267 @@
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
// *********************************************************************
|
||||
// oclDotProduct Notes:
|
||||
//
|
||||
// A simple OpenCL API demo application that implements a
|
||||
// vector dot product computation between 2 float arrays.
|
||||
//
|
||||
// Runs computations with OpenCL on the GPU device and then checks results
|
||||
// against basic host CPU/C++ computation.
|
||||
//
|
||||
// Uses 'shr' and 'ocl' functions from oclUtils and shrUtils libraries for compactness.
|
||||
// But these are NOT required libs for OpenCL developement in general.
|
||||
// *********************************************************************
|
||||
|
||||
// standard utilities and systems includes
|
||||
|
||||
#include "oclUtils.h"
|
||||
#include "shrQATest.h"
|
||||
|
||||
// Name of the file with the source code for the computation kernel
|
||||
// *********************************************************************
|
||||
const char* cSourceFile = "kernel.pocl";
|
||||
|
||||
// Host buffers for demo
|
||||
// *********************************************************************
|
||||
void *srcA, *srcB, *dst; // Host buffers for OpenCL test
|
||||
void* Golden; // Host buffer for host golden processing cross check
|
||||
|
||||
// OpenCL Vars
|
||||
cl_platform_id cpPlatform; // OpenCL platform
|
||||
cl_device_id *cdDevices; // OpenCL device
|
||||
cl_context cxGPUContext; // OpenCL context
|
||||
cl_command_queue cqCommandQueue;// OpenCL command que
|
||||
cl_program program; // OpenCL program
|
||||
cl_kernel ckKernel; // OpenCL kernel
|
||||
cl_mem cmDevSrcA; // OpenCL device source buffer A
|
||||
cl_mem cmDevSrcB; // OpenCL device source buffer B
|
||||
cl_mem cmDevDst; // OpenCL device destination buffer
|
||||
size_t szGlobalWorkSize; // Total # of work items in the 1D range
|
||||
size_t szLocalWorkSize; // # of work items in the 1D work group
|
||||
size_t szParmDataBytes; // Byte size of context information
|
||||
size_t szKernelLength; // Byte size of kernel code
|
||||
cl_int ciErrNum; // Error code var
|
||||
char* cPathAndName = NULL; // var for full paths to data, src, etc.
|
||||
char* cSourceCL = NULL; // Buffer to hold source for compilation
|
||||
const char* cExecutableName = NULL;
|
||||
|
||||
// demo config vars
|
||||
int iNumElements= 1024; // Length of float arrays to process (odd # for illustration)
|
||||
shrBOOL bNoPrompt = shrFALSE;
|
||||
|
||||
// Forward Declarations
|
||||
// *********************************************************************
|
||||
void DotProductHost(const float* pfData1, const float* pfData2, float* pfResult, int iNumElements);
|
||||
void Cleanup (int iExitCode);
|
||||
void (*pCleanup)(int) = &Cleanup;
|
||||
|
||||
int *gp_argc = NULL;
|
||||
char ***gp_argv = NULL;
|
||||
|
||||
// Main function
|
||||
// *********************************************************************
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
gp_argc = &argc;
|
||||
gp_argv = &argv;
|
||||
|
||||
shrQAStart(argc, argv);
|
||||
|
||||
cl_uint uiNumComputeUnits;
|
||||
|
||||
ciErrNum = clGetPlatformIDs(1, &cpPlatform, NULL);
|
||||
oclCheckErrorEX(ciErrNum, CL_SUCCESS, NULL);
|
||||
|
||||
cl_uint uiNumDevices = 1;
|
||||
cdDevices = (cl_device_id *)malloc(uiNumDevices * sizeof(cl_device_id));
|
||||
cl_uint uiTargetDevice = 0;
|
||||
ciErrNum = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_DEFAULT, 1, &cdDevices[uiTargetDevice], NULL);
|
||||
oclCheckErrorEX(ciErrNum, CL_SUCCESS, NULL);
|
||||
|
||||
|
||||
// Get command line device options and config accordingly
|
||||
shrLog(" # of Devices Available = %u\n", uiNumDevices);
|
||||
if(shrGetCmdLineArgumentu(argc, (const char**)argv, "device", &uiTargetDevice)== shrTRUE)
|
||||
{
|
||||
uiTargetDevice = CLAMP(uiTargetDevice, 0, (uiNumDevices - 1));
|
||||
}
|
||||
shrLog(" Using Device %u: ", uiTargetDevice);
|
||||
oclPrintDevName(LOGBOTH, cdDevices[uiTargetDevice]);
|
||||
ciErrNum = clGetDeviceInfo(cdDevices[uiTargetDevice], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(uiNumComputeUnits), &uiNumComputeUnits, NULL);
|
||||
oclCheckErrorEX(ciErrNum, CL_SUCCESS, NULL);
|
||||
shrLog("\n # of Compute Units = %u\n", uiNumComputeUnits);
|
||||
|
||||
// get command line arg for quick test, if provided
|
||||
bNoPrompt = shrCheckCmdLineFlag(argc, (const char**)argv, "noprompt");
|
||||
|
||||
// start logs
|
||||
cExecutableName = argv[0];
|
||||
shrSetLogFileName ("oclDotProduct.txt");
|
||||
shrLog("%s Starting...\n\n# of float elements per Array \t= %u\n", argv[0], iNumElements);
|
||||
|
||||
// set and log Global and Local work size dimensions
|
||||
szLocalWorkSize = 16;
|
||||
szGlobalWorkSize = shrRoundUp((int)szLocalWorkSize, iNumElements); // rounded up to the nearest multiple of the LocalWorkSize
|
||||
shrLog("Global Work Size \t\t= %u\nLocal Work Size \t\t= %u\n# of Work Groups \t\t= %u\n\n",
|
||||
szGlobalWorkSize, szLocalWorkSize, (szGlobalWorkSize % szLocalWorkSize + szGlobalWorkSize/szLocalWorkSize));
|
||||
|
||||
// Allocate and initialize host arrays
|
||||
shrLog( "Allocate and Init Host Mem...\n");
|
||||
srcA = (void *)malloc(sizeof(cl_float4) * szGlobalWorkSize);
|
||||
srcB = (void *)malloc(sizeof(cl_float4) * szGlobalWorkSize);
|
||||
dst = (void *)malloc(sizeof(cl_float) * szGlobalWorkSize);
|
||||
Golden = (void *)malloc(sizeof(cl_float) * iNumElements);
|
||||
shrFillArray((float*)srcA, 4 * iNumElements);
|
||||
shrFillArray((float*)srcB, 4 * iNumElements);
|
||||
|
||||
// Get the NVIDIA platform
|
||||
ciErrNum = oclGetPlatformID(&cpPlatform);
|
||||
oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
|
||||
|
||||
// Get a GPU device
|
||||
ciErrNum = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_DEFAULT, 1, &cdDevices[uiTargetDevice], NULL);
|
||||
oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
|
||||
|
||||
// Create the context
|
||||
cxGPUContext = clCreateContext(0, 1, &cdDevices[uiTargetDevice], NULL, NULL, &ciErrNum);
|
||||
oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
|
||||
|
||||
// Create a command-queue
|
||||
shrLog("clCreateCommandQueue...\n");
|
||||
cqCommandQueue = clCreateCommandQueue(cxGPUContext, cdDevices[uiTargetDevice], 0, &ciErrNum);
|
||||
oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
|
||||
|
||||
// Allocate the OpenCL buffer memory objects for source and result on the device GMEM
|
||||
shrLog("clCreateBuffer (SrcA, SrcB and Dst in Device GMEM)...\n");
|
||||
cmDevSrcA = clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY, sizeof(cl_float) * szGlobalWorkSize * 4, NULL, &ciErrNum);
|
||||
oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
|
||||
cmDevSrcB = clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY, sizeof(cl_float) * szGlobalWorkSize * 4, NULL, &ciErrNum);
|
||||
oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
|
||||
cmDevDst = clCreateBuffer(cxGPUContext, CL_MEM_WRITE_ONLY, sizeof(cl_float) * szGlobalWorkSize, NULL, &ciErrNum);
|
||||
oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
|
||||
|
||||
// Read the OpenCL kernel in from source file
|
||||
shrLog("oclLoadProgSource (%s)...\n", cSourceFile);
|
||||
cPathAndName = shrFindFilePath(cSourceFile, argv[0]);
|
||||
oclCheckErrorEX(cPathAndName != NULL, shrTRUE, pCleanup);
|
||||
cSourceCL = oclLoadProgSource(cPathAndName, "", &szKernelLength);
|
||||
oclCheckErrorEX(cSourceCL != NULL, shrTRUE, pCleanup);
|
||||
|
||||
// Create the program
|
||||
shrLog("clCreateProgramWithSource...\n");
|
||||
cl_int binary_status;
|
||||
cl_program program =
|
||||
clCreateProgramWithBinary(cxGPUContext, 1, cdDevices, &szKernelLength, (const uint8_t**)&cSourceCL, &binary_status, &ciErrNum);
|
||||
oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
|
||||
// Build the program with 'mad' Optimization option
|
||||
#ifdef MAC
|
||||
char* flags = "-cl-fast-relaxed-math -DMAC";
|
||||
#else
|
||||
char* flags = "-cl-fast-relaxed-math";
|
||||
#endif
|
||||
shrLog("clBuildProgram...\n");
|
||||
ciErrNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
|
||||
if (ciErrNum != CL_SUCCESS)
|
||||
{
|
||||
// write out standard error, Build Log and PTX, then cleanup and exit
|
||||
shrLogEx(LOGBOTH | ERRORMSG, ciErrNum, STDERROR);
|
||||
oclLogBuildInfo(program, oclGetFirstDev(cxGPUContext));
|
||||
oclLogPtx(program, oclGetFirstDev(cxGPUContext), "oclDotProduct.ptx");
|
||||
Cleanup(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Create the kernel
|
||||
shrLog("clCreateKernel (nDotProduct)...\n");
|
||||
ckKernel = clCreateKernel(program, "DotProduct", &ciErrNum);
|
||||
oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
|
||||
|
||||
// Set the Argument values
|
||||
shrLog("clSetKernelArg 0 - 3...\n\n");
|
||||
ciErrNum = clSetKernelArg(ckKernel, 0, sizeof(cl_mem), (void*)&cmDevSrcA);
|
||||
ciErrNum |= clSetKernelArg(ckKernel, 1, sizeof(cl_mem), (void*)&cmDevSrcB);
|
||||
ciErrNum |= clSetKernelArg(ckKernel, 2, sizeof(cl_mem), (void*)&cmDevDst);
|
||||
ciErrNum |= clSetKernelArg(ckKernel, 3, sizeof(cl_int), (void*)&iNumElements);
|
||||
oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
|
||||
|
||||
// --------------------------------------------------------
|
||||
// Core sequence... copy input data to GPU, compute, copy results back
|
||||
|
||||
// Asynchronous write of data to GPU device
|
||||
shrLog("clEnqueueWriteBuffer (SrcA and SrcB)...\n");
|
||||
ciErrNum = clEnqueueWriteBuffer(cqCommandQueue, cmDevSrcA, CL_FALSE, 0, sizeof(cl_float) * szGlobalWorkSize * 4, srcA, 0, NULL, NULL);
|
||||
ciErrNum |= clEnqueueWriteBuffer(cqCommandQueue, cmDevSrcB, CL_FALSE, 0, sizeof(cl_float) * szGlobalWorkSize * 4, srcB, 0, NULL, NULL);
|
||||
oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
|
||||
|
||||
// Launch kernel
|
||||
shrLog("clEnqueueNDRangeKernel (DotProduct)...\n");
|
||||
ciErrNum = clEnqueueNDRangeKernel(cqCommandQueue, ckKernel, 1, NULL, &szGlobalWorkSize, &szLocalWorkSize, 0, NULL, NULL);
|
||||
oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
|
||||
|
||||
// Read back results and check accumulated errors
|
||||
shrLog("clEnqueueReadBuffer (Dst)...\n\n");
|
||||
ciErrNum = clEnqueueReadBuffer(cqCommandQueue, cmDevDst, CL_TRUE, 0, sizeof(cl_float) * szGlobalWorkSize, dst, 0, NULL, NULL);
|
||||
oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
|
||||
|
||||
// Compute and compare results for golden-host and report errors and pass/fail
|
||||
shrLog("Comparing against Host/C++ computation...\n\n");
|
||||
DotProductHost ((const float*)srcA, (const float*)srcB, (float*)Golden, iNumElements);
|
||||
shrBOOL bMatch = shrComparefet((const float*)Golden, (const float*)dst, (unsigned int)iNumElements, 0.0f, 0);
|
||||
|
||||
// Cleanup and leave
|
||||
Cleanup (EXIT_SUCCESS);
|
||||
|
||||
return (bMatch == shrTRUE) ? 0 : 1;
|
||||
}
|
||||
|
||||
// "Golden" Host processing dot product function for comparison purposes
|
||||
// *********************************************************************
|
||||
void DotProductHost(const float* pfData1, const float* pfData2, float* pfResult, int iNumElements)
|
||||
{
|
||||
int i, j, k;
|
||||
for (i = 0, j = 0; i < iNumElements; i++)
|
||||
{
|
||||
pfResult[i] = 0.0f;
|
||||
for (k = 0; k < 4; k++, j++)
|
||||
{
|
||||
pfResult[i] += pfData1[j] * pfData2[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup and exit code
|
||||
// *********************************************************************
|
||||
void Cleanup(int iExitCode)
|
||||
{
|
||||
// Cleanup allocated objects
|
||||
shrLog("Starting Cleanup...\n\n");
|
||||
if(cPathAndName)free(cPathAndName);
|
||||
if(cSourceCL)free(cSourceCL);
|
||||
if(ckKernel)clReleaseKernel(ckKernel);
|
||||
if(program)clReleaseProgram(program);
|
||||
if(cqCommandQueue)clReleaseCommandQueue(cqCommandQueue);
|
||||
if(cxGPUContext)clReleaseContext(cxGPUContext);
|
||||
if (cmDevSrcA)clReleaseMemObject(cmDevSrcA);
|
||||
if (cmDevSrcB)clReleaseMemObject(cmDevSrcB);
|
||||
if (cmDevDst)clReleaseMemObject(cmDevDst);
|
||||
|
||||
// Free host memory
|
||||
free(srcA);
|
||||
free(srcB);
|
||||
free (dst);
|
||||
free(Golden);
|
||||
|
||||
if (cdDevices) free(cdDevices);
|
||||
|
||||
shrQAFinishExit(*gp_argc, (const char **)*gp_argv, (iExitCode == EXIT_SUCCESS) ? QA_PASSED : QA_FAILED);
|
||||
}
|
||||
806
tests/opencl/dotproduct/oclUtils.cpp
Normal file
806
tests/opencl/dotproduct/oclUtils.cpp
Normal file
@@ -0,0 +1,806 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
// *********************************************************************
|
||||
// Utilities specific to OpenCL samples in NVIDIA GPU Computing SDK
|
||||
// *********************************************************************
|
||||
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <stdarg.h>
|
||||
#include "oclUtils.h"
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Gets the platform ID for NVIDIA if available, otherwise default
|
||||
//!
|
||||
//! @return the id
|
||||
//! @param clSelectedPlatformID OpenCL platoform ID
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
cl_int oclGetPlatformID(cl_platform_id* clSelectedPlatformID)
|
||||
{
|
||||
char chBuffer[1024];
|
||||
cl_uint num_platforms;
|
||||
cl_platform_id* clPlatformIDs;
|
||||
cl_int ciErrNum;
|
||||
*clSelectedPlatformID = NULL;
|
||||
|
||||
// Get OpenCL platform count
|
||||
ciErrNum = clGetPlatformIDs (0, NULL, &num_platforms);
|
||||
if (ciErrNum != CL_SUCCESS)
|
||||
{
|
||||
shrLog(" Error %i in clGetPlatformIDs Call !!!\n\n", ciErrNum);
|
||||
return -1000;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(num_platforms == 0)
|
||||
{
|
||||
shrLog("No OpenCL platform found!\n\n");
|
||||
return -2000;
|
||||
}
|
||||
else
|
||||
{
|
||||
// if there's a platform or more, make space for ID's
|
||||
if ((clPlatformIDs = (cl_platform_id*)malloc(num_platforms * sizeof(cl_platform_id))) == NULL)
|
||||
{
|
||||
shrLog("Failed to allocate memory for cl_platform ID's!\n\n");
|
||||
return -3000;
|
||||
}
|
||||
|
||||
// get platform info for each platform and trap the NVIDIA platform if found
|
||||
ciErrNum = clGetPlatformIDs (num_platforms, clPlatformIDs, NULL);
|
||||
for(cl_uint i = 0; i < num_platforms; ++i)
|
||||
{
|
||||
ciErrNum = clGetPlatformInfo (clPlatformIDs[i], CL_PLATFORM_NAME, 1024, &chBuffer, NULL);
|
||||
if(ciErrNum == CL_SUCCESS)
|
||||
{
|
||||
if(strstr(chBuffer, "NVIDIA") != NULL)
|
||||
{
|
||||
*clSelectedPlatformID = clPlatformIDs[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// default to zeroeth platform if NVIDIA not found
|
||||
if(*clSelectedPlatformID == NULL)
|
||||
{
|
||||
shrLog("WARNING: NVIDIA OpenCL platform not found - defaulting to first platform!\n\n");
|
||||
*clSelectedPlatformID = clPlatformIDs[0];
|
||||
}
|
||||
|
||||
free(clPlatformIDs);
|
||||
}
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Print the device name
|
||||
//!
|
||||
//! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE
|
||||
//! @param device OpenCL id of the device
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
void oclPrintDevName(int iLogMode, cl_device_id device)
|
||||
{
|
||||
char device_string[1024];
|
||||
clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL);
|
||||
shrLogEx(iLogMode, 0, "%s", device_string);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Print info about the device
|
||||
//!
|
||||
//! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE
|
||||
//! @param device OpenCL id of the device
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
void oclPrintDevInfo(int iLogMode, cl_device_id device)
|
||||
{
|
||||
char device_string[1024];
|
||||
bool nv_device_attibute_query = false;
|
||||
|
||||
// CL_DEVICE_NAME
|
||||
clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_NAME: \t\t\t%s\n", device_string);
|
||||
|
||||
// CL_DEVICE_VENDOR
|
||||
clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(device_string), &device_string, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_VENDOR: \t\t\t%s\n", device_string);
|
||||
|
||||
// CL_DRIVER_VERSION
|
||||
clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(device_string), &device_string, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DRIVER_VERSION: \t\t\t%s\n", device_string);
|
||||
|
||||
// CL_DEVICE_VERSION
|
||||
clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(device_string), &device_string, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_VERSION: \t\t\t%s\n", device_string);
|
||||
|
||||
// CL_DEVICE_OPENCL_C_VERSION (if CL_DEVICE_VERSION version > 1.0)
|
||||
if(strncmp("OpenCL 1.0", device_string, 10) != 0)
|
||||
{
|
||||
// This code is unused for devices reporting OpenCL 1.0, but a def is needed anyway to allow compilation using v 1.0 headers
|
||||
// This constant isn't #defined in 1.0
|
||||
#ifndef CL_DEVICE_OPENCL_C_VERSION
|
||||
#define CL_DEVICE_OPENCL_C_VERSION 0x103D
|
||||
#endif
|
||||
|
||||
clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, sizeof(device_string), &device_string, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_OPENCL_C_VERSION: \t\t%s\n", device_string);
|
||||
}
|
||||
|
||||
// CL_DEVICE_TYPE
|
||||
cl_device_type type;
|
||||
clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(type), &type, NULL);
|
||||
if( type & CL_DEVICE_TYPE_CPU )
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_CPU");
|
||||
if( type & CL_DEVICE_TYPE_GPU )
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_GPU");
|
||||
if( type & CL_DEVICE_TYPE_ACCELERATOR )
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR");
|
||||
if( type & CL_DEVICE_TYPE_DEFAULT )
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT");
|
||||
|
||||
// CL_DEVICE_MAX_COMPUTE_UNITS
|
||||
cl_uint compute_units;
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_COMPUTE_UNITS:\t\t%u\n", compute_units);
|
||||
|
||||
// CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
|
||||
size_t workitem_dims;
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(workitem_dims), &workitem_dims, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:\t%u\n", workitem_dims);
|
||||
|
||||
// CL_DEVICE_MAX_WORK_ITEM_SIZES
|
||||
size_t workitem_size[3];
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(workitem_size), &workitem_size, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%u / %u / %u \n", workitem_size[0], workitem_size[1], workitem_size[2]);
|
||||
|
||||
// CL_DEVICE_MAX_WORK_GROUP_SIZE
|
||||
size_t workgroup_size;
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(workgroup_size), &workgroup_size, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_WORK_GROUP_SIZE:\t%u\n", workgroup_size);
|
||||
|
||||
// CL_DEVICE_MAX_CLOCK_FREQUENCY
|
||||
cl_uint clock_frequency;
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_CLOCK_FREQUENCY:\t%u MHz\n", clock_frequency);
|
||||
|
||||
// CL_DEVICE_ADDRESS_BITS
|
||||
cl_uint addr_bits;
|
||||
clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(addr_bits), &addr_bits, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_ADDRESS_BITS:\t\t%u\n", addr_bits);
|
||||
|
||||
// CL_DEVICE_MAX_MEM_ALLOC_SIZE
|
||||
cl_ulong max_mem_alloc_size;
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_alloc_size), &max_mem_alloc_size, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_MEM_ALLOC_SIZE:\t\t%u MByte\n", (unsigned int)(max_mem_alloc_size / (1024 * 1024)));
|
||||
|
||||
// CL_DEVICE_GLOBAL_MEM_SIZE
|
||||
cl_ulong mem_size;
|
||||
clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_GLOBAL_MEM_SIZE:\t\t%u MByte\n", (unsigned int)(mem_size / (1024 * 1024)));
|
||||
|
||||
// CL_DEVICE_ERROR_CORRECTION_SUPPORT
|
||||
cl_bool error_correction_support;
|
||||
clGetDeviceInfo(device, CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(error_correction_support), &error_correction_support, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_ERROR_CORRECTION_SUPPORT:\t%s\n", error_correction_support == CL_TRUE ? "yes" : "no");
|
||||
|
||||
// CL_DEVICE_LOCAL_MEM_TYPE
|
||||
cl_device_local_mem_type local_mem_type;
|
||||
clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(local_mem_type), &local_mem_type, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_LOCAL_MEM_TYPE:\t\t%s\n", local_mem_type == 1 ? "local" : "global");
|
||||
|
||||
// CL_DEVICE_LOCAL_MEM_SIZE
|
||||
clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_LOCAL_MEM_SIZE:\t\t%u KByte\n", (unsigned int)(mem_size / 1024));
|
||||
|
||||
// CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(mem_size), &mem_size, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:\t%u KByte\n", (unsigned int)(mem_size / 1024));
|
||||
|
||||
// CL_DEVICE_QUEUE_PROPERTIES
|
||||
cl_command_queue_properties queue_properties;
|
||||
clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(queue_properties), &queue_properties, NULL);
|
||||
if( queue_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE )
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE");
|
||||
if( queue_properties & CL_QUEUE_PROFILING_ENABLE )
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_PROFILING_ENABLE");
|
||||
|
||||
// CL_DEVICE_IMAGE_SUPPORT
|
||||
cl_bool image_support;
|
||||
clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(image_support), &image_support, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_IMAGE_SUPPORT:\t\t%u\n", image_support);
|
||||
|
||||
// CL_DEVICE_MAX_READ_IMAGE_ARGS
|
||||
cl_uint max_read_image_args;
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(max_read_image_args), &max_read_image_args, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_READ_IMAGE_ARGS:\t%u\n", max_read_image_args);
|
||||
|
||||
// CL_DEVICE_MAX_WRITE_IMAGE_ARGS
|
||||
cl_uint max_write_image_args;
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(max_write_image_args), &max_write_image_args, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_WRITE_IMAGE_ARGS:\t%u\n", max_write_image_args);
|
||||
|
||||
// CL_DEVICE_SINGLE_FP_CONFIG
|
||||
cl_device_fp_config fp_config;
|
||||
clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(cl_device_fp_config), &fp_config, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_SINGLE_FP_CONFIG:\t\t%s%s%s%s%s%s\n",
|
||||
fp_config & CL_FP_DENORM ? "denorms " : "",
|
||||
fp_config & CL_FP_INF_NAN ? "INF-quietNaNs " : "",
|
||||
fp_config & CL_FP_ROUND_TO_NEAREST ? "round-to-nearest " : "",
|
||||
fp_config & CL_FP_ROUND_TO_ZERO ? "round-to-zero " : "",
|
||||
fp_config & CL_FP_ROUND_TO_INF ? "round-to-inf " : "",
|
||||
fp_config & CL_FP_FMA ? "fma " : "");
|
||||
|
||||
// CL_DEVICE_IMAGE2D_MAX_WIDTH, CL_DEVICE_IMAGE2D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_WIDTH, CL_DEVICE_IMAGE3D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_DEPTH
|
||||
size_t szMaxDims[5];
|
||||
shrLogEx(iLogMode, 0, "\n CL_DEVICE_IMAGE <dim>");
|
||||
clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &szMaxDims[0], NULL);
|
||||
shrLogEx(iLogMode, 0, "\t\t\t2D_MAX_WIDTH\t %u\n", szMaxDims[0]);
|
||||
clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &szMaxDims[1], NULL);
|
||||
shrLogEx(iLogMode, 0, "\t\t\t\t\t2D_MAX_HEIGHT\t %u\n", szMaxDims[1]);
|
||||
clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &szMaxDims[2], NULL);
|
||||
shrLogEx(iLogMode, 0, "\t\t\t\t\t3D_MAX_WIDTH\t %u\n", szMaxDims[2]);
|
||||
clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &szMaxDims[3], NULL);
|
||||
shrLogEx(iLogMode, 0, "\t\t\t\t\t3D_MAX_HEIGHT\t %u\n", szMaxDims[3]);
|
||||
clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &szMaxDims[4], NULL);
|
||||
shrLogEx(iLogMode, 0, "\t\t\t\t\t3D_MAX_DEPTH\t %u\n", szMaxDims[4]);
|
||||
|
||||
// CL_DEVICE_EXTENSIONS: get device extensions, and if any then parse & log the string onto separate lines
|
||||
clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, sizeof(device_string), &device_string, NULL);
|
||||
if (device_string != 0)
|
||||
{
|
||||
shrLogEx(iLogMode, 0, "\n CL_DEVICE_EXTENSIONS:");
|
||||
std::string stdDevString;
|
||||
stdDevString = std::string(device_string);
|
||||
size_t szOldPos = 0;
|
||||
size_t szSpacePos = stdDevString.find(' ', szOldPos); // extensions string is space delimited
|
||||
while (szSpacePos != stdDevString.npos)
|
||||
{
|
||||
if( strcmp("cl_nv_device_attribute_query", stdDevString.substr(szOldPos, szSpacePos - szOldPos).c_str()) == 0 )
|
||||
nv_device_attibute_query = true;
|
||||
|
||||
if (szOldPos > 0)
|
||||
{
|
||||
shrLogEx(iLogMode, 0, "\t\t");
|
||||
}
|
||||
shrLogEx(iLogMode, 0, "\t\t\t%s\n", stdDevString.substr(szOldPos, szSpacePos - szOldPos).c_str());
|
||||
|
||||
do {
|
||||
szOldPos = szSpacePos + 1;
|
||||
szSpacePos = stdDevString.find(' ', szOldPos);
|
||||
} while (szSpacePos == szOldPos);
|
||||
}
|
||||
shrLogEx(iLogMode, 0, "\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_EXTENSIONS: None\n");
|
||||
}
|
||||
|
||||
if(nv_device_attibute_query)
|
||||
{
|
||||
cl_uint compute_capability_major, compute_capability_minor;
|
||||
clGetDeviceInfo(device, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof(cl_uint), &compute_capability_major, NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof(cl_uint), &compute_capability_minor, NULL);
|
||||
shrLogEx(iLogMode, 0, "\n CL_DEVICE_COMPUTE_CAPABILITY_NV:\t%u.%u\n", compute_capability_major, compute_capability_minor);
|
||||
|
||||
shrLogEx(iLogMode, 0, " NUMBER OF MULTIPROCESSORS:\t\t%u\n", compute_units); // this is the same value reported by CL_DEVICE_MAX_COMPUTE_UNITS
|
||||
shrLogEx(iLogMode, 0, " NUMBER OF CUDA CORES:\t\t\t%u\n", ConvertSMVer2Cores(compute_capability_major, compute_capability_minor) * compute_units);
|
||||
|
||||
cl_uint regs_per_block;
|
||||
clGetDeviceInfo(device, CL_DEVICE_REGISTERS_PER_BLOCK_NV, sizeof(cl_uint), ®s_per_block, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_REGISTERS_PER_BLOCK_NV:\t%u\n", regs_per_block);
|
||||
|
||||
cl_uint warp_size;
|
||||
clGetDeviceInfo(device, CL_DEVICE_WARP_SIZE_NV, sizeof(cl_uint), &warp_size, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_WARP_SIZE_NV:\t\t%u\n", warp_size);
|
||||
|
||||
cl_bool gpu_overlap;
|
||||
clGetDeviceInfo(device, CL_DEVICE_GPU_OVERLAP_NV, sizeof(cl_bool), &gpu_overlap, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_GPU_OVERLAP_NV:\t\t%s\n", gpu_overlap == CL_TRUE ? "CL_TRUE" : "CL_FALSE");
|
||||
|
||||
cl_bool exec_timeout;
|
||||
clGetDeviceInfo(device, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, sizeof(cl_bool), &exec_timeout, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV:\t%s\n", exec_timeout == CL_TRUE ? "CL_TRUE" : "CL_FALSE");
|
||||
|
||||
cl_bool integrated_memory;
|
||||
clGetDeviceInfo(device, CL_DEVICE_INTEGRATED_MEMORY_NV, sizeof(cl_bool), &integrated_memory, NULL);
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_INTEGRATED_MEMORY_NV:\t%s\n", integrated_memory == CL_TRUE ? "CL_TRUE" : "CL_FALSE");
|
||||
}
|
||||
|
||||
// CL_DEVICE_PREFERRED_VECTOR_WIDTH_<type>
|
||||
shrLogEx(iLogMode, 0, " CL_DEVICE_PREFERRED_VECTOR_WIDTH_<t>\t");
|
||||
cl_uint vec_width [6];
|
||||
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), &vec_width[0], NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint), &vec_width[1], NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), &vec_width[2], NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint), &vec_width[3], NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), &vec_width[4], NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), &vec_width[5], NULL);
|
||||
shrLogEx(iLogMode, 0, "CHAR %u, SHORT %u, INT %u, LONG %u, FLOAT %u, DOUBLE %u\n\n\n",
|
||||
vec_width[0], vec_width[1], vec_width[2], vec_width[3], vec_width[4], vec_width[5]);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Get and return device capability
|
||||
//!
|
||||
//! @return the 2 digit integer representation of device Cap (major minor). return -1 if NA
|
||||
//! @param device OpenCL id of the device
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
int oclGetDevCap(cl_device_id device)
|
||||
{
|
||||
char cDevString[1024];
|
||||
bool bDevAttributeQuery = false;
|
||||
int iDevArch = -1;
|
||||
|
||||
// Get device extensions, and if any then search for cl_nv_device_attribute_query
|
||||
clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, sizeof(cDevString), &cDevString, NULL);
|
||||
if (cDevString != 0)
|
||||
{
|
||||
std::string stdDevString;
|
||||
stdDevString = std::string(cDevString);
|
||||
size_t szOldPos = 0;
|
||||
size_t szSpacePos = stdDevString.find(' ', szOldPos); // extensions string is space delimited
|
||||
while (szSpacePos != stdDevString.npos)
|
||||
{
|
||||
if( strcmp("cl_nv_device_attribute_query", stdDevString.substr(szOldPos, szSpacePos - szOldPos).c_str()) == 0 )
|
||||
{
|
||||
bDevAttributeQuery = true;
|
||||
}
|
||||
|
||||
do {
|
||||
szOldPos = szSpacePos + 1;
|
||||
szSpacePos = stdDevString.find(' ', szOldPos);
|
||||
} while (szSpacePos == szOldPos);
|
||||
}
|
||||
}
|
||||
|
||||
// if search succeeded, get device caps
|
||||
if(bDevAttributeQuery)
|
||||
{
|
||||
cl_int iComputeCapMajor, iComputeCapMinor;
|
||||
clGetDeviceInfo(device, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof(cl_uint), (void*)&iComputeCapMajor, NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof(cl_uint), (void*)&iComputeCapMinor, NULL);
|
||||
iDevArch = (10 * iComputeCapMajor) + iComputeCapMinor;
|
||||
}
|
||||
|
||||
return iDevArch;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Gets the id of the first device from the context
|
||||
//!
|
||||
//! @return the id
|
||||
//! @param cxGPUContext OpenCL context
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
cl_device_id oclGetFirstDev(cl_context cxGPUContext)
|
||||
{
|
||||
size_t szParmDataBytes;
|
||||
cl_device_id* cdDevices;
|
||||
|
||||
// get the list of GPU devices associated with context
|
||||
clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
|
||||
cdDevices = (cl_device_id*) malloc(szParmDataBytes);
|
||||
|
||||
clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
|
||||
|
||||
cl_device_id first = cdDevices[0];
|
||||
free(cdDevices);
|
||||
|
||||
return first;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Gets the id of device with maximal FLOPS from the context
|
||||
//!
|
||||
//! @return the id
|
||||
//! @param cxGPUContext OpenCL context
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
cl_device_id oclGetMaxFlopsDev(cl_context cxGPUContext)
|
||||
{
|
||||
size_t szParmDataBytes;
|
||||
cl_device_id* cdDevices;
|
||||
|
||||
// get the list of GPU devices associated with context
|
||||
clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
|
||||
cdDevices = (cl_device_id*) malloc(szParmDataBytes);
|
||||
size_t device_count = szParmDataBytes / sizeof(cl_device_id);
|
||||
|
||||
clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
|
||||
|
||||
cl_device_id max_flops_device = cdDevices[0];
|
||||
int max_flops = 0;
|
||||
|
||||
size_t current_device = 0;
|
||||
|
||||
// CL_DEVICE_MAX_COMPUTE_UNITS
|
||||
cl_uint compute_units;
|
||||
clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL);
|
||||
|
||||
// CL_DEVICE_MAX_CLOCK_FREQUENCY
|
||||
cl_uint clock_frequency;
|
||||
clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL);
|
||||
|
||||
max_flops = compute_units * clock_frequency;
|
||||
++current_device;
|
||||
|
||||
while( current_device < device_count )
|
||||
{
|
||||
// CL_DEVICE_MAX_COMPUTE_UNITS
|
||||
cl_uint compute_units;
|
||||
clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL);
|
||||
|
||||
// CL_DEVICE_MAX_CLOCK_FREQUENCY
|
||||
cl_uint clock_frequency;
|
||||
clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL);
|
||||
|
||||
int flops = compute_units * clock_frequency;
|
||||
if( flops > max_flops )
|
||||
{
|
||||
max_flops = flops;
|
||||
max_flops_device = cdDevices[current_device];
|
||||
}
|
||||
++current_device;
|
||||
}
|
||||
|
||||
free(cdDevices);
|
||||
|
||||
return max_flops_device;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Loads a Program file and prepends the cPreamble to the code.
|
||||
//!
|
||||
//! @return the source string if succeeded, 0 otherwise
|
||||
//! @param cFilename program filename
|
||||
//! @param cPreamble code that is prepended to the loaded file, typically a set of #defines or a header
|
||||
//! @param szFinalLength returned length of the code string
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
char* oclLoadProgSource(const char* cFilename, const char* cPreamble, size_t* szFinalLength)
|
||||
{
|
||||
// locals
|
||||
FILE* pFileStream = NULL;
|
||||
size_t szSourceLength;
|
||||
|
||||
// open the OpenCL source code file
|
||||
#ifdef _WIN32 // Windows version
|
||||
if(fopen_s(&pFileStream, cFilename, "rb") != 0)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#else // Linux version
|
||||
pFileStream = fopen(cFilename, "rb");
|
||||
if(pFileStream == 0)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
size_t szPreambleLength = strlen(cPreamble);
|
||||
|
||||
// get the length of the source code
|
||||
fseek(pFileStream, 0, SEEK_END);
|
||||
szSourceLength = ftell(pFileStream);
|
||||
fseek(pFileStream, 0, SEEK_SET);
|
||||
|
||||
// allocate a buffer for the source code string and read it in
|
||||
char* cSourceString = (char *)malloc(szSourceLength + szPreambleLength + 1);
|
||||
memcpy(cSourceString, cPreamble, szPreambleLength);
|
||||
if (fread((cSourceString) + szPreambleLength, szSourceLength, 1, pFileStream) != 1)
|
||||
{
|
||||
fclose(pFileStream);
|
||||
free(cSourceString);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// close the file and return the total length of the combined (preamble + source) string
|
||||
fclose(pFileStream);
|
||||
if(szFinalLength != 0)
|
||||
{
|
||||
*szFinalLength = szSourceLength + szPreambleLength;
|
||||
}
|
||||
cSourceString[szSourceLength + szPreambleLength] = '\0';
|
||||
|
||||
return cSourceString;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Gets the id of the nth device from the context
|
||||
//!
|
||||
//! @return the id or -1 when out of range
|
||||
//! @param cxGPUContext OpenCL context
|
||||
//! @param device_idx index of the device of interest
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
cl_device_id oclGetDev(cl_context cxGPUContext, unsigned int nr)
|
||||
{
|
||||
size_t szParmDataBytes;
|
||||
cl_device_id* cdDevices;
|
||||
|
||||
// get the list of GPU devices associated with context
|
||||
clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
|
||||
|
||||
if( szParmDataBytes / sizeof(cl_device_id) <= nr ) {
|
||||
return (cl_device_id)-1;
|
||||
}
|
||||
|
||||
cdDevices = (cl_device_id*) malloc(szParmDataBytes);
|
||||
|
||||
clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
|
||||
|
||||
cl_device_id device = cdDevices[nr];
|
||||
free(cdDevices);
|
||||
|
||||
return device;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Get the binary (PTX) of the program associated with the device
|
||||
//!
|
||||
//! @param cpProgram OpenCL program
|
||||
//! @param cdDevice device of interest
|
||||
//! @param binary returned code
|
||||
//! @param length length of returned code
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
void oclGetProgBinary( cl_program cpProgram, cl_device_id cdDevice, char** binary, size_t* length)
|
||||
{
|
||||
// Grab the number of devices associated witht the program
|
||||
cl_uint num_devices;
|
||||
clGetProgramInfo(cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_devices, NULL);
|
||||
|
||||
// Grab the device ids
|
||||
cl_device_id* devices = (cl_device_id*) malloc(num_devices * sizeof(cl_device_id));
|
||||
clGetProgramInfo(cpProgram, CL_PROGRAM_DEVICES, num_devices * sizeof(cl_device_id), devices, 0);
|
||||
|
||||
// Grab the sizes of the binaries
|
||||
size_t* binary_sizes = (size_t*)malloc(num_devices * sizeof(size_t));
|
||||
clGetProgramInfo(cpProgram, CL_PROGRAM_BINARY_SIZES, num_devices * sizeof(size_t), binary_sizes, NULL);
|
||||
|
||||
// Now get the binaries
|
||||
char** ptx_code = (char**) malloc(num_devices * sizeof(char*));
|
||||
for( unsigned int i=0; i<num_devices; ++i) {
|
||||
ptx_code[i]= (char*)malloc(binary_sizes[i]);
|
||||
}
|
||||
clGetProgramInfo(cpProgram, CL_PROGRAM_BINARIES, 0, ptx_code, NULL);
|
||||
|
||||
// Find the index of the device of interest
|
||||
unsigned int idx = 0;
|
||||
while( idx<num_devices && devices[idx] != cdDevice ) ++idx;
|
||||
|
||||
// If it is associated prepare the result
|
||||
if( idx < num_devices )
|
||||
{
|
||||
*binary = ptx_code[idx];
|
||||
*length = binary_sizes[idx];
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
free( devices );
|
||||
free( binary_sizes );
|
||||
for( unsigned int i=0; i<num_devices; ++i) {
|
||||
if( i != idx ) free(ptx_code[i]);
|
||||
}
|
||||
free( ptx_code );
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Get and log the binary (PTX) from the OpenCL compiler for the requested program & device
|
||||
//!
|
||||
//! @param cpProgram OpenCL program
|
||||
//! @param cdDevice device of interest
|
||||
//! @param const char* cPtxFileName optional PTX file name
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
void oclLogPtx(cl_program cpProgram, cl_device_id cdDevice, const char* cPtxFileName)
|
||||
{
|
||||
// Grab the number of devices associated with the program
|
||||
cl_uint num_devices;
|
||||
clGetProgramInfo(cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_devices, NULL);
|
||||
|
||||
// Grab the device ids
|
||||
cl_device_id* devices = (cl_device_id*) malloc(num_devices * sizeof(cl_device_id));
|
||||
clGetProgramInfo(cpProgram, CL_PROGRAM_DEVICES, num_devices * sizeof(cl_device_id), devices, 0);
|
||||
|
||||
// Grab the sizes of the binaries
|
||||
size_t* binary_sizes = (size_t*)malloc(num_devices * sizeof(size_t));
|
||||
clGetProgramInfo(cpProgram, CL_PROGRAM_BINARY_SIZES, num_devices * sizeof(size_t), binary_sizes, NULL);
|
||||
|
||||
// Now get the binaries
|
||||
char** ptx_code = (char**)malloc(num_devices * sizeof(char*));
|
||||
for( unsigned int i=0; i<num_devices; ++i)
|
||||
{
|
||||
ptx_code[i] = (char*)malloc(binary_sizes[i]);
|
||||
}
|
||||
clGetProgramInfo(cpProgram, CL_PROGRAM_BINARIES, 0, ptx_code, NULL);
|
||||
|
||||
// Find the index of the device of interest
|
||||
unsigned int idx = 0;
|
||||
while((idx < num_devices) && (devices[idx] != cdDevice))
|
||||
{
|
||||
++idx;
|
||||
}
|
||||
|
||||
// If the index is associated, log the result
|
||||
if(idx < num_devices)
|
||||
{
|
||||
|
||||
// if a separate filename is supplied, dump ptx there
|
||||
if (NULL != cPtxFileName)
|
||||
{
|
||||
shrLog("\nWriting ptx to separate file: %s ...\n\n", cPtxFileName);
|
||||
FILE* pFileStream = NULL;
|
||||
#ifdef _WIN32
|
||||
fopen_s(&pFileStream, cPtxFileName, "wb");
|
||||
#else
|
||||
pFileStream = fopen(cPtxFileName, "wb");
|
||||
#endif
|
||||
|
||||
fwrite(ptx_code[idx], binary_sizes[idx], 1, pFileStream);
|
||||
fclose(pFileStream);
|
||||
}
|
||||
else // log to logfile and console if no ptx file specified
|
||||
{
|
||||
shrLog("\n%s\nProgram Binary:\n%s\n%s\n", HDASHLINE, ptx_code[idx], HDASHLINE);
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
free(devices);
|
||||
free(binary_sizes);
|
||||
for(unsigned int i = 0; i < num_devices; ++i)
|
||||
{
|
||||
free(ptx_code[i]);
|
||||
}
|
||||
free( ptx_code );
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Get and log the binary (PTX) from the OpenCL compiler for the requested program & device
|
||||
//!
|
||||
//! @param cpProgram OpenCL program
|
||||
//! @param cdDevice device of interest
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
void oclLogBuildInfo(cl_program cpProgram, cl_device_id cdDevice)
|
||||
{
|
||||
// write out the build log and ptx, then exit
|
||||
char cBuildLog[10240];
|
||||
clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG,
|
||||
sizeof(cBuildLog), cBuildLog, NULL );
|
||||
shrLog("\n%s\nBuild Log:\n%s\n%s\n", HDASHLINE, cBuildLog, HDASHLINE);
|
||||
}
|
||||
|
||||
// Helper function for De-allocating cl objects
|
||||
// *********************************************************************
|
||||
void oclDeleteMemObjs(cl_mem* cmMemObjs, int iNumObjs)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < iNumObjs; i++)
|
||||
{
|
||||
if (cmMemObjs[i])clReleaseMemObject(cmMemObjs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to get OpenCL error string from constant
|
||||
// *********************************************************************
|
||||
const char* oclErrorString(cl_int error)
|
||||
{
|
||||
static const char* errorString[] = {
|
||||
"CL_SUCCESS",
|
||||
"CL_DEVICE_NOT_FOUND",
|
||||
"CL_DEVICE_NOT_AVAILABLE",
|
||||
"CL_COMPILER_NOT_AVAILABLE",
|
||||
"CL_MEM_OBJECT_ALLOCATION_FAILURE",
|
||||
"CL_OUT_OF_RESOURCES",
|
||||
"CL_OUT_OF_HOST_MEMORY",
|
||||
"CL_PROFILING_INFO_NOT_AVAILABLE",
|
||||
"CL_MEM_COPY_OVERLAP",
|
||||
"CL_IMAGE_FORMAT_MISMATCH",
|
||||
"CL_IMAGE_FORMAT_NOT_SUPPORTED",
|
||||
"CL_BUILD_PROGRAM_FAILURE",
|
||||
"CL_MAP_FAILURE",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"CL_INVALID_VALUE",
|
||||
"CL_INVALID_DEVICE_TYPE",
|
||||
"CL_INVALID_PLATFORM",
|
||||
"CL_INVALID_DEVICE",
|
||||
"CL_INVALID_CONTEXT",
|
||||
"CL_INVALID_QUEUE_PROPERTIES",
|
||||
"CL_INVALID_COMMAND_QUEUE",
|
||||
"CL_INVALID_HOST_PTR",
|
||||
"CL_INVALID_MEM_OBJECT",
|
||||
"CL_INVALID_IMAGE_FORMAT_DESCRIPTOR",
|
||||
"CL_INVALID_IMAGE_SIZE",
|
||||
"CL_INVALID_SAMPLER",
|
||||
"CL_INVALID_BINARY",
|
||||
"CL_INVALID_BUILD_OPTIONS",
|
||||
"CL_INVALID_PROGRAM",
|
||||
"CL_INVALID_PROGRAM_EXECUTABLE",
|
||||
"CL_INVALID_KERNEL_NAME",
|
||||
"CL_INVALID_KERNEL_DEFINITION",
|
||||
"CL_INVALID_KERNEL",
|
||||
"CL_INVALID_ARG_INDEX",
|
||||
"CL_INVALID_ARG_VALUE",
|
||||
"CL_INVALID_ARG_SIZE",
|
||||
"CL_INVALID_KERNEL_ARGS",
|
||||
"CL_INVALID_WORK_DIMENSION",
|
||||
"CL_INVALID_WORK_GROUP_SIZE",
|
||||
"CL_INVALID_WORK_ITEM_SIZE",
|
||||
"CL_INVALID_GLOBAL_OFFSET",
|
||||
"CL_INVALID_EVENT_WAIT_LIST",
|
||||
"CL_INVALID_EVENT",
|
||||
"CL_INVALID_OPERATION",
|
||||
"CL_INVALID_GL_OBJECT",
|
||||
"CL_INVALID_BUFFER_SIZE",
|
||||
"CL_INVALID_MIP_LEVEL",
|
||||
"CL_INVALID_GLOBAL_WORK_SIZE",
|
||||
};
|
||||
|
||||
const int errorCount = sizeof(errorString) / sizeof(errorString[0]);
|
||||
|
||||
const int index = -error;
|
||||
|
||||
return (index >= 0 && index < errorCount) ? errorString[index] : "Unspecified Error";
|
||||
}
|
||||
|
||||
// Helper function to get OpenCL image format string (channel order and type) from constant
|
||||
// *********************************************************************
|
||||
const char* oclImageFormatString(cl_uint uiImageFormat)
|
||||
{
|
||||
// cl_channel_order
|
||||
if (uiImageFormat == CL_R)return "CL_R";
|
||||
if (uiImageFormat == CL_A)return "CL_A";
|
||||
if (uiImageFormat == CL_RG)return "CL_RG";
|
||||
if (uiImageFormat == CL_RA)return "CL_RA";
|
||||
if (uiImageFormat == CL_RGB)return "CL_RGB";
|
||||
if (uiImageFormat == CL_RGBA)return "CL_RGBA";
|
||||
if (uiImageFormat == CL_BGRA)return "CL_BGRA";
|
||||
if (uiImageFormat == CL_ARGB)return "CL_ARGB";
|
||||
if (uiImageFormat == CL_INTENSITY)return "CL_INTENSITY";
|
||||
if (uiImageFormat == CL_LUMINANCE)return "CL_LUMINANCE";
|
||||
|
||||
// cl_channel_type
|
||||
if (uiImageFormat == CL_SNORM_INT8)return "CL_SNORM_INT8";
|
||||
if (uiImageFormat == CL_SNORM_INT16)return "CL_SNORM_INT16";
|
||||
if (uiImageFormat == CL_UNORM_INT8)return "CL_UNORM_INT8";
|
||||
if (uiImageFormat == CL_UNORM_INT16)return "CL_UNORM_INT16";
|
||||
if (uiImageFormat == CL_UNORM_SHORT_565)return "CL_UNORM_SHORT_565";
|
||||
if (uiImageFormat == CL_UNORM_SHORT_555)return "CL_UNORM_SHORT_555";
|
||||
if (uiImageFormat == CL_UNORM_INT_101010)return "CL_UNORM_INT_101010";
|
||||
if (uiImageFormat == CL_SIGNED_INT8)return "CL_SIGNED_INT8";
|
||||
if (uiImageFormat == CL_SIGNED_INT16)return "CL_SIGNED_INT16";
|
||||
if (uiImageFormat == CL_SIGNED_INT32)return "CL_SIGNED_INT32";
|
||||
if (uiImageFormat == CL_UNSIGNED_INT8)return "CL_UNSIGNED_INT8";
|
||||
if (uiImageFormat == CL_UNSIGNED_INT16)return "CL_UNSIGNED_INT16";
|
||||
if (uiImageFormat == CL_UNSIGNED_INT32)return "CL_UNSIGNED_INT32";
|
||||
if (uiImageFormat == CL_HALF_FLOAT)return "CL_HALF_FLOAT";
|
||||
if (uiImageFormat == CL_FLOAT)return "CL_FLOAT";
|
||||
|
||||
// unknown constant
|
||||
return "Unknown";
|
||||
}
|
||||
198
tests/opencl/dotproduct/oclUtils.h
Normal file
198
tests/opencl/dotproduct/oclUtils.h
Normal file
@@ -0,0 +1,198 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef OCL_UTILS_H
|
||||
#define OCL_UTILS_H
|
||||
|
||||
// *********************************************************************
|
||||
// Utilities specific to OpenCL samples in NVIDIA GPU Computing SDK
|
||||
// *********************************************************************
|
||||
|
||||
// Common headers: Cross-API utililties and OpenCL header
|
||||
#include "shrUtils.h"
|
||||
|
||||
// All OpenCL headers
|
||||
#if defined (__APPLE__) || defined(MACOSX)
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
|
||||
// Includes
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
// For systems with CL_EXT that are not updated with these extensions, we copied these
|
||||
// extensions from <CL/cl_ext.h>
|
||||
#ifndef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV
|
||||
/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
|
||||
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
|
||||
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
|
||||
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
|
||||
#define CL_DEVICE_WARP_SIZE_NV 0x4003
|
||||
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
|
||||
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
|
||||
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
|
||||
#endif
|
||||
|
||||
// reminders for build output window and log
|
||||
#ifdef _WIN32
|
||||
#pragma message ("Note: including shrUtils.h")
|
||||
#pragma message ("Note: including opencl.h")
|
||||
#endif
|
||||
|
||||
// SDK Revision #
|
||||
#define OCL_SDKREVISION "7027912"
|
||||
|
||||
// Error and Exit Handling Macros...
|
||||
// *********************************************************************
|
||||
// Full error handling macro with Cleanup() callback (if supplied)...
|
||||
// (Companion Inline Function lower on page)
|
||||
#define oclCheckErrorEX(a, b, c) __oclCheckErrorEX(a, b, c, __FILE__ , __LINE__)
|
||||
|
||||
// Short version without Cleanup() callback pointer
|
||||
// Both Input (a) and Reference (b) are specified as args
|
||||
#define oclCheckError(a, b) oclCheckErrorEX(a, b, 0)
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Gets the platform ID for NVIDIA if available, otherwise default to platform 0
|
||||
//!
|
||||
//! @return the id
|
||||
//! @param clSelectedPlatformID OpenCL platform ID
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" cl_int oclGetPlatformID(cl_platform_id* clSelectedPlatformID);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Print info about the device
|
||||
//!
|
||||
//! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE
|
||||
//! @param device OpenCL id of the device
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" void oclPrintDevInfo(int iLogMode, cl_device_id device);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Get and return device capability
|
||||
//!
|
||||
//! @return the 2 digit integer representation of device Cap (major minor). return -1 if NA
|
||||
//! @param device OpenCL id of the device
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" int oclGetDevCap(cl_device_id device);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Print the device name
|
||||
//!
|
||||
//! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE
|
||||
//! @param device OpenCL id of the device
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" void oclPrintDevName(int iLogMode, cl_device_id device);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Gets the id of the first device from the context
|
||||
//!
|
||||
//! @return the id
|
||||
//! @param cxGPUContext OpenCL context
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" cl_device_id oclGetFirstDev(cl_context cxGPUContext);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Gets the id of the nth device from the context
|
||||
//!
|
||||
//! @return the id or -1 when out of range
|
||||
//! @param cxGPUContext OpenCL context
|
||||
//! @param device_idx index of the device of interest
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" cl_device_id oclGetDev(cl_context cxGPUContext, unsigned int device_idx);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Gets the id of device with maximal FLOPS from the context
|
||||
//!
|
||||
//! @return the id
|
||||
//! @param cxGPUContext OpenCL context
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" cl_device_id oclGetMaxFlopsDev(cl_context cxGPUContext);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Loads a Program file and prepends the cPreamble to the code.
|
||||
//!
|
||||
//! @return the source string if succeeded, 0 otherwise
|
||||
//! @param cFilename program filename
|
||||
//! @param cPreamble code that is prepended to the loaded file, typically a set of #defines or a header
|
||||
//! @param szFinalLength returned length of the code string
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" char* oclLoadProgSource(const char* cFilename, const char* cPreamble, size_t* szFinalLength);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Get the binary (PTX) of the program associated with the device
|
||||
//!
|
||||
//! @param cpProgram OpenCL program
|
||||
//! @param cdDevice device of interest
|
||||
//! @param binary returned code
|
||||
//! @param length length of returned code
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" void oclGetProgBinary( cl_program cpProgram, cl_device_id cdDevice, char** binary, size_t* length);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Get and log the binary (PTX) from the OpenCL compiler for the requested program & device
|
||||
//!
|
||||
//! @param cpProgram OpenCL program
|
||||
//! @param cdDevice device of interest
|
||||
//! @param const char* cPtxFileName optional PTX file name
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" void oclLogPtx(cl_program cpProgram, cl_device_id cdDevice, const char* cPtxFileName);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//! Get and log the Build Log from the OpenCL compiler for the requested program & device
|
||||
//!
|
||||
//! @param cpProgram OpenCL program
|
||||
//! @param cdDevice device of interest
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" void oclLogBuildInfo(cl_program cpProgram, cl_device_id cdDevice);
|
||||
|
||||
// Helper function for De-allocating cl objects
|
||||
// *********************************************************************
|
||||
extern "C" void oclDeleteMemObjs(cl_mem* cmMemObjs, int iNumObjs);
|
||||
|
||||
// Helper function to get OpenCL error string from constant
|
||||
// *********************************************************************
|
||||
extern "C" const char* oclErrorString(cl_int error);
|
||||
|
||||
// Helper function to get OpenCL image format string (channel order and type) from constant
|
||||
// *********************************************************************
|
||||
extern "C" const char* oclImageFormatString(cl_uint uiImageFormat);
|
||||
|
||||
// companion inline function for error checking and exit on error WITH Cleanup Callback (if supplied)
|
||||
// *********************************************************************
|
||||
inline void __oclCheckErrorEX(cl_int iSample, cl_int iReference, void (*pCleanup)(int), const char* cFile, const int iLine)
|
||||
{
|
||||
// An error condition is defined by the sample/test value not equal to the reference
|
||||
if (iReference != iSample)
|
||||
{
|
||||
// If the sample/test value isn't equal to the ref, it's an error by defnition, so override 0 sample/test value
|
||||
iSample = (iSample == 0) ? -9999 : iSample;
|
||||
|
||||
// Log the error info
|
||||
shrLog("\n !!! Error # %i (%s) at line %i , in file %s !!!\n\n", iSample, oclErrorString(iSample), iLine, cFile);
|
||||
|
||||
// Cleanup and exit, or just exit if no cleanup function pointer provided. Use iSample (error code in this case) as process exit code.
|
||||
if (pCleanup != NULL)
|
||||
{
|
||||
pCleanup(iSample);
|
||||
}
|
||||
else
|
||||
{
|
||||
shrLogEx(LOGBOTH | CLOSELOG, 0, "Exiting...\n");
|
||||
exit(iSample);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
238
tests/opencl/dotproduct/shrQATest.h
Normal file
238
tests/opencl/dotproduct/shrQATest.h
Normal file
@@ -0,0 +1,238 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef SHR_QATEST_H
|
||||
#define SHR_QATEST_H
|
||||
|
||||
// *********************************************************************
|
||||
// Generic utilities for NVIDIA GPU Computing SDK
|
||||
// *********************************************************************
|
||||
|
||||
// OS dependent includes
|
||||
#ifdef _WIN32
|
||||
#pragma message ("Note: including windows.h")
|
||||
#pragma message ("Note: including math.h")
|
||||
#pragma message ("Note: including assert.h")
|
||||
#pragma message ("Note: including time.h")
|
||||
|
||||
// Headers needed for Windows
|
||||
#include <windows.h>
|
||||
#include <time.h>
|
||||
#else
|
||||
// Headers needed for Linux
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/time.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdarg.h>
|
||||
#include <unistd.h>
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
||||
#ifndef STRCASECMP
|
||||
#ifdef _WIN32
|
||||
#define STRCASECMP _stricmp
|
||||
#else
|
||||
#define STRCASECMP strcasecmp
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef STRNCASECMP
|
||||
#ifdef _WIN32
|
||||
#define STRNCASECMP _strnicmp
|
||||
#else
|
||||
#define STRNCASECMP strncasecmp
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
// Standardized QA Start/Finish for CUDA SDK tests
|
||||
#define shrQAStart(a, b) __shrQAStart(a, b)
|
||||
#define shrQAFinish(a, b, c) __shrQAFinish(a, b, c)
|
||||
#define shrQAFinish2(a, b, c, d) __shrQAFinish2(a, b, c, d)
|
||||
|
||||
inline int findExeNameStart(const char *exec_name)
|
||||
{
|
||||
int exename_start = (int)strlen(exec_name);
|
||||
|
||||
while( (exename_start > 0) &&
|
||||
(exec_name[exename_start] != '\\') &&
|
||||
(exec_name[exename_start] != '/') )
|
||||
{
|
||||
exename_start--;
|
||||
}
|
||||
if (exec_name[exename_start] == '\\' ||
|
||||
exec_name[exename_start] == '/')
|
||||
{
|
||||
return exename_start+1;
|
||||
} else {
|
||||
return exename_start;
|
||||
}
|
||||
}
|
||||
|
||||
inline int __shrQAStart(int argc, char **argv)
|
||||
{
|
||||
bool bQATest = false;
|
||||
// First clear the output buffer
|
||||
fflush(stdout);
|
||||
fflush(stdout);
|
||||
|
||||
for (int i=1; i < argc; i++) {
|
||||
int string_start = 0;
|
||||
while (argv[i][string_start] == '-')
|
||||
string_start++;
|
||||
char *string_argv = &argv[i][string_start];
|
||||
|
||||
if (!STRCASECMP(string_argv, "qatest")) {
|
||||
bQATest = true;
|
||||
}
|
||||
}
|
||||
|
||||
// We don't want to print the entire path, so we search for the first
|
||||
int exename_start = findExeNameStart(argv[0]);
|
||||
if (bQATest) {
|
||||
fprintf(stdout, "&&&& RUNNING %s", &(argv[0][exename_start]));
|
||||
for (int i=1; i < argc; i++) fprintf(stdout, " %s", argv[i]);
|
||||
fprintf(stdout, "\n");
|
||||
} else {
|
||||
fprintf(stdout, "[%s] starting...\n", &(argv[0][exename_start]));
|
||||
}
|
||||
fflush(stdout);
|
||||
printf("\n"); fflush(stdout);
|
||||
return exename_start;
|
||||
}
|
||||
|
||||
enum eQAstatus {
|
||||
QA_FAILED = 0,
|
||||
QA_PASSED = 1,
|
||||
QA_WAIVED = 2
|
||||
};
|
||||
|
||||
inline void __ExitInTime(int seconds)
|
||||
{
|
||||
fprintf(stdout, "> exiting in %d seconds: ", seconds);
|
||||
fflush(stdout);
|
||||
time_t t;
|
||||
int count;
|
||||
for (t=time(0)+seconds, count=seconds; time(0) < t; count--) {
|
||||
fprintf(stdout, "%d...", count);
|
||||
#ifdef WIN32
|
||||
Sleep(1000);
|
||||
#else
|
||||
sleep(1);
|
||||
#endif
|
||||
}
|
||||
fprintf(stdout,"done!\n\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
|
||||
inline void __shrQAFinish(int argc, const char **argv, int iStatus)
|
||||
{
|
||||
// By default QATest is disabled and NoPrompt is Enabled (times out at seconds passed into __ExitInTime() )
|
||||
bool bQATest = false, bNoPrompt = true, bQuitInTime = true;
|
||||
const char *sStatus[] = { "FAILED", "PASSED", "WAIVED", NULL };
|
||||
|
||||
for (int i=1; i < argc; i++) {
|
||||
int string_start = 0;
|
||||
while (argv[i][string_start] == '-')
|
||||
string_start++;
|
||||
|
||||
const char *string_argv = &argv[i][string_start];
|
||||
if (!STRCASECMP(string_argv, "qatest")) {
|
||||
bQATest = true;
|
||||
}
|
||||
// For SDK individual samples that don't specify -noprompt or -prompt,
|
||||
// a 3 second delay will happen before exiting, giving a user time to view results
|
||||
if (!STRCASECMP(string_argv, "noprompt") || !STRCASECMP(string_argv, "help")) {
|
||||
bNoPrompt = true;
|
||||
bQuitInTime = false;
|
||||
}
|
||||
if (!STRCASECMP(string_argv, "prompt")) {
|
||||
bNoPrompt = false;
|
||||
bQuitInTime = false;
|
||||
}
|
||||
}
|
||||
|
||||
int exename_start = findExeNameStart(argv[0]);
|
||||
if (bQATest) {
|
||||
fprintf(stdout, "&&&& %s %s", sStatus[iStatus], &(argv[0][exename_start]));
|
||||
for (int i=1; i < argc; i++) fprintf(stdout, " %s", argv[i]);
|
||||
fprintf(stdout, "\n");
|
||||
} else {
|
||||
fprintf(stdout, "[%s] test results...\n%s\n", &(argv[0][exename_start]), sStatus[iStatus]);
|
||||
}
|
||||
fflush(stdout);
|
||||
printf("\n"); fflush(stdout);
|
||||
if (bQuitInTime) {
|
||||
__ExitInTime(3);
|
||||
} else {
|
||||
if (!bNoPrompt) {
|
||||
fprintf(stdout, "\nPress <Enter> to exit...\n");
|
||||
fflush(stdout);
|
||||
getchar();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void __shrQAFinish2(bool bQATest, int argc, const char **argv, int iStatus)
|
||||
{
|
||||
bool bQuitInTime = true;
|
||||
const char *sStatus[] = { "FAILED", "PASSED", "WAIVED", NULL };
|
||||
|
||||
for (int i=1; i < argc; i++) {
|
||||
int string_start = 0;
|
||||
while (argv[i][string_start] == '-')
|
||||
string_start++;
|
||||
|
||||
const char *string_argv = &argv[i][string_start];
|
||||
// For SDK individual samples that don't specify -noprompt or -prompt,
|
||||
// a 3 second delay will happen before exiting, giving a user time to view results
|
||||
if (!STRCASECMP(string_argv, "noprompt") || !STRCASECMP(string_argv, "help")) {
|
||||
bQuitInTime = false;
|
||||
}
|
||||
if (!STRCASECMP(string_argv, "prompt")) {
|
||||
bQuitInTime = false;
|
||||
}
|
||||
}
|
||||
|
||||
int exename_start = findExeNameStart(argv[0]);
|
||||
if (bQATest) {
|
||||
fprintf(stdout, "&&&& %s %s", sStatus[iStatus], &(argv[0][exename_start]));
|
||||
for (int i=1; i < argc; i++) fprintf(stdout, " %s", argv[i]);
|
||||
fprintf(stdout, "\n");
|
||||
} else {
|
||||
fprintf(stdout, "[%s] test results...\n%s\n", &(argv[0][exename_start]), sStatus[iStatus]);
|
||||
}
|
||||
fflush(stdout);
|
||||
|
||||
if (bQuitInTime) {
|
||||
__ExitInTime(3);
|
||||
}
|
||||
}
|
||||
|
||||
inline void shrQAFinishExit(int argc, const char **argv, int iStatus)
|
||||
{
|
||||
__shrQAFinish(argc, argv, iStatus);
|
||||
|
||||
exit(iStatus ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||
}
|
||||
|
||||
inline void shrQAFinishExit2(bool bQAtest, int argc, const char **argv, int iStatus)
|
||||
{
|
||||
__shrQAFinish2(bQAtest, argc, argv, iStatus);
|
||||
|
||||
exit(iStatus ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||
}
|
||||
|
||||
#endif
|
||||
1954
tests/opencl/dotproduct/shrUtils.cpp
Normal file
1954
tests/opencl/dotproduct/shrUtils.cpp
Normal file
File diff suppressed because it is too large
Load Diff
642
tests/opencl/dotproduct/shrUtils.h
Normal file
642
tests/opencl/dotproduct/shrUtils.h
Normal file
@@ -0,0 +1,642 @@
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef SHR_UTILS_H
|
||||
#define SHR_UTILS_H
|
||||
|
||||
// *********************************************************************
|
||||
// Generic utilities for NVIDIA GPU Computing SDK
|
||||
// *********************************************************************
|
||||
|
||||
// reminders for output window and build log
|
||||
#ifdef _WIN32
|
||||
#pragma message ("Note: including windows.h")
|
||||
#pragma message ("Note: including math.h")
|
||||
#pragma message ("Note: including assert.h")
|
||||
#endif
|
||||
|
||||
// OS dependent includes
|
||||
#ifdef _WIN32
|
||||
// Headers needed for Windows
|
||||
#include <windows.h>
|
||||
#else
|
||||
// Headers needed for Linux
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/time.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdarg.h>
|
||||
#endif
|
||||
|
||||
// Other headers needed for both Windows and Linux
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
// Un-comment the following #define to enable profiling code in SDK apps
|
||||
//#define GPU_PROFILING
|
||||
|
||||
// Beginning of GPU Architecture definitions
|
||||
inline int ConvertSMVer2Cores(int major, int minor)
|
||||
{
|
||||
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
|
||||
typedef struct {
|
||||
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
|
||||
int Cores;
|
||||
} sSMtoCores;
|
||||
|
||||
sSMtoCores nGpuArchCoresPerSM[] =
|
||||
{ { 0x10, 8 }, // Tesla Generation (SM 1.0) G80 class
|
||||
{ 0x11, 8 }, // Tesla Generation (SM 1.1) G8x class
|
||||
{ 0x12, 8 }, // Tesla Generation (SM 1.2) G9x class
|
||||
{ 0x13, 8 }, // Tesla Generation (SM 1.3) GT200 class
|
||||
{ 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
|
||||
{ 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
|
||||
{ 0x30, 192}, // Fermi Generation (SM 3.0) GK10x class
|
||||
{ -1, -1 }
|
||||
};
|
||||
|
||||
int index = 0;
|
||||
while (nGpuArchCoresPerSM[index].SM != -1) {
|
||||
if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) {
|
||||
return nGpuArchCoresPerSM[index].Cores;
|
||||
}
|
||||
index++;
|
||||
}
|
||||
printf("MapSMtoCores SM %d.%d is undefined (please update to the latest SDK)!\n", major, minor);
|
||||
return -1;
|
||||
}
|
||||
// end of GPU Architecture definitions
|
||||
|
||||
|
||||
// Defines and enum for use with logging functions
|
||||
// *********************************************************************
|
||||
#define DEFAULTLOGFILE "SdkConsoleLog.txt"
|
||||
#define MASTERLOGFILE "SdkMasterLog.csv"
|
||||
enum LOGMODES
|
||||
{
|
||||
LOGCONSOLE = 1, // bit to signal "log to console"
|
||||
LOGFILE = 2, // bit to signal "log to file"
|
||||
LOGBOTH = 3, // convenience union of first 2 bits to signal "log to both"
|
||||
APPENDMODE = 4, // bit to set "file append" mode instead of "replace mode" on open
|
||||
MASTER = 8, // bit to signal master .csv log output
|
||||
ERRORMSG = 16, // bit to signal "pre-pend Error"
|
||||
CLOSELOG = 32 // bit to close log file, if open, after any requested file write
|
||||
};
|
||||
#define HDASHLINE "-----------------------------------------------------------\n"
|
||||
|
||||
// Standardized boolean
|
||||
enum shrBOOL
|
||||
{
|
||||
shrFALSE = 0,
|
||||
shrTRUE = 1
|
||||
};
|
||||
|
||||
// Standardized MAX, MIN and CLAMP
|
||||
#define MAX(a, b) ((a > b) ? a : b)
|
||||
#define MIN(a, b) ((a < b) ? a : b)
|
||||
#define CLAMP(a, b, c) MIN(MAX(a, b), c) // double sided clip of input a
|
||||
#define TOPCLAMP(a, b) (a < b ? a:b) // single top side clip of input a
|
||||
|
||||
// Error and Exit Handling Macros...
|
||||
// *********************************************************************
|
||||
// Full error handling macro with Cleanup() callback (if supplied)...
|
||||
// (Companion Inline Function lower on page)
|
||||
#define shrCheckErrorEX(a, b, c) __shrCheckErrorEX(a, b, c, __FILE__ , __LINE__)
|
||||
|
||||
// Short version without Cleanup() callback pointer
|
||||
// Both Input (a) and Reference (b) are specified as args
|
||||
#define shrCheckError(a, b) shrCheckErrorEX(a, b, 0)
|
||||
|
||||
// Standardized Exit Macro for leaving main()... extended version
|
||||
// (Companion Inline Function lower on page)
|
||||
#define shrExitEX(a, b, c) __shrExitEX(a, b, c)
|
||||
|
||||
// Standardized Exit Macro for leaving main()... short version
|
||||
// (Companion Inline Function lower on page)
|
||||
#define shrEXIT(a, b) __shrExitEX(a, b, EXIT_SUCCESS)
|
||||
|
||||
// Simple argument checker macro
|
||||
#define ARGCHECK(a) if((a) != shrTRUE)return shrFALSE
|
||||
|
||||
// Define for user-customized error handling
|
||||
#define STDERROR "file %s, line %i\n\n" , __FILE__ , __LINE__
|
||||
|
||||
// Function to deallocate memory allocated within shrUtils
|
||||
// *********************************************************************
|
||||
extern "C" void shrFree(void* ptr);
|
||||
|
||||
// *********************************************************************
|
||||
// Helper function to log standardized information to Console, to File or to both
|
||||
//! Examples: shrLogEx(LOGBOTH, 0, "Function A\n");
|
||||
//! : shrLogEx(LOGBOTH | ERRORMSG, ciErrNum, STDERROR);
|
||||
//!
|
||||
//! Automatically opens file and stores handle if needed and not done yet
|
||||
//! Closes file and nulls handle on request
|
||||
//!
|
||||
//! @param 0 iLogMode: LOGCONSOLE, LOGFILE, LOGBOTH, APPENDMODE, MASTER, ERRORMSG, CLOSELOG.
|
||||
//! LOGFILE and LOGBOTH may be | 'd with APPENDMODE to select file append mode instead of overwrite mode
|
||||
//! LOGFILE and LOGBOTH may be | 'd with CLOSELOG to "write and close"
|
||||
//! First 3 options may be | 'd with MASTER to enable independent write to master data log file
|
||||
//! First 3 options may be | 'd with ERRORMSG to start line with standard error message
|
||||
//! @param 2 dValue:
|
||||
//! Positive val = double value for time in secs to be formatted to 6 decimals.
|
||||
//! Negative val is an error code and this give error preformatting.
|
||||
//! @param 3 cFormatString: String with formatting specifiers like printf or fprintf.
|
||||
//! ALL printf flags, width, precision and type specifiers are supported with this exception:
|
||||
//! Wide char type specifiers intended for wprintf (%S and %C) are NOT supported
|
||||
//! Single byte char type specifiers (%s and %c) ARE supported
|
||||
//! @param 4... variable args: like printf or fprintf. Must match format specifer type above.
|
||||
//! @return 0 if OK, negative value on error or if error occurs or was passed in.
|
||||
// *********************************************************************
|
||||
extern "C" int shrLogEx(int iLogMode, int iErrNum, const char* cFormatString, ...);
|
||||
|
||||
// Short version of shrLogEx defaulting to shrLogEx(LOGBOTH, 0,
|
||||
// *********************************************************************
|
||||
extern "C" int shrLog(const char* cFormatString, ...);
|
||||
|
||||
// *********************************************************************
|
||||
// Delta timer function for up to 3 independent timers using host high performance counters
|
||||
// Maintains state for 3 independent counters
|
||||
//! Example: double dElapsedTime = shrDeltaTime(0);
|
||||
//!
|
||||
//! @param 0 iCounterID: Which timer to check/reset. (0, 1, 2)
|
||||
//! @return delta time of specified counter since last call in seconds. Otherwise -9999.0 if error
|
||||
// *********************************************************************
|
||||
extern "C" double shrDeltaT(int iCounterID);
|
||||
|
||||
// Optional LogFileNameOverride function
|
||||
// *********************************************************************
|
||||
extern "C" void shrSetLogFileName (const char* cOverRideName);
|
||||
|
||||
// Helper function to init data arrays
|
||||
// *********************************************************************
|
||||
extern "C" void shrFillArray(float* pfData, int iSize);
|
||||
|
||||
// Helper function to print data arrays
|
||||
// *********************************************************************
|
||||
extern "C" void shrPrintArray(float* pfData, int iSize);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Find the path for a filename
|
||||
//! @return the path if succeeded, otherwise 0
|
||||
//! @param filename name of the file
|
||||
//! @param executablePath optional absolute path of the executable
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" char* shrFindFilePath(const char* filename, const char* executablePath);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Read file \filename containing single precision floating point data
|
||||
//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the source file
|
||||
//! @param data uninitialized pointer, returned initialized and pointing to
|
||||
//! the data read
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
//! @note If a NULL pointer is passed to this function and it is initialized
|
||||
//! within shrUtils, then free() has to be used to deallocate the memory
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrReadFilef( const char* filename, float** data, unsigned int* len,
|
||||
bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Read file \filename containing double precision floating point data
|
||||
//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the source file
|
||||
//! @param data uninitialized pointer, returned initialized and pointing to
|
||||
//! the data read
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
//! @note If a NULL pointer is passed to this function and it is
|
||||
//! @note If a NULL pointer is passed to this function and it is initialized
|
||||
//! within shrUtils, then free() has to be used to deallocate the memory
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrReadFiled( const char* filename, double** data, unsigned int* len,
|
||||
bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Read file \filename containing integer data
|
||||
//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the source file
|
||||
//! @param data uninitialized pointer, returned initialized and pointing to
|
||||
//! the data read
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
//! @note If a NULL pointer is passed to this function and it is
|
||||
//! @note If a NULL pointer is passed to this function and it is initialized
|
||||
//! within shrUtils, then free() has to be used to deallocate the memory
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrReadFilei( const char* filename, int** data, unsigned int* len, bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Read file \filename containing unsigned integer data
|
||||
//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the source file
|
||||
//! @param data uninitialized pointer, returned initialized and pointing to
|
||||
//! the data read
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
//! @note If a NULL pointer is passed to this function and it is
|
||||
//! @note If a NULL pointer is passed to this function and it is initialized
|
||||
//! within shrUtils, then free() has to be used to deallocate the memory
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrReadFileui( const char* filename, unsigned int** data,
|
||||
unsigned int* len, bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Read file \filename containing char / byte data
|
||||
//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the source file
|
||||
//! @param data uninitialized pointer, returned initialized and pointing to
|
||||
//! the data read
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
//! @note If a NULL pointer is passed to this function and it is
|
||||
//! @note If a NULL pointer is passed to this function and it is initialized
|
||||
//! within shrUtils, then free() has to be used to deallocate the memory
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrReadFileb( const char* filename, char** data, unsigned int* len,
|
||||
bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Read file \filename containing unsigned char / byte data
|
||||
//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the source file
|
||||
//! @param data uninitialized pointer, returned initialized and pointing to
|
||||
//! the data read
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
//! @note If a NULL pointer is passed to this function and it is
|
||||
//! @note If a NULL pointer is passed to this function and it is initialized
|
||||
//! within shrUtils, then free() has to be used to deallocate the memory
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrReadFileub( const char* filename, unsigned char** data,
|
||||
unsigned int* len, bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Write a data file \filename containing single precision floating point
|
||||
//! data
|
||||
//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the file to write
|
||||
//! @param data pointer to data to write
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
//! @param epsilon epsilon for comparison
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrWriteFilef( const char* filename, const float* data, unsigned int len,
|
||||
const float epsilon, bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Write a data file \filename containing double precision floating point
|
||||
//! data
|
||||
//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the file to write
|
||||
//! @param data pointer to data to write
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
//! @param epsilon epsilon for comparison
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrWriteFiled( const char* filename, const float* data, unsigned int len,
|
||||
const double epsilon, bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Write a data file \filename containing integer data
|
||||
//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the file to write
|
||||
//! @param data pointer to data to write
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrWriteFilei( const char* filename, const int* data, unsigned int len,
|
||||
bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Write a data file \filename containing unsigned integer data
|
||||
//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the file to write
|
||||
//! @param data pointer to data to write
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrWriteFileui( const char* filename, const unsigned int* data,
|
||||
unsigned int len, bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Write a data file \filename containing char / byte data
|
||||
//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the file to write
|
||||
//! @param data pointer to data to write
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrWriteFileb( const char* filename, const char* data, unsigned int len,
|
||||
bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Write a data file \filename containing unsigned char / byte data
|
||||
//! @return shrTRUE if writing the file succeeded, otherwise shrFALSE
|
||||
//! @param filename name of the file to write
|
||||
//! @param data pointer to data to write
|
||||
//! @param len number of data elements in data, -1 on error
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrWriteFileub( const char* filename, const unsigned char* data,
|
||||
unsigned int len, bool verbose = false);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Load PPM image file (with unsigned char as data element type), padding
|
||||
//! 4th component
|
||||
//! @return shrTRUE if reading the file succeeded, otherwise shrFALSE
|
||||
//! @param file name of the image file
|
||||
//! @param OutData handle to the data read
|
||||
//! @param w width of the image
|
||||
//! @param h height of the image
|
||||
//!
|
||||
//! Note: If *OutData is NULL this function allocates buffer that must be freed by caller
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrLoadPPM4ub(const char* file, unsigned char** OutData,
|
||||
unsigned int *w, unsigned int *h);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Save PPM image file (with unsigned char as data element type, padded to
|
||||
//! 4 bytes)
|
||||
//! @return shrTRUE if saving the file succeeded, otherwise shrFALSE
|
||||
//! @param file name of the image file
|
||||
//! @param data handle to the data read
|
||||
//! @param w width of the image
|
||||
//! @param h height of the image
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrSavePPM4ub( const char* file, unsigned char *data,
|
||||
unsigned int w, unsigned int h);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Save PGM image file (with unsigned char as data element type)
|
||||
//! @return shrTRUE if saving the file succeeded, otherwise shrFALSE
|
||||
//! @param file name of the image file
|
||||
//! @param data handle to the data read
|
||||
//! @param w width of the image
|
||||
//! @param h height of the image
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrSavePGMub( const char* file, unsigned char *data,
|
||||
unsigned int w, unsigned int h);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Load PGM image file (with unsigned char as data element type)
|
||||
//! @return shrTRUE if saving the file succeeded, otherwise shrFALSE
|
||||
//! @param file name of the image file
|
||||
//! @param data handle to the data read
|
||||
//! @param w width of the image
|
||||
//! @param h height of the image
|
||||
//! @note If a NULL pointer is passed to this function and it is initialized
|
||||
//! within shrUtils, then free() has to be used to deallocate the memory
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrLoadPGMub( const char* file, unsigned char** data,
|
||||
unsigned int *w,unsigned int *h);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Command line arguments: General notes
|
||||
// * All command line arguments begin with '--' followed by the token;
|
||||
// token and value are seperated by '='; example --samples=50
|
||||
// * Arrays have the form --model=[one.obj,two.obj,three.obj]
|
||||
// (without whitespaces)
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Check if command line argument \a flag-name is given
|
||||
//! @return shrTRUE if command line argument \a flag_name has been given,
|
||||
//! otherwise shrFALSE
|
||||
//! @param argc argc as passed to main()
|
||||
//! @param argv argv as passed to main()
|
||||
//! @param flag_name name of command line flag
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrCheckCmdLineFlag( const int argc, const char** argv,
|
||||
const char* flag_name);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Get the value of a command line argument of type int
|
||||
//! @return shrTRUE if command line argument \a arg_name has been given and
|
||||
//! is of the requested type, otherwise shrFALSE
|
||||
//! @param argc argc as passed to main()
|
||||
//! @param argv argv as passed to main()
|
||||
//! @param arg_name name of the command line argument
|
||||
//! @param val value of the command line argument
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrGetCmdLineArgumenti( const int argc, const char** argv,
|
||||
const char* arg_name, int* val);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Get the value of a command line argument of type unsigned int
|
||||
//! @return shrTRUE if command line argument \a arg_name has been given and
|
||||
//! is of the requested type, otherwise shrFALSE
|
||||
//! @param argc argc as passed to main()
|
||||
//! @param argv argv as passed to main()
|
||||
//! @param arg_name name of the command line argument
|
||||
//! @param val value of the command line argument
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrGetCmdLineArgumentu( const int argc, const char** argv,
|
||||
const char* arg_name, unsigned int* val);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Get the value of a command line argument of type float
|
||||
//! @return shrTRUE if command line argument \a arg_name has been given and
|
||||
//! is of the requested type, otherwise shrFALSE
|
||||
//! @param argc argc as passed to main()
|
||||
//! @param argv argv as passed to main()
|
||||
//! @param arg_name name of the command line argument
|
||||
//! @param val value of the command line argument
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrGetCmdLineArgumentf( const int argc, const char** argv,
|
||||
const char* arg_name, float* val);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Get the value of a command line argument of type string
|
||||
//! @return shrTRUE if command line argument \a arg_name has been given and
|
||||
//! is of the requested type, otherwise shrFALSE
|
||||
//! @param argc argc as passed to main()
|
||||
//! @param argv argv as passed to main()
|
||||
//! @param arg_name name of the command line argument
|
||||
//! @param val value of the command line argument
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrGetCmdLineArgumentstr( const int argc, const char** argv,
|
||||
const char* arg_name, char** val);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Get the value of a command line argument list those element are strings
|
||||
//! @return shrTRUE if command line argument \a arg_name has been given and
|
||||
//! is of the requested type, otherwise shrFALSE
|
||||
//! @param argc argc as passed to main()
|
||||
//! @param argv argv as passed to main()
|
||||
//! @param arg_name name of the command line argument
|
||||
//! @param val command line argument list
|
||||
//! @param len length of the list / number of elements
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrGetCmdLineArgumentListstr( const int argc, const char** argv,
|
||||
const char* arg_name, char** val,
|
||||
unsigned int* len);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two float arrays
|
||||
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
|
||||
//! @param reference handle to the reference data / gold image
|
||||
//! @param data handle to the computed data
|
||||
//! @param len number of elements in reference and data
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrComparef( const float* reference, const float* data,
|
||||
const unsigned int len);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two integer arrays
|
||||
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
|
||||
//! @param reference handle to the reference data / gold image
|
||||
//! @param data handle to the computed data
|
||||
//! @param len number of elements in reference and data
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrComparei( const int* reference, const int* data,
|
||||
const unsigned int len );
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two unsigned integer arrays, with epsilon and threshold
|
||||
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
|
||||
//! @param reference handle to the reference data / gold image
|
||||
//! @param data handle to the computed data
|
||||
//! @param len number of elements in reference and data
|
||||
//! @param threshold tolerance % # of comparison errors (0.15f = 15%)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrCompareuit( const unsigned int* reference, const unsigned int* data,
|
||||
const unsigned int len, const float epsilon, const float threshold );
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two unsigned char arrays
|
||||
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
|
||||
//! @param reference handle to the reference data / gold image
|
||||
//! @param data handle to the computed data
|
||||
//! @param len number of elements in reference and data
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrCompareub( const unsigned char* reference, const unsigned char* data,
|
||||
const unsigned int len );
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two integers with a tolernance for # of byte errors
|
||||
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
|
||||
//! @param reference handle to the reference data / gold image
|
||||
//! @param data handle to the computed data
|
||||
//! @param len number of elements in reference and data
|
||||
//! @param epsilon epsilon to use for the comparison
|
||||
//! @param threshold tolerance % # of comparison errors (0.15f = 15%)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrCompareubt( const unsigned char* reference, const unsigned char* data,
|
||||
const unsigned int len, const float epsilon, const float threshold );
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two integer arrays witha n epsilon tolerance for equality
|
||||
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
|
||||
//! @param reference handle to the reference data / gold image
|
||||
//! @param data handle to the computed data
|
||||
//! @param len number of elements in reference and data
|
||||
//! @param epsilon epsilon to use for the comparison
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrCompareube( const unsigned char* reference, const unsigned char* data,
|
||||
const unsigned int len, const float epsilon );
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two float arrays with an epsilon tolerance for equality
|
||||
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
|
||||
//! @param reference handle to the reference data / gold image
|
||||
//! @param data handle to the computed data
|
||||
//! @param len number of elements in reference and data
|
||||
//! @param epsilon epsilon to use for the comparison
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrComparefe( const float* reference, const float* data,
|
||||
const unsigned int len, const float epsilon );
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two float arrays with an epsilon tolerance for equality and a
|
||||
//! threshold for # pixel errors
|
||||
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
|
||||
//! @param reference handle to the reference data / gold image
|
||||
//! @param data handle to the computed data
|
||||
//! @param len number of elements in reference and data
|
||||
//! @param epsilon epsilon to use for the comparison
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrComparefet( const float* reference, const float* data,
|
||||
const unsigned int len, const float epsilon, const float threshold );
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two float arrays using L2-norm with an epsilon tolerance for
|
||||
//! equality
|
||||
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
|
||||
//! @param reference handle to the reference data / gold image
|
||||
//! @param data handle to the computed data
|
||||
//! @param len number of elements in reference and data
|
||||
//! @param epsilon epsilon to use for the comparison
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrCompareL2fe( const float* reference, const float* data,
|
||||
const unsigned int len, const float epsilon );
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two PPM image files with an epsilon tolerance for equality
|
||||
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
|
||||
//! @param src_file filename for the image to be compared
|
||||
//! @param data filename for the reference data / gold image
|
||||
//! @param epsilon epsilon to use for the comparison
|
||||
//! @param threshold threshold of pixels that can still mismatch to pass (i.e. 0.15f = 15% must pass)
|
||||
//! $param verboseErrors output details of image mismatch to std::err
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrComparePPM( const char *src_file, const char *ref_file, const float epsilon, const float threshold);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Compare two PGM image files with an epsilon tolerance for equality
|
||||
//! @return shrTRUEif \a reference and \a data are identical, otherwise shrFALSE
|
||||
//! @param src_file filename for the image to be compared
|
||||
//! @param data filename for the reference data / gold image
|
||||
//! @param epsilon epsilon to use for the comparison
|
||||
//! @param threshold threshold of pixels that can still mismatch to pass (i.e. 0.15f = 15% must pass)
|
||||
//! $param verboseErrors output details of image mismatch to std::err
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" shrBOOL shrComparePGM( const char *src_file, const char *ref_file, const float epsilon, const float threshold);
|
||||
|
||||
extern "C" unsigned char* shrLoadRawFile(const char* filename, size_t size);
|
||||
|
||||
extern "C" size_t shrRoundUp(int group_size, int global_size);
|
||||
|
||||
// companion inline function for error checking and exit on error WITH Cleanup Callback (if supplied)
|
||||
// *********************************************************************
|
||||
inline void __shrCheckErrorEX(int iSample, int iReference, void (*pCleanup)(int), const char* cFile, const int iLine)
|
||||
{
|
||||
if (iReference != iSample)
|
||||
{
|
||||
shrLogEx(LOGBOTH | ERRORMSG, iSample, "line %i , in file %s !!!\n\n" , iLine, cFile);
|
||||
if (pCleanup != NULL)
|
||||
{
|
||||
pCleanup(EXIT_FAILURE);
|
||||
}
|
||||
else
|
||||
{
|
||||
shrLogEx(LOGBOTH | CLOSELOG, 0, "Exiting...\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Standardized Exit
|
||||
// *********************************************************************
|
||||
inline void __shrExitEX(int argc, const char** argv, int iExitCode)
|
||||
{
|
||||
#ifdef WIN32
|
||||
if (!shrCheckCmdLineFlag(argc, argv, "noprompt") && !shrCheckCmdLineFlag(argc, argv, "qatest"))
|
||||
#else
|
||||
if (shrCheckCmdLineFlag(argc, argv, "prompt") && !shrCheckCmdLineFlag(argc, argv, "qatest"))
|
||||
#endif
|
||||
{
|
||||
shrLogEx(LOGBOTH | CLOSELOG, 0, "\nPress <Enter> to Quit...\n");
|
||||
getchar();
|
||||
}
|
||||
else
|
||||
{
|
||||
shrLogEx(LOGBOTH | CLOSELOG, 0, "%s Exiting...\n", argv[0]);
|
||||
}
|
||||
fflush(stderr);
|
||||
exit(iExitCode);
|
||||
}
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user