Files
vortex/tests/opencl/vectorhypot/kernel.cl
Blaise Tine c1e168fdbe Vortex 2.0 changes:
+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes

minor update

minor update

minor update

minor update

minor update

minor update

cleanup

cleanup

cache bindings and memory perf refactory

minor update

minor update

hw unit tests fixes

minor update

minor update

minor update

minor update

minor update

minor udpate

minor update

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor updates

minor updates

minor update

minor update
2023-11-10 02:47:05 -08:00

41 lines
1.5 KiB
Common Lisp

/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
// OpenCL Kernel Function Naive Implementation for hyptenuse
__kernel void VectorHypot(__global float4* fg4A, __global float4* fg4B, __global float4* fg4Hypot, unsigned int uiOffset, int iInnerLoopCount, unsigned int uiNumElements)
{
// get index into global data array
size_t szGlobalOffset = get_global_id(0) + uiOffset;
// bound check
if (szGlobalOffset >= uiNumElements)
{
return;
}
// Processing 4 elements per work item, so read fgA and fgB source values from GMEM
float4 f4A = fg4A[szGlobalOffset];
float4 f4B = fg4B[szGlobalOffset];
float4 f4H = (float4)0.0f;
// Get the hypotenuses the vectors of 'legs', but exaggerate the time needed with loop
for (int i = 0; i < iInnerLoopCount; i++)
{
// compute the 4 hypotenuses using built-in function
f4H.x = hypot (f4A.x, f4B.x);
f4H.y = hypot (f4A.y, f4B.y);
f4H.z = hypot (f4A.z, f4B.z);
f4H.w = hypot (f4A.w, f4B.w);
}
// Write 4 result values back out to GMEM
fg4Hypot[szGlobalOffset] = f4H;
}