[tests] Add compute-bound variant of vecadd
This loops 1000 times over `sum += A[i] + B[i]`, making every memory op hit at L1 cache.
This commit is contained in:
12
tests/opencl/vecadd/kernel.alll1hit.loop1000.cl
Normal file
12
tests/opencl/vecadd/kernel.alll1hit.loop1000.cl
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
__kernel void vecadd (__global const float *A,
|
||||||
|
__global const float *B,
|
||||||
|
__global float *C)
|
||||||
|
{
|
||||||
|
int gid = get_global_id(0);
|
||||||
|
float sum = 0.;
|
||||||
|
for (int i = 0; i < 1000; i++) {
|
||||||
|
int addr = gid + (i % 2);
|
||||||
|
sum += A[addr] + B[addr];
|
||||||
|
}
|
||||||
|
C[gid] = sum;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user