mutiple fixes: parallel printf, fixed cycle in cache, opencl refactored vecadd and sgemm, regen opencl kernels with hard-float, fixed vortex io bus interface, fixed dpi floats APi to support multicore mode, make vlsim multicore default, make rtlsim multi-core default, removed POCL binaries from repository, updated Makefiles to use external POCL

This commit is contained in:
Blaise Tine
2020-09-19 14:45:42 -04:00
parent 80f929eb61
commit f6f95e0c46
146 changed files with 116779 additions and 194258 deletions

View File

@@ -1,9 +1,18 @@
__kernel void sgemm(__global float *A, __global float *B, __global float *C, int ldc)
__kernel void sgemm (__global const float *A,
__global const float *B,
__global float *C,
int N)
{
long i = get_global_id(0);
long m = get_global_id(1);
long n = get_global_id(2);
float a = A[m+n*ldc];
float b = B[m*ldc+i];
C[i+n*ldc] = C[i+n*ldc] + a * b;
// Thread identifiers
const int r = get_global_id(0); // Row ID
const int c = get_global_id(1); // Col ID
// Compute a single element (loop a K)
float acc = 0.0f;
for (int k = 0; k < N; k++) {
acc += A[k * N + r] * B[c * N + k];
}
// Store the result
C[c * N + r] = acc;
}