Vortex 2.0 changes:
+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
__kernel void sgemm (__global const float *A,
|
||||
__global const float *B,
|
||||
__global float *C,
|
||||
#include "common.h"
|
||||
|
||||
__kernel void sgemm (__global const TYPE *A,
|
||||
__global const TYPE *B,
|
||||
__global TYPE *C,
|
||||
int N)
|
||||
{
|
||||
// Thread identifiers
|
||||
@@ -8,7 +10,7 @@ __kernel void sgemm (__global const float *A,
|
||||
const int c = get_global_id(1); // Col ID
|
||||
|
||||
// Compute a single element (loop a K)
|
||||
float acc = 0.0f;
|
||||
TYPE acc = 0;
|
||||
for (int k = 0; k < N; k++) {
|
||||
acc += A[k * N + r] * B[c * N + k];
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user