Vortex 2.0 changes:

+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes
This commit is contained in:
Blaise Tine
2023-10-19 20:51:22 -07:00
parent d69a64c32c
commit d47cccc157
1300 changed files with 247321 additions and 311189 deletions

View File

@@ -1,6 +1,8 @@
__kernel void sgemm (__global const float *A,
__global const float *B,
__global float *C,
#include "common.h"
__kernel void sgemm (__global const TYPE *A,
__global const TYPE *B,
__global TYPE *C,
int N)
{
// Thread identifiers
@@ -8,7 +10,7 @@ __kernel void sgemm (__global const float *A,
const int c = get_global_id(1); // Col ID
// Compute a single element (loop a K)
float acc = 0.0f;
TYPE acc = 0;
for (int k = 0; k < N; k++) {
acc += A[k * N + r] * B[c * N + k];
}