project tests refactoring

This commit is contained in:
Blaise Tine
2021-06-13 17:42:04 -07:00
parent 47c3234659
commit 03406c0a3f
631 changed files with 394471 additions and 653511 deletions

View File

@@ -0,0 +1,32 @@
LIB_PATH = ../../../runtime
COMP = /home/priya/dev/riscv_vec/riscv-gnu/bin/riscv32-unknown-elf-gcc
CC_FLAGS = -ffreestanding -O0 -Wl,--gc-sections -nostartfiles -nostdlib -nostartfiles -nodefaultlibs -Wl,-Bstatic,-T,$(LIB_PATH)/startup/vx_link.ld -march=rv32imv -mabi=ilp32
DMP = /home/priya/dev/riscv_vec/riscv-gnu/bin/riscv32-unknown-elf-objdump
CPY = /home/priya/dev/riscv_vec/riscv-gnu/bin/riscv32-unknown-elf-objcopy
# VX_STR = ../../startup/vx_start.S
NEWLIB = $(LIB_PATH)/newlib/newlib.c
VX_STR = $(LIB_PATH)/startup/vx_start.S
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.S
VX_IO = $(LIB_PATH)/io/vx_io.S $(LIB_PATH)/io/vx_io.c
VX_API = $(LIB_PATH)/vx_api/vx_api.c
VX_FIO = $(LIB_PATH)/fileio/fileio.S
VX_VEC = vx_vec_saxpy.s #float --> int
LIBS = /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libc.a /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
VX_MAIN = vx_vec_saxpy
all: HEX DUMP ELF
DUMP: ELF
$(DMP) -D $(VX_MAIN).elf > $(VX_MAIN).dump
HEX: ELF
$(CPY) -O ihex $(VX_MAIN).elf $(VX_MAIN).hex
ELF:
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf

View File

@@ -0,0 +1,78 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "../../../runtime/intrinsics/vx_intrinsics.h"
#include "vx_vec_saxpy.h"
//---------------------------------------------------------------
/* # void saxpy(size_t n, const float a, const float *x, float *y)
# ==> convert to int!!
# void saxpy(size_t n, const int a, const int *x, int *y)
# { size_t i;
# for (i=0; i<n; i++) y[i] = a * x[i] + y[i]; } */
//---------------------------------------------------------------
int main()
{
vx_tmc(1);
int n = 64; //#define NUM_DATA 65536
int *a = (int*)malloc(sizeof(int) * n);
int *b = (int*)malloc(sizeof(int) * n);
int *c = (int*)malloc(sizeof(int) * n); //verification
// float factor = ((float)rand()/(float)(RAND_MAX)) * 100.0;
int factor = ((float)rand()/(RAND_MAX)) * 100.0;
for (int i = 0; i < n; ++i) {
a[i] = ((float)rand()/(RAND_MAX)) * 100.0;
b[i] = 0;
c[i] = 0;
}
//; c[i] = 2;}
#if 1
printf("saxpy\nfactor: %d\na[%d]: ", factor, n);
for(int i = 0; i < n; ++i) printf("%d ", a[i]);
// printf("\nb[%d]: ", n);
// for(int i = 0; i < n; ++i) printf("%d \n", b[i]);
#endif
int startCycles = vx_num_cycles();
int startInst = vx_num_instrs();
vx_vec_saxpy(n, factor, a, b);
int endCycles = vx_num_cycles();
int endInst = vx_num_instrs();
int totalInst = (endInst - startInst);
int totalCycles = (endCycles - startCycles);
printf("\nCycles = %d, Instructions = %d", totalCycles, totalInst);
#if 0
printf("\nsaxpy\na[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d ", a[i]);
printf("\n\nb[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d ", b[i]);
#endif
for(int i = 0; i < n; ++i)
{
if(b[i] != ((a[i] * factor) + c[i]))
{
printf("\n<saxpy> FAILED at <index: %d>! \n", i);
return 1;
}
}
printf("\nPASSED.......................... <saxpy> \n");
free(a); free(b); free(c);
vx_tmc(0);
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,12 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
void vx_vec_saxpy(int n, int scalar, int* a, int* b);
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,26 @@
.type vx_vec_saxpy, @function
.global vx_vec_saxpy
# void
# saxpy(size_t n, int factor, int *a, int *b)
# { for (int i=0; i<n; i++) { y[i] = a * x[i] + y[i];} }
#
# register arguments:
# a0 n
# a1 factor
# a2 a
# a3 b
vx_vec_saxpy:
loop:
vsetvli a4, a0, e32
vlw.v v0, (a2)
sub a0, a0, a4
slli a4, a4, 2
add a2, a2, a4
vlw.v v1, (a3)
vmul.vx v0, v0, a1
vadd.vv v1, v0, v1
# vmacc.vx v1, rs1, v0
vsw.v v1, (a3)
add a3, a3, a4
bnez a0, loop
ret

View File

@@ -0,0 +1,32 @@
LIB_PATH = ../../../runtime
COMP = /home/priya/dev/riscv_vec/riscv-gnu/bin/riscv32-unknown-elf-gcc
CC_FLAGS = -ffreestanding -O0 -Wl,--gc-sections -nostartfiles -nostdlib -nostartfiles -nodefaultlibs -Wl,-Bstatic,-T,$(LIB_PATH)/startup/vx_link.ld -march=rv32imv -mabi=ilp32
DMP = /home/priya/dev/riscv_vec/riscv-gnu/bin/riscv32-unknown-elf-objdump
CPY = /home/priya/dev/riscv_vec/riscv-gnu/bin/riscv32-unknown-elf-objcopy
# VX_STR = ../../startup/vx_start.S
NEWLIB = $(LIB_PATH)/newlib/newlib.c
VX_STR = $(LIB_PATH)/startup/vx_start.S
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.S
VX_IO = $(LIB_PATH)/io/vx_io.S $(LIB_PATH)/io/vx_io.c
VX_API = $(LIB_PATH)/vx_api/vx_api.c
VX_FIO = $(LIB_PATH)/fileio/fileio.S
VX_VEC = vx_vec_sfilter.s #float --> int
LIBS = /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libc.a /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
VX_MAIN = vx_vec_sfilter
all: HEX DUMP ELF
DUMP: ELF
$(DMP) -D $(VX_MAIN).elf > $(VX_MAIN).dump
HEX: ELF
$(CPY) -O ihex $(VX_MAIN).elf $(VX_MAIN).hex
ELF:
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf

View File

@@ -0,0 +1,89 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "../../../runtime/intrinsics/vx_intrinsics.h"
#include "vx_vec_sfilter.h"
#define NUM_DATA 66
int main()
{
vx_tmc(1);
int n = NUM_DATA*NUM_DATA;
int ldc = NUM_DATA;
/*int m0 = 1;
int m1 = 1;
int m2 = 1;
int m3 = 1;
int m4 = 1;
int m5 = 1;
int m6 = 1;
int m7 = 1;
int m8 = 1;*/
int m = 1;
int *a = (int*)malloc(sizeof(int) * n);
int *b = (int*)malloc(sizeof(int) * n);
int *c = (int*)malloc(sizeof(int) * n);
for (int i = 0; i < n; ++i) {
a[i] = i;
b[i] = 0;
c[i] = 0;
}
int N = 4;
int startCycles = vx_num_cycles();
int startInst = vx_num_instrs();
for(int y = 1; y < (NUM_DATA-1); y++){
for(int x = 1; x < (NUM_DATA-1); x = x+N) {
vx_vec_sfilter(a, b, ldc, m, x, y, N);
}
}
int endCycles = vx_num_cycles();
int endInst = vx_num_instrs();
int totalInst = (endInst - startInst);
int totalCycles = (endCycles - startCycles);
printf("\nCycles = %d, Instructions = %d", totalCycles, totalInst);
for(int y = 1; y < (NUM_DATA-1); ++y)
{
for(int x = 1; x < (NUM_DATA-1); ++x){
int i0 = a[(x-1)+(y-1)*ldc]*m;
//printf("a[%d] = %d",((x-1)+(y-1)*ldc), a[(x-1)+(y-1)*ldc] );
int i1 = a[(x) +(y-1)*ldc]*m;
int i2 = a[(x+1)+(y-1)*ldc]*m;
int i3 = a[(x-1)+(y) *ldc]*m;
int i4 = a[(x) + y * ldc]*m;
int i5 = a[(x+1)+(y) *ldc]*m;
int i6 = a[(x-1)+(y+1)*ldc]*m;
int i7 = a[(x) +(y+1)*ldc]*m;
int i8 = a[(x+1)+(y+1)*ldc]*m;
c[x+y*ldc] = i0 + i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8;
//printf("\nc[%d] = %d",(x+y*ldc), c[x+y*ldc] );
//printf("\nb[%d] = %d",(x+y*ldc), b[x+y*ldc] );
//printf("%d, %d, %d, %d, %d, %d, %d, %d, %d", i0, i1, i2, i3, i4, i5, i6, i7, i8);
if(c[x+y*ldc] != b[x+y*ldc] )
{
printf("\n<saxpy> FAILED at <index: %d>! \n", x);
return 1;
}
}
}
printf("\nPASSED.......................... <sfilter> \n");
free(a); free(b); free(c);
vx_tmc(0);
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,12 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
void vx_vec_sfilter(int* a, int* b, int ldc, int m, int x, int y, int N);
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,111 @@
.type vx_vec_saxpy, @function
.global vx_vec_sfilter
#vx_vec_sfilter(a, b, ldc, m, x, y, N);
#a0 - a
#a1 - b
#a2 - ldc
#a3 - m
#a4 - x
#a5 - y
#a6 - N
vx_vec_sfilter:
vsetvli t0, a6, e32
li t1, 1
sub t2, a4, t1 #(x-1)
add t3, a4, t1 #(x+1)
sub t4, a5, t1 #(y-1)
add t5, a5, t1 #(y+1)
#i0
mul t6, t4, a2 #(y-1)*ldc
add a7, t6, t2 #(x-1) + (y-1)*ldc
slli a7, a7, 2
add a0, a0, a7
vlw.v v0, (a0)
vmul.vx v0, v0, a3
sub a0, a0, a7
#i1
add a7, t6, a4 #(x + (y-1)*ldc)
slli a7, a7, 2
add a0, a0, a7
vlw.v v1, (a0)
vmul.vx v1, v1, a3
sub a0, a0, a7
#i2
add a7, t3, t6 #((x+1) + (y-1)*ldc)
slli a7, a7, 2
add a0, a0, a7
vlw.v v2, (a0)
vmul.vx v2, v2, a3
sub a0, a0, a7
#i3
mul t6, a5, a2 #y*ldc
add a7, t6, t2 #(x-1) + y*ldc
slli a7, a7, 2
add a0, a0, a7
vlw.v v3, (a0)
vmul.vx v3, v3, a3
sub a0, a0, a7
#i4
add a7, t6, a4 #(x + y*ldc)
slli a7, a7, 2
add a0, a0, a7
vlw.v v4, (a0)
vmul.vx v4, v4, a3
sub a0, a0, a7
#i5
add a7, t6, t3 #((x+1) + (y*ldc))
slli a7, a7, 2
add a0, a0, a7
vlw.v v5, (a0)
vmul.vx v5, v5, a3
sub a0, a0, a7
#i6
mul t6, t5, a2 #(y+1)*ldc
add a7, t6, t2 #(x-1) + (y+1)*ldc
slli a7, a7, 2
add a0, a0, a7
vlw.v v6, (a0)
vmul.vx v6, v6, a3
sub a0, a0, a7
#i7
add a7, t6, a4 #(y+1)*ldc + x
slli a7, a7, 2
add a0, a0, a7
vlw.v v7, (a0)
vmul.vx v7, v7, a3
sub a0, a0, a7
#i8
add a7, t6, t3 #(x+1) + (y+1)*ldc
slli a7, a7, 2
add a0, a0, a7
vlw.v v8, (a0)
vmul.vx v8, v8, a3
sub a0, a0, a7
#c
mul t6, a5, a2 #y*ldc
add a7, t6, a4 # x + y*ldc
vadd.vv v9, v0, v1
vadd.vv v9, v9, v2
vadd.vv v9, v9, v3
vadd.vv v9, v9, v4
vadd.vv v9, v9, v5
vadd.vv v9, v9, v6
vadd.vv v9, v9, v7
vadd.vv v9, v9, v8
slli a7, a7, 2
add a1, a1, a7
vsw.v v9, (a1)
ret

View File

@@ -0,0 +1,32 @@
LIB_PATH = ../../../runtime
COMP = /home/priya/dev/riscv_vec/riscv-gnu/bin/riscv32-unknown-elf-gcc
CC_FLAGS = -ffreestanding -O0 -Wl,--gc-sections -nostartfiles -nostdlib -nostartfiles -nodefaultlibs -Wl,-Bstatic,-T,$(LIB_PATH)/startup/vx_link.ld -march=rv32imv -mabi=ilp32
DMP = /home/priya/dev/riscv_vec/riscv-gnu/bin/riscv32-unknown-elf-objdump
CPY = /home/priya/dev/riscv_vec/riscv-gnu/bin/riscv32-unknown-elf-objcopy
# VX_STR = ../../startup/vx_start.S
NEWLIB = $(LIB_PATH)/newlib/newlib.c
VX_STR = $(LIB_PATH)/startup/vx_start.S
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.S
VX_IO = $(LIB_PATH)/io/vx_io.S $(LIB_PATH)/io/vx_io.c
VX_API = $(LIB_PATH)/vx_api/vx_api.c
VX_FIO = $(LIB_PATH)/fileio/fileio.S
VX_VEC = vx_vec_sgemm_nn.s #float --> int
LIBS = /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libc.a /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
VX_MAIN = vx_vec_sgemm_nn
all: HEX DUMP ELF
DUMP: ELF
$(DMP) -D $(VX_MAIN).elf > $(VX_MAIN).dump
HEX: ELF
$(CPY) -O ihex $(VX_MAIN).elf $(VX_MAIN).hex
ELF:
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf

View File

@@ -0,0 +1,38 @@
.type vx_vec_sgemm_nn, @function
.global vx_vec_sgemm_nn
#
# for (int n = 0; n < k; n++) {
# for (int m = 0; m < m; m++) {
# for (int i = 0; i < n;) {
#// d1[n*k+i] += a1[n*k+m]*b1[i*n+m];
# vx_vec_sgemm_nn(i, c, r, a1, b1, c1, ldc);
# i = i + 4;
# }
# }
# }
# a3 = a, a4 = b, a5 = c
# a0 = i, a1 = m, a2 = n
# a6 = ldc
vx_vec_sgemm_nn:
vsetvli t0, a6, e32
mul x1, a6, a2 # n*ldc
add x2, x1, a1 # i + (n*ldc)
add a3, x2, a3 # a[i+ n*ldc]
lw x3, (a3)
mul x4, a1, a6 # m*ldc
add x5, a0, x4 # i + m*ldc
add a4, x5, a4 # b[i + m*ldc]
# lw x6, (a4)
vlw.v v0, (a4)
vmul.vx v2, v1, x3
mul x6, a2, a6 # n*ldc
add x7, a0, x6 # i + n*ldc
add a5, x7, a5 # c[i + m*ldc]
vlw.v v3, (a5) #c
vadd.vv v3, v3, v2
ret

View File

@@ -0,0 +1,119 @@
#include <stdio.h>
#include <stdlib.h>
#include "../../../runtime/intrinsics/vx_intrinsics.h"
#include "vx_vec_sgemm_nn.h"
//---------------------------------------------------------------
/* # void sgemm_nn(size_t n, size_t m, size_t k,
# int *a, // m * k matri size_t lda,
# int *b, // k * n matrix size_t ldb,
# int *c, // m * n matrix size_t ldc)
# c += a*b (alpha=1, no transpose on input matrices)
# matrices stored in C row-major order */
//---------------------------------------------------------------
int main()
{
vx_tmc(1);
int m = 64;
int k = 64;
int n = 64;
int* a1 = (int*)malloc(sizeof(int) * m * k);
int* b1 = (int*)malloc(sizeof(int) * k * n);
int* c1 = (int*)malloc(sizeof(int) * m * n);
int* d1 = (int*)malloc(sizeof(int) * m * n); //verfication
for (int i = 0; i < (m * k); ++i) a1[i] = i;
for (int i = 0; i < (k * n); ++i) b1[i] = 1;
for (int i = 0; i < (m * n); ++i) c1[i] = 0;
for (int i = 0; i < (m * n); ++i) d1[i] = 0;
#if 0
printf("sgemm_nn\na[%d]:", m*k);
for (int i = 0; i < m*k; ++i) {
if(!(i % k)) printf("\n");
printf("%d ", a1[i]);
}
printf("\n\nb[%d]:", k*n);
for (int i = 0; i < k*n; ++i) {
if (!(i % n)) printf("\n");
printf("%d ", b1[i]);
}
#endif
int lda = 4;
int ldb = 4;
int ldc = 64; //64;
int vsize = 32;
int startCycles = vx_num_cycles();
int startInst = vx_num_instrs();
for (int r = 0; r < m; r++) {
for (int c = 0; c < n; c++) {
for (int i = 0; i < k;) {
// d1[r*k+i] += a1[r*k+c]*b1[i*n+c];
vx_vec_sgemm_nn(i, r, c, a1, b1, c1, ldc, vsize);
i = i + vsize;
}
}
}
int endCycles = vx_num_cycles();
int endInst = vx_num_instrs();
int totalInst = (endInst - startInst);
int totalCycles = (endCycles - startCycles);
printf("\nCycles = %d, Instructions = %d", totalCycles, totalInst);
// vx_vec_sgemm_nn(n, a1, b1, c1);
#if 0
printf("\n\nc[%d]:", m*n);
for (int i = 0; i < m*n; ++i) {
if (!(i % n)) printf("\n");
printf("%d ", c1[i]);
}
#endif
for (int r = 0; r < m; r++) {
for (int c = 0; c < n; c++) {
for (int i = 0; i < k; i++) {
d1[c*ldc+i] += a1[c*ldc+r]*b1[i + (r*ldc)];
//printf("d[%d] += a[%d]*b[%d]\n", c*ldc+i, c*ldc+r , i + (r*ldc));
//printf("%d %d %d\n", d1[c*ldc+i] , a1[c*ldc+r] , b1[i + (r*ldc)]);
}
}
}
#if 0
printf("\n\nc[%d]:\n", m*n);
for(int i = 0; i < m; ++i) {
for(int j = 0; j < n; ++j) {
printf("%d ", d1[i*m+j]);
}
printf("\n");
}
#endif
for(int i = 0; i < m*n; ++i)
{
if(c1[i] != d1[i])
{
printf("\n<sgemm_nn> FAILED at <index: %d>! \n", i);
return 1;
}
}
printf("\nPASS.......................... <sgemm_nn> \n");
free(a1); free(b1); free(c1);
vx_tmc(0);
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,13 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
//void vx_vec_sgemm_nn(int n, int m, int k, int* a1, int lda, int* b1, int ldb, int* c1, int ldc);
void vx_vec_sgemm_nn(int n, int m, int k, int* a1, int* b1, int* c1, int ldc, int vsize);
//void vx_vec_sgemm_nn(int n, int* a1, int* b1, int* c1);
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,42 @@
.type vx_vec_sgemm_nn, @function
.global vx_vec_sgemm_nn
#
# for (int n = 0; n < k; n++) {
# for (int m = 0; m < m; m++) {
# for (int i = 0; i < n;) {
#// d1[n*k+i] += a1[n*k+m]*b1[i*n+m];
# vx_vec_sgemm_nn(i, c, r, a1, b1, c1, ldc);
# i = i + 4;
# }
# }
# }
# a3 = a, a4 = b, a5 = c
# a0 = i, a1 = m, a2 = n
# a6 = ldc
vx_vec_sgemm_nn:
vsetvli t0, a7, e32
mul t1, a6, a2 # n*ldc
add t2, t1, a1 # i + (n*ldc)
slli t2, t2, 2
add a3, t2, a3 # a[i+ n*ldc]
lw t3, (a3)
mul t4, a1, a6 # m*ldc
add t5, a0, t4 # i + m*ldc
slli t5, t5, 2
add a4, t5, a4 # b[i + m*ldc]
# lw x6, (a4)
vlw.v v0, (a4)
vmul.vx v1, v0, t3
mul t6, a2, a6 # n*ldc
add t0, a0, t6 # i + n*ldc
slli t0, t0, 2
add a5, t0, a5 # c[i + m*ldc]
vlw.v v2, (a5) #c
vadd.vv v2, v2, v1
vsw.v v2, (a5)
ret

View File

@@ -0,0 +1,40 @@
LIB_PATH = ../../../runtime
COMP = /nethome/ekim79/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-gcc
CC_FLAGS = -ffreestanding -O0 -Wl,--gc-sections -nostartfiles -nostdlib -nostartfiles -nodefaultlibs -Wl,-Bstatic,-T,$(LIB_PATH)/startup/vx_link.ld -march=rv32imv -mabi=ilp32
DMP = /nethome/ekim79/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objdump
CPY = /nethome/ekim79/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy
# VX_STR = ../../startup/vx_start.S
NEWLIB = $(LIB_PATH)/newlib/newlib.c
VX_STR = $(LIB_PATH)/startup/vx_start.S
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.S
VX_IO = $(LIB_PATH)/io/vx_io.S $(LIB_PATH)/io/vx_io.c
VX_API = $(LIB_PATH)/vx_api/vx_api.c
VX_FIO = $(LIB_PATH)/fileio/fileio.S
VX_VEC1 = vx_vec_vvaddint32.s
#VX_VEC2 = vx_vec_saxpy.s #float --> int
#VX_VEC3 = vx_vec_sgemm.s #float --> int
#VX_VEC4 = vx_vec_vsadd.s
#VX_VEC5 = vx_vec_memcpy.s
LIBS = /nethome/ekim79/riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libc.a /nethome/ekim79/riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
VX_MAIN = vx_vec_vecadd
all: HEX DUMP ELF
DUMP: ELF
$(DMP) -D $(VX_MAIN).elf > $(VX_MAIN).dump
HEX: ELF
$(CPY) -O ihex $(VX_MAIN).elf $(VX_MAIN).hex
ELF:
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC1) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC2) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC3) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC4) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC5) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf~

View File

@@ -0,0 +1,57 @@
#include <stdio.h>
#include <stdlib.h>
#include "../../../runtime/intrinsics/vx_intrinsics.h"
#include "vx_vec_vecadd.h"
//---------------------------------------------------------------
/* vvaddint32
* # vector-vector add routine of 32-bit integers
* # void vvaddint32(size_t n, const int*x, const int*y, int*z)
* # { for (size_t i=0; i<n; i++) { z[i]=x[i]+y[i]; } } */
//---------------------------------------------------------------
int main()
{
vx_tmc(1);
int n = 4; //SIZE
int *a = (int*)malloc(sizeof(int) * n);
int *b = (int*)malloc(sizeof(int) * n);
int *c = (int*)malloc(sizeof(int) * n);
// Initialize values for array members.
for (int i = 0; i < n; ++i) {
a[i] = i * 2 + 0;
b[i] = i * 2 + 1;
c[i] = 0;
}
#if 0
printf("vvaddint...\na[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d ", a[i]);
printf("\nb[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d ", b[i]);
printf("\nc[%d] = a[%d] + b[%d]: ", n, n, n);
for(int i = 0; i < n; ++i) printf("%d ", c[i]);
#endif
vx_vec_vvaddint32(n, a, b, c);
for(int i = 0; i < n; ++i)
{
if(c[i] != (a[i]+b[i]))
{
printf("\n<vddint32> FAILED at <index: %d>! \n", i);
return 1;
}
}
printf("\nPASSED.......................... <vddint32> \n");
free(a); free(b); free(c);
vx_tmc(0);
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,17 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
void vx_vec_vvaddint32(int n, int* a, int* b, int *c);
//void vx_vec_vsadd(int n, int* a, int scalar);
//void vx_vec_memcpy(int* a, int* b, int n);
//void vx_vec_saxpy(int n, int scalar, int* a, int* b);
//void vx_vec_sgemm_nn(int n, int m, int k, int* a1, int lda, int* b1, int ldb, int* c1, int ldc);
//void vx_vec_sgemm_nn(int n, int m, int k, int* a1, int* b1, int* c1);
//void vx_vec_sgemm_nn(int n, int* a1, int* b1, int* c1);
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,22 @@
.type vx_vec_vvaddi32, @function
.global vx_vec_vvaddint32
# vector-vector add routine of 32-bit integers
# void vvaddint32(size_t n, const int*x, const int*y, int*z)
# { for (size_t i=0; i<n; i++) { z[i]=x[i]+y[i]; } }
#
# a0 = n, a1 = x, a2 = y, a3 = z
# Non-vector instructions are indented
vx_vec_vvaddint32:
vsetvli t0, a0, e32 # Set vector length based on 32-bit vectors
loop:
vlw.v v0, (a1) # Get first vector
sub a0, a0, t0 # Decrement number done
slli t0, t0, 2 # Multiply number done by 4 bytes
add a1, a1, t0 # Bump pointer
vlw.v v1, (a2) # Get second vector
add a2, a2, t0 # Bump pointer
vadd.vv v2, v0, v1 # Sum vectors
vsw.v v2, (a3) # Store result
add a3, a3, t0 # Bump pointer
bnez a0, loop # Loop back
ret # Finished