Vector evaluations

This commit is contained in:
proshan3
2019-11-25 22:18:12 -05:00
parent 72361b3afe
commit 374d991a20
16 changed files with 104163 additions and 104289 deletions

View File

@@ -1,11 +1,11 @@
LIB_PATH = ../../../runtime LIB_PATH = ../../../runtime
COMP = /nethome/ekim79/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-gcc COMP = /home/priya/dev/riscv_vec/riscv-gnu/bin/riscv32-unknown-elf-gcc
CC_FLAGS = -ffreestanding -O0 -Wl,--gc-sections -nostartfiles -nostdlib -nostartfiles -nodefaultlibs -Wl,-Bstatic,-T,$(LIB_PATH)/mains/vortex_link.ld -march=rv32imv -mabi=ilp32 CC_FLAGS = -ffreestanding -O0 -Wl,--gc-sections -nostartfiles -nostdlib -nostartfiles -nodefaultlibs -Wl,-Bstatic,-T,$(LIB_PATH)/mains/vortex_link.ld -march=rv32imv -mabi=ilp32
DMP = /nethome/ekim79/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objdump DMP = /home/priya/dev/riscv_vec/riscv-gnu/bin/riscv32-unknown-elf-objdump
CPY = /nethome/ekim79/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy CPY = /home/priya/dev/riscv_vec/riscv-gnu/bin/riscv32-unknown-elf-objcopy
# VX_STR = ../../startup/vx_start.s # VX_STR = ../../startup/vx_start.s
@@ -17,7 +17,7 @@ VX_API = $(LIB_PATH)/vx_api/vx_api.c
VX_TEST = $(LIB_PATH)/tests/tests.c VX_TEST = $(LIB_PATH)/tests/tests.c
VX_FIO = $(LIB_PATH)/fileio/fileio.s VX_FIO = $(LIB_PATH)/fileio/fileio.s
VX_VEC = vx_vec_saxpy.s #float --> int VX_VEC = vx_vec_saxpy.s #float --> int
LIBS = /nethome/ekim79/riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libc.a /nethome/ekim79/riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc LIBS = /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libc.a /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
VX_MAIN = vx_vec_saxpy VX_MAIN = vx_vec_saxpy

View File

@@ -16,7 +16,7 @@ int main()
{ {
vx_tmc(1); vx_tmc(1);
int n = 4; //#define NUM_DATA 65536 int n = 64; //#define NUM_DATA 65536
int *a = (int*)malloc(sizeof(int) * n); int *a = (int*)malloc(sizeof(int) * n);
int *b = (int*)malloc(sizeof(int) * n); int *b = (int*)malloc(sizeof(int) * n);
@@ -39,9 +39,18 @@ int main()
// for(int i = 0; i < n; ++i) printf("%d \n", b[i]); // for(int i = 0; i < n; ++i) printf("%d \n", b[i]);
#endif #endif
int startCycles = vx_getCycles();
int startInst = vx_getInst();
vx_vec_saxpy(n, factor, a, b); vx_vec_saxpy(n, factor, a, b);
int endCycles = vx_getCycles();
int endInst = vx_getInst();
#if 1 int totalInst = (endInst - startInst);
int totalCycles = (endCycles - startCycles);
printf("\nCycles = %d, Instructions = %d", totalCycles, totalInst);
#if 0
printf("\nsaxpy\na[%d]: ", n); printf("\nsaxpy\na[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d ", a[i]); for(int i = 0; i < n; ++i) printf("%d ", a[i]);
printf("\n\nb[%d]: ", n); printf("\n\nb[%d]: ", n);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -10,8 +10,8 @@
# a2 a # a2 a
# a3 b # a3 b
vx_vec_saxpy: vx_vec_saxpy:
vsetvli a4, a0, e32
loop: loop:
vsetvli a4, a0, e32
vlw.v v0, (a2) vlw.v v0, (a2)
sub a0, a0, a4 sub a0, a0, a4
slli a4, a4, 2 slli a4, a4, 2

View File

@@ -35,12 +35,21 @@ int main()
c[i] = 0; c[i] = 0;
} }
int N = 32; int N = 4;
int startCycles = vx_getCycles();
int startInst = vx_getInst();
for(int y = 1; y < (NUM_DATA-1); y++){ for(int y = 1; y < (NUM_DATA-1); y++){
for(int x = 1; x < (NUM_DATA-1); x = x+N) { for(int x = 1; x < (NUM_DATA-1); x = x+N) {
vx_vec_sfilter(a, b, ldc, m, x, y, N); vx_vec_sfilter(a, b, ldc, m, x, y, N);
} }
} }
int endCycles = vx_getCycles();
int endInst = vx_getInst();
int totalInst = (endInst - startInst);
int totalCycles = (endCycles - startCycles);
printf("\nCycles = %d, Instructions = %d", totalCycles, totalInst);
for(int y = 1; y < (NUM_DATA-1); ++y) for(int y = 1; y < (NUM_DATA-1); ++y)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -16,76 +16,89 @@ int main()
{ {
vx_tmc(1); vx_tmc(1);
int w = 4; int m = 64;
int h = 4; int k = 64;
int d = 4; int n = 64;
int* a1 = (int*)malloc(sizeof(int) * w * h); int* a1 = (int*)malloc(sizeof(int) * m * k);
int* b1 = (int*)malloc(sizeof(int) * h * d); int* b1 = (int*)malloc(sizeof(int) * k * n);
int* c1 = (int*)malloc(sizeof(int) * w * d); int* c1 = (int*)malloc(sizeof(int) * m * n);
int* d1 = (int*)malloc(sizeof(int) * w * d); //verfication int* d1 = (int*)malloc(sizeof(int) * m * n); //verfication
for (int i = 0; i < (w * h); ++i) a1[i] = i; for (int i = 0; i < (m * k); ++i) a1[i] = i;
for (int i = 0; i < (h * d); ++i) b1[i] = 1; for (int i = 0; i < (k * n); ++i) b1[i] = 1;
for (int i = 0; i < (w * d); ++i) c1[i] = 0; for (int i = 0; i < (m * n); ++i) c1[i] = 0;
for (int i = 0; i < (w * d); ++i) d1[i] = 0; for (int i = 0; i < (m * n); ++i) d1[i] = 0;
#if 0 #if 0
printf("sgemm_nn\na[%d]:", w*h); printf("sgemm_nn\na[%d]:", m*k);
for (int i = 0; i < w*h; ++i) { for (int i = 0; i < m*k; ++i) {
if(!(i % h)) printf("\n"); if(!(i % k)) printf("\n");
printf("%d ", a1[i]); printf("%d ", a1[i]);
} }
printf("\n\nb[%d]:", h*d); printf("\n\nb[%d]:", k*n);
for (int i = 0; i < h*d; ++i) { for (int i = 0; i < k*n; ++i) {
if (!(i % d)) printf("\n"); if (!(i % n)) printf("\n");
printf("%d ", b1[i]); printf("%d ", b1[i]);
} }
#endif #endif
int lda = 4; int lda = 4;
int ldb = 4; int ldb = 4;
int ldc = 4; //64; int ldc = 64; //64;
int vsize = 4; int vsize = 32;
for (int n = 0; n < h; n++) {
for (int i = 0; i < w; i=+4) { int startCycles = vx_getCycles();
for (int m = 0; m < d; m++) { int startInst = vx_getInst();
vx_vec_sgemm_nn(i, m, n, a1, b1, c1, ldc, vsize); for (int r = 0; r < m; r++) {
for (int c = 0; c < n; c++) {
for (int i = 0; i < k;) {
// d1[r*k+i] += a1[r*k+c]*b1[i*n+c];
vx_vec_sgemm_nn(i, r, c, a1, b1, c1, ldc, vsize);
i = i + vsize; i = i + vsize;
} }
} }
} }
int endCycles = vx_getCycles();
int endInst = vx_getInst();
#if 1 int totalInst = (endInst - startInst);
printf("\n\nc[%d]:", d*h); int totalCycles = (endCycles - startCycles);
for (int i = 0; i < d*h; ++i) { printf("\nCycles = %d, Instructions = %d", totalCycles, totalInst);
if (!(i % h)) printf("\n"); // vx_vec_sgemm_nn(n, a1, b1, c1);
#if 0
printf("\n\nc[%d]:", m*n);
for (int i = 0; i < m*n; ++i) {
if (!(i % n)) printf("\n");
printf("%d ", c1[i]); printf("%d ", c1[i]);
} }
#endif #endif
for (int r = 0; r < h; r++) { for (int r = 0; r < m; r++) {
for (int c = 0; c < w; c++) { for (int c = 0; c < n; c++) {
for (int i = 0; i < d; i++) { for (int i = 0; i < k; i++) {
d1[r*h+i] += a1[r*h+c]*b1[i*d+c]; d1[c*ldc+i] += a1[c*ldc+r]*b1[i + (r*ldc)];
//printf("d[%d] += a[%d]*b[%d]\n", c*ldc+i, c*ldc+r , i + (r*ldc));
//printf("%d %d %d\n", d1[c*ldc+i] , a1[c*ldc+r] , b1[i + (r*ldc)]);
} }
} }
} }
#if 1 #if 0
printf("\n\nc[%d]:\n", w*d); printf("\n\nc[%d]:\n", m*n);
for(int i = 0; i < w; ++i) { for(int i = 0; i < m; ++i) {
for(int j = 0; j < d; ++j) { for(int j = 0; j < n; ++j) {
printf("%d ", d1[i*w+j]); printf("%d ", d1[i*m+j]);
} }
printf("\n"); printf("\n");
} }
#endif #endif
for(int i = 0; i < w*d; ++i) for(int i = 0; i < m*n; ++i)
{ {
if(c1[i] != d1[i]) if(c1[i] != d1[i])
{ {

File diff suppressed because it is too large Load Diff

View File

@@ -6,7 +6,7 @@ extern "C" {
#endif #endif
//void vx_vec_sgemm_nn(int n, int m, int k, int* a1, int lda, int* b1, int ldb, int* c1, int ldc); //void vx_vec_sgemm_nn(int n, int m, int k, int* a1, int lda, int* b1, int ldb, int* c1, int ldc);
void vx_vec_sgemm_nn(int i, int m, int n, int* a1, int* b1, int* c1, int ldc, int vsize); void vx_vec_sgemm_nn(int n, int m, int k, int* a1, int* b1, int* c1, int ldc, int vsize);
//void vx_vec_sgemm_nn(int n, int* a1, int* b1, int* c1); //void vx_vec_sgemm_nn(int n, int* a1, int* b1, int* c1);
#ifdef __cplusplus #ifdef __cplusplus
} }

File diff suppressed because it is too large Load Diff

View File

@@ -10,33 +10,33 @@
# } # }
# } # }
# } # }
# a0 = i, a1 = m, a2 = n, a3 = a, a4 = b, a5 = c, a6 = ldc, a7 = vsize # a3 = a, a4 = b, a5 = c
# # a0 = i, a1 = m, a2 = n
# a6 = ldc
vx_vec_sgemm_nn: vx_vec_sgemm_nn:
vsetvli t0, a7, e32 # <--- vsize vsetvli t0, a7, e32
mul x11, a6, a2 # n*ldc mul t1, a6, a2 # n*ldc
add x12, x11, a1 # i + (n*ldc) add t2, t1, a1 # i + (n*ldc)
slli x12, x12, 2 slli t2, t2, 2
add a3, x12, a3 # a[i+ n*ldc] add a3, t2, a3 # a[i+ n*ldc]
lw x13, (a3) lw t3, (a3)
mul x14, a1, a6 # m*ldc mul t4, a1, a6 # m*ldc
add x15, a0, x14 # i + m*ldc add t5, a0, t4 # i + m*ldc
slli x15, x15, 2 slli t5, t5, 2
add a4, x15, a4 # b[i + m*ldc] add a4, t5, a4 # b[i + m*ldc]
vlw.v v0, (a4) # lw x6, (a4)
vmul.vx v2, v1, x13
## lw x6, (a4) vlw.v v0, (a4)
## lw x10, (a4) # b vmul.vx v1, v0, t3
## mul x11, x3, x10
mul t6, a2, a6 # n*ldc
add t0, a0, t6 # i + n*ldc
slli t0, t0, 2
add a5, t0, a5 # c[i + m*ldc]
vlw.v v2, (a5) #c
vadd.vv v2, v2, v1
vsw.v v2, (a5)
mul x6, a2, a6 # n*ldc
add x7, a0, x6 # i + n*ldc
add a5, x7, a5 # c[i + m*ldc]
vlw.v v3, (a5) # c
vadd.vv v3, v3, v2
vsw.v v3, (a5)
## lw x12, (a5)
## add x12, x12, x11
## sw x12, (a5)
ret ret