Merge remote-tracking branch 'refs/remotes/origin/master'

This commit is contained in:
Euna Kim
2019-11-23 22:25:45 -05:00
137 changed files with 2472848 additions and 2586571 deletions

View File

@@ -35,3 +35,8 @@ HEX: ELF
ELF:
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
run:
../../simX/obj_dir/Vcache_simX -E -a rv32i --core vx_vector_main.hex -s -b 1> emulator.debug

View File

@@ -0,0 +1,30 @@
.type vx_vec_test, @function
.global vx_vec_test
vx_vec_test:
li a1, 7
sw a1, 0(a0)
ret
# slli a0, a0, 2
# add a0, a0, a3
# vmv.v.x vv0, a2
# # vsplat4 vv0, a2
# stripmine_loop:
# vlb4 vv1, (a1)
# vcmpez4 vp0, vv1
# !vp0 vlw4 vv1, (a3)
# !vp0 vlw4 vv2, (a4)
# !vp0 vfma4 vv1, vv0, vv1, vv2
# !vp0 vsw4 vv1, (a4)
# addi a1, a1, 4
# addi a3, a3, 16
# addi a4, a4, 16
# bleu a3, a0, stripmine_loop
# handle edge cases
# when (n % 4) != 0 ...

View File

@@ -0,0 +1,32 @@
#include "../../runtime/intrinsics/vx_intrinsics.h"
#include "vx_vec.h"
int main()
{
vx_tmc(1);
// int * a = malloc(4);
// int * b = malloc(4);
// int * c = malloc(4);
int * a = malloc(4);
*a = 5;
printf("Value of a: %d\n", *a);
vx_vec_test(a);
printf("Value of a: %d\n", *a);
// for (int i = 0; i < 4; i++)
// {
// if (c[i] != (a[i] + b[i]))
// {
// printf("Fail\n");
// break;
// }
// }
vx_tmc(0);
}

View File

@@ -0,0 +1,91 @@
#include <stdio.h>
#include <stdlib.h>
#include "../../runtime/intrinsics/vx_intrinsics.h"
#include "vx_vec.h"
int main()
{
vx_tmc(1);
#if 0
# vector-vector add routine of 32-bit integers
# void vvaddint32(size_t n, const int*x, const int*y, int*z)
# { for (size_t i=0; i<n; i++) { z[i]=x[i]+y[i]; } }
#
# a0 = n, a1 = x, a2 = y, a3 = z
# Non-vector instructions are indented
#endif
#if 1
int n = 5;
int *a = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *b = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *c = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
for(int i = 0; i < n; ++i)
{
a[i] = b[i] = c[i] = 1;
}
for(int i = 0; i < n; ++i) printf("%d, ", a[i]);
printf("\n");
// for(int i = 0; i < n; ++i) printf("%d, ", b[i]);
// printf("\n");
// for(int i = 0; i < n; ++i) printf("%d, ", c[i]);
int *d;
*d = 1;
vx_vec_test(n, d, b, c);
printf("(after: n = %d, %d)\n", n, *d);
for(int i = 0; i < n; ++i) printf("%d, ", a[i]);
// printf("\n");
// for(int i = 0; i < n; ++i) printf("%d, ", b[i]);
// printf("\n");
// for(int i = 0; i < n; ++i) printf("%d, ", c[i]);
#endif
#if 0
int * a = malloc(sizeof(int) * 10);
for(int i = 0; i < 10; ++i) a[i] = 5;
for(int i = 0; i < 10; ++i)
printf("%d, ", a[i]);
vx_vec_test(a);
//vx_vec_test(2, a, a, a);
printf("after--------\n");
for(int i = 0; i < 10; ++i)
printf("%d, ", a[i]);
#endif
#if 0
int n = 5;
int *a = (int*)malloc(sizeof(int) * 5); //{1, 1, 1, 1, 1};
int *b = (int*)malloc(sizeof(int) * 5); //{1, 1, 1, 1, 1};
int *c = (int*)malloc(sizeof(int) * 5); //{1, 1, 1, 1, 1};
for(int i = 0; i < n; ++i)
{
a[i] = 1;
b[i] = 1;
c[i] = 0;
}
printf("Value of a: %d, b: %d, c: %d, n: %d\n", a[0], b[0], c[0], n);
vx_vec_test(n, a, b, c);
printf("Value of a: %d, b: %d, c: %d, n: %d\n", a[0], b[0], c[0], n);
#endif
// for (int i = 0; i < 4; i++)
// {
// if (c[i] != (a[i] + b[i]))
// {
// printf("Fail\n");
// break;
// }
// }
vx_tmc(0);
}

View File

@@ -7,7 +7,7 @@
extern "C" {
#endif
void vx_vec_test(int *);
void vx_vec_test(int n, int* a, int* b, int* c); //vvaddint32
#ifdef __cplusplus

View File

@@ -1,30 +1,23 @@
.type vx_vec_test, @function
.global vx_vec_test
vx_vec_test:
li a1, 7
sw a1, 0(a0)
ret
# slli a0, a0, 2
# add a0, a0, a3
# vmv.v.x vv0, a2
# # vsplat4 vv0, a2
# stripmine_loop:
# vlb4 vv1, (a1)
# vcmpez4 vp0, vv1
# !vp0 vlw4 vv1, (a3)
# !vp0 vlw4 vv2, (a4)
# !vp0 vfma4 vv1, vv0, vv1, vv2
# !vp0 vsw4 vv1, (a4)
# addi a1, a1, 4
# addi a3, a3, 16
# addi a4, a4, 16
# bleu a3, a0, stripmine_loop
# handle edge cases
# when (n % 4) != 0 ...
# vector-vector add routine of 32-bit integers
# void vvaddint32(size_t n, const int*x, const int*y, int*z)
# { for (size_t i=0; i<n; i++) { z[i]=x[i]+y[i]; } }
#
# a0 = n, a1 = x, a2 = y, a3 = z
# Non-vector instructions are indented
vsetvli t0, a0, e32 # Set vector length based on 32-bit vectors
vlw.v v0, (a1) # Get first vector
sub a0, a0, t0 # Decrement number done
slli t0, t0, 2 # Multiply number done by 4 bytes
add a1, a1, t0 # Bump pointer
vlw.v v1, (a2) # Get second vector
add a2, a2, t0 # Bump pointer
vadd.vv v2, v0, v1 # Sum vectors
vsw.v v2, (a3) # Store result
add a3, a3, t0 # Bump pointer
bnez a0, vx_vec_test # Loop back
ret # Finished

View File

@@ -0,0 +1,27 @@
#include "../../runtime/intrinsics/vx_intrinsics.h"
#include "vx_vec.h"
int main()
{
vx_tmc(1);
printf("----------------hello!!! \n");
int n = 8;
int *a = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *b = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *c = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
printf("hello!!! \n");
for(int i = 0; i < n; ++i)
{
a[i] = b[i] = c[i] = 1;
}
vx_vec_test(n, a, b, c);
for(int i = 0; i < n; ++i)
printf("%d ", c[i]);
vx_tmc(0);
}

View File

@@ -1,32 +1,29 @@
#include "../../runtime/intrinsics/vx_intrinsics.h"
#include "vx_vec.h"
int main()
{
vx_tmc(1);
// int * a = malloc(4);
// int * b = malloc(4);
// int * c = malloc(4);
vx_tmc(1);
printf("Hello\n");
int n = 64;
int *a = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *b = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *c = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
for(int i = 0; i < n; ++i)
{
a[i] = b[i] = c[i] = 1;
}
vx_vec_test(n, a, b, c);
for (int i = 0; i < n; ++i)
{
printf("a[%d]=%d, b[%d]=%d, c[%d]=%d\n", i, a[i], i, b[i], i, c[i]);
}
int * a = malloc(4);
*a = 5;
printf("Value of a: %d\n", *a);
vx_vec_test(a);
printf("Value of a: %d\n", *a);
// for (int i = 0; i < 4; i++)
// {
// if (c[i] != (a[i] + b[i]))
// {
// printf("Fail\n");
// break;
// }
// }
vx_tmc(0);
vx_tmc(0);
}

File diff suppressed because it is too large Load Diff

166
rvvector/benchmark_temp/1 Normal file
View File

@@ -0,0 +1,166 @@
#include <stdio.h>
#include <stdlib.h>
#include "../../runtime/intrinsics/vx_intrinsics.h"
#include "vx_vec_benchmark.h"
int main()
{
vx_tmc(1);
int n = 65536;
int scalar = 10;
int *a = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *b = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *c = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
for (int i = 0; i < n; ++i) { a[i] = 1; b[i] = 2; c[i] = 5; }
#if 0
//---------------------------------------------------------------
/* vvaddint32
* # vector-vector add routine of 32-bit integers
* # void vvaddint32(size_t n, const int*x, const int*y, int*z)
* # { for (size_t i=0; i<n; i++) { z[i]=x[i]+y[i]; } } */
printf("vvaddint...\na[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d ", a[i]);
printf("\nb[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d ", b[i]);
printf("\nc[%d] = a[%d] + b[%d]: ", n, n, n);
for(int i = 0; i < n; ++i) printf("%d ", c[i]);
vx_vec_vvaddint32(n, a, b, c);
for(int i = 0; i < n; ++i)
{
if(c[i] != (a[i]+b[i]))
{
printf("\n<vddint32> failed at <index: %d>! \n", i);
return 1;
}
}
printf("\nPASSED.......................... <vddint32> \n");
#endif
#if 0
//---------------------------------------------------------------
/* # vector-scalar add
# for (i=0; i<N; i++) { C[i] = A[i] + B; } // 32-bit ints */
for (int i = 0; i < n; ++i) { a[i] = 1; b[i] = 1;}
printf("vsadd...scalar:%d\na[%d]: ", scalar, n);
for(int i = 0; i < n; ++i) printf("%d \n", a[i]);
printf("\nb: %d", scalar);
vx_vec_vsadd(n, a, scalar);
for(int i = 0; i < n; ++i)
{
if(a[i] != (b[i] * scalar))
{
printf("\n<vsadd> failed at <index: %d>! \n", i);
return 1;
}
}
printf("\nPASSED.......................... <vsadd> \n");
#endif
#if 0
//---------------------------------------------------------------
/* # memory copy
# void *memcpy(void* dest, const void* src, size_t n) */
for (int i = 0; i < n; ++i) { a[i] = 1; b[i] = 2;}
printf("memcpy\na[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d \n", a[i]);
printf("\nb[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d \n", b[i]);
vx_vec_memcpy(a, b, n);
for(int i = 0; i < n; ++i)
{
if(a[i] != b[i])
{
printf("\n<memcpy> failed at <index: %d>! \n", i);
return 1;
}
}
printf("\nPASSED.......................... <memcpy> \n");
#endif
#if 1
//---------------------------------------------------------------
/* # void saxpy(size_t n, const float a, const float *x, float *y)
# ==> convert to int!!
# void saxpy(size_t n, const int a, const int *x, int *y)
# {
# size_t i;
# for (i=0; i<n; i++) y[i] = a * x[i] + y[i];
# } */
for (int i = 0; i < n; ++i) { a[i] = 4; b[i] = 2; c[i] = 2;}
printf("saxpy\na[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d \n", a[i]);
printf("\nb[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d \n", b[i]);
vx_vec_saxpy(n, scalar, a, b);
printf("saxpy\na[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d \n", a[i]);
printf("\nb[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d \n", b[i]);
for(int i = 0; i < n; ++i)
{
if(b[i] != ((a[i] * scalar) + c[i]))
{
printf("\n<saxpy> failed at <index: %d>! \n", i);
return 1;
}
}
printf("\nPASSED.......................... <saxpy> \n");
#endif
#if 0
//---------------------------------------------------------------
/* # void sgemm_nn(size_t n, size_t m, size_t k, const float*a, // m * k matrix
# size_t lda, const float*b, // k * n matrix
# size_t ldb, float*c, // m * n matrix
# size_t ldc)
# c += a*b (alpha=1, no transpose on input matrices)
# matrices stored in C row-major order */
int m = 8;
int k = 8;
int n = 8
int lda = 4;
int ldb = 4;
int ldc = 4;
int* a1 = (int*)malloc(sizeof(m * k));
int* b1 = (int*)malloc(sizeof(k * n));
int* c1 = (int*)malloc(sizeof(m * n));
for(int i = 0; i < (m * k); ++i) a1[i] = 1;
for(int i = 0; i < (k * n); ++i) b1[i] = 1;
for(int i = 0; i < (m * n); ++i) c1[i] = 1;
printf("sgemm_nn\na[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d \n", a1[i]);
printf("\nb[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d \n", b1[i]);
vx_vec_sgemm_nn(n, m, k, a1, lda, b1, ldb, c1, ldc);
//for(int i = 0; i < n; ++i)
//{
// if(b[i] != ((a[i] * scalar) + c[i]))
// {
// printf("\n<sgemm_nn> failed at <index: %d>! \n", i);
// return;
// }
//}
printf("\nNOT TESTED.......................... <sgemm_nn> \n");
//---------------------------------------------------------------
#endif
vx_tmc(0);
return 0;
}

View File

@@ -34,8 +34,7 @@ HEX: ELF
$(CPY) -O ihex $(VX_MAIN).elf $(VX_MAIN).hex
ELF:
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC1) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC2) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC2) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC3) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC4) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC5) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf~

View File

@@ -6,14 +6,17 @@
int main()
{
vx_tmc(1);
int n = 5;
int scalar = 10;
int *a = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *b = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *c = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
for (int i = 0; i < n; ++i) { a[i] = 1; b[i] = 2; c[i] = 5; }
#if 1
#if 0
//---------------------------------------------------------------
/* vvaddint32
* # vector-vector add routine of 32-bit integers
@@ -43,7 +46,6 @@ int main()
/* # vector-scalar add
# for (i=0; i<N; i++) { C[i] = A[i] + B; } // 32-bit ints */
for (int i = 0; i < n; ++i) { a[i] = 1; b[i] = 1;}
int scalar = 10;
printf("vsadd...scalar:%d\na[%d]: ", scalar, n);
for(int i = 0; i < n; ++i) printf("%d \n", a[i]);
printf("\nb: %d", scalar);
@@ -78,10 +80,18 @@ int main()
if(a[i] != b[i])
{
printf("\n<memcpy> failed at <index: %d>! \n", i);
<<<<<<< HEAD
return;
}
}
printf("\nPASSED.......................... <memcpy> \n");
=======
return 1;
}
}
printf("\nPASSED.......................... <memcpy> \n");
#endif
#if 1
//---------------------------------------------------------------
/* # void saxpy(size_t n, const float a, const float *x, float *y)
# ==> convert to int!!
@@ -99,6 +109,11 @@ int main()
vx_vec_saxpy(n, scalar, a, b);
printf("saxpy\na[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d \n", a[i]);
printf("\nb[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d \n", b[i]);
for(int i = 0; i < n; ++i)
{
if(b[i] != ((a[i] * scalar) + c[i]))
@@ -109,6 +124,12 @@ int main()
}
printf("\nPASSED.......................... <saxpy> \n");
return 1;
}
}
printf("\nPASSED.......................... <saxpy> \n");
#endif
#if 0
//---------------------------------------------------------------
/* # void sgemm_nn(size_t n, size_t m, size_t k, const float*a, // m * k matrix
# size_t lda, const float*b, // k * n matrix

Binary file not shown.

View File

@@ -5,10 +5,10 @@
extern "C" {
#endif
void vx_vec_vvaddint32(int n, int* a, int* b, int *c);
//void vx_vec_vvaddint32(int n, int* a, int* b, int *c);
//void vx_vec_vsadd(int n, int* a, int scalar);
//void vx_vec_memcpy(int* a, int* b, int n);
//void vx_vec_saxpy(int n, int scalar, int* a, int* b);
void vx_vec_saxpy(int n, int scalar, int* a, int* b);
//void vx_vec_sgemm_nn(int n, int m, int k, int* a1, int lda, int* b1, int ldb, int* c1, int ldc);
#ifdef __cplusplus

View File

@@ -25,4 +25,32 @@ saxpy:
vsw.v v8, (a2)
add a2, a2, a4
bnez a0, saxpy
ret
ret
#vx_vec_saxpy:
# vsetvli a4, a0, e32, m8
#saxpy:
# vlw.v v0, (a1)
# sub a0, a0, a4
# slli a4, a4, 2
# add a1, a1, a4
# vlw.v v8, (a2)
# vfmacc.vf v8, fa0, v0
# vsw.v v8, (a2)
# add a2, a2, a4
# bnez a0, saxpy
# ret
# a0 n, rs1 a, a2 x, a3 y
vx_vec_saxpy:
vsetvli a4, a0, e32, m8
saxpy:
vlw.v v0, (a2)
sub a0, a0, a4
slli a4, a4, 2
add a2, a2, a4
vlw.v v1, (a3)
vmacc.vx v1, rs1, v0
vsw.v v1, (a3)
add a3, a3, a4
bnez a0, saxpy
ret