merging with master

This commit is contained in:
fares
2019-11-23 20:40:19 -05:00
16 changed files with 29131 additions and 415 deletions

View File

@@ -131,6 +131,7 @@ void _clCmdParams(int argc, char* argv[]){
// devices have no relationship with context // devices have no relationship with context
void _clInit() void _clInit()
{ {
printf("_clInit()\n");
int DEVICE_ID_INUSED = device_id_inused; int DEVICE_ID_INUSED = device_id_inused;
cl_int resultCL; cl_int resultCL;
@@ -225,15 +226,18 @@ void _clInit()
throw(string("InitCL()::Creating Command Queue. (clCreateCommandQueue)")); throw(string("InitCL()::Creating Command Queue. (clCreateCommandQueue)"));
//----------------------------------------------- //-----------------------------------------------
//--cambine-5: Load CL file, build CL program object, create CL kernel object //--cambine-5: Load CL file, build CL program object, create CL kernel object
std::string source_str = FileToString(kernel_file); /*std::string source_str = FileToString(kernel_file);
const char * source = source_str.c_str(); const char * source = source_str.c_str();
size_t sourceSize[] = { source_str.length() }; size_t sourceSize[] = { source_str.length() };*/
oclHandles.program = clCreateProgramWithSource(oclHandles.context, oclHandles.program =
clCreateProgramWithBuiltInKernels(oclHandles.context, 1, &oclHandles.devices[DEVICE_ID_INUSED], "BFS_1, BFS_2", &resultCL);
/*oclHandles.program = clCreateProgramWithSource(oclHandles.context,
1, 1,
&source, &source,
sourceSize, sourceSize,
&resultCL); &resultCL);*/
if ((resultCL != CL_SUCCESS) || (oclHandles.program == NULL)) if ((resultCL != CL_SUCCESS) || (oclHandles.program == NULL))
throw(string("InitCL()::Error: Loading Binary into cl_program. (clCreateProgramWithBinary)")); throw(string("InitCL()::Error: Loading Binary into cl_program. (clCreateProgramWithBinary)"));

View File

@@ -1,9 +1,7 @@
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops) RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc) POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
POCL_RT_PATH=$(wildcard ~/dev/pocl/drops_riscv_rt) POCL_INC_PATH = $(wildcard ../include)
POCL_LIB_PATH = $(wildcard ../lib)
VX_RT_PATH = $(wildcard ../../../runtime) VX_RT_PATH = $(wildcard ../../../runtime)
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir) VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
@@ -11,23 +9,27 @@ CC=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
NEWLIB_PATH=$(RISCV_TOOL_PATH)/riscv32-unknown-elf/lib GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_STR = $(VX_RT_PATH)/startup/vx_start.s VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
VX_SRCS = $(VX_STR) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
CXXFLAGS = -g -O0 -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -march=rv32im -mabi=ilp32 CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
CXXFLAGS += -ffreestanding # program may not begin at main() CXXFLAGS += -ffreestanding # program may not begin at main()
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
CXXFLAGS += -I$(POCL_INC_PATH)
LIBS = -lOpenCL VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
PROJECT=bfs PROJECT=bfs
@@ -37,7 +39,10 @@ lib$(PROJECT).a: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
$(PROJECT).elf: main.cc lib$(PROJECT).a $(PROJECT).elf: main.cc lib$(PROJECT).a
$(CXX) $(CXXFLAGS) -I$(POCL_RT_PATH)/include -L$(POCL_RT_PATH)/lib/static -L. $(VX_SRCS) main.cc timer.cc -Wl,--whole-archive -l$(PROJECT) -Wl,--no-whole-archive $(LIBS) -o $(PROJECT).elf $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).qemu: main.cc lib$(PROJECT).a
$(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).hex: $(PROJECT).elf $(PROJECT).hex: $(PROJECT).elf
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
@@ -45,8 +50,17 @@ $(PROJECT).hex: $(PROJECT).elf
$(PROJECT).dump: $(PROJECT).elf $(PROJECT).dump: $(PROJECT).elf
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump $(DMP) -D $(PROJECT).elf > $(PROJECT).dump
run: run: $(PROJECT).hex
$(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
qemu: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -strace -d in_asm -D debug.log $(PROJECT).qemu
gdb-s: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
gdb-c: $(PROJECT).qemu
$(GDB) $(PROJECT).qemu
clean: clean:
rm -rf *.elf *.dump *.hex *.a *.pocl rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu

28677
benchmarks/opencl/bfs/graph4096.txt Executable file

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@@ -1,10 +1,12 @@
//--by Jianbin Fang //--by Jianbin Fang
#define __CL_ENABLE_EXCEPTIONS
#include <cstdlib> #include <cstdlib>
#include <cstring>
#include <iostream> #include <iostream>
#include <string> #include <string>
#include <cstring> #include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#ifdef PROFILING #ifdef PROFILING
#include "timer.h" #include "timer.h"
@@ -16,34 +18,36 @@
#define MAX_THREADS_PER_BLOCK 256 #define MAX_THREADS_PER_BLOCK 256
// Structure to hold a node information // Structure to hold a node information
struct Node struct Node {
{
int starting; int starting;
int no_of_edges; int no_of_edges;
}; };
//---------------------------------------------------------- //----------------------------------------------------------
//--bfs on cpu //--bfs on cpu
//--programmer: jianbin //--programmer: jianbin
//--date: 26/01/2011 //--date: 26/01/2011
//--note: width is changed to the new_width //--note: width is changed to the new_width
//---------------------------------------------------------- //----------------------------------------------------------
void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \ void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size,
int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, \ int *h_graph_edges, char *h_graph_mask,
char *h_graph_visited, int *h_cost_ref){ char *h_updating_graph_mask, char *h_graph_visited,
int *h_cost_ref) {
char stop; char stop;
int k = 0; int k = 0;
do { do {
// if no thread changes this value then the loop stops // if no thread changes this value then the loop stops
stop = false; stop = false;
for(int tid = 0; tid < no_of_nodes; tid++ ) for (int tid = 0; tid < no_of_nodes; tid++) {
{
if (h_graph_mask[tid] == true) { if (h_graph_mask[tid] == true) {
h_graph_mask[tid] = false; h_graph_mask[tid] = false;
for(int i=h_graph_nodes[tid].starting; i<(h_graph_nodes[tid].no_of_edges + h_graph_nodes[tid].starting); i++){ for (int i = h_graph_nodes[tid].starting;
int id = h_graph_edges[i]; //--cambine: node id is connected with node tid i < (h_graph_nodes[tid].no_of_edges + h_graph_nodes[tid].starting);
if(!h_graph_visited[id]){ //--cambine: if node id has not been visited, enter the body below i++) {
int id =
h_graph_edges[i]; //--cambine: node id is connected with node tid
if (!h_graph_visited[id]) { //--cambine: if node id has not been
//visited, enter the body below
h_cost_ref[id] = h_cost_ref[tid] + 1; h_cost_ref[id] = h_cost_ref[tid] + 1;
h_updating_graph_mask[id] = true; h_updating_graph_mask[id] = true;
} }
@@ -51,8 +55,7 @@ void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
} }
} }
for(int tid=0; tid< no_of_nodes ; tid++ ) for (int tid = 0; tid < no_of_nodes; tid++) {
{
if (h_updating_graph_mask[tid] == true) { if (h_updating_graph_mask[tid] == true) {
h_graph_mask[tid] = true; h_graph_mask[tid] = true;
h_graph_visited[tid] = true; h_graph_visited[tid] = true;
@@ -61,20 +64,19 @@ void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
} }
} }
k++; k++;
} } while (stop);
while(stop);
} }
//---------------------------------------------------------- //----------------------------------------------------------
//--breadth first search on GPUs //--breadth first search on GPUs
//---------------------------------------------------------- //----------------------------------------------------------
void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size,
int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, \ int *h_graph_edges, char *h_graph_mask,
char *h_graph_visited, int *h_cost) char *h_updating_graph_mask, char *h_graph_visited,
throw(std::string){ int *h_cost) throw(std::string) {
// int number_elements = height*width; // int number_elements = height*width;
char h_over; char h_over;
cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask, \ cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask,
d_graph_visited, d_cost, d_over; d_graph_visited, d_cost, d_over;
try { try {
//--1 transfer data from host to device //--1 transfer data from host to device
@@ -82,17 +84,18 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
d_graph_nodes = _clMalloc(no_of_nodes * sizeof(Node), h_graph_nodes); d_graph_nodes = _clMalloc(no_of_nodes * sizeof(Node), h_graph_nodes);
d_graph_edges = _clMalloc(edge_list_size * sizeof(int), h_graph_edges); d_graph_edges = _clMalloc(edge_list_size * sizeof(int), h_graph_edges);
d_graph_mask = _clMallocRW(no_of_nodes * sizeof(char), h_graph_mask); d_graph_mask = _clMallocRW(no_of_nodes * sizeof(char), h_graph_mask);
d_updating_graph_mask = _clMallocRW(no_of_nodes*sizeof(char), h_updating_graph_mask); d_updating_graph_mask =
_clMallocRW(no_of_nodes * sizeof(char), h_updating_graph_mask);
d_graph_visited = _clMallocRW(no_of_nodes * sizeof(char), h_graph_visited); d_graph_visited = _clMallocRW(no_of_nodes * sizeof(char), h_graph_visited);
d_cost = _clMallocRW(no_of_nodes * sizeof(int), h_cost); d_cost = _clMallocRW(no_of_nodes * sizeof(int), h_cost);
d_over = _clMallocRW(sizeof(char), &h_over); d_over = _clMallocRW(sizeof(char), &h_over);
_clMemcpyH2D(d_graph_nodes, no_of_nodes * sizeof(Node), h_graph_nodes); _clMemcpyH2D(d_graph_nodes, no_of_nodes * sizeof(Node), h_graph_nodes);
_clMemcpyH2D(d_graph_edges, edge_list_size * sizeof(int), h_graph_edges); _clMemcpyH2D(d_graph_edges, edge_list_size * sizeof(int), h_graph_edges);
_clMemcpyH2D(d_graph_mask, no_of_nodes * sizeof(char), h_graph_mask); _clMemcpyH2D(d_graph_mask, no_of_nodes * sizeof(char), h_graph_mask);
_clMemcpyH2D(d_updating_graph_mask, no_of_nodes*sizeof(char), h_updating_graph_mask); _clMemcpyH2D(d_updating_graph_mask, no_of_nodes * sizeof(char),
h_updating_graph_mask);
_clMemcpyH2D(d_graph_visited, no_of_nodes * sizeof(char), h_graph_visited); _clMemcpyH2D(d_graph_visited, no_of_nodes * sizeof(char), h_graph_visited);
_clMemcpyH2D(d_cost, no_of_nodes * sizeof(int), h_cost); _clMemcpyH2D(d_cost, no_of_nodes * sizeof(int), h_cost);
@@ -155,8 +158,7 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
_clFree(d_cost); _clFree(d_cost);
_clFree(d_over); _clFree(d_over);
_clRelease(); _clRelease();
} } catch (std::string msg) {
catch(std::string msg){
_clFree(d_graph_nodes); _clFree(d_graph_nodes);
_clFree(d_graph_edges); _clFree(d_graph_edges);
_clFree(d_graph_mask); _clFree(d_graph_mask);
@@ -171,31 +173,23 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
} }
return; return;
} }
void Usage(int argc, char**argv){
fprintf(stderr,"Usage: %s <input_file>\n", argv[0]);
}
//---------------------------------------------------------- //----------------------------------------------------------
//--cambine: main function //--cambine: main function
//--author: created by Jianbin Fang //--author: created by Jianbin Fang
//--date: 25/01/2011 //--date: 25/01/2011
//---------------------------------------------------------- //----------------------------------------------------------
int main(int argc, char * argv[]) int main(int argc, char *argv[]) {
{ printf("enter demo main\n");
int no_of_nodes; int no_of_nodes;
int edge_list_size; int edge_list_size;
FILE *fp; FILE *fp;
Node *h_graph_nodes; Node *h_graph_nodes;
char *h_graph_mask, *h_updating_graph_mask, *h_graph_visited; char *h_graph_mask, *h_updating_graph_mask, *h_graph_visited;
try{
char *input_f;
if(argc!=2){
Usage(argc, argv);
exit(0);
}
input_f = argv[1]; try {
char *input_f = "graph4096.txt";
printf("Reading File\n"); printf("Reading File\n");
// Read in Graph from a file // Read in Graph from a file
fp = fopen(input_f, "r"); fp = fopen(input_f, "r");
@@ -204,6 +198,8 @@ int main(int argc, char * argv[])
return 0; return 0;
} }
printf("Reading File completed!\n");
int source = 0; int source = 0;
fscanf(fp, "%d", &no_of_nodes); fscanf(fp, "%d", &no_of_nodes);
@@ -262,7 +258,8 @@ int main(int argc, char * argv[])
h_cost_ref[source] = 0; h_cost_ref[source] = 0;
//--------------------------------------------------------- //---------------------------------------------------------
//--gpu entry //--gpu entry
run_bfs_gpu(no_of_nodes,h_graph_nodes,edge_list_size,h_graph_edges, h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost); run_bfs_gpu(no_of_nodes, h_graph_nodes, edge_list_size, h_graph_edges,
h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost);
//--------------------------------------------------------- //---------------------------------------------------------
//--cpu entry //--cpu entry
// initalize the memory again // initalize the memory again
@@ -275,7 +272,9 @@ int main(int argc, char * argv[])
source = 0; source = 0;
h_graph_mask[source] = true; h_graph_mask[source] = true;
h_graph_visited[source] = true; h_graph_visited[source] = true;
run_bfs_cpu(no_of_nodes,h_graph_nodes,edge_list_size,h_graph_edges, h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost_ref); run_bfs_cpu(no_of_nodes, h_graph_nodes, edge_list_size, h_graph_edges,
h_graph_mask, h_updating_graph_mask, h_graph_visited,
h_cost_ref);
//--------------------------------------------------------- //---------------------------------------------------------
//--result varification //--result varification
compare_results<int>(h_cost_ref, h_cost, no_of_nodes); compare_results<int>(h_cost_ref, h_cost, no_of_nodes);
@@ -285,8 +284,7 @@ int main(int argc, char * argv[])
free(h_updating_graph_mask); free(h_updating_graph_mask);
free(h_graph_visited); free(h_graph_visited);
} } catch (std::string msg) {
catch(std::string msg){
std::cout << "--cambine: exception in main ->" << msg << std::endl; std::cout << "--cambine: exception in main ->" << msg << std::endl;
// release host memory // release host memory
free(h_graph_nodes); free(h_graph_nodes);

View File

@@ -3,7 +3,6 @@
#include <iostream> #include <iostream>
class timer { class timer {
public: public:
timer(const char *name = 0); timer(const char *name = 0);
@@ -38,88 +37,62 @@ class timer {
static double CPU_speed_in_MHz, get_CPU_speed_in_MHz(); static double CPU_speed_in_MHz, get_CPU_speed_in_MHz();
}; };
std::ostream &operator<<(std::ostream &, class timer &); std::ostream &operator<<(std::ostream &, class timer &);
inline void timer::reset() {
inline void timer::reset()
{
total_time = 0; total_time = 0;
count = 0; count = 0;
} }
inline timer::timer(const char *name) : name(name), write_on_exit(0) {
inline timer::timer(const char *name)
:
name(name),
write_on_exit(0)
{
reset(); reset();
} }
inline timer::timer(const char *name, std::ostream &write_on_exit) inline timer::timer(const char *name, std::ostream &write_on_exit)
: : name(name), write_on_exit(&write_on_exit) {
name(name),
write_on_exit(&write_on_exit)
{
reset(); reset();
} }
inline timer::~timer() {
inline timer::~timer()
{
if (write_on_exit != 0) if (write_on_exit != 0)
print(*write_on_exit); print(*write_on_exit);
} }
inline void timer::start() {
inline void timer::start()
{
#if (defined __PATHSCALE__) && (defined __i386 || defined __x86_64) #if (defined __PATHSCALE__) && (defined __i386 || defined __x86_64)
unsigned eax, edx; unsigned eax, edx;
asm volatile("rdtsc" : "=a"(eax), "=d"(edx)); asm volatile("rdtsc" : "=a"(eax), "=d"(edx));
total_time -= ((unsigned long long)edx << 32) + eax; total_time -= ((unsigned long long)edx << 32) + eax;
#elif (defined __GNUC__ || defined __INTEL_COMPILER) && (defined __i386 || defined __x86_64) #elif (defined __GNUC__ || defined __INTEL_COMPILER) && \
asm volatile (defined __i386 || defined __x86_64)
( asm volatile("rdtsc\n\t"
"rdtsc\n\t"
"subl %%eax, %0\n\t" "subl %%eax, %0\n\t"
"sbbl %%edx, %1" "sbbl %%edx, %1"
: "+m"(low), "+m"(high)
: :
"+m" (low), "+m" (high) : "eax", "edx");
:
:
"eax", "edx"
);
#else #else
#error Compiler/Architecture not recognized #error Compiler/Architecture not recognized
#endif #endif
} }
inline void timer::stop() {
inline void timer::stop()
{
#if (defined __PATHSCALE__) && (defined __i386 || defined __x86_64) #if (defined __PATHSCALE__) && (defined __i386 || defined __x86_64)
unsigned eax, edx; unsigned eax, edx;
asm volatile("rdtsc" : "=a"(eax), "=d"(edx)); asm volatile("rdtsc" : "=a"(eax), "=d"(edx));
total_time += ((unsigned long long)edx << 32) + eax; total_time += ((unsigned long long)edx << 32) + eax;
#elif (defined __GNUC__ || defined __INTEL_COMPILER) && (defined __i386 || defined __x86_64) #elif (defined __GNUC__ || defined __INTEL_COMPILER) && \
asm volatile (defined __i386 || defined __x86_64)
( asm volatile("rdtsc\n\t"
"rdtsc\n\t"
"addl %%eax, %0\n\t" "addl %%eax, %0\n\t"
"adcl %%edx, %1" "adcl %%edx, %1"
: "+m"(low), "+m"(high)
: :
"+m" (low), "+m" (high) : "eax", "edx");
:
:
"eax", "edx"
);
#endif #endif
++count; ++count;

View File

@@ -1,9 +1,7 @@
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops) RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc) POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
POCL_RT_PATH=$(wildcard ~/dev/pocl/drops_riscv_rt) POCL_INC_PATH = $(wildcard ../include)
POCL_LIB_PATH = $(wildcard ../lib)
VX_RT_PATH = $(wildcard ../../../runtime) VX_RT_PATH = $(wildcard ../../../runtime)
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir) VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
@@ -11,34 +9,52 @@ CC=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
NEWLIB_PATH=$(RISCV_TOOL_PATH)/riscv32-unknown-elf/lib GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_STR = $(VX_RT_PATH)/startup/vx_start.s VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
VX_SRCS = $(VX_STR) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
CXXFLAGS = -g -O0 -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -march=rv32im -mabi=ilp32 CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
CXXFLAGS += -ffreestanding # program may not begin at main() CXXFLAGS += -ffreestanding # program may not begin at main()
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
CXXFLAGS += -I$(POCL_INC_PATH)
LIBS = -lOpenCL VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
PROJECT=kmeans PROJECT=kmeans
PROJECT=saxpy
all: $(PROJECT).dump $(PROJECT).hex all: $(PROJECT).dump $(PROJECT).hex
lib$(PROJECT).a: kernel.cl lib$(PROJECT).a: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
$(PROJECT).elf: main.cc lib$(PROJECT).a kmeans_clustering.o: kmeans_clustering.c
$(CXX) $(CXXFLAGS) -I$(POCL_RT_PATH)/include -L$(POCL_RT_PATH)/lib/static -L. $(VX_SRCS) main.cc rmse.c read_input.c cluster.c kmeans_clustering.c -Wl,--whole-archive -l$(PROJECT) -Wl,--no-whole-archive $(LIBS) -o $(PROJECT).elf $(CC) $(CXXFLAGS) -c kmeans_clustering.c
cluster.o: cluster.c
$(CC) $(CXXFLAGS) -c cluster.c
read_input.o: read_input.c
$(CC) $(CXXFLAGS) -c read_input.c
rmse.o: rmse.c
$(CC) $(CXXFLAGS) -c rmse.c
$(PROJECT).elf: main.cc lib$(PROJECT).a read_input.o rmse.o cluster.o kmeans_clustering.o
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc read_input.o rmse.o cluster.o kmeans_clustering.o $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).qemu: main.cc lib$(PROJECT).a read_input.o rmse.o cluster.o kmeans_clustering.o
$(CXX) $(CXXFLAGS) main.cc read_input.o rmse.o cluster.o kmeans_clustering.o $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).hex: $(PROJECT).elf $(PROJECT).hex: $(PROJECT).elf
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
@@ -46,8 +62,17 @@ $(PROJECT).hex: $(PROJECT).elf
$(PROJECT).dump: $(PROJECT).elf $(PROJECT).dump: $(PROJECT).elf
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump $(DMP) -D $(PROJECT).elf > $(PROJECT).dump
run: run: $(PROJECT).hex
$(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
qemu: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -strace -d in_asm -D debug.log $(PROJECT).qemu
gdb-s: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
gdb-c: $(PROJECT).qemu
$(GDB) $(PROJECT).qemu
clean: clean:
rm -rf *.elf *.dump *.hex *.a *.pocl rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu

View File

@@ -63,4 +63,4 @@ gdb-c: $(PROJECT).qemu
$(GDB) $(PROJECT).qemu $(GDB) $(PROJECT).qemu
clean: clean:
rm -rf *.elf *.dump *.hex *.a *.pocl *.qemu rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu

View File

@@ -63,4 +63,4 @@ gdb-c: $(PROJECT).qemu
$(GDB) $(PROJECT).qemu $(GDB) $(PROJECT).qemu
clean: clean:
rm -rf *.elf *.dump *.hex *.a *.pocl *.qemu rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu

View File

@@ -63,4 +63,4 @@ gdb-c: $(PROJECT).qemu
$(GDB) $(PROJECT).qemu $(GDB) $(PROJECT).qemu
clean: clean:
rm -rf *.elf *.dump *.hex *.a *.pocl *.qemu rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu

View File

@@ -34,8 +34,8 @@ HEX: ELF
$(CPY) -O ihex $(VX_MAIN).elf $(VX_MAIN).hex $(CPY) -O ihex $(VX_MAIN).elf $(VX_MAIN).hex
ELF: ELF:
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC1) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf # $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC1) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC2) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC2) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC3) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf # $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC3) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC4) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf # $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC4) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC5) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf~ # $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC5) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf~

View File

@@ -6,14 +6,17 @@
int main() int main()
{ {
vx_tmc(1); vx_tmc(1);
int n = 5; int n = 5;
int scalar = 10;
int *a = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1}; int *a = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *b = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1}; int *b = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *c = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1}; int *c = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
for (int i = 0; i < n; ++i) { a[i] = 1; b[i] = 2; c[i] = 5; } for (int i = 0; i < n; ++i) { a[i] = 1; b[i] = 2; c[i] = 5; }
#if 1 #if 0
//--------------------------------------------------------------- //---------------------------------------------------------------
/* vvaddint32 /* vvaddint32
* # vector-vector add routine of 32-bit integers * # vector-vector add routine of 32-bit integers
@@ -43,7 +46,6 @@ int main()
/* # vector-scalar add /* # vector-scalar add
# for (i=0; i<N; i++) { C[i] = A[i] + B; } // 32-bit ints */ # for (i=0; i<N; i++) { C[i] = A[i] + B; } // 32-bit ints */
for (int i = 0; i < n; ++i) { a[i] = 1; b[i] = 1;} for (int i = 0; i < n; ++i) { a[i] = 1; b[i] = 1;}
int scalar = 10;
printf("vsadd...scalar:%d\na[%d]: ", scalar, n); printf("vsadd...scalar:%d\na[%d]: ", scalar, n);
for(int i = 0; i < n; ++i) printf("%d \n", a[i]); for(int i = 0; i < n; ++i) printf("%d \n", a[i]);
printf("\nb: %d", scalar); printf("\nb: %d", scalar);
@@ -78,10 +80,12 @@ int main()
if(a[i] != b[i]) if(a[i] != b[i])
{ {
printf("\n<memcpy> failed at <index: %d>! \n", i); printf("\n<memcpy> failed at <index: %d>! \n", i);
return; return 1;
} }
} }
printf("\nPASSED.......................... <memcpy> \n"); printf("\nPASSED.......................... <memcpy> \n");
#endif
#if 1
//--------------------------------------------------------------- //---------------------------------------------------------------
/* # void saxpy(size_t n, const float a, const float *x, float *y) /* # void saxpy(size_t n, const float a, const float *x, float *y)
# ==> convert to int!! # ==> convert to int!!
@@ -99,16 +103,22 @@ int main()
vx_vec_saxpy(n, scalar, a, b); vx_vec_saxpy(n, scalar, a, b);
printf("saxpy\na[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d \n", a[i]);
printf("\nb[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d \n", b[i]);
for(int i = 0; i < n; ++i) for(int i = 0; i < n; ++i)
{ {
if(b[i] != ((a[i] * scalar) + c[i])) if(b[i] != ((a[i] * scalar) + c[i]))
{ {
printf("\n<saxpy> failed at <index: %d>! \n", i); printf("\n<saxpy> failed at <index: %d>! \n", i);
return; return 1;
} }
} }
printf("\nPASSED.......................... <saxpy> \n"); printf("\nPASSED.......................... <saxpy> \n");
#endif
#if 0
//--------------------------------------------------------------- //---------------------------------------------------------------
/* # void sgemm_nn(size_t n, size_t m, size_t k, const float*a, // m * k matrix /* # void sgemm_nn(size_t n, size_t m, size_t k, const float*a, // m * k matrix
# size_t lda, const float*b, // k * n matrix # size_t lda, const float*b, // k * n matrix

View File

@@ -5,10 +5,10 @@
extern "C" { extern "C" {
#endif #endif
void vx_vec_vvaddint32(int n, int* a, int* b, int *c); //void vx_vec_vvaddint32(int n, int* a, int* b, int *c);
//void vx_vec_vsadd(int n, int* a, int scalar); //void vx_vec_vsadd(int n, int* a, int scalar);
//void vx_vec_memcpy(int* a, int* b, int n); //void vx_vec_memcpy(int* a, int* b, int n);
//void vx_vec_saxpy(int n, int scalar, int* a, int* b); void vx_vec_saxpy(int n, int scalar, int* a, int* b);
//void vx_vec_sgemm_nn(int n, int m, int k, int* a1, int lda, int* b1, int ldb, int* c1, int ldc); //void vx_vec_sgemm_nn(int n, int m, int k, int* a1, int lda, int* b1, int ldb, int* c1, int ldc);
#ifdef __cplusplus #ifdef __cplusplus

View File

@@ -13,16 +13,31 @@
# fa0 a # fa0 a
# a1 x # a1 x
# a2 y # a2 y
#vx_vec_saxpy:
# vsetvli a4, a0, e32, m8
#saxpy:
# vlw.v v0, (a1)
# sub a0, a0, a4
# slli a4, a4, 2
# add a1, a1, a4
# vlw.v v8, (a2)
# vfmacc.vf v8, fa0, v0
# vsw.v v8, (a2)
# add a2, a2, a4
# bnez a0, saxpy
# ret
# a0 n, rs1 a, a2 x, a3 y
vx_vec_saxpy: vx_vec_saxpy:
vsetvli a4, a0, e32, m8 vsetvli a4, a0, e32, m8
saxpy: saxpy:
vlw.v v0, (a1) vlw.v v0, (a2)
sub a0, a0, a4 sub a0, a0, a4
slli a4, a4, 2 slli a4, a4, 2
add a1, a1, a4
vlw.v v8, (a2)
vfmacc.vf v8, fa0, v0
vsw.v v8, (a2)
add a2, a2, a4 add a2, a2, a4
vlw.v v1, (a3)
vmacc.vx v1, rs1, v0
vsw.v v1, (a3)
add a3, a3, a4
bnez a0, saxpy bnez a0, saxpy
ret ret