update
This commit is contained in:
@@ -1,9 +1,7 @@
|
|||||||
|
|
||||||
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||||
|
|
||||||
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
|
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
|
||||||
POCL_RT_PATH=$(wildcard ~/dev/pocl/drops_riscv_rt)
|
POCL_INC_PATH = $(wildcard ../include)
|
||||||
|
POCL_LIB_PATH = $(wildcard ../lib)
|
||||||
VX_RT_PATH = $(wildcard ../../../runtime)
|
VX_RT_PATH = $(wildcard ../../../runtime)
|
||||||
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
|
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
|
||||||
|
|
||||||
@@ -11,23 +9,27 @@ CC=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
|||||||
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
||||||
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
||||||
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||||
NEWLIB_PATH=$(RISCV_TOOL_PATH)/riscv32-unknown-elf/lib
|
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
||||||
|
|
||||||
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
|
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||||
VX_STR = $(VX_RT_PATH)/startup/vx_start.s
|
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
|
||||||
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||||
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||||
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
|
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||||
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
|
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||||
|
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
|
||||||
|
|
||||||
VX_SRCS = $(VX_STR) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
|
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
|
||||||
|
|
||||||
CXXFLAGS = -g -O0 -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -march=rv32im -mabi=ilp32
|
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
|
||||||
CXXFLAGS += -ffreestanding # program may not begin at main()
|
CXXFLAGS += -ffreestanding # program may not begin at main()
|
||||||
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
|
||||||
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
|
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
|
||||||
|
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
||||||
|
CXXFLAGS += -I$(POCL_INC_PATH)
|
||||||
|
|
||||||
LIBS = -lOpenCL
|
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||||
|
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||||
|
|
||||||
PROJECT=bfs
|
PROJECT=bfs
|
||||||
|
|
||||||
@@ -37,7 +39,10 @@ lib$(PROJECT).a: kernel.cl
|
|||||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
||||||
|
|
||||||
$(PROJECT).elf: main.cc lib$(PROJECT).a
|
$(PROJECT).elf: main.cc lib$(PROJECT).a
|
||||||
$(CXX) $(CXXFLAGS) -I$(POCL_RT_PATH)/include -L$(POCL_RT_PATH)/lib/static -L. $(VX_SRCS) main.cc timer.cc -Wl,--whole-archive -l$(PROJECT) -Wl,--no-whole-archive $(LIBS) -o $(PROJECT).elf
|
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
|
||||||
|
|
||||||
|
$(PROJECT).qemu: main.cc lib$(PROJECT).a
|
||||||
|
$(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu
|
||||||
|
|
||||||
$(PROJECT).hex: $(PROJECT).elf
|
$(PROJECT).hex: $(PROJECT).elf
|
||||||
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
||||||
@@ -45,8 +50,17 @@ $(PROJECT).hex: $(PROJECT).elf
|
|||||||
$(PROJECT).dump: $(PROJECT).elf
|
$(PROJECT).dump: $(PROJECT).elf
|
||||||
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
||||||
|
|
||||||
run:
|
run: $(PROJECT).hex
|
||||||
$(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
|
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
|
||||||
|
|
||||||
|
qemu: $(PROJECT).qemu
|
||||||
|
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -strace -d in_asm -D debug.log $(PROJECT).qemu
|
||||||
|
|
||||||
|
gdb-s: $(PROJECT).qemu
|
||||||
|
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
|
||||||
|
|
||||||
|
gdb-c: $(PROJECT).qemu
|
||||||
|
$(GDB) $(PROJECT).qemu
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf *.elf *.dump *.hex *.a *.pocl
|
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu
|
||||||
Binary file not shown.
@@ -1,10 +1,12 @@
|
|||||||
//--by Jianbin Fang
|
//--by Jianbin Fang
|
||||||
|
|
||||||
#define __CL_ENABLE_EXCEPTIONS
|
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
#include <cstring>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <cstring>
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
|
||||||
#ifdef PROFILING
|
#ifdef PROFILING
|
||||||
#include "timer.h"
|
#include "timer.h"
|
||||||
@@ -16,34 +18,36 @@
|
|||||||
#define MAX_THREADS_PER_BLOCK 256
|
#define MAX_THREADS_PER_BLOCK 256
|
||||||
|
|
||||||
// Structure to hold a node information
|
// Structure to hold a node information
|
||||||
struct Node
|
struct Node {
|
||||||
{
|
|
||||||
int starting;
|
int starting;
|
||||||
int no_of_edges;
|
int no_of_edges;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
//----------------------------------------------------------
|
//----------------------------------------------------------
|
||||||
//--bfs on cpu
|
//--bfs on cpu
|
||||||
//--programmer: jianbin
|
//--programmer: jianbin
|
||||||
//--date: 26/01/2011
|
//--date: 26/01/2011
|
||||||
//--note: width is changed to the new_width
|
//--note: width is changed to the new_width
|
||||||
//----------------------------------------------------------
|
//----------------------------------------------------------
|
||||||
void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
|
void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size,
|
||||||
int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, \
|
int *h_graph_edges, char *h_graph_mask,
|
||||||
char *h_graph_visited, int *h_cost_ref){
|
char *h_updating_graph_mask, char *h_graph_visited,
|
||||||
|
int *h_cost_ref) {
|
||||||
char stop;
|
char stop;
|
||||||
int k = 0;
|
int k = 0;
|
||||||
do {
|
do {
|
||||||
// if no thread changes this value then the loop stops
|
// if no thread changes this value then the loop stops
|
||||||
stop = false;
|
stop = false;
|
||||||
for(int tid = 0; tid < no_of_nodes; tid++ )
|
for (int tid = 0; tid < no_of_nodes; tid++) {
|
||||||
{
|
|
||||||
if (h_graph_mask[tid] == true) {
|
if (h_graph_mask[tid] == true) {
|
||||||
h_graph_mask[tid] = false;
|
h_graph_mask[tid] = false;
|
||||||
for(int i=h_graph_nodes[tid].starting; i<(h_graph_nodes[tid].no_of_edges + h_graph_nodes[tid].starting); i++){
|
for (int i = h_graph_nodes[tid].starting;
|
||||||
int id = h_graph_edges[i]; //--cambine: node id is connected with node tid
|
i < (h_graph_nodes[tid].no_of_edges + h_graph_nodes[tid].starting);
|
||||||
if(!h_graph_visited[id]){ //--cambine: if node id has not been visited, enter the body below
|
i++) {
|
||||||
|
int id =
|
||||||
|
h_graph_edges[i]; //--cambine: node id is connected with node tid
|
||||||
|
if (!h_graph_visited[id]) { //--cambine: if node id has not been
|
||||||
|
//visited, enter the body below
|
||||||
h_cost_ref[id] = h_cost_ref[tid] + 1;
|
h_cost_ref[id] = h_cost_ref[tid] + 1;
|
||||||
h_updating_graph_mask[id] = true;
|
h_updating_graph_mask[id] = true;
|
||||||
}
|
}
|
||||||
@@ -51,8 +55,7 @@ void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int tid=0; tid< no_of_nodes ; tid++ )
|
for (int tid = 0; tid < no_of_nodes; tid++) {
|
||||||
{
|
|
||||||
if (h_updating_graph_mask[tid] == true) {
|
if (h_updating_graph_mask[tid] == true) {
|
||||||
h_graph_mask[tid] = true;
|
h_graph_mask[tid] = true;
|
||||||
h_graph_visited[tid] = true;
|
h_graph_visited[tid] = true;
|
||||||
@@ -61,20 +64,19 @@ void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
k++;
|
k++;
|
||||||
}
|
} while (stop);
|
||||||
while(stop);
|
|
||||||
}
|
}
|
||||||
//----------------------------------------------------------
|
//----------------------------------------------------------
|
||||||
//--breadth first search on GPUs
|
//--breadth first search on GPUs
|
||||||
//----------------------------------------------------------
|
//----------------------------------------------------------
|
||||||
void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
|
void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size,
|
||||||
int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, \
|
int *h_graph_edges, char *h_graph_mask,
|
||||||
char *h_graph_visited, int *h_cost)
|
char *h_updating_graph_mask, char *h_graph_visited,
|
||||||
throw(std::string){
|
int *h_cost) throw(std::string) {
|
||||||
|
|
||||||
// int number_elements = height*width;
|
// int number_elements = height*width;
|
||||||
char h_over;
|
char h_over;
|
||||||
cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask, \
|
cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask,
|
||||||
d_graph_visited, d_cost, d_over;
|
d_graph_visited, d_cost, d_over;
|
||||||
try {
|
try {
|
||||||
//--1 transfer data from host to device
|
//--1 transfer data from host to device
|
||||||
@@ -82,17 +84,18 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
|
|||||||
d_graph_nodes = _clMalloc(no_of_nodes * sizeof(Node), h_graph_nodes);
|
d_graph_nodes = _clMalloc(no_of_nodes * sizeof(Node), h_graph_nodes);
|
||||||
d_graph_edges = _clMalloc(edge_list_size * sizeof(int), h_graph_edges);
|
d_graph_edges = _clMalloc(edge_list_size * sizeof(int), h_graph_edges);
|
||||||
d_graph_mask = _clMallocRW(no_of_nodes * sizeof(char), h_graph_mask);
|
d_graph_mask = _clMallocRW(no_of_nodes * sizeof(char), h_graph_mask);
|
||||||
d_updating_graph_mask = _clMallocRW(no_of_nodes*sizeof(char), h_updating_graph_mask);
|
d_updating_graph_mask =
|
||||||
|
_clMallocRW(no_of_nodes * sizeof(char), h_updating_graph_mask);
|
||||||
d_graph_visited = _clMallocRW(no_of_nodes * sizeof(char), h_graph_visited);
|
d_graph_visited = _clMallocRW(no_of_nodes * sizeof(char), h_graph_visited);
|
||||||
|
|
||||||
|
|
||||||
d_cost = _clMallocRW(no_of_nodes * sizeof(int), h_cost);
|
d_cost = _clMallocRW(no_of_nodes * sizeof(int), h_cost);
|
||||||
d_over = _clMallocRW(sizeof(char), &h_over);
|
d_over = _clMallocRW(sizeof(char), &h_over);
|
||||||
|
|
||||||
_clMemcpyH2D(d_graph_nodes, no_of_nodes * sizeof(Node), h_graph_nodes);
|
_clMemcpyH2D(d_graph_nodes, no_of_nodes * sizeof(Node), h_graph_nodes);
|
||||||
_clMemcpyH2D(d_graph_edges, edge_list_size * sizeof(int), h_graph_edges);
|
_clMemcpyH2D(d_graph_edges, edge_list_size * sizeof(int), h_graph_edges);
|
||||||
_clMemcpyH2D(d_graph_mask, no_of_nodes * sizeof(char), h_graph_mask);
|
_clMemcpyH2D(d_graph_mask, no_of_nodes * sizeof(char), h_graph_mask);
|
||||||
_clMemcpyH2D(d_updating_graph_mask, no_of_nodes*sizeof(char), h_updating_graph_mask);
|
_clMemcpyH2D(d_updating_graph_mask, no_of_nodes * sizeof(char),
|
||||||
|
h_updating_graph_mask);
|
||||||
_clMemcpyH2D(d_graph_visited, no_of_nodes * sizeof(char), h_graph_visited);
|
_clMemcpyH2D(d_graph_visited, no_of_nodes * sizeof(char), h_graph_visited);
|
||||||
_clMemcpyH2D(d_cost, no_of_nodes * sizeof(int), h_cost);
|
_clMemcpyH2D(d_cost, no_of_nodes * sizeof(int), h_cost);
|
||||||
|
|
||||||
@@ -155,8 +158,7 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
|
|||||||
_clFree(d_cost);
|
_clFree(d_cost);
|
||||||
_clFree(d_over);
|
_clFree(d_over);
|
||||||
_clRelease();
|
_clRelease();
|
||||||
}
|
} catch (std::string msg) {
|
||||||
catch(std::string msg){
|
|
||||||
_clFree(d_graph_nodes);
|
_clFree(d_graph_nodes);
|
||||||
_clFree(d_graph_edges);
|
_clFree(d_graph_edges);
|
||||||
_clFree(d_graph_mask);
|
_clFree(d_graph_mask);
|
||||||
@@ -171,31 +173,23 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
|
|||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
void Usage(int argc, char**argv){
|
|
||||||
|
|
||||||
fprintf(stderr,"Usage: %s <input_file>\n", argv[0]);
|
|
||||||
|
|
||||||
}
|
|
||||||
//----------------------------------------------------------
|
//----------------------------------------------------------
|
||||||
//--cambine: main function
|
//--cambine: main function
|
||||||
//--author: created by Jianbin Fang
|
//--author: created by Jianbin Fang
|
||||||
//--date: 25/01/2011
|
//--date: 25/01/2011
|
||||||
//----------------------------------------------------------
|
//----------------------------------------------------------
|
||||||
int main(int argc, char * argv[])
|
int main(int argc, char *argv[]) {
|
||||||
{
|
printf("enter demo main\n");
|
||||||
|
|
||||||
int no_of_nodes;
|
int no_of_nodes;
|
||||||
int edge_list_size;
|
int edge_list_size;
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
Node *h_graph_nodes;
|
Node *h_graph_nodes;
|
||||||
char *h_graph_mask, *h_updating_graph_mask, *h_graph_visited;
|
char *h_graph_mask, *h_updating_graph_mask, *h_graph_visited;
|
||||||
try{
|
|
||||||
char *input_f;
|
|
||||||
if(argc!=2){
|
|
||||||
Usage(argc, argv);
|
|
||||||
exit(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
input_f = argv[1];
|
try {
|
||||||
|
char *input_f = "../data/bfs/graph1MW_6.txt";
|
||||||
printf("Reading File\n");
|
printf("Reading File\n");
|
||||||
// Read in Graph from a file
|
// Read in Graph from a file
|
||||||
fp = fopen(input_f, "r");
|
fp = fopen(input_f, "r");
|
||||||
@@ -262,7 +256,8 @@ int main(int argc, char * argv[])
|
|||||||
h_cost_ref[source] = 0;
|
h_cost_ref[source] = 0;
|
||||||
//---------------------------------------------------------
|
//---------------------------------------------------------
|
||||||
//--gpu entry
|
//--gpu entry
|
||||||
run_bfs_gpu(no_of_nodes,h_graph_nodes,edge_list_size,h_graph_edges, h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost);
|
run_bfs_gpu(no_of_nodes, h_graph_nodes, edge_list_size, h_graph_edges,
|
||||||
|
h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost);
|
||||||
//---------------------------------------------------------
|
//---------------------------------------------------------
|
||||||
//--cpu entry
|
//--cpu entry
|
||||||
// initalize the memory again
|
// initalize the memory again
|
||||||
@@ -275,7 +270,9 @@ int main(int argc, char * argv[])
|
|||||||
source = 0;
|
source = 0;
|
||||||
h_graph_mask[source] = true;
|
h_graph_mask[source] = true;
|
||||||
h_graph_visited[source] = true;
|
h_graph_visited[source] = true;
|
||||||
run_bfs_cpu(no_of_nodes,h_graph_nodes,edge_list_size,h_graph_edges, h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost_ref);
|
run_bfs_cpu(no_of_nodes, h_graph_nodes, edge_list_size, h_graph_edges,
|
||||||
|
h_graph_mask, h_updating_graph_mask, h_graph_visited,
|
||||||
|
h_cost_ref);
|
||||||
//---------------------------------------------------------
|
//---------------------------------------------------------
|
||||||
//--result varification
|
//--result varification
|
||||||
compare_results<int>(h_cost_ref, h_cost, no_of_nodes);
|
compare_results<int>(h_cost_ref, h_cost, no_of_nodes);
|
||||||
@@ -285,8 +282,7 @@ int main(int argc, char * argv[])
|
|||||||
free(h_updating_graph_mask);
|
free(h_updating_graph_mask);
|
||||||
free(h_graph_visited);
|
free(h_graph_visited);
|
||||||
|
|
||||||
}
|
} catch (std::string msg) {
|
||||||
catch(std::string msg){
|
|
||||||
std::cout << "--cambine: exception in main ->" << msg << std::endl;
|
std::cout << "--cambine: exception in main ->" << msg << std::endl;
|
||||||
// release host memory
|
// release host memory
|
||||||
free(h_graph_nodes);
|
free(h_graph_nodes);
|
||||||
|
|||||||
@@ -3,7 +3,6 @@
|
|||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
|
|
||||||
class timer {
|
class timer {
|
||||||
public:
|
public:
|
||||||
timer(const char *name = 0);
|
timer(const char *name = 0);
|
||||||
@@ -38,88 +37,62 @@ class timer {
|
|||||||
static double CPU_speed_in_MHz, get_CPU_speed_in_MHz();
|
static double CPU_speed_in_MHz, get_CPU_speed_in_MHz();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
std::ostream &operator<<(std::ostream &, class timer &);
|
std::ostream &operator<<(std::ostream &, class timer &);
|
||||||
|
|
||||||
|
inline void timer::reset() {
|
||||||
inline void timer::reset()
|
|
||||||
{
|
|
||||||
total_time = 0;
|
total_time = 0;
|
||||||
count = 0;
|
count = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline timer::timer(const char *name) : name(name), write_on_exit(0) {
|
||||||
inline timer::timer(const char *name)
|
|
||||||
:
|
|
||||||
name(name),
|
|
||||||
write_on_exit(0)
|
|
||||||
{
|
|
||||||
reset();
|
reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
inline timer::timer(const char *name, std::ostream &write_on_exit)
|
inline timer::timer(const char *name, std::ostream &write_on_exit)
|
||||||
:
|
: name(name), write_on_exit(&write_on_exit) {
|
||||||
name(name),
|
|
||||||
write_on_exit(&write_on_exit)
|
|
||||||
{
|
|
||||||
reset();
|
reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline timer::~timer() {
|
||||||
inline timer::~timer()
|
|
||||||
{
|
|
||||||
if (write_on_exit != 0)
|
if (write_on_exit != 0)
|
||||||
print(*write_on_exit);
|
print(*write_on_exit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void timer::start() {
|
||||||
inline void timer::start()
|
|
||||||
{
|
|
||||||
#if (defined __PATHSCALE__) && (defined __i386 || defined __x86_64)
|
#if (defined __PATHSCALE__) && (defined __i386 || defined __x86_64)
|
||||||
unsigned eax, edx;
|
unsigned eax, edx;
|
||||||
|
|
||||||
asm volatile("rdtsc" : "=a"(eax), "=d"(edx));
|
asm volatile("rdtsc" : "=a"(eax), "=d"(edx));
|
||||||
|
|
||||||
total_time -= ((unsigned long long)edx << 32) + eax;
|
total_time -= ((unsigned long long)edx << 32) + eax;
|
||||||
#elif (defined __GNUC__ || defined __INTEL_COMPILER) && (defined __i386 || defined __x86_64)
|
#elif (defined __GNUC__ || defined __INTEL_COMPILER) && \
|
||||||
asm volatile
|
(defined __i386 || defined __x86_64)
|
||||||
(
|
asm volatile("rdtsc\n\t"
|
||||||
"rdtsc\n\t"
|
|
||||||
"subl %%eax, %0\n\t"
|
"subl %%eax, %0\n\t"
|
||||||
"sbbl %%edx, %1"
|
"sbbl %%edx, %1"
|
||||||
|
: "+m"(low), "+m"(high)
|
||||||
:
|
:
|
||||||
"+m" (low), "+m" (high)
|
: "eax", "edx");
|
||||||
:
|
|
||||||
:
|
|
||||||
"eax", "edx"
|
|
||||||
);
|
|
||||||
#else
|
#else
|
||||||
#error Compiler/Architecture not recognized
|
#error Compiler/Architecture not recognized
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void timer::stop() {
|
||||||
inline void timer::stop()
|
|
||||||
{
|
|
||||||
#if (defined __PATHSCALE__) && (defined __i386 || defined __x86_64)
|
#if (defined __PATHSCALE__) && (defined __i386 || defined __x86_64)
|
||||||
unsigned eax, edx;
|
unsigned eax, edx;
|
||||||
|
|
||||||
asm volatile("rdtsc" : "=a"(eax), "=d"(edx));
|
asm volatile("rdtsc" : "=a"(eax), "=d"(edx));
|
||||||
|
|
||||||
total_time += ((unsigned long long)edx << 32) + eax;
|
total_time += ((unsigned long long)edx << 32) + eax;
|
||||||
#elif (defined __GNUC__ || defined __INTEL_COMPILER) && (defined __i386 || defined __x86_64)
|
#elif (defined __GNUC__ || defined __INTEL_COMPILER) && \
|
||||||
asm volatile
|
(defined __i386 || defined __x86_64)
|
||||||
(
|
asm volatile("rdtsc\n\t"
|
||||||
"rdtsc\n\t"
|
|
||||||
"addl %%eax, %0\n\t"
|
"addl %%eax, %0\n\t"
|
||||||
"adcl %%edx, %1"
|
"adcl %%edx, %1"
|
||||||
|
: "+m"(low), "+m"(high)
|
||||||
:
|
:
|
||||||
"+m" (low), "+m" (high)
|
: "eax", "edx");
|
||||||
:
|
|
||||||
:
|
|
||||||
"eax", "edx"
|
|
||||||
);
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
++count;
|
++count;
|
||||||
|
|||||||
@@ -1,9 +1,7 @@
|
|||||||
|
|
||||||
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||||
|
|
||||||
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
|
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
|
||||||
POCL_RT_PATH=$(wildcard ~/dev/pocl/drops_riscv_rt)
|
POCL_INC_PATH = $(wildcard ../include)
|
||||||
|
POCL_LIB_PATH = $(wildcard ../lib)
|
||||||
VX_RT_PATH = $(wildcard ../../../runtime)
|
VX_RT_PATH = $(wildcard ../../../runtime)
|
||||||
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
|
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
|
||||||
|
|
||||||
@@ -11,34 +9,52 @@ CC=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
|||||||
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
||||||
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
||||||
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||||
NEWLIB_PATH=$(RISCV_TOOL_PATH)/riscv32-unknown-elf/lib
|
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
||||||
|
|
||||||
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
|
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||||
VX_STR = $(VX_RT_PATH)/startup/vx_start.s
|
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
|
||||||
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||||
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||||
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
|
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||||
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
|
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||||
|
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
|
||||||
|
|
||||||
VX_SRCS = $(VX_STR) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
|
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
|
||||||
|
|
||||||
CXXFLAGS = -g -O0 -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -march=rv32im -mabi=ilp32
|
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
|
||||||
CXXFLAGS += -ffreestanding # program may not begin at main()
|
CXXFLAGS += -ffreestanding # program may not begin at main()
|
||||||
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
|
||||||
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
|
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
|
||||||
|
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
||||||
|
CXXFLAGS += -I$(POCL_INC_PATH)
|
||||||
|
|
||||||
LIBS = -lOpenCL
|
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||||
|
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||||
|
|
||||||
PROJECT=kmeans
|
PROJECT=kmeans
|
||||||
PROJECT=saxpy
|
|
||||||
|
|
||||||
all: $(PROJECT).dump $(PROJECT).hex
|
all: $(PROJECT).dump $(PROJECT).hex
|
||||||
|
|
||||||
lib$(PROJECT).a: kernel.cl
|
lib$(PROJECT).a: kernel.cl
|
||||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
||||||
|
|
||||||
$(PROJECT).elf: main.cc lib$(PROJECT).a
|
kmeans_clustering.o: kmeans_clustering.c
|
||||||
$(CXX) $(CXXFLAGS) -I$(POCL_RT_PATH)/include -L$(POCL_RT_PATH)/lib/static -L. $(VX_SRCS) main.cc rmse.c read_input.c cluster.c kmeans_clustering.c -Wl,--whole-archive -l$(PROJECT) -Wl,--no-whole-archive $(LIBS) -o $(PROJECT).elf
|
$(CC) $(CXXFLAGS) -c kmeans_clustering.c
|
||||||
|
|
||||||
|
cluster.o: cluster.c
|
||||||
|
$(CC) $(CXXFLAGS) -c cluster.c
|
||||||
|
|
||||||
|
read_input.o: read_input.c
|
||||||
|
$(CC) $(CXXFLAGS) -c read_input.c
|
||||||
|
|
||||||
|
rmse.o: rmse.c
|
||||||
|
$(CC) $(CXXFLAGS) -c rmse.c
|
||||||
|
|
||||||
|
$(PROJECT).elf: main.cc lib$(PROJECT).a read_input.o rmse.o cluster.o kmeans_clustering.o
|
||||||
|
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc read_input.o rmse.o cluster.o kmeans_clustering.o $(VX_LIBS) -o $(PROJECT).elf
|
||||||
|
|
||||||
|
$(PROJECT).qemu: main.cc lib$(PROJECT).a read_input.o rmse.o cluster.o kmeans_clustering.o
|
||||||
|
$(CXX) $(CXXFLAGS) main.cc read_input.o rmse.o cluster.o kmeans_clustering.o $(QEMU_LIBS) -o $(PROJECT).qemu
|
||||||
|
|
||||||
$(PROJECT).hex: $(PROJECT).elf
|
$(PROJECT).hex: $(PROJECT).elf
|
||||||
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
||||||
@@ -46,8 +62,17 @@ $(PROJECT).hex: $(PROJECT).elf
|
|||||||
$(PROJECT).dump: $(PROJECT).elf
|
$(PROJECT).dump: $(PROJECT).elf
|
||||||
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
||||||
|
|
||||||
run:
|
run: $(PROJECT).hex
|
||||||
$(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
|
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
|
||||||
|
|
||||||
|
qemu: $(PROJECT).qemu
|
||||||
|
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -strace -d in_asm -D debug.log $(PROJECT).qemu
|
||||||
|
|
||||||
|
gdb-s: $(PROJECT).qemu
|
||||||
|
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
|
||||||
|
|
||||||
|
gdb-c: $(PROJECT).qemu
|
||||||
|
$(GDB) $(PROJECT).qemu
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf *.elf *.dump *.hex *.a *.pocl
|
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu
|
||||||
@@ -63,4 +63,4 @@ gdb-c: $(PROJECT).qemu
|
|||||||
$(GDB) $(PROJECT).qemu
|
$(GDB) $(PROJECT).qemu
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf *.elf *.dump *.hex *.a *.pocl *.qemu
|
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu
|
||||||
@@ -63,4 +63,4 @@ gdb-c: $(PROJECT).qemu
|
|||||||
$(GDB) $(PROJECT).qemu
|
$(GDB) $(PROJECT).qemu
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf *.elf *.dump *.hex *.a *.pocl *.qemu
|
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu
|
||||||
@@ -63,4 +63,4 @@ gdb-c: $(PROJECT).qemu
|
|||||||
$(GDB) $(PROJECT).qemu
|
$(GDB) $(PROJECT).qemu
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf *.elf *.dump *.hex *.a *.pocl *.qemu
|
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu
|
||||||
@@ -63,4 +63,4 @@ gdb-c: $(PROJECT).qemu
|
|||||||
$(GDB) $(PROJECT).qemu
|
$(GDB) $(PROJECT).qemu
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf *.elf *.dump *.hex *.a *.pocl *.qemu
|
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu
|
||||||
Reference in New Issue
Block a user