Merge remote-tracking branch 'refs/remotes/origin/master'
This commit is contained in:
@@ -131,6 +131,7 @@ void _clCmdParams(int argc, char* argv[]){
|
|||||||
// devices have no relationship with context
|
// devices have no relationship with context
|
||||||
void _clInit()
|
void _clInit()
|
||||||
{
|
{
|
||||||
|
printf("_clInit()\n");
|
||||||
int DEVICE_ID_INUSED = device_id_inused;
|
int DEVICE_ID_INUSED = device_id_inused;
|
||||||
cl_int resultCL;
|
cl_int resultCL;
|
||||||
|
|
||||||
@@ -225,15 +226,18 @@ void _clInit()
|
|||||||
throw(string("InitCL()::Creating Command Queue. (clCreateCommandQueue)"));
|
throw(string("InitCL()::Creating Command Queue. (clCreateCommandQueue)"));
|
||||||
//-----------------------------------------------
|
//-----------------------------------------------
|
||||||
//--cambine-5: Load CL file, build CL program object, create CL kernel object
|
//--cambine-5: Load CL file, build CL program object, create CL kernel object
|
||||||
std::string source_str = FileToString(kernel_file);
|
/*std::string source_str = FileToString(kernel_file);
|
||||||
const char * source = source_str.c_str();
|
const char * source = source_str.c_str();
|
||||||
size_t sourceSize[] = { source_str.length() };
|
size_t sourceSize[] = { source_str.length() };*/
|
||||||
|
|
||||||
oclHandles.program = clCreateProgramWithSource(oclHandles.context,
|
oclHandles.program =
|
||||||
|
clCreateProgramWithBuiltInKernels(oclHandles.context, 1, &oclHandles.devices[DEVICE_ID_INUSED], "BFS_1, BFS_2", &resultCL);
|
||||||
|
|
||||||
|
/*oclHandles.program = clCreateProgramWithSource(oclHandles.context,
|
||||||
1,
|
1,
|
||||||
&source,
|
&source,
|
||||||
sourceSize,
|
sourceSize,
|
||||||
&resultCL);
|
&resultCL);*/
|
||||||
|
|
||||||
if ((resultCL != CL_SUCCESS) || (oclHandles.program == NULL))
|
if ((resultCL != CL_SUCCESS) || (oclHandles.program == NULL))
|
||||||
throw(string("InitCL()::Error: Loading Binary into cl_program. (clCreateProgramWithBinary)"));
|
throw(string("InitCL()::Error: Loading Binary into cl_program. (clCreateProgramWithBinary)"));
|
||||||
|
|||||||
@@ -1,33 +1,35 @@
|
|||||||
|
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||||
|
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
|
||||||
|
POCL_INC_PATH = $(wildcard ../include)
|
||||||
|
POCL_LIB_PATH = $(wildcard ../lib)
|
||||||
|
VX_RT_PATH = $(wildcard ../../../runtime)
|
||||||
|
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
|
||||||
|
|
||||||
RISCV_TOOL_PATH=$(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
||||||
|
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
||||||
|
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
||||||
|
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||||
|
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
||||||
|
|
||||||
POCL_CC_PATH=$(wildcard ~/dev/pocl/drops_riscv_cc)
|
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||||
POCL_RT_PATH=$(wildcard ~/dev/pocl/drops_riscv_rt)
|
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||||
|
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
|
||||||
|
|
||||||
VX_RT_PATH=$(wildcard ../../../runtime)
|
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
|
||||||
VX_SIMX_PATH=$(wildcard ../../../simX/obj_dir)
|
|
||||||
|
|
||||||
CC=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
|
||||||
CXX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
|
||||||
DMP=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
|
||||||
HEX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
|
||||||
NEWLIB_PATH=$(RISCV_TOOL_PATH)/riscv32-unknown-elf/lib
|
|
||||||
|
|
||||||
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
|
|
||||||
VX_STR = $(VX_RT_PATH)/startup/vx_start.s
|
|
||||||
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
|
||||||
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
|
||||||
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
|
|
||||||
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
|
|
||||||
|
|
||||||
VX_SRCS = $(VX_STR) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
|
|
||||||
|
|
||||||
CXXFLAGS = -g -O0 -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -march=rv32im -mabi=ilp32
|
|
||||||
CXXFLAGS += -ffreestanding # program may not begin at main()
|
CXXFLAGS += -ffreestanding # program may not begin at main()
|
||||||
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
|
||||||
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
|
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
|
||||||
|
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
||||||
|
CXXFLAGS += -I$(POCL_INC_PATH)
|
||||||
|
|
||||||
LIBS = -lOpenCL
|
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||||
|
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||||
|
|
||||||
PROJECT=bfs
|
PROJECT=bfs
|
||||||
|
|
||||||
@@ -37,7 +39,10 @@ lib$(PROJECT).a: kernel.cl
|
|||||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
||||||
|
|
||||||
$(PROJECT).elf: main.cc lib$(PROJECT).a
|
$(PROJECT).elf: main.cc lib$(PROJECT).a
|
||||||
$(CXX) $(CXXFLAGS) -I$(POCL_RT_PATH)/include -L$(POCL_RT_PATH)/lib/static -L. $(VX_SRCS) main.cc timer.cc -Wl,--whole-archive -l$(PROJECT) -Wl,--no-whole-archive $(LIBS) -o $(PROJECT).elf
|
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
|
||||||
|
|
||||||
|
$(PROJECT).qemu: main.cc lib$(PROJECT).a
|
||||||
|
$(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu
|
||||||
|
|
||||||
$(PROJECT).hex: $(PROJECT).elf
|
$(PROJECT).hex: $(PROJECT).elf
|
||||||
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
||||||
@@ -45,8 +50,17 @@ $(PROJECT).hex: $(PROJECT).elf
|
|||||||
$(PROJECT).dump: $(PROJECT).elf
|
$(PROJECT).dump: $(PROJECT).elf
|
||||||
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
||||||
|
|
||||||
run:
|
run: $(PROJECT).hex
|
||||||
$(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
|
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
|
||||||
|
|
||||||
|
qemu: $(PROJECT).qemu
|
||||||
|
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -strace -d in_asm -D debug.log $(PROJECT).qemu
|
||||||
|
|
||||||
|
gdb-s: $(PROJECT).qemu
|
||||||
|
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
|
||||||
|
|
||||||
|
gdb-c: $(PROJECT).qemu
|
||||||
|
$(GDB) $(PROJECT).qemu
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf *.elf *.dump *.hex *.a *.pocl
|
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu
|
||||||
28677
benchmarks/opencl/bfs/graph4096.txt
Executable file
28677
benchmarks/opencl/bfs/graph4096.txt
Executable file
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@@ -1,12 +1,14 @@
|
|||||||
//--by Jianbin Fang
|
//--by Jianbin Fang
|
||||||
|
|
||||||
#define __CL_ENABLE_EXCEPTIONS
|
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
#include <cstring>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <cstring>
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
|
||||||
#ifdef PROFILING
|
#ifdef PROFILING
|
||||||
#include "timer.h"
|
#include "timer.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -15,285 +17,281 @@
|
|||||||
|
|
||||||
#define MAX_THREADS_PER_BLOCK 256
|
#define MAX_THREADS_PER_BLOCK 256
|
||||||
|
|
||||||
//Structure to hold a node information
|
// Structure to hold a node information
|
||||||
struct Node
|
struct Node {
|
||||||
{
|
int starting;
|
||||||
int starting;
|
int no_of_edges;
|
||||||
int no_of_edges;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
//----------------------------------------------------------
|
//----------------------------------------------------------
|
||||||
//--bfs on cpu
|
//--bfs on cpu
|
||||||
//--programmer: jianbin
|
//--programmer: jianbin
|
||||||
//--date: 26/01/2011
|
//--date: 26/01/2011
|
||||||
//--note: width is changed to the new_width
|
//--note: width is changed to the new_width
|
||||||
//----------------------------------------------------------
|
//----------------------------------------------------------
|
||||||
void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
|
void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size,
|
||||||
int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, \
|
int *h_graph_edges, char *h_graph_mask,
|
||||||
char *h_graph_visited, int *h_cost_ref){
|
char *h_updating_graph_mask, char *h_graph_visited,
|
||||||
char stop;
|
int *h_cost_ref) {
|
||||||
int k = 0;
|
char stop;
|
||||||
do{
|
int k = 0;
|
||||||
//if no thread changes this value then the loop stops
|
do {
|
||||||
stop=false;
|
// if no thread changes this value then the loop stops
|
||||||
for(int tid = 0; tid < no_of_nodes; tid++ )
|
stop = false;
|
||||||
{
|
for (int tid = 0; tid < no_of_nodes; tid++) {
|
||||||
if (h_graph_mask[tid] == true){
|
if (h_graph_mask[tid] == true) {
|
||||||
h_graph_mask[tid]=false;
|
h_graph_mask[tid] = false;
|
||||||
for(int i=h_graph_nodes[tid].starting; i<(h_graph_nodes[tid].no_of_edges + h_graph_nodes[tid].starting); i++){
|
for (int i = h_graph_nodes[tid].starting;
|
||||||
int id = h_graph_edges[i]; //--cambine: node id is connected with node tid
|
i < (h_graph_nodes[tid].no_of_edges + h_graph_nodes[tid].starting);
|
||||||
if(!h_graph_visited[id]){ //--cambine: if node id has not been visited, enter the body below
|
i++) {
|
||||||
h_cost_ref[id]=h_cost_ref[tid]+1;
|
int id =
|
||||||
h_updating_graph_mask[id]=true;
|
h_graph_edges[i]; //--cambine: node id is connected with node tid
|
||||||
}
|
if (!h_graph_visited[id]) { //--cambine: if node id has not been
|
||||||
}
|
//visited, enter the body below
|
||||||
}
|
h_cost_ref[id] = h_cost_ref[tid] + 1;
|
||||||
}
|
h_updating_graph_mask[id] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for(int tid=0; tid< no_of_nodes ; tid++ )
|
for (int tid = 0; tid < no_of_nodes; tid++) {
|
||||||
{
|
if (h_updating_graph_mask[tid] == true) {
|
||||||
if (h_updating_graph_mask[tid] == true){
|
h_graph_mask[tid] = true;
|
||||||
h_graph_mask[tid]=true;
|
h_graph_visited[tid] = true;
|
||||||
h_graph_visited[tid]=true;
|
stop = true;
|
||||||
stop=true;
|
h_updating_graph_mask[tid] = false;
|
||||||
h_updating_graph_mask[tid]=false;
|
}
|
||||||
}
|
}
|
||||||
}
|
k++;
|
||||||
k++;
|
} while (stop);
|
||||||
}
|
|
||||||
while(stop);
|
|
||||||
}
|
}
|
||||||
//----------------------------------------------------------
|
//----------------------------------------------------------
|
||||||
//--breadth first search on GPUs
|
//--breadth first search on GPUs
|
||||||
//----------------------------------------------------------
|
//----------------------------------------------------------
|
||||||
void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
|
void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size,
|
||||||
int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, \
|
int *h_graph_edges, char *h_graph_mask,
|
||||||
char *h_graph_visited, int *h_cost)
|
char *h_updating_graph_mask, char *h_graph_visited,
|
||||||
throw(std::string){
|
int *h_cost) throw(std::string) {
|
||||||
|
|
||||||
//int number_elements = height*width;
|
// int number_elements = height*width;
|
||||||
char h_over;
|
char h_over;
|
||||||
cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask, \
|
cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask,
|
||||||
d_graph_visited, d_cost, d_over;
|
d_graph_visited, d_cost, d_over;
|
||||||
try{
|
try {
|
||||||
//--1 transfer data from host to device
|
//--1 transfer data from host to device
|
||||||
_clInit();
|
_clInit();
|
||||||
d_graph_nodes = _clMalloc(no_of_nodes*sizeof(Node), h_graph_nodes);
|
d_graph_nodes = _clMalloc(no_of_nodes * sizeof(Node), h_graph_nodes);
|
||||||
d_graph_edges = _clMalloc(edge_list_size*sizeof(int), h_graph_edges);
|
d_graph_edges = _clMalloc(edge_list_size * sizeof(int), h_graph_edges);
|
||||||
d_graph_mask = _clMallocRW(no_of_nodes*sizeof(char), h_graph_mask);
|
d_graph_mask = _clMallocRW(no_of_nodes * sizeof(char), h_graph_mask);
|
||||||
d_updating_graph_mask = _clMallocRW(no_of_nodes*sizeof(char), h_updating_graph_mask);
|
d_updating_graph_mask =
|
||||||
d_graph_visited = _clMallocRW(no_of_nodes*sizeof(char), h_graph_visited);
|
_clMallocRW(no_of_nodes * sizeof(char), h_updating_graph_mask);
|
||||||
|
d_graph_visited = _clMallocRW(no_of_nodes * sizeof(char), h_graph_visited);
|
||||||
|
|
||||||
|
d_cost = _clMallocRW(no_of_nodes * sizeof(int), h_cost);
|
||||||
|
d_over = _clMallocRW(sizeof(char), &h_over);
|
||||||
|
|
||||||
d_cost = _clMallocRW(no_of_nodes*sizeof(int), h_cost);
|
_clMemcpyH2D(d_graph_nodes, no_of_nodes * sizeof(Node), h_graph_nodes);
|
||||||
d_over = _clMallocRW(sizeof(char), &h_over);
|
_clMemcpyH2D(d_graph_edges, edge_list_size * sizeof(int), h_graph_edges);
|
||||||
|
_clMemcpyH2D(d_graph_mask, no_of_nodes * sizeof(char), h_graph_mask);
|
||||||
|
_clMemcpyH2D(d_updating_graph_mask, no_of_nodes * sizeof(char),
|
||||||
|
h_updating_graph_mask);
|
||||||
|
_clMemcpyH2D(d_graph_visited, no_of_nodes * sizeof(char), h_graph_visited);
|
||||||
|
_clMemcpyH2D(d_cost, no_of_nodes * sizeof(int), h_cost);
|
||||||
|
|
||||||
_clMemcpyH2D(d_graph_nodes, no_of_nodes*sizeof(Node), h_graph_nodes);
|
//--2 invoke kernel
|
||||||
_clMemcpyH2D(d_graph_edges, edge_list_size*sizeof(int), h_graph_edges);
|
#ifdef PROFILING
|
||||||
_clMemcpyH2D(d_graph_mask, no_of_nodes*sizeof(char), h_graph_mask);
|
timer kernel_timer;
|
||||||
_clMemcpyH2D(d_updating_graph_mask, no_of_nodes*sizeof(char), h_updating_graph_mask);
|
double kernel_time = 0.0;
|
||||||
_clMemcpyH2D(d_graph_visited, no_of_nodes*sizeof(char), h_graph_visited);
|
kernel_timer.reset();
|
||||||
_clMemcpyH2D(d_cost, no_of_nodes*sizeof(int), h_cost);
|
kernel_timer.start();
|
||||||
|
|
||||||
//--2 invoke kernel
|
|
||||||
#ifdef PROFILING
|
|
||||||
timer kernel_timer;
|
|
||||||
double kernel_time = 0.0;
|
|
||||||
kernel_timer.reset();
|
|
||||||
kernel_timer.start();
|
|
||||||
#endif
|
#endif
|
||||||
do{
|
do {
|
||||||
h_over = false;
|
h_over = false;
|
||||||
_clMemcpyH2D(d_over, sizeof(char), &h_over);
|
_clMemcpyH2D(d_over, sizeof(char), &h_over);
|
||||||
//--kernel 0
|
//--kernel 0
|
||||||
int kernel_id = 0;
|
int kernel_id = 0;
|
||||||
int kernel_idx = 0;
|
int kernel_idx = 0;
|
||||||
_clSetArgs(kernel_id, kernel_idx++, d_graph_nodes);
|
_clSetArgs(kernel_id, kernel_idx++, d_graph_nodes);
|
||||||
_clSetArgs(kernel_id, kernel_idx++, d_graph_edges);
|
_clSetArgs(kernel_id, kernel_idx++, d_graph_edges);
|
||||||
_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
|
_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
|
||||||
_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
|
_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
|
||||||
_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
|
_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
|
||||||
_clSetArgs(kernel_id, kernel_idx++, d_cost);
|
_clSetArgs(kernel_id, kernel_idx++, d_cost);
|
||||||
_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
|
_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
|
||||||
|
|
||||||
//int work_items = no_of_nodes;
|
// int work_items = no_of_nodes;
|
||||||
_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
|
_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
|
||||||
|
|
||||||
//--kernel 1
|
//--kernel 1
|
||||||
kernel_id = 1;
|
kernel_id = 1;
|
||||||
kernel_idx = 0;
|
kernel_idx = 0;
|
||||||
_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
|
_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
|
||||||
_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
|
_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
|
||||||
_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
|
_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
|
||||||
_clSetArgs(kernel_id, kernel_idx++, d_over);
|
_clSetArgs(kernel_id, kernel_idx++, d_over);
|
||||||
_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
|
_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
|
||||||
|
|
||||||
//work_items = no_of_nodes;
|
// work_items = no_of_nodes;
|
||||||
_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
|
_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
|
||||||
|
|
||||||
_clMemcpyD2H(d_over,sizeof(char), &h_over);
|
_clMemcpyD2H(d_over, sizeof(char), &h_over);
|
||||||
}while(h_over);
|
} while (h_over);
|
||||||
|
|
||||||
_clFinish();
|
_clFinish();
|
||||||
#ifdef PROFILING
|
#ifdef PROFILING
|
||||||
kernel_timer.stop();
|
kernel_timer.stop();
|
||||||
kernel_time = kernel_timer.getTimeInSeconds();
|
kernel_time = kernel_timer.getTimeInSeconds();
|
||||||
#endif
|
#endif
|
||||||
//--3 transfer data from device to host
|
//--3 transfer data from device to host
|
||||||
_clMemcpyD2H(d_cost,no_of_nodes*sizeof(int), h_cost);
|
_clMemcpyD2H(d_cost, no_of_nodes * sizeof(int), h_cost);
|
||||||
//--statistics
|
//--statistics
|
||||||
#ifdef PROFILING
|
#ifdef PROFILING
|
||||||
std::cout<<"kernel time(s):"<<kernel_time<<std::endl;
|
std::cout << "kernel time(s):" << kernel_time << std::endl;
|
||||||
#endif
|
#endif
|
||||||
//--4 release cl resources.
|
//--4 release cl resources.
|
||||||
_clFree(d_graph_nodes);
|
_clFree(d_graph_nodes);
|
||||||
_clFree(d_graph_edges);
|
_clFree(d_graph_edges);
|
||||||
_clFree(d_graph_mask);
|
_clFree(d_graph_mask);
|
||||||
_clFree(d_updating_graph_mask);
|
_clFree(d_updating_graph_mask);
|
||||||
_clFree(d_graph_visited);
|
_clFree(d_graph_visited);
|
||||||
_clFree(d_cost);
|
_clFree(d_cost);
|
||||||
_clFree(d_over);
|
_clFree(d_over);
|
||||||
_clRelease();
|
_clRelease();
|
||||||
}
|
} catch (std::string msg) {
|
||||||
catch(std::string msg){
|
_clFree(d_graph_nodes);
|
||||||
_clFree(d_graph_nodes);
|
_clFree(d_graph_edges);
|
||||||
_clFree(d_graph_edges);
|
_clFree(d_graph_mask);
|
||||||
_clFree(d_graph_mask);
|
_clFree(d_updating_graph_mask);
|
||||||
_clFree(d_updating_graph_mask);
|
_clFree(d_graph_visited);
|
||||||
_clFree(d_graph_visited);
|
_clFree(d_cost);
|
||||||
_clFree(d_cost);
|
_clFree(d_over);
|
||||||
_clFree(d_over);
|
_clRelease();
|
||||||
_clRelease();
|
std::string e_str = "in run_transpose_gpu -> ";
|
||||||
std::string e_str = "in run_transpose_gpu -> ";
|
e_str += msg;
|
||||||
e_str += msg;
|
throw(e_str);
|
||||||
throw(e_str);
|
}
|
||||||
}
|
return;
|
||||||
return ;
|
|
||||||
}
|
}
|
||||||
void Usage(int argc, char**argv){
|
|
||||||
|
|
||||||
fprintf(stderr,"Usage: %s <input_file>\n", argv[0]);
|
|
||||||
|
|
||||||
}
|
|
||||||
//----------------------------------------------------------
|
//----------------------------------------------------------
|
||||||
//--cambine: main function
|
//--cambine: main function
|
||||||
//--author: created by Jianbin Fang
|
//--author: created by Jianbin Fang
|
||||||
//--date: 25/01/2011
|
//--date: 25/01/2011
|
||||||
//----------------------------------------------------------
|
//----------------------------------------------------------
|
||||||
int main(int argc, char * argv[])
|
int main(int argc, char *argv[]) {
|
||||||
{
|
printf("enter demo main\n");
|
||||||
int no_of_nodes;
|
|
||||||
int edge_list_size;
|
|
||||||
FILE *fp;
|
|
||||||
Node* h_graph_nodes;
|
|
||||||
char *h_graph_mask, *h_updating_graph_mask, *h_graph_visited;
|
|
||||||
try{
|
|
||||||
char *input_f;
|
|
||||||
if(argc!=2){
|
|
||||||
Usage(argc, argv);
|
|
||||||
exit(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
input_f = argv[1];
|
int no_of_nodes;
|
||||||
printf("Reading File\n");
|
int edge_list_size;
|
||||||
//Read in Graph from a file
|
FILE *fp;
|
||||||
fp = fopen(input_f,"r");
|
Node *h_graph_nodes;
|
||||||
if(!fp){
|
char *h_graph_mask, *h_updating_graph_mask, *h_graph_visited;
|
||||||
printf("Error Reading graph file\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int source = 0;
|
try {
|
||||||
|
char *input_f = "graph4096.txt";
|
||||||
|
printf("Reading File\n");
|
||||||
|
// Read in Graph from a file
|
||||||
|
fp = fopen(input_f, "r");
|
||||||
|
if (!fp) {
|
||||||
|
printf("Error Reading graph file\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
fscanf(fp,"%d",&no_of_nodes);
|
printf("Reading File completed!\n");
|
||||||
|
|
||||||
int num_of_blocks = 1;
|
int source = 0;
|
||||||
int num_of_threads_per_block = no_of_nodes;
|
|
||||||
|
|
||||||
//Make execution Parameters according to the number of nodes
|
fscanf(fp, "%d", &no_of_nodes);
|
||||||
//Distribute threads across multiple Blocks if necessary
|
|
||||||
if(no_of_nodes>MAX_THREADS_PER_BLOCK){
|
|
||||||
num_of_blocks = (int)ceil(no_of_nodes/(double)MAX_THREADS_PER_BLOCK);
|
|
||||||
num_of_threads_per_block = MAX_THREADS_PER_BLOCK;
|
|
||||||
}
|
|
||||||
work_group_size = num_of_threads_per_block;
|
|
||||||
// allocate host memory
|
|
||||||
h_graph_nodes = (Node*) malloc(sizeof(Node)*no_of_nodes);
|
|
||||||
h_graph_mask = (char*) malloc(sizeof(char)*no_of_nodes);
|
|
||||||
h_updating_graph_mask = (char*) malloc(sizeof(char)*no_of_nodes);
|
|
||||||
h_graph_visited = (char*) malloc(sizeof(char)*no_of_nodes);
|
|
||||||
|
|
||||||
int start, edgeno;
|
int num_of_blocks = 1;
|
||||||
// initalize the memory
|
int num_of_threads_per_block = no_of_nodes;
|
||||||
for(int i = 0; i < no_of_nodes; i++){
|
|
||||||
fscanf(fp,"%d %d",&start,&edgeno);
|
|
||||||
h_graph_nodes[i].starting = start;
|
|
||||||
h_graph_nodes[i].no_of_edges = edgeno;
|
|
||||||
h_graph_mask[i]=false;
|
|
||||||
h_updating_graph_mask[i]=false;
|
|
||||||
h_graph_visited[i]=false;
|
|
||||||
}
|
|
||||||
//read the source node from the file
|
|
||||||
fscanf(fp,"%d",&source);
|
|
||||||
source=0;
|
|
||||||
//set the source node as true in the mask
|
|
||||||
h_graph_mask[source]=true;
|
|
||||||
h_graph_visited[source]=true;
|
|
||||||
fscanf(fp,"%d",&edge_list_size);
|
|
||||||
int id,cost;
|
|
||||||
int* h_graph_edges = (int*) malloc(sizeof(int)*edge_list_size);
|
|
||||||
for(int i=0; i < edge_list_size ; i++){
|
|
||||||
fscanf(fp,"%d",&id);
|
|
||||||
fscanf(fp,"%d",&cost);
|
|
||||||
h_graph_edges[i] = id;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(fp)
|
// Make execution Parameters according to the number of nodes
|
||||||
fclose(fp);
|
// Distribute threads across multiple Blocks if necessary
|
||||||
// allocate mem for the result on host side
|
if (no_of_nodes > MAX_THREADS_PER_BLOCK) {
|
||||||
int *h_cost = (int*) malloc(sizeof(int)*no_of_nodes);
|
num_of_blocks = (int)ceil(no_of_nodes / (double)MAX_THREADS_PER_BLOCK);
|
||||||
int *h_cost_ref = (int*)malloc(sizeof(int)*no_of_nodes);
|
num_of_threads_per_block = MAX_THREADS_PER_BLOCK;
|
||||||
for(int i=0;i<no_of_nodes;i++){
|
}
|
||||||
h_cost[i]=-1;
|
work_group_size = num_of_threads_per_block;
|
||||||
h_cost_ref[i] = -1;
|
// allocate host memory
|
||||||
}
|
h_graph_nodes = (Node *)malloc(sizeof(Node) * no_of_nodes);
|
||||||
h_cost[source]=0;
|
h_graph_mask = (char *)malloc(sizeof(char) * no_of_nodes);
|
||||||
h_cost_ref[source]=0;
|
h_updating_graph_mask = (char *)malloc(sizeof(char) * no_of_nodes);
|
||||||
//---------------------------------------------------------
|
h_graph_visited = (char *)malloc(sizeof(char) * no_of_nodes);
|
||||||
//--gpu entry
|
|
||||||
run_bfs_gpu(no_of_nodes,h_graph_nodes,edge_list_size,h_graph_edges, h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost);
|
|
||||||
//---------------------------------------------------------
|
|
||||||
//--cpu entry
|
|
||||||
// initalize the memory again
|
|
||||||
for(int i = 0; i < no_of_nodes; i++){
|
|
||||||
h_graph_mask[i]=false;
|
|
||||||
h_updating_graph_mask[i]=false;
|
|
||||||
h_graph_visited[i]=false;
|
|
||||||
}
|
|
||||||
//set the source node as true in the mask
|
|
||||||
source=0;
|
|
||||||
h_graph_mask[source]=true;
|
|
||||||
h_graph_visited[source]=true;
|
|
||||||
run_bfs_cpu(no_of_nodes,h_graph_nodes,edge_list_size,h_graph_edges, h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost_ref);
|
|
||||||
//---------------------------------------------------------
|
|
||||||
//--result varification
|
|
||||||
compare_results<int>(h_cost_ref, h_cost, no_of_nodes);
|
|
||||||
//release host memory
|
|
||||||
free(h_graph_nodes);
|
|
||||||
free(h_graph_mask);
|
|
||||||
free(h_updating_graph_mask);
|
|
||||||
free(h_graph_visited);
|
|
||||||
|
|
||||||
}
|
int start, edgeno;
|
||||||
catch(std::string msg){
|
// initalize the memory
|
||||||
std::cout<<"--cambine: exception in main ->"<<msg<<std::endl;
|
for (int i = 0; i < no_of_nodes; i++) {
|
||||||
//release host memory
|
fscanf(fp, "%d %d", &start, &edgeno);
|
||||||
free(h_graph_nodes);
|
h_graph_nodes[i].starting = start;
|
||||||
free(h_graph_mask);
|
h_graph_nodes[i].no_of_edges = edgeno;
|
||||||
free(h_updating_graph_mask);
|
h_graph_mask[i] = false;
|
||||||
free(h_graph_visited);
|
h_updating_graph_mask[i] = false;
|
||||||
}
|
h_graph_visited[i] = false;
|
||||||
|
}
|
||||||
|
// read the source node from the file
|
||||||
|
fscanf(fp, "%d", &source);
|
||||||
|
source = 0;
|
||||||
|
// set the source node as true in the mask
|
||||||
|
h_graph_mask[source] = true;
|
||||||
|
h_graph_visited[source] = true;
|
||||||
|
fscanf(fp, "%d", &edge_list_size);
|
||||||
|
int id, cost;
|
||||||
|
int *h_graph_edges = (int *)malloc(sizeof(int) * edge_list_size);
|
||||||
|
for (int i = 0; i < edge_list_size; i++) {
|
||||||
|
fscanf(fp, "%d", &id);
|
||||||
|
fscanf(fp, "%d", &cost);
|
||||||
|
h_graph_edges[i] = id;
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
if (fp)
|
||||||
|
fclose(fp);
|
||||||
|
// allocate mem for the result on host side
|
||||||
|
int *h_cost = (int *)malloc(sizeof(int) * no_of_nodes);
|
||||||
|
int *h_cost_ref = (int *)malloc(sizeof(int) * no_of_nodes);
|
||||||
|
for (int i = 0; i < no_of_nodes; i++) {
|
||||||
|
h_cost[i] = -1;
|
||||||
|
h_cost_ref[i] = -1;
|
||||||
|
}
|
||||||
|
h_cost[source] = 0;
|
||||||
|
h_cost_ref[source] = 0;
|
||||||
|
//---------------------------------------------------------
|
||||||
|
//--gpu entry
|
||||||
|
run_bfs_gpu(no_of_nodes, h_graph_nodes, edge_list_size, h_graph_edges,
|
||||||
|
h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost);
|
||||||
|
//---------------------------------------------------------
|
||||||
|
//--cpu entry
|
||||||
|
// initalize the memory again
|
||||||
|
for (int i = 0; i < no_of_nodes; i++) {
|
||||||
|
h_graph_mask[i] = false;
|
||||||
|
h_updating_graph_mask[i] = false;
|
||||||
|
h_graph_visited[i] = false;
|
||||||
|
}
|
||||||
|
// set the source node as true in the mask
|
||||||
|
source = 0;
|
||||||
|
h_graph_mask[source] = true;
|
||||||
|
h_graph_visited[source] = true;
|
||||||
|
run_bfs_cpu(no_of_nodes, h_graph_nodes, edge_list_size, h_graph_edges,
|
||||||
|
h_graph_mask, h_updating_graph_mask, h_graph_visited,
|
||||||
|
h_cost_ref);
|
||||||
|
//---------------------------------------------------------
|
||||||
|
//--result varification
|
||||||
|
compare_results<int>(h_cost_ref, h_cost, no_of_nodes);
|
||||||
|
// release host memory
|
||||||
|
free(h_graph_nodes);
|
||||||
|
free(h_graph_mask);
|
||||||
|
free(h_updating_graph_mask);
|
||||||
|
free(h_graph_visited);
|
||||||
|
|
||||||
|
} catch (std::string msg) {
|
||||||
|
std::cout << "--cambine: exception in main ->" << msg << std::endl;
|
||||||
|
// release host memory
|
||||||
|
free(h_graph_nodes);
|
||||||
|
free(h_graph_mask);
|
||||||
|
free(h_updating_graph_mask);
|
||||||
|
free(h_graph_visited);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,126 +3,99 @@
|
|||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
|
|
||||||
class timer {
|
class timer {
|
||||||
public:
|
public:
|
||||||
timer(const char *name = 0);
|
timer(const char *name = 0);
|
||||||
timer(const char *name, std::ostream &write_on_exit);
|
timer(const char *name, std::ostream &write_on_exit);
|
||||||
|
|
||||||
~timer();
|
~timer();
|
||||||
|
|
||||||
void start(), stop();
|
void start(), stop();
|
||||||
void reset();
|
void reset();
|
||||||
std::ostream &print(std::ostream &);
|
std::ostream &print(std::ostream &);
|
||||||
|
|
||||||
double getTimeInSeconds();
|
double getTimeInSeconds();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void print_time(std::ostream &, const char *which, double time) const;
|
void print_time(std::ostream &, const char *which, double time) const;
|
||||||
|
|
||||||
union {
|
union {
|
||||||
long long total_time;
|
long long total_time;
|
||||||
struct {
|
struct {
|
||||||
#if defined __PPC__
|
#if defined __PPC__
|
||||||
int high, low;
|
int high, low;
|
||||||
#else
|
#else
|
||||||
int low, high;
|
int low, high;
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
unsigned long long count;
|
unsigned long long count;
|
||||||
const char *const name;
|
const char *const name;
|
||||||
std::ostream *const write_on_exit;
|
std::ostream *const write_on_exit;
|
||||||
|
|
||||||
static double CPU_speed_in_MHz, get_CPU_speed_in_MHz();
|
static double CPU_speed_in_MHz, get_CPU_speed_in_MHz();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
std::ostream &operator<<(std::ostream &, class timer &);
|
||||||
|
|
||||||
std::ostream &operator << (std::ostream &, class timer &);
|
inline void timer::reset() {
|
||||||
|
total_time = 0;
|
||||||
|
count = 0;
|
||||||
inline void timer::reset()
|
|
||||||
{
|
|
||||||
total_time = 0;
|
|
||||||
count = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline timer::timer(const char *name) : name(name), write_on_exit(0) {
|
||||||
inline timer::timer(const char *name)
|
reset();
|
||||||
:
|
|
||||||
name(name),
|
|
||||||
write_on_exit(0)
|
|
||||||
{
|
|
||||||
reset();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
inline timer::timer(const char *name, std::ostream &write_on_exit)
|
inline timer::timer(const char *name, std::ostream &write_on_exit)
|
||||||
:
|
: name(name), write_on_exit(&write_on_exit) {
|
||||||
name(name),
|
reset();
|
||||||
write_on_exit(&write_on_exit)
|
|
||||||
{
|
|
||||||
reset();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline timer::~timer() {
|
||||||
inline timer::~timer()
|
if (write_on_exit != 0)
|
||||||
{
|
print(*write_on_exit);
|
||||||
if (write_on_exit != 0)
|
|
||||||
print(*write_on_exit);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void timer::start() {
|
||||||
inline void timer::start()
|
|
||||||
{
|
|
||||||
#if (defined __PATHSCALE__) && (defined __i386 || defined __x86_64)
|
#if (defined __PATHSCALE__) && (defined __i386 || defined __x86_64)
|
||||||
unsigned eax, edx;
|
unsigned eax, edx;
|
||||||
|
|
||||||
asm volatile ("rdtsc" : "=a" (eax), "=d" (edx));
|
asm volatile("rdtsc" : "=a"(eax), "=d"(edx));
|
||||||
|
|
||||||
total_time -= ((unsigned long long) edx << 32) + eax;
|
total_time -= ((unsigned long long)edx << 32) + eax;
|
||||||
#elif (defined __GNUC__ || defined __INTEL_COMPILER) && (defined __i386 || defined __x86_64)
|
#elif (defined __GNUC__ || defined __INTEL_COMPILER) && \
|
||||||
asm volatile
|
(defined __i386 || defined __x86_64)
|
||||||
(
|
asm volatile("rdtsc\n\t"
|
||||||
"rdtsc\n\t"
|
"subl %%eax, %0\n\t"
|
||||||
"subl %%eax, %0\n\t"
|
"sbbl %%edx, %1"
|
||||||
"sbbl %%edx, %1"
|
: "+m"(low), "+m"(high)
|
||||||
:
|
:
|
||||||
"+m" (low), "+m" (high)
|
: "eax", "edx");
|
||||||
:
|
|
||||||
:
|
|
||||||
"eax", "edx"
|
|
||||||
);
|
|
||||||
#else
|
#else
|
||||||
#error Compiler/Architecture not recognized
|
#error Compiler/Architecture not recognized
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void timer::stop() {
|
||||||
inline void timer::stop()
|
|
||||||
{
|
|
||||||
#if (defined __PATHSCALE__) && (defined __i386 || defined __x86_64)
|
#if (defined __PATHSCALE__) && (defined __i386 || defined __x86_64)
|
||||||
unsigned eax, edx;
|
unsigned eax, edx;
|
||||||
|
|
||||||
asm volatile ("rdtsc" : "=a" (eax), "=d" (edx));
|
asm volatile("rdtsc" : "=a"(eax), "=d"(edx));
|
||||||
|
|
||||||
total_time += ((unsigned long long) edx << 32) + eax;
|
total_time += ((unsigned long long)edx << 32) + eax;
|
||||||
#elif (defined __GNUC__ || defined __INTEL_COMPILER) && (defined __i386 || defined __x86_64)
|
#elif (defined __GNUC__ || defined __INTEL_COMPILER) && \
|
||||||
asm volatile
|
(defined __i386 || defined __x86_64)
|
||||||
(
|
asm volatile("rdtsc\n\t"
|
||||||
"rdtsc\n\t"
|
"addl %%eax, %0\n\t"
|
||||||
"addl %%eax, %0\n\t"
|
"adcl %%edx, %1"
|
||||||
"adcl %%edx, %1"
|
: "+m"(low), "+m"(high)
|
||||||
:
|
:
|
||||||
"+m" (low), "+m" (high)
|
: "eax", "edx");
|
||||||
:
|
|
||||||
:
|
|
||||||
"eax", "edx"
|
|
||||||
);
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
++ count;
|
++count;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
1804
benchmarks/opencl/include/CL/cl.h
Normal file
1804
benchmarks/opencl/include/CL/cl.h
Normal file
File diff suppressed because it is too large
Load Diff
12459
benchmarks/opencl/include/CL/cl.hpp
Normal file
12459
benchmarks/opencl/include/CL/cl.hpp
Normal file
File diff suppressed because it is too large
Load Diff
10119
benchmarks/opencl/include/CL/cl2.hpp
Normal file
10119
benchmarks/opencl/include/CL/cl2.hpp
Normal file
File diff suppressed because it is too large
Load Diff
131
benchmarks/opencl/include/CL/cl_d3d10.h
Normal file
131
benchmarks/opencl/include/CL/cl_d3d10.h
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
/**********************************************************************************
|
||||||
|
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and/or associated documentation files (the
|
||||||
|
* "Materials"), to deal in the Materials without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||||
|
* permit persons to whom the Materials are furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included
|
||||||
|
* in all copies or substantial portions of the Materials.
|
||||||
|
*
|
||||||
|
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||||
|
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||||
|
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||||
|
* https://www.khronos.org/registry/
|
||||||
|
*
|
||||||
|
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||||
|
**********************************************************************************/
|
||||||
|
|
||||||
|
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||||
|
|
||||||
|
#ifndef __OPENCL_CL_D3D10_H
|
||||||
|
#define __OPENCL_CL_D3D10_H
|
||||||
|
|
||||||
|
#include <d3d10.h>
|
||||||
|
#include <CL/cl.h>
|
||||||
|
#include <CL/cl_platform.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/******************************************************************************
|
||||||
|
* cl_khr_d3d10_sharing */
|
||||||
|
#define cl_khr_d3d10_sharing 1
|
||||||
|
|
||||||
|
typedef cl_uint cl_d3d10_device_source_khr;
|
||||||
|
typedef cl_uint cl_d3d10_device_set_khr;
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
/* Error Codes */
|
||||||
|
#define CL_INVALID_D3D10_DEVICE_KHR -1002
|
||||||
|
#define CL_INVALID_D3D10_RESOURCE_KHR -1003
|
||||||
|
#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004
|
||||||
|
#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005
|
||||||
|
|
||||||
|
/* cl_d3d10_device_source_nv */
|
||||||
|
#define CL_D3D10_DEVICE_KHR 0x4010
|
||||||
|
#define CL_D3D10_DXGI_ADAPTER_KHR 0x4011
|
||||||
|
|
||||||
|
/* cl_d3d10_device_set_nv */
|
||||||
|
#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012
|
||||||
|
#define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013
|
||||||
|
|
||||||
|
/* cl_context_info */
|
||||||
|
#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014
|
||||||
|
#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C
|
||||||
|
|
||||||
|
/* cl_mem_info */
|
||||||
|
#define CL_MEM_D3D10_RESOURCE_KHR 0x4015
|
||||||
|
|
||||||
|
/* cl_image_info */
|
||||||
|
#define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016
|
||||||
|
|
||||||
|
/* cl_command_type */
|
||||||
|
#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017
|
||||||
|
#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)(
|
||||||
|
cl_platform_id platform,
|
||||||
|
cl_d3d10_device_source_khr d3d_device_source,
|
||||||
|
void * d3d_object,
|
||||||
|
cl_d3d10_device_set_khr d3d_device_set,
|
||||||
|
cl_uint num_entries,
|
||||||
|
cl_device_id * devices,
|
||||||
|
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)(
|
||||||
|
cl_context context,
|
||||||
|
cl_mem_flags flags,
|
||||||
|
ID3D10Buffer * resource,
|
||||||
|
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)(
|
||||||
|
cl_context context,
|
||||||
|
cl_mem_flags flags,
|
||||||
|
ID3D10Texture2D * resource,
|
||||||
|
UINT subresource,
|
||||||
|
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)(
|
||||||
|
cl_context context,
|
||||||
|
cl_mem_flags flags,
|
||||||
|
ID3D10Texture3D * resource,
|
||||||
|
UINT subresource,
|
||||||
|
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)(
|
||||||
|
cl_command_queue command_queue,
|
||||||
|
cl_uint num_objects,
|
||||||
|
const cl_mem * mem_objects,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event * event_wait_list,
|
||||||
|
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
|
||||||
|
cl_command_queue command_queue,
|
||||||
|
cl_uint num_objects,
|
||||||
|
const cl_mem * mem_objects,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event * event_wait_list,
|
||||||
|
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* __OPENCL_CL_D3D10_H */
|
||||||
|
|
||||||
131
benchmarks/opencl/include/CL/cl_d3d11.h
Normal file
131
benchmarks/opencl/include/CL/cl_d3d11.h
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
/**********************************************************************************
|
||||||
|
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and/or associated documentation files (the
|
||||||
|
* "Materials"), to deal in the Materials without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||||
|
* permit persons to whom the Materials are furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included
|
||||||
|
* in all copies or substantial portions of the Materials.
|
||||||
|
*
|
||||||
|
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||||
|
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||||
|
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||||
|
* https://www.khronos.org/registry/
|
||||||
|
*
|
||||||
|
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||||
|
**********************************************************************************/
|
||||||
|
|
||||||
|
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||||
|
|
||||||
|
#ifndef __OPENCL_CL_D3D11_H
|
||||||
|
#define __OPENCL_CL_D3D11_H
|
||||||
|
|
||||||
|
#include <d3d11.h>
|
||||||
|
#include <CL/cl.h>
|
||||||
|
#include <CL/cl_platform.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/******************************************************************************
|
||||||
|
* cl_khr_d3d11_sharing */
|
||||||
|
#define cl_khr_d3d11_sharing 1
|
||||||
|
|
||||||
|
typedef cl_uint cl_d3d11_device_source_khr;
|
||||||
|
typedef cl_uint cl_d3d11_device_set_khr;
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
/* Error Codes */
|
||||||
|
#define CL_INVALID_D3D11_DEVICE_KHR -1006
|
||||||
|
#define CL_INVALID_D3D11_RESOURCE_KHR -1007
|
||||||
|
#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008
|
||||||
|
#define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009
|
||||||
|
|
||||||
|
/* cl_d3d11_device_source */
|
||||||
|
#define CL_D3D11_DEVICE_KHR 0x4019
|
||||||
|
#define CL_D3D11_DXGI_ADAPTER_KHR 0x401A
|
||||||
|
|
||||||
|
/* cl_d3d11_device_set */
|
||||||
|
#define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B
|
||||||
|
#define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C
|
||||||
|
|
||||||
|
/* cl_context_info */
|
||||||
|
#define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D
|
||||||
|
#define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D
|
||||||
|
|
||||||
|
/* cl_mem_info */
|
||||||
|
#define CL_MEM_D3D11_RESOURCE_KHR 0x401E
|
||||||
|
|
||||||
|
/* cl_image_info */
|
||||||
|
#define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F
|
||||||
|
|
||||||
|
/* cl_command_type */
|
||||||
|
#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020
|
||||||
|
#define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)(
|
||||||
|
cl_platform_id platform,
|
||||||
|
cl_d3d11_device_source_khr d3d_device_source,
|
||||||
|
void * d3d_object,
|
||||||
|
cl_d3d11_device_set_khr d3d_device_set,
|
||||||
|
cl_uint num_entries,
|
||||||
|
cl_device_id * devices,
|
||||||
|
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)(
|
||||||
|
cl_context context,
|
||||||
|
cl_mem_flags flags,
|
||||||
|
ID3D11Buffer * resource,
|
||||||
|
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)(
|
||||||
|
cl_context context,
|
||||||
|
cl_mem_flags flags,
|
||||||
|
ID3D11Texture2D * resource,
|
||||||
|
UINT subresource,
|
||||||
|
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)(
|
||||||
|
cl_context context,
|
||||||
|
cl_mem_flags flags,
|
||||||
|
ID3D11Texture3D * resource,
|
||||||
|
UINT subresource,
|
||||||
|
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)(
|
||||||
|
cl_command_queue command_queue,
|
||||||
|
cl_uint num_objects,
|
||||||
|
const cl_mem * mem_objects,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event * event_wait_list,
|
||||||
|
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)(
|
||||||
|
cl_command_queue command_queue,
|
||||||
|
cl_uint num_objects,
|
||||||
|
const cl_mem * mem_objects,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event * event_wait_list,
|
||||||
|
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* __OPENCL_CL_D3D11_H */
|
||||||
|
|
||||||
132
benchmarks/opencl/include/CL/cl_dx9_media_sharing.h
Normal file
132
benchmarks/opencl/include/CL/cl_dx9_media_sharing.h
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
/**********************************************************************************
|
||||||
|
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and/or associated documentation files (the
|
||||||
|
* "Materials"), to deal in the Materials without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||||
|
* permit persons to whom the Materials are furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included
|
||||||
|
* in all copies or substantial portions of the Materials.
|
||||||
|
*
|
||||||
|
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||||
|
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||||
|
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||||
|
* https://www.khronos.org/registry/
|
||||||
|
*
|
||||||
|
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||||
|
**********************************************************************************/
|
||||||
|
|
||||||
|
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||||
|
|
||||||
|
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H
|
||||||
|
#define __OPENCL_CL_DX9_MEDIA_SHARING_H
|
||||||
|
|
||||||
|
#include <CL/cl.h>
|
||||||
|
#include <CL/cl_platform.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
/* cl_khr_dx9_media_sharing */
|
||||||
|
#define cl_khr_dx9_media_sharing 1
|
||||||
|
|
||||||
|
typedef cl_uint cl_dx9_media_adapter_type_khr;
|
||||||
|
typedef cl_uint cl_dx9_media_adapter_set_khr;
|
||||||
|
|
||||||
|
#if defined(_WIN32)
|
||||||
|
#include <d3d9.h>
|
||||||
|
typedef struct _cl_dx9_surface_info_khr
|
||||||
|
{
|
||||||
|
IDirect3DSurface9 *resource;
|
||||||
|
HANDLE shared_handle;
|
||||||
|
} cl_dx9_surface_info_khr;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
/* Error Codes */
|
||||||
|
#define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010
|
||||||
|
#define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011
|
||||||
|
#define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012
|
||||||
|
#define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013
|
||||||
|
|
||||||
|
/* cl_media_adapter_type_khr */
|
||||||
|
#define CL_ADAPTER_D3D9_KHR 0x2020
|
||||||
|
#define CL_ADAPTER_D3D9EX_KHR 0x2021
|
||||||
|
#define CL_ADAPTER_DXVA_KHR 0x2022
|
||||||
|
|
||||||
|
/* cl_media_adapter_set_khr */
|
||||||
|
#define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023
|
||||||
|
#define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024
|
||||||
|
|
||||||
|
/* cl_context_info */
|
||||||
|
#define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025
|
||||||
|
#define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026
|
||||||
|
#define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027
|
||||||
|
|
||||||
|
/* cl_mem_info */
|
||||||
|
#define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028
|
||||||
|
#define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029
|
||||||
|
|
||||||
|
/* cl_image_info */
|
||||||
|
#define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A
|
||||||
|
|
||||||
|
/* cl_command_type */
|
||||||
|
#define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B
|
||||||
|
#define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)(
|
||||||
|
cl_platform_id platform,
|
||||||
|
cl_uint num_media_adapters,
|
||||||
|
cl_dx9_media_adapter_type_khr * media_adapter_type,
|
||||||
|
void * media_adapters,
|
||||||
|
cl_dx9_media_adapter_set_khr media_adapter_set,
|
||||||
|
cl_uint num_entries,
|
||||||
|
cl_device_id * devices,
|
||||||
|
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)(
|
||||||
|
cl_context context,
|
||||||
|
cl_mem_flags flags,
|
||||||
|
cl_dx9_media_adapter_type_khr adapter_type,
|
||||||
|
void * surface_info,
|
||||||
|
cl_uint plane,
|
||||||
|
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)(
|
||||||
|
cl_command_queue command_queue,
|
||||||
|
cl_uint num_objects,
|
||||||
|
const cl_mem * mem_objects,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event * event_wait_list,
|
||||||
|
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)(
|
||||||
|
cl_command_queue command_queue,
|
||||||
|
cl_uint num_objects,
|
||||||
|
const cl_mem * mem_objects,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event * event_wait_list,
|
||||||
|
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_H */
|
||||||
|
|
||||||
182
benchmarks/opencl/include/CL/cl_dx9_media_sharing_intel.h
Normal file
182
benchmarks/opencl/include/CL/cl_dx9_media_sharing_intel.h
Normal file
@@ -0,0 +1,182 @@
|
|||||||
|
/**********************************************************************************
|
||||||
|
* Copyright (c) 2008-2019 The Khronos Group Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and/or associated documentation files (the
|
||||||
|
* "Materials"), to deal in the Materials without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||||
|
* permit persons to whom the Materials are furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included
|
||||||
|
* in all copies or substantial portions of the Materials.
|
||||||
|
*
|
||||||
|
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||||
|
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||||
|
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||||
|
* https://www.khronos.org/registry/
|
||||||
|
*
|
||||||
|
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||||
|
**********************************************************************************/
|
||||||
|
/*****************************************************************************\
|
||||||
|
|
||||||
|
Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
|
||||||
|
|
||||||
|
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
||||||
|
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||||
|
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
|
||||||
|
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
File Name: cl_dx9_media_sharing_intel.h
|
||||||
|
|
||||||
|
Abstract:
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
|
||||||
|
\*****************************************************************************/
|
||||||
|
|
||||||
|
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
|
||||||
|
#define __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
|
||||||
|
|
||||||
|
#include <CL/cl.h>
|
||||||
|
#include <CL/cl_platform.h>
|
||||||
|
#include <d3d9.h>
|
||||||
|
#include <dxvahd.h>
|
||||||
|
#include <wtypes.h>
|
||||||
|
#include <d3d9types.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/***************************************
|
||||||
|
* cl_intel_dx9_media_sharing extension *
|
||||||
|
****************************************/
|
||||||
|
|
||||||
|
#define cl_intel_dx9_media_sharing 1
|
||||||
|
|
||||||
|
typedef cl_uint cl_dx9_device_source_intel;
|
||||||
|
typedef cl_uint cl_dx9_device_set_intel;
|
||||||
|
|
||||||
|
/* error codes */
|
||||||
|
#define CL_INVALID_DX9_DEVICE_INTEL -1010
|
||||||
|
#define CL_INVALID_DX9_RESOURCE_INTEL -1011
|
||||||
|
#define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL -1012
|
||||||
|
#define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL -1013
|
||||||
|
|
||||||
|
/* cl_dx9_device_source_intel */
|
||||||
|
#define CL_D3D9_DEVICE_INTEL 0x4022
|
||||||
|
#define CL_D3D9EX_DEVICE_INTEL 0x4070
|
||||||
|
#define CL_DXVA_DEVICE_INTEL 0x4071
|
||||||
|
|
||||||
|
/* cl_dx9_device_set_intel */
|
||||||
|
#define CL_PREFERRED_DEVICES_FOR_DX9_INTEL 0x4024
|
||||||
|
#define CL_ALL_DEVICES_FOR_DX9_INTEL 0x4025
|
||||||
|
|
||||||
|
/* cl_context_info */
|
||||||
|
#define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026
|
||||||
|
#define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072
|
||||||
|
#define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073
|
||||||
|
|
||||||
|
/* cl_mem_info */
|
||||||
|
#define CL_MEM_DX9_RESOURCE_INTEL 0x4027
|
||||||
|
#define CL_MEM_DX9_SHARED_HANDLE_INTEL 0x4074
|
||||||
|
|
||||||
|
/* cl_image_info */
|
||||||
|
#define CL_IMAGE_DX9_PLANE_INTEL 0x4075
|
||||||
|
|
||||||
|
/* cl_command_type */
|
||||||
|
#define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL 0x402A
|
||||||
|
#define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL 0x402B
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clGetDeviceIDsFromDX9INTEL(
|
||||||
|
cl_platform_id platform,
|
||||||
|
cl_dx9_device_source_intel dx9_device_source,
|
||||||
|
void* dx9_object,
|
||||||
|
cl_dx9_device_set_intel dx9_device_set,
|
||||||
|
cl_uint num_entries,
|
||||||
|
cl_device_id* devices,
|
||||||
|
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL* clGetDeviceIDsFromDX9INTEL_fn)(
|
||||||
|
cl_platform_id platform,
|
||||||
|
cl_dx9_device_source_intel dx9_device_source,
|
||||||
|
void* dx9_object,
|
||||||
|
cl_dx9_device_set_intel dx9_device_set,
|
||||||
|
cl_uint num_entries,
|
||||||
|
cl_device_id* devices,
|
||||||
|
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||||
|
clCreateFromDX9MediaSurfaceINTEL(
|
||||||
|
cl_context context,
|
||||||
|
cl_mem_flags flags,
|
||||||
|
IDirect3DSurface9* resource,
|
||||||
|
HANDLE sharedHandle,
|
||||||
|
UINT plane,
|
||||||
|
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceINTEL_fn)(
|
||||||
|
cl_context context,
|
||||||
|
cl_mem_flags flags,
|
||||||
|
IDirect3DSurface9* resource,
|
||||||
|
HANDLE sharedHandle,
|
||||||
|
UINT plane,
|
||||||
|
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clEnqueueAcquireDX9ObjectsINTEL(
|
||||||
|
cl_command_queue command_queue,
|
||||||
|
cl_uint num_objects,
|
||||||
|
const cl_mem* mem_objects,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event* event_wait_list,
|
||||||
|
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9ObjectsINTEL_fn)(
|
||||||
|
cl_command_queue command_queue,
|
||||||
|
cl_uint num_objects,
|
||||||
|
const cl_mem* mem_objects,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event* event_wait_list,
|
||||||
|
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clEnqueueReleaseDX9ObjectsINTEL(
|
||||||
|
cl_command_queue command_queue,
|
||||||
|
cl_uint num_objects,
|
||||||
|
cl_mem* mem_objects,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event* event_wait_list,
|
||||||
|
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9ObjectsINTEL_fn)(
|
||||||
|
cl_command_queue command_queue,
|
||||||
|
cl_uint num_objects,
|
||||||
|
cl_mem* mem_objects,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event* event_wait_list,
|
||||||
|
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H */
|
||||||
|
|
||||||
132
benchmarks/opencl/include/CL/cl_egl.h
Normal file
132
benchmarks/opencl/include/CL/cl_egl.h
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (c) 2008-2019 The Khronos Group Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and/or associated documentation files (the
|
||||||
|
* "Materials"), to deal in the Materials without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||||
|
* permit persons to whom the Materials are furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included
|
||||||
|
* in all copies or substantial portions of the Materials.
|
||||||
|
*
|
||||||
|
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||||
|
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||||
|
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||||
|
* https://www.khronos.org/registry/
|
||||||
|
*
|
||||||
|
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||||
|
******************************************************************************/
|
||||||
|
|
||||||
|
#ifndef __OPENCL_CL_EGL_H
|
||||||
|
#define __OPENCL_CL_EGL_H
|
||||||
|
|
||||||
|
#include <CL/cl.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */
|
||||||
|
#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F
|
||||||
|
#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D
|
||||||
|
#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E
|
||||||
|
|
||||||
|
/* Error type for clCreateFromEGLImageKHR */
|
||||||
|
#define CL_INVALID_EGL_OBJECT_KHR -1093
|
||||||
|
#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092
|
||||||
|
|
||||||
|
/* CLeglImageKHR is an opaque handle to an EGLImage */
|
||||||
|
typedef void* CLeglImageKHR;
|
||||||
|
|
||||||
|
/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */
|
||||||
|
typedef void* CLeglDisplayKHR;
|
||||||
|
|
||||||
|
/* CLeglSyncKHR is an opaque handle to an EGLSync object */
|
||||||
|
typedef void* CLeglSyncKHR;
|
||||||
|
|
||||||
|
/* properties passed to clCreateFromEGLImageKHR */
|
||||||
|
typedef intptr_t cl_egl_image_properties_khr;
|
||||||
|
|
||||||
|
|
||||||
|
#define cl_khr_egl_image 1
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||||
|
clCreateFromEGLImageKHR(cl_context context,
|
||||||
|
CLeglDisplayKHR egldisplay,
|
||||||
|
CLeglImageKHR eglimage,
|
||||||
|
cl_mem_flags flags,
|
||||||
|
const cl_egl_image_properties_khr * properties,
|
||||||
|
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)(
|
||||||
|
cl_context context,
|
||||||
|
CLeglDisplayKHR egldisplay,
|
||||||
|
CLeglImageKHR eglimage,
|
||||||
|
cl_mem_flags flags,
|
||||||
|
const cl_egl_image_properties_khr * properties,
|
||||||
|
cl_int * errcode_ret);
|
||||||
|
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue,
|
||||||
|
cl_uint num_objects,
|
||||||
|
const cl_mem * mem_objects,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event * event_wait_list,
|
||||||
|
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)(
|
||||||
|
cl_command_queue command_queue,
|
||||||
|
cl_uint num_objects,
|
||||||
|
const cl_mem * mem_objects,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event * event_wait_list,
|
||||||
|
cl_event * event);
|
||||||
|
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue,
|
||||||
|
cl_uint num_objects,
|
||||||
|
const cl_mem * mem_objects,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event * event_wait_list,
|
||||||
|
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)(
|
||||||
|
cl_command_queue command_queue,
|
||||||
|
cl_uint num_objects,
|
||||||
|
const cl_mem * mem_objects,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event * event_wait_list,
|
||||||
|
cl_event * event);
|
||||||
|
|
||||||
|
|
||||||
|
#define cl_khr_egl_event 1
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_event CL_API_CALL
|
||||||
|
clCreateEventFromEGLSyncKHR(cl_context context,
|
||||||
|
CLeglSyncKHR sync,
|
||||||
|
CLeglDisplayKHR display,
|
||||||
|
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)(
|
||||||
|
cl_context context,
|
||||||
|
CLeglSyncKHR sync,
|
||||||
|
CLeglDisplayKHR display,
|
||||||
|
cl_int * errcode_ret);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* __OPENCL_CL_EGL_H */
|
||||||
762
benchmarks/opencl/include/CL/cl_ext.h
Normal file
762
benchmarks/opencl/include/CL/cl_ext.h
Normal file
@@ -0,0 +1,762 @@
|
|||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (c) 2008-2019 The Khronos Group Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and/or associated documentation files (the
|
||||||
|
* "Materials"), to deal in the Materials without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||||
|
* permit persons to whom the Materials are furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included
|
||||||
|
* in all copies or substantial portions of the Materials.
|
||||||
|
*
|
||||||
|
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||||
|
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||||
|
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||||
|
* https://www.khronos.org/registry/
|
||||||
|
*
|
||||||
|
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||||
|
******************************************************************************/
|
||||||
|
|
||||||
|
/* cl_ext.h contains OpenCL extensions which don't have external */
|
||||||
|
/* (OpenGL, D3D) dependencies. */
|
||||||
|
|
||||||
|
#ifndef __CL_EXT_H
|
||||||
|
#define __CL_EXT_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <CL/cl.h>
|
||||||
|
|
||||||
|
/* cl_khr_fp64 extension - no extension #define since it has no functions */
|
||||||
|
/* CL_DEVICE_DOUBLE_FP_CONFIG is defined in CL.h for OpenCL >= 120 */
|
||||||
|
|
||||||
|
#if CL_TARGET_OPENCL_VERSION <= 110
|
||||||
|
#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* cl_khr_fp16 extension - no extension #define since it has no functions */
|
||||||
|
#define CL_DEVICE_HALF_FP_CONFIG 0x1033
|
||||||
|
|
||||||
|
/* Memory object destruction
|
||||||
|
*
|
||||||
|
* Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
|
||||||
|
*
|
||||||
|
* Registers a user callback function that will be called when the memory object is deleted and its resources
|
||||||
|
* freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
|
||||||
|
* stack associated with memobj. The registered user callback functions are called in the reverse order in
|
||||||
|
* which they were registered. The user callback functions are called and then the memory object is deleted
|
||||||
|
* and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
|
||||||
|
* notified when the memory referenced by host_ptr, specified when the memory object is created and used as
|
||||||
|
* the storage bits for the memory object, can be reused or freed.
|
||||||
|
*
|
||||||
|
* The application may not call CL api's with the cl_mem object passed to the pfn_notify.
|
||||||
|
*
|
||||||
|
* Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
|
||||||
|
* before using.
|
||||||
|
*/
|
||||||
|
#define cl_APPLE_SetMemObjectDestructor 1
|
||||||
|
cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem memobj,
|
||||||
|
void (* pfn_notify)(cl_mem memobj, void * user_data),
|
||||||
|
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
|
||||||
|
/* Context Logging Functions
|
||||||
|
*
|
||||||
|
* The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
|
||||||
|
* Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
|
||||||
|
* before using.
|
||||||
|
*
|
||||||
|
* clLogMessagesToSystemLog forwards on all log messages to the Apple System Logger
|
||||||
|
*/
|
||||||
|
#define cl_APPLE_ContextLoggingFunctions 1
|
||||||
|
extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * errstr,
|
||||||
|
const void * private_info,
|
||||||
|
size_t cb,
|
||||||
|
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
|
||||||
|
extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * errstr,
|
||||||
|
const void * private_info,
|
||||||
|
size_t cb,
|
||||||
|
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
|
||||||
|
extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * errstr,
|
||||||
|
const void * private_info,
|
||||||
|
size_t cb,
|
||||||
|
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
|
||||||
|
/************************
|
||||||
|
* cl_khr_icd extension *
|
||||||
|
************************/
|
||||||
|
#define cl_khr_icd 1
|
||||||
|
|
||||||
|
/* cl_platform_info */
|
||||||
|
#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920
|
||||||
|
|
||||||
|
/* Additional Error Codes */
|
||||||
|
#define CL_PLATFORM_NOT_FOUND_KHR -1001
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clIcdGetPlatformIDsKHR(cl_uint num_entries,
|
||||||
|
cl_platform_id * platforms,
|
||||||
|
cl_uint * num_platforms);
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int
|
||||||
|
(CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(cl_uint num_entries,
|
||||||
|
cl_platform_id * platforms,
|
||||||
|
cl_uint * num_platforms);
|
||||||
|
|
||||||
|
|
||||||
|
/*******************************
|
||||||
|
* cl_khr_il_program extension *
|
||||||
|
*******************************/
|
||||||
|
#define cl_khr_il_program 1
|
||||||
|
|
||||||
|
/* New property to clGetDeviceInfo for retrieving supported intermediate
|
||||||
|
* languages
|
||||||
|
*/
|
||||||
|
#define CL_DEVICE_IL_VERSION_KHR 0x105B
|
||||||
|
|
||||||
|
/* New property to clGetProgramInfo for retrieving for retrieving the IL of a
|
||||||
|
* program
|
||||||
|
*/
|
||||||
|
#define CL_PROGRAM_IL_KHR 0x1169
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_program CL_API_CALL
|
||||||
|
clCreateProgramWithILKHR(cl_context context,
|
||||||
|
const void * il,
|
||||||
|
size_t length,
|
||||||
|
cl_int * errcode_ret);
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_program
|
||||||
|
(CL_API_CALL *clCreateProgramWithILKHR_fn)(cl_context context,
|
||||||
|
const void * il,
|
||||||
|
size_t length,
|
||||||
|
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
/* Extension: cl_khr_image2d_from_buffer
|
||||||
|
*
|
||||||
|
* This extension allows a 2D image to be created from a cl_mem buffer without
|
||||||
|
* a copy. The type associated with a 2D image created from a buffer in an
|
||||||
|
* OpenCL program is image2d_t. Both the sampler and sampler-less read_image
|
||||||
|
* built-in functions are supported for 2D images and 2D images created from
|
||||||
|
* a buffer. Similarly, the write_image built-ins are also supported for 2D
|
||||||
|
* images created from a buffer.
|
||||||
|
*
|
||||||
|
* When the 2D image from buffer is created, the client must specify the
|
||||||
|
* width, height, image format (i.e. channel order and channel data type)
|
||||||
|
* and optionally the row pitch.
|
||||||
|
*
|
||||||
|
* The pitch specified must be a multiple of
|
||||||
|
* CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR pixels.
|
||||||
|
* The base address of the buffer must be aligned to
|
||||||
|
* CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR pixels.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR 0x104A
|
||||||
|
#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR 0x104B
|
||||||
|
|
||||||
|
|
||||||
|
/**************************************
|
||||||
|
* cl_khr_initialize_memory extension *
|
||||||
|
**************************************/
|
||||||
|
|
||||||
|
#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x2030
|
||||||
|
|
||||||
|
|
||||||
|
/**************************************
|
||||||
|
* cl_khr_terminate_context extension *
|
||||||
|
**************************************/
|
||||||
|
|
||||||
|
#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x2031
|
||||||
|
#define CL_CONTEXT_TERMINATE_KHR 0x2032
|
||||||
|
|
||||||
|
#define cl_khr_terminate_context 1
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clTerminateContextKHR(cl_context context) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int
|
||||||
|
(CL_API_CALL *clTerminateContextKHR_fn)(cl_context context) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Extension: cl_khr_spir
|
||||||
|
*
|
||||||
|
* This extension adds support to create an OpenCL program object from a
|
||||||
|
* Standard Portable Intermediate Representation (SPIR) instance
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define CL_DEVICE_SPIR_VERSIONS 0x40E0
|
||||||
|
#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1
|
||||||
|
|
||||||
|
|
||||||
|
/*****************************************
|
||||||
|
* cl_khr_create_command_queue extension *
|
||||||
|
*****************************************/
|
||||||
|
#define cl_khr_create_command_queue 1
|
||||||
|
|
||||||
|
typedef cl_bitfield cl_queue_properties_khr;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_command_queue CL_API_CALL
|
||||||
|
clCreateCommandQueueWithPropertiesKHR(cl_context context,
|
||||||
|
cl_device_id device,
|
||||||
|
const cl_queue_properties_khr* properties,
|
||||||
|
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_command_queue
|
||||||
|
(CL_API_CALL *clCreateCommandQueueWithPropertiesKHR_fn)(cl_context context,
|
||||||
|
cl_device_id device,
|
||||||
|
const cl_queue_properties_khr* properties,
|
||||||
|
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
|
||||||
|
/******************************************
|
||||||
|
* cl_nv_device_attribute_query extension *
|
||||||
|
******************************************/
|
||||||
|
|
||||||
|
/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
|
||||||
|
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
|
||||||
|
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
|
||||||
|
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
|
||||||
|
#define CL_DEVICE_WARP_SIZE_NV 0x4003
|
||||||
|
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
|
||||||
|
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
|
||||||
|
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
|
||||||
|
|
||||||
|
|
||||||
|
/*********************************
|
||||||
|
* cl_amd_device_attribute_query *
|
||||||
|
*********************************/
|
||||||
|
|
||||||
|
#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
|
||||||
|
|
||||||
|
|
||||||
|
/*********************************
|
||||||
|
* cl_arm_printf extension
|
||||||
|
*********************************/
|
||||||
|
|
||||||
|
#define CL_PRINTF_CALLBACK_ARM 0x40B0
|
||||||
|
#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1
|
||||||
|
|
||||||
|
|
||||||
|
/***********************************
|
||||||
|
* cl_ext_device_fission extension
|
||||||
|
***********************************/
|
||||||
|
#define cl_ext_device_fission 1
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clReleaseDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int
|
||||||
|
(CL_API_CALL *clReleaseDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clRetainDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int
|
||||||
|
(CL_API_CALL *clRetainDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
|
||||||
|
|
||||||
|
typedef cl_ulong cl_device_partition_property_ext;
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clCreateSubDevicesEXT(cl_device_id in_device,
|
||||||
|
const cl_device_partition_property_ext * properties,
|
||||||
|
cl_uint num_entries,
|
||||||
|
cl_device_id * out_devices,
|
||||||
|
cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int
|
||||||
|
(CL_API_CALL * clCreateSubDevicesEXT_fn)(cl_device_id in_device,
|
||||||
|
const cl_device_partition_property_ext * properties,
|
||||||
|
cl_uint num_entries,
|
||||||
|
cl_device_id * out_devices,
|
||||||
|
cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1;
|
||||||
|
|
||||||
|
/* cl_device_partition_property_ext */
|
||||||
|
#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
|
||||||
|
#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
|
||||||
|
#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
|
||||||
|
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053
|
||||||
|
|
||||||
|
/* clDeviceGetInfo selectors */
|
||||||
|
#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
|
||||||
|
#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
|
||||||
|
#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
|
||||||
|
#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
|
||||||
|
#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058
|
||||||
|
|
||||||
|
/* error codes */
|
||||||
|
#define CL_DEVICE_PARTITION_FAILED_EXT -1057
|
||||||
|
#define CL_INVALID_PARTITION_COUNT_EXT -1058
|
||||||
|
#define CL_INVALID_PARTITION_NAME_EXT -1059
|
||||||
|
|
||||||
|
/* CL_AFFINITY_DOMAINs */
|
||||||
|
#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
|
||||||
|
#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
|
||||||
|
#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
|
||||||
|
#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
|
||||||
|
#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
|
||||||
|
#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100
|
||||||
|
|
||||||
|
/* cl_device_partition_property_ext list terminators */
|
||||||
|
#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0)
|
||||||
|
#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0)
|
||||||
|
#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1)
|
||||||
|
|
||||||
|
|
||||||
|
/***********************************
|
||||||
|
* cl_ext_migrate_memobject extension definitions
|
||||||
|
***********************************/
|
||||||
|
#define cl_ext_migrate_memobject 1
|
||||||
|
|
||||||
|
typedef cl_bitfield cl_mem_migration_flags_ext;
|
||||||
|
|
||||||
|
#define CL_MIGRATE_MEM_OBJECT_HOST_EXT 0x1
|
||||||
|
|
||||||
|
#define CL_COMMAND_MIGRATE_MEM_OBJECT_EXT 0x4040
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clEnqueueMigrateMemObjectEXT(cl_command_queue command_queue,
|
||||||
|
cl_uint num_mem_objects,
|
||||||
|
const cl_mem * mem_objects,
|
||||||
|
cl_mem_migration_flags_ext flags,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event * event_wait_list,
|
||||||
|
cl_event * event);
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int
|
||||||
|
(CL_API_CALL *clEnqueueMigrateMemObjectEXT_fn)(cl_command_queue command_queue,
|
||||||
|
cl_uint num_mem_objects,
|
||||||
|
const cl_mem * mem_objects,
|
||||||
|
cl_mem_migration_flags_ext flags,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event * event_wait_list,
|
||||||
|
cl_event * event);
|
||||||
|
|
||||||
|
|
||||||
|
/*********************************
|
||||||
|
* cl_qcom_ext_host_ptr extension
|
||||||
|
*********************************/
|
||||||
|
#define cl_qcom_ext_host_ptr 1
|
||||||
|
|
||||||
|
#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29)
|
||||||
|
|
||||||
|
#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0
|
||||||
|
#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1
|
||||||
|
#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2
|
||||||
|
#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3
|
||||||
|
#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4
|
||||||
|
#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5
|
||||||
|
#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6
|
||||||
|
#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7
|
||||||
|
|
||||||
|
typedef cl_uint cl_image_pitch_info_qcom;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clGetDeviceImageInfoQCOM(cl_device_id device,
|
||||||
|
size_t image_width,
|
||||||
|
size_t image_height,
|
||||||
|
const cl_image_format *image_format,
|
||||||
|
cl_image_pitch_info_qcom param_name,
|
||||||
|
size_t param_value_size,
|
||||||
|
void *param_value,
|
||||||
|
size_t *param_value_size_ret);
|
||||||
|
|
||||||
|
typedef struct _cl_mem_ext_host_ptr
|
||||||
|
{
|
||||||
|
/* Type of external memory allocation. */
|
||||||
|
/* Legal values will be defined in layered extensions. */
|
||||||
|
cl_uint allocation_type;
|
||||||
|
|
||||||
|
/* Host cache policy for this external memory allocation. */
|
||||||
|
cl_uint host_cache_policy;
|
||||||
|
|
||||||
|
} cl_mem_ext_host_ptr;
|
||||||
|
|
||||||
|
|
||||||
|
/*******************************************
|
||||||
|
* cl_qcom_ext_host_ptr_iocoherent extension
|
||||||
|
********************************************/
|
||||||
|
|
||||||
|
/* Cache policy specifying io-coherence */
|
||||||
|
#define CL_MEM_HOST_IOCOHERENT_QCOM 0x40A9
|
||||||
|
|
||||||
|
|
||||||
|
/*********************************
|
||||||
|
* cl_qcom_ion_host_ptr extension
|
||||||
|
*********************************/
|
||||||
|
|
||||||
|
#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8
|
||||||
|
|
||||||
|
typedef struct _cl_mem_ion_host_ptr
|
||||||
|
{
|
||||||
|
/* Type of external memory allocation. */
|
||||||
|
/* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */
|
||||||
|
cl_mem_ext_host_ptr ext_host_ptr;
|
||||||
|
|
||||||
|
/* ION file descriptor */
|
||||||
|
int ion_filedesc;
|
||||||
|
|
||||||
|
/* Host pointer to the ION allocated memory */
|
||||||
|
void* ion_hostptr;
|
||||||
|
|
||||||
|
} cl_mem_ion_host_ptr;
|
||||||
|
|
||||||
|
|
||||||
|
/*********************************
|
||||||
|
* cl_qcom_android_native_buffer_host_ptr extension
|
||||||
|
*********************************/
|
||||||
|
|
||||||
|
#define CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM 0x40C6
|
||||||
|
|
||||||
|
typedef struct _cl_mem_android_native_buffer_host_ptr
|
||||||
|
{
|
||||||
|
/* Type of external memory allocation. */
|
||||||
|
/* Must be CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM for Android native buffers. */
|
||||||
|
cl_mem_ext_host_ptr ext_host_ptr;
|
||||||
|
|
||||||
|
/* Virtual pointer to the android native buffer */
|
||||||
|
void* anb_ptr;
|
||||||
|
|
||||||
|
} cl_mem_android_native_buffer_host_ptr;
|
||||||
|
|
||||||
|
|
||||||
|
/******************************************
|
||||||
|
* cl_img_yuv_image extension *
|
||||||
|
******************************************/
|
||||||
|
|
||||||
|
/* Image formats used in clCreateImage */
|
||||||
|
#define CL_NV21_IMG 0x40D0
|
||||||
|
#define CL_YV12_IMG 0x40D1
|
||||||
|
|
||||||
|
|
||||||
|
/******************************************
|
||||||
|
* cl_img_cached_allocations extension *
|
||||||
|
******************************************/
|
||||||
|
|
||||||
|
/* Flag values used by clCreateBuffer */
|
||||||
|
#define CL_MEM_USE_UNCACHED_CPU_MEMORY_IMG (1 << 26)
|
||||||
|
#define CL_MEM_USE_CACHED_CPU_MEMORY_IMG (1 << 27)
|
||||||
|
|
||||||
|
|
||||||
|
/******************************************
|
||||||
|
* cl_img_use_gralloc_ptr extension *
|
||||||
|
******************************************/
|
||||||
|
#define cl_img_use_gralloc_ptr 1
|
||||||
|
|
||||||
|
/* Flag values used by clCreateBuffer */
|
||||||
|
#define CL_MEM_USE_GRALLOC_PTR_IMG (1 << 28)
|
||||||
|
|
||||||
|
/* To be used by clGetEventInfo: */
|
||||||
|
#define CL_COMMAND_ACQUIRE_GRALLOC_OBJECTS_IMG 0x40D2
|
||||||
|
#define CL_COMMAND_RELEASE_GRALLOC_OBJECTS_IMG 0x40D3
|
||||||
|
|
||||||
|
/* Error code from clEnqueueReleaseGrallocObjectsIMG */
|
||||||
|
#define CL_GRALLOC_RESOURCE_NOT_ACQUIRED_IMG 0x40D4
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clEnqueueAcquireGrallocObjectsIMG(cl_command_queue command_queue,
|
||||||
|
cl_uint num_objects,
|
||||||
|
const cl_mem * mem_objects,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event * event_wait_list,
|
||||||
|
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clEnqueueReleaseGrallocObjectsIMG(cl_command_queue command_queue,
|
||||||
|
cl_uint num_objects,
|
||||||
|
const cl_mem * mem_objects,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event * event_wait_list,
|
||||||
|
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
|
||||||
|
/*********************************
|
||||||
|
* cl_khr_subgroups extension
|
||||||
|
*********************************/
|
||||||
|
#define cl_khr_subgroups 1
|
||||||
|
|
||||||
|
#if !defined(CL_VERSION_2_1)
|
||||||
|
/* For OpenCL 2.1 and newer, cl_kernel_sub_group_info is declared in CL.h.
|
||||||
|
In hindsight, there should have been a khr suffix on this type for
|
||||||
|
the extension, but keeping it un-suffixed to maintain backwards
|
||||||
|
compatibility. */
|
||||||
|
typedef cl_uint cl_kernel_sub_group_info;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* cl_kernel_sub_group_info */
|
||||||
|
#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033
|
||||||
|
#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clGetKernelSubGroupInfoKHR(cl_kernel in_kernel,
|
||||||
|
cl_device_id in_device,
|
||||||
|
cl_kernel_sub_group_info param_name,
|
||||||
|
size_t input_value_size,
|
||||||
|
const void * input_value,
|
||||||
|
size_t param_value_size,
|
||||||
|
void * param_value,
|
||||||
|
size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int
|
||||||
|
(CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel in_kernel,
|
||||||
|
cl_device_id in_device,
|
||||||
|
cl_kernel_sub_group_info param_name,
|
||||||
|
size_t input_value_size,
|
||||||
|
const void * input_value,
|
||||||
|
size_t param_value_size,
|
||||||
|
void * param_value,
|
||||||
|
size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
|
||||||
|
|
||||||
|
|
||||||
|
/*********************************
|
||||||
|
* cl_khr_mipmap_image extension
|
||||||
|
*********************************/
|
||||||
|
|
||||||
|
/* cl_sampler_properties */
|
||||||
|
#define CL_SAMPLER_MIP_FILTER_MODE_KHR 0x1155
|
||||||
|
#define CL_SAMPLER_LOD_MIN_KHR 0x1156
|
||||||
|
#define CL_SAMPLER_LOD_MAX_KHR 0x1157
|
||||||
|
|
||||||
|
|
||||||
|
/*********************************
|
||||||
|
* cl_khr_priority_hints extension
|
||||||
|
*********************************/
|
||||||
|
/* This extension define is for backwards compatibility.
|
||||||
|
It shouldn't be required since this extension has no new functions. */
|
||||||
|
#define cl_khr_priority_hints 1
|
||||||
|
|
||||||
|
typedef cl_uint cl_queue_priority_khr;
|
||||||
|
|
||||||
|
/* cl_command_queue_properties */
|
||||||
|
#define CL_QUEUE_PRIORITY_KHR 0x1096
|
||||||
|
|
||||||
|
/* cl_queue_priority_khr */
|
||||||
|
#define CL_QUEUE_PRIORITY_HIGH_KHR (1<<0)
|
||||||
|
#define CL_QUEUE_PRIORITY_MED_KHR (1<<1)
|
||||||
|
#define CL_QUEUE_PRIORITY_LOW_KHR (1<<2)
|
||||||
|
|
||||||
|
|
||||||
|
/*********************************
|
||||||
|
* cl_khr_throttle_hints extension
|
||||||
|
*********************************/
|
||||||
|
/* This extension define is for backwards compatibility.
|
||||||
|
It shouldn't be required since this extension has no new functions. */
|
||||||
|
#define cl_khr_throttle_hints 1
|
||||||
|
|
||||||
|
typedef cl_uint cl_queue_throttle_khr;
|
||||||
|
|
||||||
|
/* cl_command_queue_properties */
|
||||||
|
#define CL_QUEUE_THROTTLE_KHR 0x1097
|
||||||
|
|
||||||
|
/* cl_queue_throttle_khr */
|
||||||
|
#define CL_QUEUE_THROTTLE_HIGH_KHR (1<<0)
|
||||||
|
#define CL_QUEUE_THROTTLE_MED_KHR (1<<1)
|
||||||
|
#define CL_QUEUE_THROTTLE_LOW_KHR (1<<2)
|
||||||
|
|
||||||
|
|
||||||
|
/*********************************
|
||||||
|
* cl_khr_subgroup_named_barrier
|
||||||
|
*********************************/
|
||||||
|
/* This extension define is for backwards compatibility.
|
||||||
|
It shouldn't be required since this extension has no new functions. */
|
||||||
|
#define cl_khr_subgroup_named_barrier 1
|
||||||
|
|
||||||
|
/* cl_device_info */
|
||||||
|
#define CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR 0x2035
|
||||||
|
|
||||||
|
|
||||||
|
/**********************************
|
||||||
|
* cl_arm_import_memory extension *
|
||||||
|
**********************************/
|
||||||
|
#define cl_arm_import_memory 1
|
||||||
|
|
||||||
|
typedef intptr_t cl_import_properties_arm;
|
||||||
|
|
||||||
|
/* Default and valid proporties name for cl_arm_import_memory */
|
||||||
|
#define CL_IMPORT_TYPE_ARM 0x40B2
|
||||||
|
|
||||||
|
/* Host process memory type default value for CL_IMPORT_TYPE_ARM property */
|
||||||
|
#define CL_IMPORT_TYPE_HOST_ARM 0x40B3
|
||||||
|
|
||||||
|
/* DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */
|
||||||
|
#define CL_IMPORT_TYPE_DMA_BUF_ARM 0x40B4
|
||||||
|
|
||||||
|
/* Protected DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */
|
||||||
|
#define CL_IMPORT_TYPE_PROTECTED_ARM 0x40B5
|
||||||
|
|
||||||
|
/* This extension adds a new function that allows for direct memory import into
|
||||||
|
* OpenCL via the clImportMemoryARM function.
|
||||||
|
*
|
||||||
|
* Memory imported through this interface will be mapped into the device's page
|
||||||
|
* tables directly, providing zero copy access. It will never fall back to copy
|
||||||
|
* operations and aliased buffers.
|
||||||
|
*
|
||||||
|
* Types of memory supported for import are specified as additional extension
|
||||||
|
* strings.
|
||||||
|
*
|
||||||
|
* This extension produces cl_mem allocations which are compatible with all other
|
||||||
|
* users of cl_mem in the standard API.
|
||||||
|
*
|
||||||
|
* This extension maps pages with the same properties as the normal buffer creation
|
||||||
|
* function clCreateBuffer.
|
||||||
|
*/
|
||||||
|
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||||
|
clImportMemoryARM( cl_context context,
|
||||||
|
cl_mem_flags flags,
|
||||||
|
const cl_import_properties_arm *properties,
|
||||||
|
void *memory,
|
||||||
|
size_t size,
|
||||||
|
cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
|
||||||
|
/******************************************
|
||||||
|
* cl_arm_shared_virtual_memory extension *
|
||||||
|
******************************************/
|
||||||
|
#define cl_arm_shared_virtual_memory 1
|
||||||
|
|
||||||
|
/* Used by clGetDeviceInfo */
|
||||||
|
#define CL_DEVICE_SVM_CAPABILITIES_ARM 0x40B6
|
||||||
|
|
||||||
|
/* Used by clGetMemObjectInfo */
|
||||||
|
#define CL_MEM_USES_SVM_POINTER_ARM 0x40B7
|
||||||
|
|
||||||
|
/* Used by clSetKernelExecInfoARM: */
|
||||||
|
#define CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM 0x40B8
|
||||||
|
#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM 0x40B9
|
||||||
|
|
||||||
|
/* To be used by clGetEventInfo: */
|
||||||
|
#define CL_COMMAND_SVM_FREE_ARM 0x40BA
|
||||||
|
#define CL_COMMAND_SVM_MEMCPY_ARM 0x40BB
|
||||||
|
#define CL_COMMAND_SVM_MEMFILL_ARM 0x40BC
|
||||||
|
#define CL_COMMAND_SVM_MAP_ARM 0x40BD
|
||||||
|
#define CL_COMMAND_SVM_UNMAP_ARM 0x40BE
|
||||||
|
|
||||||
|
/* Flag values returned by clGetDeviceInfo with CL_DEVICE_SVM_CAPABILITIES_ARM as the param_name. */
|
||||||
|
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_ARM (1 << 0)
|
||||||
|
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_ARM (1 << 1)
|
||||||
|
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_ARM (1 << 2)
|
||||||
|
#define CL_DEVICE_SVM_ATOMICS_ARM (1 << 3)
|
||||||
|
|
||||||
|
/* Flag values used by clSVMAllocARM: */
|
||||||
|
#define CL_MEM_SVM_FINE_GRAIN_BUFFER_ARM (1 << 10)
|
||||||
|
#define CL_MEM_SVM_ATOMICS_ARM (1 << 11)
|
||||||
|
|
||||||
|
typedef cl_bitfield cl_svm_mem_flags_arm;
|
||||||
|
typedef cl_uint cl_kernel_exec_info_arm;
|
||||||
|
typedef cl_bitfield cl_device_svm_capabilities_arm;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY void * CL_API_CALL
|
||||||
|
clSVMAllocARM(cl_context context,
|
||||||
|
cl_svm_mem_flags_arm flags,
|
||||||
|
size_t size,
|
||||||
|
cl_uint alignment) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY void CL_API_CALL
|
||||||
|
clSVMFreeARM(cl_context context,
|
||||||
|
void * svm_pointer) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clEnqueueSVMFreeARM(cl_command_queue command_queue,
|
||||||
|
cl_uint num_svm_pointers,
|
||||||
|
void * svm_pointers[],
|
||||||
|
void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,
|
||||||
|
cl_uint num_svm_pointers,
|
||||||
|
void * svm_pointers[],
|
||||||
|
void * user_data),
|
||||||
|
void * user_data,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event * event_wait_list,
|
||||||
|
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clEnqueueSVMMemcpyARM(cl_command_queue command_queue,
|
||||||
|
cl_bool blocking_copy,
|
||||||
|
void * dst_ptr,
|
||||||
|
const void * src_ptr,
|
||||||
|
size_t size,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event * event_wait_list,
|
||||||
|
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clEnqueueSVMMemFillARM(cl_command_queue command_queue,
|
||||||
|
void * svm_ptr,
|
||||||
|
const void * pattern,
|
||||||
|
size_t pattern_size,
|
||||||
|
size_t size,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event * event_wait_list,
|
||||||
|
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clEnqueueSVMMapARM(cl_command_queue command_queue,
|
||||||
|
cl_bool blocking_map,
|
||||||
|
cl_map_flags flags,
|
||||||
|
void * svm_ptr,
|
||||||
|
size_t size,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event * event_wait_list,
|
||||||
|
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clEnqueueSVMUnmapARM(cl_command_queue command_queue,
|
||||||
|
void * svm_ptr,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event * event_wait_list,
|
||||||
|
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clSetKernelArgSVMPointerARM(cl_kernel kernel,
|
||||||
|
cl_uint arg_index,
|
||||||
|
const void * arg_value) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clSetKernelExecInfoARM(cl_kernel kernel,
|
||||||
|
cl_kernel_exec_info_arm param_name,
|
||||||
|
size_t param_value_size,
|
||||||
|
const void * param_value) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
/********************************
|
||||||
|
* cl_arm_get_core_id extension *
|
||||||
|
********************************/
|
||||||
|
|
||||||
|
#ifdef CL_VERSION_1_2
|
||||||
|
|
||||||
|
#define cl_arm_get_core_id 1
|
||||||
|
|
||||||
|
/* Device info property for bitfield of cores present */
|
||||||
|
#define CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM 0x40BF
|
||||||
|
|
||||||
|
#endif /* CL_VERSION_1_2 */
|
||||||
|
|
||||||
|
/*********************************
|
||||||
|
* cl_arm_job_slot_selection
|
||||||
|
*********************************/
|
||||||
|
|
||||||
|
#define cl_arm_job_slot_selection 1
|
||||||
|
|
||||||
|
/* cl_device_info */
|
||||||
|
#define CL_DEVICE_JOB_SLOTS_ARM 0x41E0
|
||||||
|
|
||||||
|
/* cl_command_queue_properties */
|
||||||
|
#define CL_QUEUE_JOB_SLOT_ARM 0x41E1
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* __CL_EXT_H */
|
||||||
423
benchmarks/opencl/include/CL/cl_ext_intel.h
Normal file
423
benchmarks/opencl/include/CL/cl_ext_intel.h
Normal file
@@ -0,0 +1,423 @@
|
|||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (c) 2008-2019 The Khronos Group Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and/or associated documentation files (the
|
||||||
|
* "Materials"), to deal in the Materials without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||||
|
* permit persons to whom the Materials are furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included
|
||||||
|
* in all copies or substantial portions of the Materials.
|
||||||
|
*
|
||||||
|
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||||
|
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||||
|
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||||
|
* https://www.khronos.org/registry/
|
||||||
|
*
|
||||||
|
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||||
|
******************************************************************************/
|
||||||
|
/*****************************************************************************\
|
||||||
|
|
||||||
|
Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
|
||||||
|
|
||||||
|
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
||||||
|
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||||
|
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
|
||||||
|
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
File Name: cl_ext_intel.h
|
||||||
|
|
||||||
|
Abstract:
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
|
||||||
|
\*****************************************************************************/
|
||||||
|
|
||||||
|
#ifndef __CL_EXT_INTEL_H
|
||||||
|
#define __CL_EXT_INTEL_H
|
||||||
|
|
||||||
|
#include <CL/cl.h>
|
||||||
|
#include <CL/cl_platform.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/***************************************
|
||||||
|
* cl_intel_thread_local_exec extension *
|
||||||
|
****************************************/
|
||||||
|
|
||||||
|
#define cl_intel_thread_local_exec 1
|
||||||
|
|
||||||
|
#define CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL (((cl_bitfield)1) << 31)
|
||||||
|
|
||||||
|
/***********************************************
|
||||||
|
* cl_intel_device_partition_by_names extension *
|
||||||
|
************************************************/
|
||||||
|
|
||||||
|
#define cl_intel_device_partition_by_names 1
|
||||||
|
|
||||||
|
#define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052
|
||||||
|
#define CL_PARTITION_BY_NAMES_LIST_END_INTEL -1
|
||||||
|
|
||||||
|
/************************************************
|
||||||
|
* cl_intel_accelerator extension *
|
||||||
|
* cl_intel_motion_estimation extension *
|
||||||
|
* cl_intel_advanced_motion_estimation extension *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
#define cl_intel_accelerator 1
|
||||||
|
#define cl_intel_motion_estimation 1
|
||||||
|
#define cl_intel_advanced_motion_estimation 1
|
||||||
|
|
||||||
|
typedef struct _cl_accelerator_intel* cl_accelerator_intel;
|
||||||
|
typedef cl_uint cl_accelerator_type_intel;
|
||||||
|
typedef cl_uint cl_accelerator_info_intel;
|
||||||
|
|
||||||
|
typedef struct _cl_motion_estimation_desc_intel {
|
||||||
|
cl_uint mb_block_type;
|
||||||
|
cl_uint subpixel_mode;
|
||||||
|
cl_uint sad_adjust_mode;
|
||||||
|
cl_uint search_path_type;
|
||||||
|
} cl_motion_estimation_desc_intel;
|
||||||
|
|
||||||
|
/* error codes */
|
||||||
|
#define CL_INVALID_ACCELERATOR_INTEL -1094
|
||||||
|
#define CL_INVALID_ACCELERATOR_TYPE_INTEL -1095
|
||||||
|
#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL -1096
|
||||||
|
#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL -1097
|
||||||
|
|
||||||
|
/* cl_accelerator_type_intel */
|
||||||
|
#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL 0x0
|
||||||
|
|
||||||
|
/* cl_accelerator_info_intel */
|
||||||
|
#define CL_ACCELERATOR_DESCRIPTOR_INTEL 0x4090
|
||||||
|
#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL 0x4091
|
||||||
|
#define CL_ACCELERATOR_CONTEXT_INTEL 0x4092
|
||||||
|
#define CL_ACCELERATOR_TYPE_INTEL 0x4093
|
||||||
|
|
||||||
|
/* cl_motion_detect_desc_intel flags */
|
||||||
|
#define CL_ME_MB_TYPE_16x16_INTEL 0x0
|
||||||
|
#define CL_ME_MB_TYPE_8x8_INTEL 0x1
|
||||||
|
#define CL_ME_MB_TYPE_4x4_INTEL 0x2
|
||||||
|
|
||||||
|
#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
|
||||||
|
#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
|
||||||
|
#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL 0x2
|
||||||
|
|
||||||
|
#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
|
||||||
|
#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x1
|
||||||
|
|
||||||
|
#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL 0x0
|
||||||
|
#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL 0x1
|
||||||
|
#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL 0x5
|
||||||
|
|
||||||
|
#define CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL 0x0
|
||||||
|
#define CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL 0x1
|
||||||
|
#define CL_ME_LUMA_INTRA_PREDICT_ENABLED_INTEL 0x2
|
||||||
|
#define CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL 0x4
|
||||||
|
|
||||||
|
#define CL_ME_FORWARD_INPUT_MODE_INTEL 0x1
|
||||||
|
#define CL_ME_BACKWARD_INPUT_MODE_INTEL 0x2
|
||||||
|
#define CL_ME_BIDIRECTION_INPUT_MODE_INTEL 0x3
|
||||||
|
|
||||||
|
#define CL_ME_BIDIR_WEIGHT_QUARTER_INTEL 16
|
||||||
|
#define CL_ME_BIDIR_WEIGHT_THIRD_INTEL 21
|
||||||
|
#define CL_ME_BIDIR_WEIGHT_HALF_INTEL 32
|
||||||
|
#define CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 43
|
||||||
|
#define CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 48
|
||||||
|
|
||||||
|
#define CL_ME_COST_PENALTY_NONE_INTEL 0x0
|
||||||
|
#define CL_ME_COST_PENALTY_LOW_INTEL 0x1
|
||||||
|
#define CL_ME_COST_PENALTY_NORMAL_INTEL 0x2
|
||||||
|
#define CL_ME_COST_PENALTY_HIGH_INTEL 0x3
|
||||||
|
|
||||||
|
#define CL_ME_COST_PRECISION_QPEL_INTEL 0x0
|
||||||
|
#define CL_ME_COST_PRECISION_HPEL_INTEL 0x1
|
||||||
|
#define CL_ME_COST_PRECISION_PEL_INTEL 0x2
|
||||||
|
#define CL_ME_COST_PRECISION_DPEL_INTEL 0x3
|
||||||
|
|
||||||
|
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
|
||||||
|
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
||||||
|
#define CL_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
|
||||||
|
#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
|
||||||
|
|
||||||
|
#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
|
||||||
|
#define CL_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
|
||||||
|
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
|
||||||
|
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
|
||||||
|
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
|
||||||
|
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
|
||||||
|
|
||||||
|
#define CL_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
|
||||||
|
#define CL_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
||||||
|
#define CL_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
|
||||||
|
#define CL_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
|
||||||
|
|
||||||
|
/* cl_device_info */
|
||||||
|
#define CL_DEVICE_ME_VERSION_INTEL 0x407E
|
||||||
|
|
||||||
|
#define CL_ME_VERSION_LEGACY_INTEL 0x0
|
||||||
|
#define CL_ME_VERSION_ADVANCED_VER_1_INTEL 0x1
|
||||||
|
#define CL_ME_VERSION_ADVANCED_VER_2_INTEL 0x2
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL
|
||||||
|
clCreateAcceleratorINTEL(
|
||||||
|
cl_context context,
|
||||||
|
cl_accelerator_type_intel accelerator_type,
|
||||||
|
size_t descriptor_size,
|
||||||
|
const void* descriptor,
|
||||||
|
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_accelerator_intel (CL_API_CALL *clCreateAcceleratorINTEL_fn)(
|
||||||
|
cl_context context,
|
||||||
|
cl_accelerator_type_intel accelerator_type,
|
||||||
|
size_t descriptor_size,
|
||||||
|
const void* descriptor,
|
||||||
|
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clGetAcceleratorInfoINTEL(
|
||||||
|
cl_accelerator_intel accelerator,
|
||||||
|
cl_accelerator_info_intel param_name,
|
||||||
|
size_t param_value_size,
|
||||||
|
void* param_value,
|
||||||
|
size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetAcceleratorInfoINTEL_fn)(
|
||||||
|
cl_accelerator_intel accelerator,
|
||||||
|
cl_accelerator_info_intel param_name,
|
||||||
|
size_t param_value_size,
|
||||||
|
void* param_value,
|
||||||
|
size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clRetainAcceleratorINTEL(
|
||||||
|
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL *clRetainAcceleratorINTEL_fn)(
|
||||||
|
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clReleaseAcceleratorINTEL(
|
||||||
|
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL *clReleaseAcceleratorINTEL_fn)(
|
||||||
|
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
/******************************************
|
||||||
|
* cl_intel_simultaneous_sharing extension *
|
||||||
|
*******************************************/
|
||||||
|
|
||||||
|
#define cl_intel_simultaneous_sharing 1
|
||||||
|
|
||||||
|
#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104
|
||||||
|
#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105
|
||||||
|
|
||||||
|
/***********************************
|
||||||
|
* cl_intel_egl_image_yuv extension *
|
||||||
|
************************************/
|
||||||
|
|
||||||
|
#define cl_intel_egl_image_yuv 1
|
||||||
|
|
||||||
|
#define CL_EGL_YUV_PLANE_INTEL 0x4107
|
||||||
|
|
||||||
|
/********************************
|
||||||
|
* cl_intel_packed_yuv extension *
|
||||||
|
*********************************/
|
||||||
|
|
||||||
|
#define cl_intel_packed_yuv 1
|
||||||
|
|
||||||
|
#define CL_YUYV_INTEL 0x4076
|
||||||
|
#define CL_UYVY_INTEL 0x4077
|
||||||
|
#define CL_YVYU_INTEL 0x4078
|
||||||
|
#define CL_VYUY_INTEL 0x4079
|
||||||
|
|
||||||
|
/********************************************
|
||||||
|
* cl_intel_required_subgroup_size extension *
|
||||||
|
*********************************************/
|
||||||
|
|
||||||
|
#define cl_intel_required_subgroup_size 1
|
||||||
|
|
||||||
|
#define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108
|
||||||
|
#define CL_KERNEL_SPILL_MEM_SIZE_INTEL 0x4109
|
||||||
|
#define CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL 0x410A
|
||||||
|
|
||||||
|
/****************************************
|
||||||
|
* cl_intel_driver_diagnostics extension *
|
||||||
|
*****************************************/
|
||||||
|
|
||||||
|
#define cl_intel_driver_diagnostics 1
|
||||||
|
|
||||||
|
typedef cl_uint cl_diagnostics_verbose_level;
|
||||||
|
|
||||||
|
#define CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL 0x4106
|
||||||
|
|
||||||
|
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL ( 0xff )
|
||||||
|
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL ( 1 )
|
||||||
|
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL ( 1 << 1 )
|
||||||
|
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL ( 1 << 2 )
|
||||||
|
|
||||||
|
/********************************
|
||||||
|
* cl_intel_planar_yuv extension *
|
||||||
|
*********************************/
|
||||||
|
|
||||||
|
#define CL_NV12_INTEL 0x410E
|
||||||
|
|
||||||
|
#define CL_MEM_NO_ACCESS_INTEL ( 1 << 24 )
|
||||||
|
#define CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL ( 1 << 25 )
|
||||||
|
|
||||||
|
#define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL 0x417E
|
||||||
|
#define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL 0x417F
|
||||||
|
|
||||||
|
/*******************************************************
|
||||||
|
* cl_intel_device_side_avc_motion_estimation extension *
|
||||||
|
********************************************************/
|
||||||
|
|
||||||
|
#define CL_DEVICE_AVC_ME_VERSION_INTEL 0x410B
|
||||||
|
#define CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C
|
||||||
|
#define CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL 0x410D
|
||||||
|
|
||||||
|
#define CL_AVC_ME_VERSION_0_INTEL 0x0; // No support.
|
||||||
|
#define CL_AVC_ME_VERSION_1_INTEL 0x1; // First supported version.
|
||||||
|
|
||||||
|
#define CL_AVC_ME_MAJOR_16x16_INTEL 0x0
|
||||||
|
#define CL_AVC_ME_MAJOR_16x8_INTEL 0x1
|
||||||
|
#define CL_AVC_ME_MAJOR_8x16_INTEL 0x2
|
||||||
|
#define CL_AVC_ME_MAJOR_8x8_INTEL 0x3
|
||||||
|
|
||||||
|
#define CL_AVC_ME_MINOR_8x8_INTEL 0x0
|
||||||
|
#define CL_AVC_ME_MINOR_8x4_INTEL 0x1
|
||||||
|
#define CL_AVC_ME_MINOR_4x8_INTEL 0x2
|
||||||
|
#define CL_AVC_ME_MINOR_4x4_INTEL 0x3
|
||||||
|
|
||||||
|
#define CL_AVC_ME_MAJOR_FORWARD_INTEL 0x0
|
||||||
|
#define CL_AVC_ME_MAJOR_BACKWARD_INTEL 0x1
|
||||||
|
#define CL_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2
|
||||||
|
|
||||||
|
#define CL_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0
|
||||||
|
#define CL_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E
|
||||||
|
#define CL_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D
|
||||||
|
#define CL_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B
|
||||||
|
#define CL_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77
|
||||||
|
#define CL_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F
|
||||||
|
#define CL_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F
|
||||||
|
#define CL_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F
|
||||||
|
|
||||||
|
#define CL_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0
|
||||||
|
#define CL_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1
|
||||||
|
#define CL_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2
|
||||||
|
#define CL_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3
|
||||||
|
#define CL_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4
|
||||||
|
#define CL_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5
|
||||||
|
#define CL_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6
|
||||||
|
#define CL_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7
|
||||||
|
#define CL_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8
|
||||||
|
#define CL_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL 0x9
|
||||||
|
#define CL_AVC_ME_SEARCH_WINDOW_4x4_RADIUS_INTEL 0x2
|
||||||
|
#define CL_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL 0xa
|
||||||
|
|
||||||
|
#define CL_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
|
||||||
|
#define CL_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2
|
||||||
|
|
||||||
|
#define CL_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
|
||||||
|
#define CL_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
|
||||||
|
#define CL_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3
|
||||||
|
|
||||||
|
#define CL_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0
|
||||||
|
#define CL_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1
|
||||||
|
#define CL_AVC_ME_COST_PRECISION_PEL_INTEL 0x2
|
||||||
|
#define CL_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3
|
||||||
|
|
||||||
|
#define CL_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10
|
||||||
|
#define CL_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15
|
||||||
|
#define CL_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20
|
||||||
|
#define CL_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B
|
||||||
|
#define CL_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30
|
||||||
|
|
||||||
|
#define CL_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0
|
||||||
|
#define CL_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2
|
||||||
|
#define CL_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4
|
||||||
|
#define CL_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8
|
||||||
|
|
||||||
|
#define CL_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0
|
||||||
|
#define CL_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000
|
||||||
|
|
||||||
|
#define CL_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL ( 0x1 << 24 )
|
||||||
|
#define CL_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL ( 0x2 << 24 )
|
||||||
|
#define CL_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL ( 0x3 << 24 )
|
||||||
|
#define CL_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL ( 0x55 << 24 )
|
||||||
|
#define CL_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL ( 0xAA << 24 )
|
||||||
|
#define CL_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL ( 0xFF << 24 )
|
||||||
|
#define CL_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL ( 0x1 << 24 )
|
||||||
|
#define CL_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL ( 0x2 << 24 )
|
||||||
|
#define CL_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL ( 0x1 << 26 )
|
||||||
|
#define CL_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL ( 0x2 << 26 )
|
||||||
|
#define CL_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL ( 0x1 << 28 )
|
||||||
|
#define CL_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL ( 0x2 << 28 )
|
||||||
|
#define CL_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL ( 0x1 << 30 )
|
||||||
|
#define CL_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL ( 0x2 << 30 )
|
||||||
|
|
||||||
|
#define CL_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00
|
||||||
|
#define CL_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80
|
||||||
|
|
||||||
|
#define CL_AVC_ME_INTRA_16x16_INTEL 0x0
|
||||||
|
#define CL_AVC_ME_INTRA_8x8_INTEL 0x1
|
||||||
|
#define CL_AVC_ME_INTRA_4x4_INTEL 0x2
|
||||||
|
|
||||||
|
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6
|
||||||
|
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5
|
||||||
|
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3
|
||||||
|
|
||||||
|
#define CL_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60
|
||||||
|
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10
|
||||||
|
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8
|
||||||
|
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4
|
||||||
|
|
||||||
|
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
|
||||||
|
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
||||||
|
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
|
||||||
|
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
|
||||||
|
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
|
||||||
|
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
|
||||||
|
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
|
||||||
|
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
|
||||||
|
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
|
||||||
|
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
|
||||||
|
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
|
||||||
|
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
||||||
|
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
|
||||||
|
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
|
||||||
|
|
||||||
|
#define CL_AVC_ME_FRAME_FORWARD_INTEL 0x1
|
||||||
|
#define CL_AVC_ME_FRAME_BACKWARD_INTEL 0x2
|
||||||
|
#define CL_AVC_ME_FRAME_DUAL_INTEL 0x3
|
||||||
|
|
||||||
|
#define CL_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0
|
||||||
|
#define CL_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1
|
||||||
|
#define CL_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2
|
||||||
|
|
||||||
|
#define CL_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0
|
||||||
|
#define CL_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* __CL_EXT_INTEL_H */
|
||||||
171
benchmarks/opencl/include/CL/cl_gl.h
Normal file
171
benchmarks/opencl/include/CL/cl_gl.h
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
/**********************************************************************************
|
||||||
|
* Copyright (c) 2008-2019 The Khronos Group Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and/or associated documentation files (the
|
||||||
|
* "Materials"), to deal in the Materials without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||||
|
* permit persons to whom the Materials are furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included
|
||||||
|
* in all copies or substantial portions of the Materials.
|
||||||
|
*
|
||||||
|
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||||
|
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||||
|
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||||
|
* https://www.khronos.org/registry/
|
||||||
|
*
|
||||||
|
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||||
|
**********************************************************************************/
|
||||||
|
|
||||||
|
#ifndef __OPENCL_CL_GL_H
|
||||||
|
#define __OPENCL_CL_GL_H
|
||||||
|
|
||||||
|
#include <CL/cl.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef cl_uint cl_gl_object_type;
|
||||||
|
typedef cl_uint cl_gl_texture_info;
|
||||||
|
typedef cl_uint cl_gl_platform_info;
|
||||||
|
typedef struct __GLsync *cl_GLsync;
|
||||||
|
|
||||||
|
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
|
||||||
|
#define CL_GL_OBJECT_BUFFER 0x2000
|
||||||
|
#define CL_GL_OBJECT_TEXTURE2D 0x2001
|
||||||
|
#define CL_GL_OBJECT_TEXTURE3D 0x2002
|
||||||
|
#define CL_GL_OBJECT_RENDERBUFFER 0x2003
|
||||||
|
#ifdef CL_VERSION_1_2
|
||||||
|
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
|
||||||
|
#define CL_GL_OBJECT_TEXTURE1D 0x200F
|
||||||
|
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
|
||||||
|
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* cl_gl_texture_info */
|
||||||
|
#define CL_GL_TEXTURE_TARGET 0x2004
|
||||||
|
#define CL_GL_MIPMAP_LEVEL 0x2005
|
||||||
|
#ifdef CL_VERSION_1_2
|
||||||
|
#define CL_GL_NUM_SAMPLES 0x2012
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||||
|
clCreateFromGLBuffer(cl_context context,
|
||||||
|
cl_mem_flags flags,
|
||||||
|
cl_GLuint bufobj,
|
||||||
|
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
#ifdef CL_VERSION_1_2
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||||
|
clCreateFromGLTexture(cl_context context,
|
||||||
|
cl_mem_flags flags,
|
||||||
|
cl_GLenum target,
|
||||||
|
cl_GLint miplevel,
|
||||||
|
cl_GLuint texture,
|
||||||
|
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||||
|
clCreateFromGLRenderbuffer(cl_context context,
|
||||||
|
cl_mem_flags flags,
|
||||||
|
cl_GLuint renderbuffer,
|
||||||
|
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clGetGLObjectInfo(cl_mem memobj,
|
||||||
|
cl_gl_object_type * gl_object_type,
|
||||||
|
cl_GLuint * gl_object_name) CL_API_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clGetGLTextureInfo(cl_mem memobj,
|
||||||
|
cl_gl_texture_info param_name,
|
||||||
|
size_t param_value_size,
|
||||||
|
void * param_value,
|
||||||
|
size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clEnqueueAcquireGLObjects(cl_command_queue command_queue,
|
||||||
|
cl_uint num_objects,
|
||||||
|
const cl_mem * mem_objects,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event * event_wait_list,
|
||||||
|
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clEnqueueReleaseGLObjects(cl_command_queue command_queue,
|
||||||
|
cl_uint num_objects,
|
||||||
|
const cl_mem * mem_objects,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event * event_wait_list,
|
||||||
|
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
|
||||||
|
/* Deprecated OpenCL 1.1 APIs */
|
||||||
|
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||||
|
clCreateFromGLTexture2D(cl_context context,
|
||||||
|
cl_mem_flags flags,
|
||||||
|
cl_GLenum target,
|
||||||
|
cl_GLint miplevel,
|
||||||
|
cl_GLuint texture,
|
||||||
|
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||||
|
clCreateFromGLTexture3D(cl_context context,
|
||||||
|
cl_mem_flags flags,
|
||||||
|
cl_GLenum target,
|
||||||
|
cl_GLint miplevel,
|
||||||
|
cl_GLuint texture,
|
||||||
|
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||||
|
|
||||||
|
/* cl_khr_gl_sharing extension */
|
||||||
|
|
||||||
|
#define cl_khr_gl_sharing 1
|
||||||
|
|
||||||
|
typedef cl_uint cl_gl_context_info;
|
||||||
|
|
||||||
|
/* Additional Error Codes */
|
||||||
|
#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
|
||||||
|
|
||||||
|
/* cl_gl_context_info */
|
||||||
|
#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
|
||||||
|
#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
|
||||||
|
|
||||||
|
/* Additional cl_context_properties */
|
||||||
|
#define CL_GL_CONTEXT_KHR 0x2008
|
||||||
|
#define CL_EGL_DISPLAY_KHR 0x2009
|
||||||
|
#define CL_GLX_DISPLAY_KHR 0x200A
|
||||||
|
#define CL_WGL_HDC_KHR 0x200B
|
||||||
|
#define CL_CGL_SHAREGROUP_KHR 0x200C
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clGetGLContextInfoKHR(const cl_context_properties * properties,
|
||||||
|
cl_gl_context_info param_name,
|
||||||
|
size_t param_value_size,
|
||||||
|
void * param_value,
|
||||||
|
size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
|
||||||
|
const cl_context_properties * properties,
|
||||||
|
cl_gl_context_info param_name,
|
||||||
|
size_t param_value_size,
|
||||||
|
void * param_value,
|
||||||
|
size_t * param_value_size_ret);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* __OPENCL_CL_GL_H */
|
||||||
52
benchmarks/opencl/include/CL/cl_gl_ext.h
Normal file
52
benchmarks/opencl/include/CL/cl_gl_ext.h
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
/**********************************************************************************
|
||||||
|
* Copyright (c) 2008-2019 The Khronos Group Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and/or associated documentation files (the
|
||||||
|
* "Materials"), to deal in the Materials without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||||
|
* permit persons to whom the Materials are furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included
|
||||||
|
* in all copies or substantial portions of the Materials.
|
||||||
|
*
|
||||||
|
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||||
|
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||||
|
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||||
|
* https://www.khronos.org/registry/
|
||||||
|
*
|
||||||
|
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||||
|
**********************************************************************************/
|
||||||
|
|
||||||
|
#ifndef __OPENCL_CL_GL_EXT_H
|
||||||
|
#define __OPENCL_CL_GL_EXT_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <CL/cl_gl.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* cl_khr_gl_event extension
|
||||||
|
*/
|
||||||
|
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_event CL_API_CALL
|
||||||
|
clCreateEventFromGLsyncKHR(cl_context context,
|
||||||
|
cl_GLsync cl_GLsync,
|
||||||
|
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* __OPENCL_CL_GL_EXT_H */
|
||||||
1384
benchmarks/opencl/include/CL/cl_platform.h
Normal file
1384
benchmarks/opencl/include/CL/cl_platform.h
Normal file
File diff suppressed because it is too large
Load Diff
172
benchmarks/opencl/include/CL/cl_va_api_media_sharing_intel.h
Normal file
172
benchmarks/opencl/include/CL/cl_va_api_media_sharing_intel.h
Normal file
@@ -0,0 +1,172 @@
|
|||||||
|
/**********************************************************************************
|
||||||
|
* Copyright (c) 2008-2019 The Khronos Group Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and/or associated documentation files (the
|
||||||
|
* "Materials"), to deal in the Materials without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||||
|
* permit persons to whom the Materials are furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included
|
||||||
|
* in all copies or substantial portions of the Materials.
|
||||||
|
*
|
||||||
|
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||||
|
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||||
|
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||||
|
* https://www.khronos.org/registry/
|
||||||
|
*
|
||||||
|
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||||
|
**********************************************************************************/
|
||||||
|
/*****************************************************************************\
|
||||||
|
|
||||||
|
Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
|
||||||
|
|
||||||
|
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
||||||
|
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||||
|
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
|
||||||
|
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
File Name: cl_va_api_media_sharing_intel.h
|
||||||
|
|
||||||
|
Abstract:
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
|
||||||
|
\*****************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
|
||||||
|
#define __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
|
||||||
|
|
||||||
|
#include <CL/cl.h>
|
||||||
|
#include <CL/cl_platform.h>
|
||||||
|
#include <va/va.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/******************************************
|
||||||
|
* cl_intel_va_api_media_sharing extension *
|
||||||
|
*******************************************/
|
||||||
|
|
||||||
|
#define cl_intel_va_api_media_sharing 1
|
||||||
|
|
||||||
|
/* error codes */
|
||||||
|
#define CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL -1098
|
||||||
|
#define CL_INVALID_VA_API_MEDIA_SURFACE_INTEL -1099
|
||||||
|
#define CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL -1100
|
||||||
|
#define CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL -1101
|
||||||
|
|
||||||
|
/* cl_va_api_device_source_intel */
|
||||||
|
#define CL_VA_API_DISPLAY_INTEL 0x4094
|
||||||
|
|
||||||
|
/* cl_va_api_device_set_intel */
|
||||||
|
#define CL_PREFERRED_DEVICES_FOR_VA_API_INTEL 0x4095
|
||||||
|
#define CL_ALL_DEVICES_FOR_VA_API_INTEL 0x4096
|
||||||
|
|
||||||
|
/* cl_context_info */
|
||||||
|
#define CL_CONTEXT_VA_API_DISPLAY_INTEL 0x4097
|
||||||
|
|
||||||
|
/* cl_mem_info */
|
||||||
|
#define CL_MEM_VA_API_MEDIA_SURFACE_INTEL 0x4098
|
||||||
|
|
||||||
|
/* cl_image_info */
|
||||||
|
#define CL_IMAGE_VA_API_PLANE_INTEL 0x4099
|
||||||
|
|
||||||
|
/* cl_command_type */
|
||||||
|
#define CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL 0x409A
|
||||||
|
#define CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL 0x409B
|
||||||
|
|
||||||
|
typedef cl_uint cl_va_api_device_source_intel;
|
||||||
|
typedef cl_uint cl_va_api_device_set_intel;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clGetDeviceIDsFromVA_APIMediaAdapterINTEL(
|
||||||
|
cl_platform_id platform,
|
||||||
|
cl_va_api_device_source_intel media_adapter_type,
|
||||||
|
void* media_adapter,
|
||||||
|
cl_va_api_device_set_intel media_adapter_set,
|
||||||
|
cl_uint num_entries,
|
||||||
|
cl_device_id* devices,
|
||||||
|
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)(
|
||||||
|
cl_platform_id platform,
|
||||||
|
cl_va_api_device_source_intel media_adapter_type,
|
||||||
|
void* media_adapter,
|
||||||
|
cl_va_api_device_set_intel media_adapter_set,
|
||||||
|
cl_uint num_entries,
|
||||||
|
cl_device_id* devices,
|
||||||
|
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||||
|
clCreateFromVA_APIMediaSurfaceINTEL(
|
||||||
|
cl_context context,
|
||||||
|
cl_mem_flags flags,
|
||||||
|
VASurfaceID* surface,
|
||||||
|
cl_uint plane,
|
||||||
|
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)(
|
||||||
|
cl_context context,
|
||||||
|
cl_mem_flags flags,
|
||||||
|
VASurfaceID* surface,
|
||||||
|
cl_uint plane,
|
||||||
|
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clEnqueueAcquireVA_APIMediaSurfacesINTEL(
|
||||||
|
cl_command_queue command_queue,
|
||||||
|
cl_uint num_objects,
|
||||||
|
const cl_mem* mem_objects,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event* event_wait_list,
|
||||||
|
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)(
|
||||||
|
cl_command_queue command_queue,
|
||||||
|
cl_uint num_objects,
|
||||||
|
const cl_mem* mem_objects,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event* event_wait_list,
|
||||||
|
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||||
|
clEnqueueReleaseVA_APIMediaSurfacesINTEL(
|
||||||
|
cl_command_queue command_queue,
|
||||||
|
cl_uint num_objects,
|
||||||
|
const cl_mem* mem_objects,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event* event_wait_list,
|
||||||
|
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)(
|
||||||
|
cl_command_queue command_queue,
|
||||||
|
cl_uint num_objects,
|
||||||
|
const cl_mem* mem_objects,
|
||||||
|
cl_uint num_events_in_wait_list,
|
||||||
|
const cl_event* event_wait_list,
|
||||||
|
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H */
|
||||||
|
|
||||||
86
benchmarks/opencl/include/CL/cl_version.h
Normal file
86
benchmarks/opencl/include/CL/cl_version.h
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (c) 2018 The Khronos Group Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and/or associated documentation files (the
|
||||||
|
* "Materials"), to deal in the Materials without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||||
|
* permit persons to whom the Materials are furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included
|
||||||
|
* in all copies or substantial portions of the Materials.
|
||||||
|
*
|
||||||
|
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||||
|
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||||
|
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||||
|
* https://www.khronos.org/registry/
|
||||||
|
*
|
||||||
|
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||||
|
******************************************************************************/
|
||||||
|
|
||||||
|
#ifndef __CL_VERSION_H
|
||||||
|
#define __CL_VERSION_H
|
||||||
|
|
||||||
|
/* Detect which version to target */
|
||||||
|
#if !defined(CL_TARGET_OPENCL_VERSION)
|
||||||
|
#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 220 (OpenCL 2.2)")
|
||||||
|
#define CL_TARGET_OPENCL_VERSION 220
|
||||||
|
#endif
|
||||||
|
#if CL_TARGET_OPENCL_VERSION != 100 && \
|
||||||
|
CL_TARGET_OPENCL_VERSION != 110 && \
|
||||||
|
CL_TARGET_OPENCL_VERSION != 120 && \
|
||||||
|
CL_TARGET_OPENCL_VERSION != 200 && \
|
||||||
|
CL_TARGET_OPENCL_VERSION != 210 && \
|
||||||
|
CL_TARGET_OPENCL_VERSION != 220
|
||||||
|
#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220). Defaulting to 220 (OpenCL 2.2)")
|
||||||
|
#undef CL_TARGET_OPENCL_VERSION
|
||||||
|
#define CL_TARGET_OPENCL_VERSION 220
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/* OpenCL Version */
|
||||||
|
#if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2)
|
||||||
|
#define CL_VERSION_2_2 1
|
||||||
|
#endif
|
||||||
|
#if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1)
|
||||||
|
#define CL_VERSION_2_1 1
|
||||||
|
#endif
|
||||||
|
#if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0)
|
||||||
|
#define CL_VERSION_2_0 1
|
||||||
|
#endif
|
||||||
|
#if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2)
|
||||||
|
#define CL_VERSION_1_2 1
|
||||||
|
#endif
|
||||||
|
#if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1)
|
||||||
|
#define CL_VERSION_1_1 1
|
||||||
|
#endif
|
||||||
|
#if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0)
|
||||||
|
#define CL_VERSION_1_0 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Allow deprecated APIs for older OpenCL versions. */
|
||||||
|
#if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS)
|
||||||
|
#define CL_USE_DEPRECATED_OPENCL_2_1_APIS
|
||||||
|
#endif
|
||||||
|
#if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS)
|
||||||
|
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
|
||||||
|
#endif
|
||||||
|
#if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS)
|
||||||
|
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||||
|
#endif
|
||||||
|
#if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
|
||||||
|
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
|
||||||
|
#endif
|
||||||
|
#if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS)
|
||||||
|
#define CL_USE_DEPRECATED_OPENCL_1_0_APIS
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* __CL_VERSION_H */
|
||||||
47
benchmarks/opencl/include/CL/opencl.h
Normal file
47
benchmarks/opencl/include/CL/opencl.h
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and/or associated documentation files (the
|
||||||
|
* "Materials"), to deal in the Materials without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||||
|
* permit persons to whom the Materials are furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included
|
||||||
|
* in all copies or substantial portions of the Materials.
|
||||||
|
*
|
||||||
|
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||||
|
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||||
|
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||||
|
* https://www.khronos.org/registry/
|
||||||
|
*
|
||||||
|
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||||
|
******************************************************************************/
|
||||||
|
|
||||||
|
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||||
|
|
||||||
|
#ifndef __OPENCL_H
|
||||||
|
#define __OPENCL_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <CL/cl.h>
|
||||||
|
#include <CL/cl_gl.h>
|
||||||
|
#include <CL/cl_gl_ext.h>
|
||||||
|
#include <CL/cl_ext.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* __OPENCL_H */
|
||||||
@@ -1,44 +1,60 @@
|
|||||||
|
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||||
|
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
|
||||||
|
POCL_INC_PATH = $(wildcard ../include)
|
||||||
|
POCL_LIB_PATH = $(wildcard ../lib)
|
||||||
|
VX_RT_PATH = $(wildcard ../../../runtime)
|
||||||
|
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
|
||||||
|
|
||||||
RISCV_TOOL_PATH=$(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
||||||
|
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
||||||
|
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
||||||
|
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||||
|
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
||||||
|
|
||||||
POCL_CC_PATH=$(wildcard ~/dev/pocl/drops_riscv_cc)
|
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||||
POCL_RT_PATH=$(wildcard ~/dev/pocl/drops_riscv_rt)
|
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||||
|
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
|
||||||
|
|
||||||
VX_RT_PATH=$(wildcard ../../../runtime)
|
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
|
||||||
VX_SIMX_PATH=$(wildcard ../../../simX/obj_dir)
|
|
||||||
|
|
||||||
CC=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
|
||||||
CXX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
|
||||||
DMP=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
|
||||||
HEX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
|
||||||
NEWLIB_PATH=$(RISCV_TOOL_PATH)/riscv32-unknown-elf/lib
|
|
||||||
|
|
||||||
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
|
|
||||||
VX_STR = $(VX_RT_PATH)/startup/vx_start.s
|
|
||||||
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
|
||||||
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
|
||||||
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
|
|
||||||
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
|
|
||||||
|
|
||||||
VX_SRCS = $(VX_STR) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
|
|
||||||
|
|
||||||
CXXFLAGS = -g -O0 -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -march=rv32im -mabi=ilp32
|
|
||||||
CXXFLAGS += -ffreestanding # program may not begin at main()
|
CXXFLAGS += -ffreestanding # program may not begin at main()
|
||||||
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
|
||||||
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
|
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
|
||||||
|
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
||||||
|
CXXFLAGS += -I$(POCL_INC_PATH)
|
||||||
|
|
||||||
LIBS = -lOpenCL
|
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||||
|
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||||
|
|
||||||
PROJECT=kmeans
|
PROJECT=kmeans
|
||||||
PROJECT=saxpy
|
|
||||||
|
|
||||||
all: $(PROJECT).dump $(PROJECT).hex
|
all: $(PROJECT).dump $(PROJECT).hex
|
||||||
|
|
||||||
lib$(PROJECT).a: kernel.cl
|
lib$(PROJECT).a: kernel.cl
|
||||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
||||||
|
|
||||||
$(PROJECT).elf: main.cc lib$(PROJECT).a
|
kmeans_clustering.o: kmeans_clustering.c
|
||||||
$(CXX) $(CXXFLAGS) -I$(POCL_RT_PATH)/include -L$(POCL_RT_PATH)/lib/static -L. $(VX_SRCS) main.cc rmse.c read_input.c cluster.c kmeans_clustering.c -Wl,--whole-archive -l$(PROJECT) -Wl,--no-whole-archive $(LIBS) -o $(PROJECT).elf
|
$(CC) $(CXXFLAGS) -c kmeans_clustering.c
|
||||||
|
|
||||||
|
cluster.o: cluster.c
|
||||||
|
$(CC) $(CXXFLAGS) -c cluster.c
|
||||||
|
|
||||||
|
read_input.o: read_input.c
|
||||||
|
$(CC) $(CXXFLAGS) -c read_input.c
|
||||||
|
|
||||||
|
rmse.o: rmse.c
|
||||||
|
$(CC) $(CXXFLAGS) -c rmse.c
|
||||||
|
|
||||||
|
$(PROJECT).elf: main.cc lib$(PROJECT).a read_input.o rmse.o cluster.o kmeans_clustering.o
|
||||||
|
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc read_input.o rmse.o cluster.o kmeans_clustering.o $(VX_LIBS) -o $(PROJECT).elf
|
||||||
|
|
||||||
|
$(PROJECT).qemu: main.cc lib$(PROJECT).a read_input.o rmse.o cluster.o kmeans_clustering.o
|
||||||
|
$(CXX) $(CXXFLAGS) main.cc read_input.o rmse.o cluster.o kmeans_clustering.o $(QEMU_LIBS) -o $(PROJECT).qemu
|
||||||
|
|
||||||
$(PROJECT).hex: $(PROJECT).elf
|
$(PROJECT).hex: $(PROJECT).elf
|
||||||
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
||||||
@@ -46,8 +62,17 @@ $(PROJECT).hex: $(PROJECT).elf
|
|||||||
$(PROJECT).dump: $(PROJECT).elf
|
$(PROJECT).dump: $(PROJECT).elf
|
||||||
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
||||||
|
|
||||||
run:
|
run: $(PROJECT).hex
|
||||||
$(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
|
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
|
||||||
|
|
||||||
|
qemu: $(PROJECT).qemu
|
||||||
|
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -strace -d in_asm -D debug.log $(PROJECT).qemu
|
||||||
|
|
||||||
|
gdb-s: $(PROJECT).qemu
|
||||||
|
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
|
||||||
|
|
||||||
|
gdb-c: $(PROJECT).qemu
|
||||||
|
$(GDB) $(PROJECT).qemu
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf *.elf *.dump *.hex *.a *.pocl
|
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu
|
||||||
BIN
benchmarks/opencl/lib/libOpenCL.a
Normal file
BIN
benchmarks/opencl/lib/libOpenCL.a
Normal file
Binary file not shown.
@@ -1,33 +1,35 @@
|
|||||||
|
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||||
|
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
|
||||||
|
POCL_INC_PATH = $(wildcard ../include)
|
||||||
|
POCL_LIB_PATH = $(wildcard ../lib)
|
||||||
|
VX_RT_PATH = $(wildcard ../../../runtime)
|
||||||
|
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
|
||||||
|
|
||||||
RISCV_TOOL_PATH=$(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
||||||
|
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
||||||
|
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
||||||
|
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||||
|
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
||||||
|
|
||||||
POCL_CC_PATH=$(wildcard ~/dev/pocl/drops_riscv_cc)
|
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||||
POCL_RT_PATH=$(wildcard ~/dev/pocl/drops_riscv_rt)
|
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||||
|
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
|
||||||
|
|
||||||
VX_RT_PATH=$(wildcard ../../../runtime)
|
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
|
||||||
VX_SIMX_PATH=$(wildcard ../../../simX/obj_dir)
|
|
||||||
|
|
||||||
CC=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
|
||||||
CXX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
|
||||||
DMP=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
|
||||||
HEX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
|
||||||
NEWLIB_PATH=$(RISCV_TOOL_PATH)/riscv32-unknown-elf/lib
|
|
||||||
|
|
||||||
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
|
|
||||||
VX_STR = $(VX_RT_PATH)/startup/vx_start.s
|
|
||||||
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
|
||||||
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
|
||||||
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
|
|
||||||
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
|
|
||||||
|
|
||||||
VX_SRCS = $(VX_STR) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
|
|
||||||
|
|
||||||
CXXFLAGS = -g -O0 -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -march=rv32im -mabi=ilp32
|
|
||||||
CXXFLAGS += -ffreestanding # program may not begin at main()
|
CXXFLAGS += -ffreestanding # program may not begin at main()
|
||||||
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
|
||||||
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
|
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
|
||||||
|
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
||||||
|
CXXFLAGS += -I$(POCL_INC_PATH)
|
||||||
|
|
||||||
LIBS = -lOpenCL
|
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||||
|
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||||
|
|
||||||
PROJECT=saxpy
|
PROJECT=saxpy
|
||||||
|
|
||||||
@@ -37,7 +39,10 @@ lib$(PROJECT).a: kernel.cl
|
|||||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
||||||
|
|
||||||
$(PROJECT).elf: main.cc lib$(PROJECT).a
|
$(PROJECT).elf: main.cc lib$(PROJECT).a
|
||||||
$(CXX) $(CXXFLAGS) -I$(POCL_RT_PATH)/include -L$(POCL_RT_PATH)/lib/static -L. $(VX_SRCS) main.cc -Wl,--whole-archive -l$(PROJECT) -Wl,--no-whole-archive $(LIBS) -o $(PROJECT).elf
|
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
|
||||||
|
|
||||||
|
$(PROJECT).qemu: main.cc lib$(PROJECT).a
|
||||||
|
$(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu
|
||||||
|
|
||||||
$(PROJECT).hex: $(PROJECT).elf
|
$(PROJECT).hex: $(PROJECT).elf
|
||||||
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
||||||
@@ -45,8 +50,17 @@ $(PROJECT).hex: $(PROJECT).elf
|
|||||||
$(PROJECT).dump: $(PROJECT).elf
|
$(PROJECT).dump: $(PROJECT).elf
|
||||||
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
||||||
|
|
||||||
run:
|
run: $(PROJECT).hex
|
||||||
$(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
|
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
|
||||||
|
|
||||||
|
qemu: $(PROJECT).qemu
|
||||||
|
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu
|
||||||
|
|
||||||
|
gdb-s: $(PROJECT).qemu
|
||||||
|
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
|
||||||
|
|
||||||
|
gdb-c: $(PROJECT).qemu
|
||||||
|
$(GDB) $(PROJECT).qemu
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf *.elf *.dump *.hex *.a *.pocl
|
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu
|
||||||
Binary file not shown.
@@ -17,481 +17,175 @@
|
|||||||
* along with this program; if not, write to the Free Software
|
* along with this program; if not, write to the Free Software
|
||||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
*
|
*
|
||||||
* gcc -o cldemo -std=gnu99 -Wall -I/usr/include/nvidia-current cldemo.c -lOpenCL
|
* gcc -o cldemo -std=gnu99 -Wall -I/usr/include/nvidia-current cldemo.c
|
||||||
|
* -lOpenCL
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <CL/cl.h>
|
#include <CL/cl.h>
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
#include <iostream>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <errno.h>
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <unistd.h>
|
|
||||||
|
|
||||||
#define NUM_DATA 65536
|
//#define NUM_DATA 65536
|
||||||
|
#define NUM_DATA 4096
|
||||||
|
|
||||||
#define CL_CHECK(_expr) \
|
#define CL_CHECK(_expr) \
|
||||||
do { \
|
do { \
|
||||||
cl_int _err = _expr; \
|
cl_int _err = _expr; \
|
||||||
if (_err == CL_SUCCESS) \
|
if (_err == CL_SUCCESS) \
|
||||||
break; \
|
break; \
|
||||||
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||||
abort(); \
|
abort(); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define CL_CHECK_ERR(_expr) \
|
#define CL_CHECK_ERR(_expr) \
|
||||||
({ \
|
({ \
|
||||||
cl_int _err = CL_INVALID_VALUE; \
|
cl_int _err = CL_INVALID_VALUE; \
|
||||||
typeof(_expr) _ret = _expr; \
|
typeof(_expr) _ret = _expr; \
|
||||||
if (_err != CL_SUCCESS) { \
|
if (_err != CL_SUCCESS) { \
|
||||||
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||||
abort(); \
|
abort(); \
|
||||||
} \
|
} \
|
||||||
_ret; \
|
_ret; \
|
||||||
})
|
})
|
||||||
|
|
||||||
void pfn_notify(const char *errinfo, const void *private_info, size_t cb, void *user_data)
|
void pfn_notify(const char *errinfo, const void *private_info, size_t cb,
|
||||||
{
|
void *user_data) {
|
||||||
fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo);
|
fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo);
|
||||||
}
|
|
||||||
|
|
||||||
///
|
|
||||||
// Create an OpenCL program from the kernel source file
|
|
||||||
//
|
|
||||||
cl_program CreateProgram(cl_context context, cl_device_id device, const char* fileName)
|
|
||||||
{
|
|
||||||
cl_int errNum;
|
|
||||||
cl_program program;
|
|
||||||
|
|
||||||
std::ifstream kernelFile(fileName, std::ios::in);
|
|
||||||
if (!kernelFile.is_open())
|
|
||||||
{
|
|
||||||
std::cerr << "Failed to open file for reading: " << fileName << std::endl;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::ostringstream oss;
|
|
||||||
oss << kernelFile.rdbuf();
|
|
||||||
|
|
||||||
std::string srcStdStr = oss.str();
|
|
||||||
const char *srcStr = srcStdStr.c_str();
|
|
||||||
program = clCreateProgramWithSource(context, 1,
|
|
||||||
(const char**)&srcStr,
|
|
||||||
NULL, NULL);
|
|
||||||
if (program == NULL)
|
|
||||||
{
|
|
||||||
std::cerr << "Failed to create CL program from source." << std::endl;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
|
|
||||||
if (errNum != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
// Determine the reason for the error
|
|
||||||
char buildLog[16384];
|
|
||||||
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
|
|
||||||
sizeof(buildLog), buildLog, NULL);
|
|
||||||
|
|
||||||
std::cerr << "Error in kernel: " << std::endl;
|
|
||||||
std::cerr << buildLog;
|
|
||||||
clReleaseProgram(program);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return program;
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
///
|
|
||||||
// Retreive program binary for all of the devices attached to the
|
|
||||||
// program an and store the one for the device passed in
|
|
||||||
//
|
|
||||||
bool SaveProgramBinary(cl_program program, cl_device_id device, const char* fileName)
|
|
||||||
{
|
|
||||||
//cl_uint numDevices = malloc(sizeof(cl_uint));
|
|
||||||
//cl_uint* numDevices = malloc(sizeof(cl_uint));
|
|
||||||
cl_int errNum;
|
|
||||||
|
|
||||||
printf("try getting program info\n");
|
|
||||||
// 1 - Query for number of devices attached to program
|
|
||||||
/*errNum = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint),
|
|
||||||
&numDevices, NULL);
|
|
||||||
printf("Got program_num_devices\n");
|
|
||||||
if (errNum != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << "Error querying for number of devices." << std::endl;
|
|
||||||
return false;
|
|
||||||
}*/
|
|
||||||
|
|
||||||
// 2 - Get all of the Device IDs
|
|
||||||
cl_device_id *devices = new cl_device_id[1];
|
|
||||||
errNum = clGetProgramInfo(program, CL_PROGRAM_DEVICES,
|
|
||||||
sizeof(cl_device_id) * 1,
|
|
||||||
devices, NULL);
|
|
||||||
printf("Got program_devices\n");
|
|
||||||
if (errNum != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << "Error querying for devices." << std::endl;
|
|
||||||
delete [] devices;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 3 - Determine the size of each program binary
|
|
||||||
size_t *programBinarySizes = new size_t [1];
|
|
||||||
errNum = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES,
|
|
||||||
sizeof(size_t) * 1,
|
|
||||||
programBinarySizes, NULL);
|
|
||||||
printf("Got program_binary_sizes\n");
|
|
||||||
if (errNum != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << "Error querying for program binary sizes." << std::endl;
|
|
||||||
delete [] devices;
|
|
||||||
delete [] programBinarySizes;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned char **programBinaries = new unsigned char*[1];
|
|
||||||
for (cl_uint i = 0; i < 1; i++)
|
|
||||||
{
|
|
||||||
programBinaries[i] = new unsigned char[programBinarySizes[i]];
|
|
||||||
}
|
|
||||||
|
|
||||||
// 4 - Get all of the program binaries
|
|
||||||
errNum = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char*) * 1,
|
|
||||||
programBinaries, NULL);
|
|
||||||
printf("Got program_binarys\n");
|
|
||||||
if (errNum != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << "Error querying for program binaries" << std::endl;
|
|
||||||
|
|
||||||
delete [] devices;
|
|
||||||
delete [] programBinarySizes;
|
|
||||||
for (cl_uint i = 0; i < 1; i++)
|
|
||||||
{
|
|
||||||
delete [] programBinaries[i];
|
|
||||||
}
|
|
||||||
delete [] programBinaries;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 5 - Finally store the binaries for the device requested out to disk for future reading.
|
|
||||||
for (cl_uint i = 0; i < 1; i++)
|
|
||||||
{
|
|
||||||
// Store the binary just for the device requested. In a scenario where
|
|
||||||
// multiple devices were being used you would save all of the binaries out here.
|
|
||||||
if (devices[i] == device)
|
|
||||||
{
|
|
||||||
FILE *fp = fopen(fileName, "wb");
|
|
||||||
if(fp ==NULL){
|
|
||||||
delete [] devices;
|
|
||||||
delete [] programBinarySizes;
|
|
||||||
for (cl_uint i = 0; i < 1; i++)
|
|
||||||
{
|
|
||||||
delete [] programBinaries[i];
|
|
||||||
}
|
|
||||||
delete [] programBinaries;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
printf("Opened file\n");
|
|
||||||
fwrite(programBinaries[i], 1, programBinarySizes[i], fp);
|
|
||||||
printf("wrote file\n");
|
|
||||||
fclose(fp);
|
|
||||||
printf("close file\n");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cleanup
|
|
||||||
delete [] devices;
|
|
||||||
delete [] programBinarySizes;
|
|
||||||
for (cl_uint i = 0; i < 1; i++)
|
|
||||||
{
|
|
||||||
delete [] programBinaries[i];
|
|
||||||
}
|
|
||||||
delete [] programBinaries;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
///
|
|
||||||
// Attempt to create the program object from a cached binary. Note that
|
|
||||||
// on first run this will fail because the binary has not yet been created.
|
|
||||||
//
|
|
||||||
cl_program CreateProgramFromBinary(cl_context context, cl_device_id device, const char* fileName)
|
|
||||||
{
|
|
||||||
FILE *fp = fopen(fileName, "rb");
|
|
||||||
if (fp == NULL)
|
|
||||||
{
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Determine the size of the binary
|
|
||||||
size_t binarySize;
|
|
||||||
fseek(fp, 0, SEEK_END);
|
|
||||||
binarySize = ftell(fp);
|
|
||||||
rewind(fp);
|
|
||||||
|
|
||||||
unsigned char *programBinary = new unsigned char[binarySize];
|
|
||||||
fread(programBinary, 1, binarySize, fp);
|
|
||||||
fclose(fp);
|
|
||||||
|
|
||||||
cl_int errNum = 0;
|
|
||||||
cl_program program;
|
|
||||||
cl_int binaryStatus;
|
|
||||||
|
|
||||||
program = clCreateProgramWithBinary(context,
|
|
||||||
1,
|
|
||||||
&device,
|
|
||||||
&binarySize,
|
|
||||||
(const unsigned char**)&programBinary,
|
|
||||||
&binaryStatus,
|
|
||||||
&errNum);
|
|
||||||
delete [] programBinary;
|
|
||||||
if (errNum != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << "Error loading program binary." << std::endl;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (binaryStatus != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << "Invalid binary for device" << std::endl;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
errNum = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
|
|
||||||
if (errNum != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
printf("build errNum:%d\n", errNum);
|
|
||||||
// Determine the reason for the error
|
|
||||||
char buildLog[16384];
|
|
||||||
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
|
|
||||||
sizeof(buildLog), buildLog, NULL);
|
|
||||||
|
|
||||||
std::cerr << "Error in program: " << std::endl;
|
|
||||||
std::cerr << buildLog << std::endl;
|
|
||||||
clReleaseProgram(program);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return program;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
///
|
||||||
// Cleanup any created OpenCL resources
|
// Cleanup any created OpenCL resources
|
||||||
//
|
//
|
||||||
void Cleanup(cl_context context, cl_command_queue commandQueue,
|
void Cleanup(cl_context context, cl_command_queue commandQueue,
|
||||||
cl_program program, cl_kernel kernel, cl_mem memObjects[3])
|
cl_program program, cl_kernel kernel, cl_mem memObjects[3]) {
|
||||||
{
|
for (int i = 0; i < 3; i++) {
|
||||||
for (int i = 0; i < 3; i++)
|
if (memObjects[i] != 0)
|
||||||
{
|
clReleaseMemObject(memObjects[i]);
|
||||||
if (memObjects[i] != 0)
|
}
|
||||||
clReleaseMemObject(memObjects[i]);
|
if (commandQueue != 0)
|
||||||
}
|
clReleaseCommandQueue(commandQueue);
|
||||||
if (commandQueue != 0)
|
|
||||||
clReleaseCommandQueue(commandQueue);
|
|
||||||
|
|
||||||
if (kernel != 0)
|
if (kernel != 0)
|
||||||
clReleaseKernel(kernel);
|
clReleaseKernel(kernel);
|
||||||
|
|
||||||
if (program != 0)
|
if (program != 0)
|
||||||
clReleaseProgram(program);
|
clReleaseProgram(program);
|
||||||
|
|
||||||
if (context != 0)
|
|
||||||
clReleaseContext(context);
|
|
||||||
|
|
||||||
|
if (context != 0)
|
||||||
|
clReleaseContext(context);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv) {
|
||||||
{
|
|
||||||
printf("enter demo main\n");
|
printf("enter demo main\n");
|
||||||
fflush(stdout);
|
|
||||||
putenv("POCL_VERBOSE=1");
|
|
||||||
putenv("POCL_DEVICES=basic");
|
|
||||||
putenv("POCL_LEAVE_TEMP_DIRS=1");
|
|
||||||
putenv("POCL_LEAVE_KERNEL_COMPILER_TEMP_FILES=1");
|
|
||||||
putenv("POCL_TEMP_DIR=pocl");
|
|
||||||
putenv("POCL_CACHE_DIR=pocl");
|
|
||||||
putenv("POCL_WORK_GROUP_METHOD=spmd");
|
|
||||||
if(argc >= 2){
|
|
||||||
printf("argv[1]:%s:\n",argv[1]);
|
|
||||||
if(!strcmp(argv[1], "h"))
|
|
||||||
putenv("POCL_WORK_GROUP_METHOD=spmd");
|
|
||||||
if(!strcmp(argv[1], "c"))
|
|
||||||
putenv("POCL_CROSS_COMPILE=1");
|
|
||||||
}
|
|
||||||
if(argc >= 3){
|
|
||||||
printf("argv[2]:%s:\n",argv[2]);
|
|
||||||
if(!strcmp(argv[2], "h"))
|
|
||||||
putenv("POCL_WORK_GROUP_METHOD=spmd");
|
|
||||||
if(!strcmp(argv[2], "c"))
|
|
||||||
putenv("POCL_CROSS_COMPILE=1");
|
|
||||||
}
|
|
||||||
|
|
||||||
//putenv("LD_LIBRARY_PATH=/scratch/colins/build/linux/fs/lib");
|
cl_platform_id platform_id;
|
||||||
//putenv("LTDL_LIBRARY_PATH=/scratch/colins/build/linux/fs/lib");
|
cl_device_id device_id;
|
||||||
//lt_dlsetsearchpath("/scratch/colins/build/linux/fs/lib");
|
size_t binary_size;
|
||||||
//printf("SEARCH_PATH:%s\n",lt_dlgetsearchpath());
|
int i;
|
||||||
cl_platform_id platforms[100];
|
|
||||||
cl_uint platforms_n = 0;
|
|
||||||
CL_CHECK(clGetPlatformIDs(100, platforms, &platforms_n));
|
|
||||||
|
|
||||||
printf("=== %d OpenCL platform(s) found: ===\n", platforms_n);
|
// Getting platform and device information
|
||||||
for (int i=0; i<platforms_n; i++)
|
CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL));
|
||||||
{
|
CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL));
|
||||||
char buffer[10240];
|
|
||||||
printf(" -- %d --\n", i);
|
|
||||||
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, 10240, buffer, NULL));
|
|
||||||
printf(" PROFILE = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, 10240, buffer, NULL));
|
|
||||||
printf(" VERSION = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 10240, buffer, NULL));
|
|
||||||
printf(" NAME = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 10240, buffer, NULL));
|
|
||||||
printf(" VENDOR = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL));
|
|
||||||
printf(" EXTENSIONS = %s\n", buffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (platforms_n == 0)
|
cl_context context;
|
||||||
return 1;
|
context = CL_CHECK_ERR(clCreateContext(NULL, 1, &device_id, &pfn_notify, NULL, &_err));
|
||||||
|
|
||||||
cl_device_id devices[100];
|
cl_command_queue queue;
|
||||||
cl_uint devices_n = 0;
|
queue = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &_err));
|
||||||
// CL_CHECK(clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 100, devices, &devices_n));
|
|
||||||
CL_CHECK(clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 100, devices, &devices_n));
|
|
||||||
|
|
||||||
printf("=== %d OpenCL device(s) found on platform:\n", platforms_n);
|
|
||||||
for (int i=0; i<devices_n; i++)
|
|
||||||
{
|
|
||||||
char buffer[10240];
|
|
||||||
cl_uint buf_uint;
|
|
||||||
cl_ulong buf_ulong;
|
|
||||||
size_t wi_size[3];
|
|
||||||
printf(" -- %d --\n", i);
|
|
||||||
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(buffer), buffer, NULL));
|
|
||||||
printf(" DEVICE_NAME = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_VENDOR, sizeof(buffer), buffer, NULL));
|
|
||||||
printf(" DEVICE_VENDOR = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, sizeof(buffer), buffer, NULL));
|
|
||||||
printf(" DEVICE_VERSION = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetDeviceInfo(devices[i], CL_DRIVER_VERSION, sizeof(buffer), buffer, NULL));
|
|
||||||
printf(" DRIVER_VERSION = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(buf_uint), &buf_uint, NULL));
|
|
||||||
printf(" DEVICE_MAX_COMPUTE_UNITS = %u\n", (unsigned int)buf_uint);
|
|
||||||
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(buf_uint), &buf_uint, NULL));
|
|
||||||
printf(" DEVICE_MAX_CLOCK_FREQUENCY = %u\n", (unsigned int)buf_uint);
|
|
||||||
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(buf_ulong), &buf_ulong, NULL));
|
|
||||||
printf(" DEVICE_GLOBAL_MEM_SIZE = %llu\n", (unsigned long long)buf_ulong);
|
|
||||||
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(wi_size), &wi_size, NULL));
|
|
||||||
printf(" DEVICE_MAX_WG_SIZE X=%ld,Y=%ld,Z=%ld\n", wi_size[0], wi_size[1], wi_size[2]);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (devices_n == 0)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
cl_context context;
|
|
||||||
context = CL_CHECK_ERR(clCreateContext(NULL, 1, devices+1, &pfn_notify, NULL, &_err));
|
|
||||||
|
|
||||||
cl_command_queue queue;
|
|
||||||
queue = CL_CHECK_ERR(clCreateCommandQueue(context, devices[1], CL_QUEUE_PROFILING_ENABLE, &_err));
|
|
||||||
|
|
||||||
cl_kernel kernel = 0;
|
|
||||||
cl_mem memObjects[2] = {0,0};
|
|
||||||
|
|
||||||
|
cl_kernel kernel = 0;
|
||||||
|
cl_mem memObjects[2] = {0, 0};
|
||||||
|
|
||||||
// Create OpenCL program - first attempt to load cached binary.
|
// Create OpenCL program - first attempt to load cached binary.
|
||||||
// If that is not available, then create the program from source
|
// If that is not available, then create the program from source
|
||||||
// and store the binary for future use.
|
// and store the binary for future use.
|
||||||
std::cout << "Attempting to create program from binary..." << std::endl;
|
std::cout << "Attempting to create program from binary..." << std::endl;
|
||||||
cl_program program = CreateProgramFromBinary(context, devices[1], "kernel.cl.bin");
|
cl_program program =
|
||||||
if (program == NULL)
|
clCreateProgramWithBuiltInKernels(context, 1, &device_id, "saxpy", NULL);
|
||||||
{
|
if (program == NULL) {
|
||||||
std::cout << "Binary not loaded, create from source..." << std::endl;
|
std::cerr << "Failed to write program binary" << std::endl;
|
||||||
program = CreateProgram(context, devices[1], "kernel.cl");
|
Cleanup(context, queue, program, kernel, memObjects);
|
||||||
if (program == NULL)
|
return 1;
|
||||||
{
|
} else {
|
||||||
Cleanup(context, queue, program, kernel, memObjects);
|
std::cout << "Read program from binary." << std::endl;
|
||||||
return 1;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << "Save program binary for future run..." << std::endl;
|
// Build program
|
||||||
if (SaveProgramBinary(program, devices[1], "kernel.cl.bin") == false)
|
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
|
||||||
{
|
|
||||||
std::cerr << "Failed to write program binary" << std::endl;
|
|
||||||
Cleanup(context, queue, program, kernel, memObjects);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::cout << "Read program from binary." << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("attempting to create input buffer\n");
|
printf("attempting to create input buffer\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
cl_mem input_buffer;
|
cl_mem input_buffer;
|
||||||
input_buffer = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float)*NUM_DATA, NULL, &_err));
|
input_buffer = CL_CHECK_ERR(clCreateBuffer(
|
||||||
|
context, CL_MEM_READ_ONLY, sizeof(float) * NUM_DATA, NULL, &_err));
|
||||||
|
|
||||||
printf("attempting to create output buffer\n");
|
printf("attempting to create output buffer\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
cl_mem output_buffer;
|
cl_mem output_buffer;
|
||||||
output_buffer = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float)*NUM_DATA, NULL, &_err));
|
output_buffer = CL_CHECK_ERR(clCreateBuffer(
|
||||||
|
context, CL_MEM_WRITE_ONLY, sizeof(float) * NUM_DATA, NULL, &_err));
|
||||||
|
|
||||||
memObjects[0] = input_buffer;
|
memObjects[0] = input_buffer;
|
||||||
memObjects[1] = output_buffer;
|
memObjects[1] = output_buffer;
|
||||||
|
|
||||||
float factor = ((float)rand()/(float)(RAND_MAX)) * 100.0;
|
float factor = ((float)rand() / (float)(RAND_MAX)) * 100.0;
|
||||||
|
|
||||||
printf("attempting to create kernel\n");
|
printf("attempting to create kernel\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
kernel = CL_CHECK_ERR(clCreateKernel(program, "saxpy", &_err));
|
kernel = CL_CHECK_ERR(clCreateKernel(program, "saxpy", &_err));
|
||||||
printf("setting up kernel args cl_mem:%lx \n",input_buffer);
|
printf("setting up kernel args cl_mem:%lx \n", input_buffer);
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_buffer), &input_buffer));
|
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_buffer), &input_buffer));
|
||||||
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(output_buffer), &output_buffer));
|
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(output_buffer), &output_buffer));
|
||||||
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(factor), &factor));
|
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(factor), &factor));
|
||||||
|
|
||||||
printf("attempting to enqueue write buffer\n");
|
printf("attempting to enqueue write buffer\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
for (int i=0; i<NUM_DATA; i++) {
|
for (int i = 0; i < NUM_DATA; i++) {
|
||||||
float in = ((float)rand()/(float)(RAND_MAX)) * 100.0;
|
float in = ((float)rand() / (float)(RAND_MAX)) * 100.0;
|
||||||
CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE, i*sizeof(float), 4, &in, 0, NULL, NULL));
|
CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE,
|
||||||
}
|
i * sizeof(float), 4, &in, 0, NULL, NULL));
|
||||||
|
}
|
||||||
|
|
||||||
cl_event kernel_completion;
|
cl_event kernel_completion;
|
||||||
size_t global_work_size[1] = { NUM_DATA };
|
size_t global_work_size[1] = {NUM_DATA};
|
||||||
printf("attempting to enqueue kernel\n");
|
printf("attempting to enqueue kernel\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, &kernel_completion));
|
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
|
||||||
|
NULL, 0, NULL, &kernel_completion));
|
||||||
printf("Enqueue'd kerenel\n");
|
printf("Enqueue'd kerenel\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
cl_ulong time_start, time_end;
|
cl_ulong time_start, time_end;
|
||||||
CL_CHECK(clWaitForEvents(1, &kernel_completion));
|
CL_CHECK(clWaitForEvents(1, &kernel_completion));
|
||||||
CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, NULL));
|
CL_CHECK(clGetEventProfilingInfo(kernel_completion,
|
||||||
CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, NULL));
|
CL_PROFILING_COMMAND_START,
|
||||||
|
sizeof(time_start), &time_start, NULL));
|
||||||
|
CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_END,
|
||||||
|
sizeof(time_end), &time_end, NULL));
|
||||||
double elapsed = time_end - time_start;
|
double elapsed = time_end - time_start;
|
||||||
printf("time(ns):%lg\n",elapsed);
|
printf("time(ns):%lg\n", elapsed);
|
||||||
CL_CHECK(clReleaseEvent(kernel_completion));
|
CL_CHECK(clReleaseEvent(kernel_completion));
|
||||||
|
|
||||||
printf("Result:");
|
printf("Result:");
|
||||||
for (int i=0; i<NUM_DATA; i++) {
|
for (int i = 0; i < NUM_DATA; i++) {
|
||||||
float data;
|
float data;
|
||||||
CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, i*sizeof(float), 4, &data, 0, NULL, NULL));
|
CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE,
|
||||||
//printf(" %f", data);
|
i * sizeof(float), 4, &data, 0, NULL, NULL));
|
||||||
}
|
// printf(" %f", data);
|
||||||
printf("\n");
|
}
|
||||||
|
printf("\n");
|
||||||
|
|
||||||
CL_CHECK(clReleaseMemObject(memObjects[0]));
|
CL_CHECK(clReleaseMemObject(memObjects[0]));
|
||||||
CL_CHECK(clReleaseMemObject(memObjects[1]));
|
CL_CHECK(clReleaseMemObject(memObjects[1]));
|
||||||
|
|
||||||
CL_CHECK(clReleaseKernel(kernel));
|
CL_CHECK(clReleaseKernel(kernel));
|
||||||
CL_CHECK(clReleaseProgram(program));
|
CL_CHECK(clReleaseProgram(program));
|
||||||
CL_CHECK(clReleaseContext(context));
|
CL_CHECK(clReleaseContext(context));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -1,33 +1,35 @@
|
|||||||
|
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||||
|
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
|
||||||
|
POCL_INC_PATH = $(wildcard ../include)
|
||||||
|
POCL_LIB_PATH = $(wildcard ../lib)
|
||||||
|
VX_RT_PATH = $(wildcard ../../../runtime)
|
||||||
|
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
|
||||||
|
|
||||||
RISCV_TOOL_PATH=$(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
||||||
|
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
||||||
|
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
||||||
|
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||||
|
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
||||||
|
|
||||||
POCL_CC_PATH=$(wildcard ~/dev/pocl/drops_riscv_cc)
|
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||||
POCL_RT_PATH=$(wildcard ~/dev/pocl/drops_riscv_rt)
|
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||||
|
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
|
||||||
|
|
||||||
VX_RT_PATH=$(wildcard ../../../runtime)
|
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
|
||||||
VX_SIMX_PATH=$(wildcard ../../../simX/obj_dir)
|
|
||||||
|
|
||||||
CC=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
|
||||||
CXX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
|
||||||
DMP=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
|
||||||
HEX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
|
||||||
NEWLIB_PATH=$(RISCV_TOOL_PATH)/riscv32-unknown-elf/lib
|
|
||||||
|
|
||||||
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
|
|
||||||
VX_STR = $(VX_RT_PATH)/startup/vx_start.s
|
|
||||||
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
|
||||||
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
|
||||||
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
|
|
||||||
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
|
|
||||||
|
|
||||||
VX_SRCS = $(VX_STR) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
|
|
||||||
|
|
||||||
CXXFLAGS = -g -O0 -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -march=rv32im -mabi=ilp32
|
|
||||||
CXXFLAGS += -ffreestanding # program may not begin at main()
|
CXXFLAGS += -ffreestanding # program may not begin at main()
|
||||||
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
|
||||||
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
|
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
|
||||||
|
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
||||||
|
CXXFLAGS += -I$(POCL_INC_PATH)
|
||||||
|
|
||||||
LIBS = -lOpenCL
|
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||||
|
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||||
|
|
||||||
PROJECT=sfilter
|
PROJECT=sfilter
|
||||||
|
|
||||||
@@ -37,7 +39,10 @@ lib$(PROJECT).a: kernel.cl
|
|||||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
||||||
|
|
||||||
$(PROJECT).elf: main.cc lib$(PROJECT).a
|
$(PROJECT).elf: main.cc lib$(PROJECT).a
|
||||||
$(CXX) $(CXXFLAGS) -I$(POCL_RT_PATH)/include -L$(POCL_RT_PATH)/lib/static -L. $(VX_SRCS) main.cc -Wl,--whole-archive -l$(PROJECT) -Wl,--no-whole-archive $(LIBS) -o $(PROJECT).elf
|
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
|
||||||
|
|
||||||
|
$(PROJECT).qemu: main.cc lib$(PROJECT).a
|
||||||
|
$(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu
|
||||||
|
|
||||||
$(PROJECT).hex: $(PROJECT).elf
|
$(PROJECT).hex: $(PROJECT).elf
|
||||||
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
||||||
@@ -45,8 +50,17 @@ $(PROJECT).hex: $(PROJECT).elf
|
|||||||
$(PROJECT).dump: $(PROJECT).elf
|
$(PROJECT).dump: $(PROJECT).elf
|
||||||
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
||||||
|
|
||||||
run:
|
run: $(PROJECT).hex
|
||||||
$(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
|
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
|
||||||
|
|
||||||
|
qemu: $(PROJECT).qemu
|
||||||
|
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu
|
||||||
|
|
||||||
|
gdb-s: $(PROJECT).qemu
|
||||||
|
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
|
||||||
|
|
||||||
|
gdb-c: $(PROJECT).qemu
|
||||||
|
$(GDB) $(PROJECT).qemu
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf *.elf *.dump *.hex *.a *.pocl
|
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu
|
||||||
Binary file not shown.
@@ -17,95 +17,95 @@
|
|||||||
* along with this program; if not, write to the Free Software
|
* along with this program; if not, write to the Free Software
|
||||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
*
|
*
|
||||||
* gcc -o cldemo -std=gnu99 -Wall -I/usr/include/nvidia-current cldemo.c -lOpenCL
|
* gcc -o cldemo -std=gnu99 -Wall -I/usr/include/nvidia-current cldemo.c
|
||||||
|
* -lOpenCL
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <CL/cl.h>
|
#include <CL/cl.h>
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
#include <fstream>
|
|
||||||
#include <sstream>
|
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
#include <fstream>
|
||||||
|
#include <iostream>
|
||||||
|
#include <math.h>
|
||||||
|
#include <sstream>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <math.h>
|
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
#define NUM_DATA 66
|
#define NUM_DATA 66
|
||||||
|
|
||||||
#define CL_CHECK(_expr) \
|
#define CL_CHECK(_expr) \
|
||||||
do { \
|
do { \
|
||||||
cl_int _err = _expr; \
|
cl_int _err = _expr; \
|
||||||
if (_err == CL_SUCCESS) \
|
if (_err == CL_SUCCESS) \
|
||||||
break; \
|
break; \
|
||||||
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||||
abort(); \
|
abort(); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define CL_CHECK_ERR(_expr) \
|
#define CL_CHECK_ERR(_expr) \
|
||||||
({ \
|
({ \
|
||||||
cl_int _err = CL_INVALID_VALUE; \
|
cl_int _err = CL_INVALID_VALUE; \
|
||||||
typeof(_expr) _ret = _expr; \
|
typeof(_expr) _ret = _expr; \
|
||||||
if (_err != CL_SUCCESS) { \
|
if (_err != CL_SUCCESS) { \
|
||||||
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||||
abort(); \
|
abort(); \
|
||||||
} \
|
} \
|
||||||
_ret; \
|
_ret; \
|
||||||
})
|
})
|
||||||
|
|
||||||
void pfn_notify(const char *errinfo, const void *private_info, size_t cb, void *user_data)
|
void pfn_notify(const char *errinfo, const void *private_info, size_t cb,
|
||||||
{
|
void *user_data) {
|
||||||
fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo);
|
fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo);
|
||||||
}
|
}
|
||||||
// inlcude pocl float to half conversions
|
// inlcude pocl float to half conversions
|
||||||
typedef union
|
typedef union {
|
||||||
{
|
|
||||||
int32_t i;
|
int32_t i;
|
||||||
float f;
|
float f;
|
||||||
} FloatConvUnion;
|
} FloatConvUnion;
|
||||||
cl_half
|
cl_half poclu_float_to_cl_half(float value) {
|
||||||
poclu_float_to_cl_half(float value)
|
|
||||||
{
|
|
||||||
FloatConvUnion u;
|
FloatConvUnion u;
|
||||||
u.f = value;
|
u.f = value;
|
||||||
cl_half half = (u.i >> 16) & 0x8000; // sign
|
cl_half half = (u.i >> 16) & 0x8000; // sign
|
||||||
cl_half fraction = (u.i >> 12) & 0x007ff; // fraction with extra bit for rounding
|
cl_half fraction =
|
||||||
cl_half exponent = (u.i >> 23) & 0xff; // exponent
|
(u.i >> 12) & 0x007ff; // fraction with extra bit for rounding
|
||||||
|
cl_half exponent = (u.i >> 23) & 0xff; // exponent
|
||||||
|
|
||||||
if(exponent < 0x0067) // Return signed zero if zero or value is too small for denormal half
|
if (exponent < 0x0067) // Return signed zero if zero or value is too small for
|
||||||
|
// denormal half
|
||||||
return half;
|
return half;
|
||||||
|
|
||||||
if(exponent > 0x008e){// value was NaN or Inf
|
if (exponent > 0x008e) { // value was NaN or Inf
|
||||||
half |= 0x7c00u; // Make into inf
|
half |= 0x7c00u; // Make into inf
|
||||||
half |= exponent == 255 && (u.i & 0x007fffffu); // If value was NaN make this into NaN
|
half |= exponent == 255 &&
|
||||||
|
(u.i & 0x007fffffu); // If value was NaN make this into NaN
|
||||||
return half;
|
return half;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(exponent < 0x0071){// Denormal
|
if (exponent < 0x0071) { // Denormal
|
||||||
fraction |= 0x0800u;
|
fraction |= 0x0800u;
|
||||||
|
|
||||||
// rounding
|
// rounding
|
||||||
half |= (fraction >> (0x0072 - exponent)) + ((fraction >> (0x0071 - exponent)) & 1);
|
half |= (fraction >> (0x0072 - exponent)) +
|
||||||
|
((fraction >> (0x0071 - exponent)) & 1);
|
||||||
return half;
|
return half;
|
||||||
}
|
}
|
||||||
|
|
||||||
half |= ((exponent - 0x0070) << 10) | (fraction >> 1);
|
half |= ((exponent - 0x0070) << 10) | (fraction >> 1);
|
||||||
half += fraction & 1;// rounding
|
half += fraction & 1; // rounding
|
||||||
return half;
|
return half;
|
||||||
}
|
}
|
||||||
#ifndef INFINITY
|
#ifndef INFINITY
|
||||||
#define INFINITY 1.0/0.0
|
#define INFINITY 1.0 / 0.0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef NAN
|
#ifndef NAN
|
||||||
#define NAN 0.0/0.0
|
#define NAN 0.0 / 0.0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
float
|
float poclu_cl_half_to_float(cl_half value) {
|
||||||
poclu_cl_half_to_float(cl_half value)
|
|
||||||
{
|
|
||||||
if (value == 0xFC00) {
|
if (value == 0xFC00) {
|
||||||
return -INFINITY;
|
return -INFINITY;
|
||||||
}
|
}
|
||||||
@@ -131,384 +131,78 @@ poclu_cl_half_to_float(cl_half value)
|
|||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
// Create an OpenCL program from the kernel source file
|
|
||||||
//
|
|
||||||
cl_program CreateProgram(cl_context context, cl_device_id device, const char* fileName)
|
|
||||||
{
|
|
||||||
cl_int errNum;
|
|
||||||
cl_program program;
|
|
||||||
|
|
||||||
std::ifstream kernelFile(fileName, std::ios::in);
|
|
||||||
if (!kernelFile.is_open())
|
|
||||||
{
|
|
||||||
std::cerr << "Failed to open file for reading: " << fileName << std::endl;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::ostringstream oss;
|
|
||||||
oss << kernelFile.rdbuf();
|
|
||||||
|
|
||||||
std::string srcStdStr = oss.str();
|
|
||||||
const char *srcStr = srcStdStr.c_str();
|
|
||||||
program = clCreateProgramWithSource(context, 1,
|
|
||||||
(const char**)&srcStr,
|
|
||||||
NULL, NULL);
|
|
||||||
if (program == NULL)
|
|
||||||
{
|
|
||||||
std::cerr << "Failed to create CL program from source." << std::endl;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
|
|
||||||
if (errNum != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
// Determine the reason for the error
|
|
||||||
char buildLog[16384];
|
|
||||||
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
|
|
||||||
sizeof(buildLog), buildLog, NULL);
|
|
||||||
|
|
||||||
std::cerr << "Error in kernel: " << std::endl;
|
|
||||||
std::cerr << buildLog;
|
|
||||||
clReleaseProgram(program);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return program;
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
///
|
|
||||||
// Retreive program binary for all of the devices attached to the
|
|
||||||
// program an and store the one for the device passed in
|
|
||||||
//
|
|
||||||
bool SaveProgramBinary(cl_program program, cl_device_id device, const char* fileName)
|
|
||||||
{
|
|
||||||
//cl_uint numDevices = malloc(sizeof(cl_uint));
|
|
||||||
//cl_uint* numDevices = malloc(sizeof(cl_uint));
|
|
||||||
cl_int errNum;
|
|
||||||
|
|
||||||
printf("try getting program info\n");
|
|
||||||
// 1 - Query for number of devices attached to program
|
|
||||||
/*errNum = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint),
|
|
||||||
&numDevices, NULL);
|
|
||||||
printf("Got program_num_devices\n");
|
|
||||||
if (errNum != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << "Error querying for number of devices." << std::endl;
|
|
||||||
return false;
|
|
||||||
}*/
|
|
||||||
|
|
||||||
// 2 - Get all of the Device IDs
|
|
||||||
cl_device_id *devices = new cl_device_id[1];
|
|
||||||
errNum = clGetProgramInfo(program, CL_PROGRAM_DEVICES,
|
|
||||||
sizeof(cl_device_id) * 1,
|
|
||||||
devices, NULL);
|
|
||||||
printf("Got program_devices\n");
|
|
||||||
if (errNum != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << "Error querying for devices." << std::endl;
|
|
||||||
delete [] devices;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 3 - Determine the size of each program binary
|
|
||||||
size_t *programBinarySizes = new size_t [1];
|
|
||||||
errNum = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES,
|
|
||||||
sizeof(size_t) * 1,
|
|
||||||
programBinarySizes, NULL);
|
|
||||||
printf("Got program_binary_sizes\n");
|
|
||||||
if (errNum != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << "Error querying for program binary sizes." << std::endl;
|
|
||||||
delete [] devices;
|
|
||||||
delete [] programBinarySizes;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned char **programBinaries = new unsigned char*[1];
|
|
||||||
for (cl_uint i = 0; i < 1; i++)
|
|
||||||
{
|
|
||||||
programBinaries[i] = new unsigned char[programBinarySizes[i]];
|
|
||||||
}
|
|
||||||
|
|
||||||
// 4 - Get all of the program binaries
|
|
||||||
errNum = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char*) * 1,
|
|
||||||
programBinaries, NULL);
|
|
||||||
printf("Got program_binarys\n");
|
|
||||||
if (errNum != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << "Error querying for program binaries" << std::endl;
|
|
||||||
|
|
||||||
delete [] devices;
|
|
||||||
delete [] programBinarySizes;
|
|
||||||
for (cl_uint i = 0; i < 1; i++)
|
|
||||||
{
|
|
||||||
delete [] programBinaries[i];
|
|
||||||
}
|
|
||||||
delete [] programBinaries;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 5 - Finally store the binaries for the device requested out to disk for future reading.
|
|
||||||
for (cl_uint i = 0; i < 1; i++)
|
|
||||||
{
|
|
||||||
// Store the binary just for the device requested. In a scenario where
|
|
||||||
// multiple devices were being used you would save all of the binaries out here.
|
|
||||||
if (devices[i] == device)
|
|
||||||
{
|
|
||||||
FILE *fp = fopen(fileName, "wb");
|
|
||||||
if(fp ==NULL){
|
|
||||||
delete [] devices;
|
|
||||||
delete [] programBinarySizes;
|
|
||||||
for (cl_uint i = 0; i < 1; i++)
|
|
||||||
{
|
|
||||||
delete [] programBinaries[i];
|
|
||||||
}
|
|
||||||
delete [] programBinaries;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
printf("Opened file\n");
|
|
||||||
fwrite(programBinaries[i], 1, programBinarySizes[i], fp);
|
|
||||||
printf("wrote file\n");
|
|
||||||
fclose(fp);
|
|
||||||
printf("close file\n");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cleanup
|
|
||||||
delete [] devices;
|
|
||||||
delete [] programBinarySizes;
|
|
||||||
for (cl_uint i = 0; i < 1; i++)
|
|
||||||
{
|
|
||||||
delete [] programBinaries[i];
|
|
||||||
}
|
|
||||||
delete [] programBinaries;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
///
|
|
||||||
// Attempt to create the program object from a cached binary. Note that
|
|
||||||
// on first run this will fail because the binary has not yet been created.
|
|
||||||
//
|
|
||||||
cl_program CreateProgramFromBinary(cl_context context, cl_device_id device, const char* fileName)
|
|
||||||
{
|
|
||||||
FILE *fp = fopen(fileName, "rb");
|
|
||||||
if (fp == NULL)
|
|
||||||
{
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Determine the size of the binary
|
|
||||||
size_t binarySize;
|
|
||||||
fseek(fp, 0, SEEK_END);
|
|
||||||
binarySize = ftell(fp);
|
|
||||||
rewind(fp);
|
|
||||||
|
|
||||||
unsigned char *programBinary = new unsigned char[binarySize];
|
|
||||||
fread(programBinary, 1, binarySize, fp);
|
|
||||||
fclose(fp);
|
|
||||||
|
|
||||||
cl_int errNum = 0;
|
|
||||||
cl_program program;
|
|
||||||
cl_int binaryStatus;
|
|
||||||
|
|
||||||
program = clCreateProgramWithBinary(context,
|
|
||||||
1,
|
|
||||||
&device,
|
|
||||||
&binarySize,
|
|
||||||
(const unsigned char**)&programBinary,
|
|
||||||
&binaryStatus,
|
|
||||||
&errNum);
|
|
||||||
delete [] programBinary;
|
|
||||||
if (errNum != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << "Error loading program binary." << std::endl;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (binaryStatus != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << "Invalid binary for device" << std::endl;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
errNum = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
|
|
||||||
if (errNum != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
printf("build errNum:%d\n", errNum);
|
|
||||||
// Determine the reason for the error
|
|
||||||
char buildLog[16384];
|
|
||||||
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
|
|
||||||
sizeof(buildLog), buildLog, NULL);
|
|
||||||
|
|
||||||
std::cerr << "Error in program: " << std::endl;
|
|
||||||
std::cerr << buildLog << std::endl;
|
|
||||||
clReleaseProgram(program);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return program;
|
|
||||||
}
|
|
||||||
|
|
||||||
///
|
///
|
||||||
// Cleanup any created OpenCL resources
|
// Cleanup any created OpenCL resources
|
||||||
//
|
//
|
||||||
void Cleanup(cl_context context, cl_command_queue commandQueue,
|
void Cleanup(cl_context context, cl_command_queue commandQueue,
|
||||||
cl_program program, cl_kernel kernel, cl_mem memObjects[3])
|
cl_program program, cl_kernel kernel, cl_mem memObjects[3]) {
|
||||||
{
|
for (int i = 0; i < 3; i++) {
|
||||||
for (int i = 0; i < 3; i++)
|
if (memObjects[i] != 0)
|
||||||
{
|
clReleaseMemObject(memObjects[i]);
|
||||||
if (memObjects[i] != 0)
|
}
|
||||||
clReleaseMemObject(memObjects[i]);
|
if (commandQueue != 0)
|
||||||
}
|
clReleaseCommandQueue(commandQueue);
|
||||||
if (commandQueue != 0)
|
|
||||||
clReleaseCommandQueue(commandQueue);
|
|
||||||
|
|
||||||
if (kernel != 0)
|
if (kernel != 0)
|
||||||
clReleaseKernel(kernel);
|
clReleaseKernel(kernel);
|
||||||
|
|
||||||
if (program != 0)
|
if (program != 0)
|
||||||
clReleaseProgram(program);
|
clReleaseProgram(program);
|
||||||
|
|
||||||
if (context != 0)
|
|
||||||
clReleaseContext(context);
|
|
||||||
|
|
||||||
|
if (context != 0)
|
||||||
|
clReleaseContext(context);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv) {
|
||||||
{
|
|
||||||
printf("enter demo main\n");
|
printf("enter demo main\n");
|
||||||
fflush(stdout);
|
|
||||||
putenv("POCL_VERBOSE=1");
|
|
||||||
putenv("POCL_DEVICES=basic");
|
|
||||||
putenv("POCL_LEAVE_TEMP_DIRS=1");
|
|
||||||
putenv("POCL_LEAVE_KERNEL_COMPILER_TEMP_FILES=1");
|
|
||||||
putenv("POCL_TEMP_DIR=pocl");
|
|
||||||
putenv("POCL_CACHE_DIR=pocl");
|
|
||||||
putenv("POCL_WORK_GROUP_METHOD=spmd");
|
|
||||||
if(argc >= 2){
|
|
||||||
printf("argv[1]:%s:\n",argv[1]);
|
|
||||||
if(!strcmp(argv[1], "h"))
|
|
||||||
putenv("POCL_WORK_GROUP_METHOD=spmd");
|
|
||||||
if(!strcmp(argv[1], "c"))
|
|
||||||
putenv("POCL_CROSS_COMPILE=1");
|
|
||||||
}
|
|
||||||
if(argc >= 3){
|
|
||||||
printf("argv[2]:%s:\n",argv[2]);
|
|
||||||
if(!strcmp(argv[2], "h"))
|
|
||||||
putenv("POCL_WORK_GROUP_METHOD=spmd");
|
|
||||||
if(!strcmp(argv[2], "c"))
|
|
||||||
putenv("POCL_CROSS_COMPILE=1");
|
|
||||||
}
|
|
||||||
|
|
||||||
//putenv("LD_LIBRARY_PATH=/scratch/colins/build/linux/fs/lib");
|
cl_platform_id platform_id;
|
||||||
//putenv("LTDL_LIBRARY_PATH=/scratch/colins/build/linux/fs/lib");
|
cl_device_id device_id;
|
||||||
//lt_dlsetsearchpath("/scratch/colins/build/linux/fs/lib");
|
size_t binary_size;
|
||||||
//printf("SEARCH_PATH:%s\n",lt_dlgetsearchpath());
|
int i;
|
||||||
cl_platform_id platforms[100];
|
|
||||||
cl_uint platforms_n = 0;
|
|
||||||
CL_CHECK(clGetPlatformIDs(100, platforms, &platforms_n));
|
|
||||||
|
|
||||||
printf("=== %d OpenCL platform(s) found: ===\n", platforms_n);
|
// Getting platform and device information
|
||||||
for (int i=0; i<platforms_n; i++)
|
CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL));
|
||||||
{
|
CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL));
|
||||||
char buffer[10240];
|
|
||||||
printf(" -- %d --\n", i);
|
|
||||||
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, 10240, buffer, NULL));
|
|
||||||
printf(" PROFILE = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, 10240, buffer, NULL));
|
|
||||||
printf(" VERSION = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 10240, buffer, NULL));
|
|
||||||
printf(" NAME = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 10240, buffer, NULL));
|
|
||||||
printf(" VENDOR = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL));
|
|
||||||
printf(" EXTENSIONS = %s\n", buffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (platforms_n == 0)
|
cl_context context;
|
||||||
return 1;
|
context = CL_CHECK_ERR(clCreateContext(NULL, 1, &device_id, &pfn_notify, NULL, &_err));
|
||||||
|
|
||||||
cl_device_id devices[100];
|
cl_command_queue queue;
|
||||||
cl_uint devices_n = 0;
|
queue = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &_err));
|
||||||
// CL_CHECK(clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 100, devices, &devices_n));
|
|
||||||
CL_CHECK(clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 100, devices, &devices_n));
|
|
||||||
|
|
||||||
printf("=== %d OpenCL device(s) found on platform:\n", platforms_n);
|
|
||||||
for (int i=0; i<devices_n; i++)
|
|
||||||
{
|
|
||||||
char buffer[10240];
|
|
||||||
cl_uint buf_uint;
|
|
||||||
cl_ulong buf_ulong;
|
|
||||||
printf(" -- %d --\n", i);
|
|
||||||
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(buffer), buffer, NULL));
|
|
||||||
printf(" DEVICE_NAME = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_VENDOR, sizeof(buffer), buffer, NULL));
|
|
||||||
printf(" DEVICE_VENDOR = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, sizeof(buffer), buffer, NULL));
|
|
||||||
printf(" DEVICE_VERSION = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetDeviceInfo(devices[i], CL_DRIVER_VERSION, sizeof(buffer), buffer, NULL));
|
|
||||||
printf(" DRIVER_VERSION = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(buf_uint), &buf_uint, NULL));
|
|
||||||
printf(" DEVICE_MAX_COMPUTE_UNITS = %u\n", (unsigned int)buf_uint);
|
|
||||||
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(buf_uint), &buf_uint, NULL));
|
|
||||||
printf(" DEVICE_MAX_CLOCK_FREQUENCY = %u\n", (unsigned int)buf_uint);
|
|
||||||
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(buf_ulong), &buf_ulong, NULL));
|
|
||||||
printf(" DEVICE_GLOBAL_MEM_SIZE = %llu\n", (unsigned long long)buf_ulong);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (devices_n == 0)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
cl_context context;
|
|
||||||
context = CL_CHECK_ERR(clCreateContext(NULL, 1, devices+1, &pfn_notify, NULL, &_err));
|
|
||||||
|
|
||||||
cl_command_queue queue;
|
|
||||||
queue = CL_CHECK_ERR(clCreateCommandQueue(context, devices[1], CL_QUEUE_PROFILING_ENABLE, &_err));
|
|
||||||
|
|
||||||
cl_kernel kernel = 0;
|
|
||||||
cl_mem memObjects[2] = {0,0};
|
|
||||||
|
|
||||||
|
cl_kernel kernel = 0;
|
||||||
|
cl_mem memObjects[2] = {0, 0};
|
||||||
|
|
||||||
// Create OpenCL program - first attempt to load cached binary.
|
// Create OpenCL program - first attempt to load cached binary.
|
||||||
// If that is not available, then create the program from source
|
// If that is not available, then create the program from source
|
||||||
// and store the binary for future use.
|
// and store the binary for future use.
|
||||||
std::cout << "Attempting to create program from binary..." << std::endl;
|
std::cout << "Attempting to create program from binary..." << std::endl;
|
||||||
cl_program program = CreateProgramFromBinary(context, devices[1], "kernel.cl.bin");
|
cl_program program = clCreateProgramWithBuiltInKernels(context, 1, &device_id, "sfilter", NULL);
|
||||||
if (program == NULL)
|
if (program == NULL) {
|
||||||
{
|
std::cerr << "Failed to write program binary" << std::endl;
|
||||||
std::cout << "Binary not loaded, create from source..." << std::endl;
|
Cleanup(context, queue, program, kernel, memObjects);
|
||||||
program = CreateProgram(context, devices[1], "kernel.cl");
|
return 1;
|
||||||
if (program == NULL)
|
} else {
|
||||||
{
|
std::cout << "Read program from binary." << std::endl;
|
||||||
Cleanup(context, queue, program, kernel, memObjects);
|
}
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << "Save program binary for future run..." << std::endl;
|
// Build program
|
||||||
if (SaveProgramBinary(program, devices[1], "kernel.cl.bin") == false)
|
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
|
||||||
{
|
|
||||||
std::cerr << "Failed to write program binary" << std::endl;
|
|
||||||
Cleanup(context, queue, program, kernel, memObjects);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::cout << "Read program from binary." << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("attempting to create input buffer\n");
|
printf("attempting to create input buffer\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
cl_mem input_buffer;
|
cl_mem input_buffer;
|
||||||
input_buffer = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float)*NUM_DATA*NUM_DATA, NULL, &_err));
|
input_buffer = CL_CHECK_ERR(
|
||||||
|
clCreateBuffer(context, CL_MEM_READ_ONLY,
|
||||||
|
sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err));
|
||||||
|
|
||||||
printf("attempting to create output buffer\n");
|
printf("attempting to create output buffer\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
cl_mem output_buffer;
|
cl_mem output_buffer;
|
||||||
output_buffer = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float)*NUM_DATA*NUM_DATA, NULL, &_err));
|
output_buffer = CL_CHECK_ERR(
|
||||||
|
clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
||||||
|
sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err));
|
||||||
|
|
||||||
memObjects[0] = input_buffer;
|
memObjects[0] = input_buffer;
|
||||||
memObjects[1] = output_buffer;
|
memObjects[1] = output_buffer;
|
||||||
@@ -527,61 +221,67 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
printf("attempting to create kernel\n");
|
printf("attempting to create kernel\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
kernel = CL_CHECK_ERR(clCreateKernel(program, "sfilter", &_err));
|
kernel = CL_CHECK_ERR(clCreateKernel(program, "sfilter", &_err));
|
||||||
printf("setting up kernel args cl_mem:%lx \n",input_buffer);
|
printf("setting up kernel args cl_mem:%lx \n", input_buffer);
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_buffer), &input_buffer));
|
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_buffer), &input_buffer));
|
||||||
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(output_buffer), &output_buffer));
|
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(output_buffer), &output_buffer));
|
||||||
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(ldc), (&ldc)));
|
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(ldc), (&ldc)));
|
||||||
CL_CHECK(clSetKernelArg(kernel, 3, sizeof(m0), (&m0)));
|
CL_CHECK(clSetKernelArg(kernel, 3, sizeof(m0), (&m0)));
|
||||||
CL_CHECK(clSetKernelArg(kernel, 4, sizeof(m1), (&m1)));
|
CL_CHECK(clSetKernelArg(kernel, 4, sizeof(m1), (&m1)));
|
||||||
CL_CHECK(clSetKernelArg(kernel, 5, sizeof(m2), (&m2)));
|
CL_CHECK(clSetKernelArg(kernel, 5, sizeof(m2), (&m2)));
|
||||||
CL_CHECK(clSetKernelArg(kernel, 6, sizeof(m3), (&m3)));
|
CL_CHECK(clSetKernelArg(kernel, 6, sizeof(m3), (&m3)));
|
||||||
CL_CHECK(clSetKernelArg(kernel, 7, sizeof(m4), (&m4)));
|
CL_CHECK(clSetKernelArg(kernel, 7, sizeof(m4), (&m4)));
|
||||||
CL_CHECK(clSetKernelArg(kernel, 8, sizeof(m5), (&m5)));
|
CL_CHECK(clSetKernelArg(kernel, 8, sizeof(m5), (&m5)));
|
||||||
CL_CHECK(clSetKernelArg(kernel, 9, sizeof(m6), (&m6)));
|
CL_CHECK(clSetKernelArg(kernel, 9, sizeof(m6), (&m6)));
|
||||||
CL_CHECK(clSetKernelArg(kernel, 10, sizeof(m7), (&m7)));
|
CL_CHECK(clSetKernelArg(kernel, 10, sizeof(m7), (&m7)));
|
||||||
CL_CHECK(clSetKernelArg(kernel, 11, sizeof(m8), (&m8)));
|
CL_CHECK(clSetKernelArg(kernel, 11, sizeof(m8), (&m8)));
|
||||||
|
|
||||||
printf("attempting to enqueue write buffer\n");
|
printf("attempting to enqueue write buffer\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
for (int i=0; i<NUM_DATA*NUM_DATA; i++) {
|
for (int i = 0; i < NUM_DATA * NUM_DATA; i++) {
|
||||||
float in = ((float)rand()/(float)(RAND_MAX)) * 100.0;
|
float in = ((float)rand() / (float)(RAND_MAX)) * 100.0;
|
||||||
CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE, i*sizeof(float), 4, &in, 0, NULL, NULL));
|
CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE,
|
||||||
}
|
i * sizeof(float), 4, &in, 0, NULL, NULL));
|
||||||
|
}
|
||||||
|
|
||||||
cl_event kernel_completion;
|
cl_event kernel_completion;
|
||||||
size_t global_offset[2] = { 1, 1};
|
size_t global_offset[2] = {1, 1};
|
||||||
size_t global_work_size[2] = { NUM_DATA - 2, NUM_DATA - 2};//avoid the edges
|
size_t global_work_size[2] = {NUM_DATA - 2, NUM_DATA - 2}; // avoid the edges
|
||||||
const size_t local_work_size[2] = { 64, 1 };
|
const size_t local_work_size[2] = {64, 1};
|
||||||
printf("attempting to enqueue kernel\n");
|
printf("attempting to enqueue kernel\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 2, global_offset, global_work_size, local_work_size, 0, NULL, &kernel_completion));
|
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 2, global_offset,
|
||||||
|
global_work_size, local_work_size, 0, NULL,
|
||||||
|
&kernel_completion));
|
||||||
printf("Enqueue'd kerenel\n");
|
printf("Enqueue'd kerenel\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
cl_ulong time_start, time_end;
|
cl_ulong time_start, time_end;
|
||||||
CL_CHECK(clWaitForEvents(1, &kernel_completion));
|
CL_CHECK(clWaitForEvents(1, &kernel_completion));
|
||||||
CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, NULL));
|
CL_CHECK(clGetEventProfilingInfo(kernel_completion,
|
||||||
CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, NULL));
|
CL_PROFILING_COMMAND_START,
|
||||||
|
sizeof(time_start), &time_start, NULL));
|
||||||
|
CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_END,
|
||||||
|
sizeof(time_end), &time_end, NULL));
|
||||||
double elapsed = time_end - time_start;
|
double elapsed = time_end - time_start;
|
||||||
printf("time(ns):%lg\n",elapsed);
|
printf("time(ns):%lg\n", elapsed);
|
||||||
CL_CHECK(clReleaseEvent(kernel_completion));
|
CL_CHECK(clReleaseEvent(kernel_completion));
|
||||||
|
|
||||||
printf("Result:");
|
printf("Result:");
|
||||||
for (int i=0; i<NUM_DATA*NUM_DATA; i++) {
|
for (int i = 0; i < NUM_DATA * NUM_DATA; i++) {
|
||||||
float data;
|
float data;
|
||||||
CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, i*sizeof(float), 4, &data, 0, NULL, NULL));
|
CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE,
|
||||||
//printf(" %f", data);
|
i * sizeof(float), 4, &data, 0, NULL, NULL));
|
||||||
}
|
// printf(" %f", data);
|
||||||
printf("\n");
|
}
|
||||||
|
printf("\n");
|
||||||
|
|
||||||
CL_CHECK(clReleaseMemObject(memObjects[0]));
|
CL_CHECK(clReleaseMemObject(memObjects[0]));
|
||||||
CL_CHECK(clReleaseMemObject(memObjects[1]));
|
CL_CHECK(clReleaseMemObject(memObjects[1]));
|
||||||
|
|
||||||
CL_CHECK(clReleaseKernel(kernel));
|
CL_CHECK(clReleaseKernel(kernel));
|
||||||
CL_CHECK(clReleaseProgram(program));
|
CL_CHECK(clReleaseProgram(program));
|
||||||
CL_CHECK(clReleaseContext(context));
|
CL_CHECK(clReleaseContext(context));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -1,36 +1,35 @@
|
|||||||
|
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||||
|
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
|
||||||
|
POCL_INC_PATH = $(wildcard ../include)
|
||||||
|
POCL_LIB_PATH = $(wildcard ../lib)
|
||||||
|
VX_RT_PATH = $(wildcard ../../../runtime)
|
||||||
|
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
|
||||||
|
|
||||||
RISCV_TOOL_PATH=$(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
||||||
|
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
||||||
|
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
||||||
|
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||||
|
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
||||||
|
|
||||||
POCL_CC_PATH=$(wildcard ~/dev/pocl/drops_riscv_cc)
|
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||||
POCL_RT_PATH=$(wildcard ~/dev/pocl/drops_riscv_rt)
|
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
|
||||||
POCL_RT0_PATH=$(wildcard ~/dev/pocl/drops_riscv_rt0)
|
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||||
|
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
|
||||||
|
|
||||||
VX_RT_PATH=$(wildcard ../../../runtime)
|
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
|
||||||
VX_SIMX_PATH=$(wildcard ../../../simX/obj_dir)
|
|
||||||
|
|
||||||
CC=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
|
||||||
CXX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
|
||||||
DMP=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
|
||||||
HEX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
|
||||||
NEWLIB_PATH=$(RISCV_TOOL_PATH)/riscv32-unknown-elf/lib
|
|
||||||
|
|
||||||
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
|
|
||||||
VX_STR = $(VX_RT_PATH)/startup/vx_start.s
|
|
||||||
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
|
||||||
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
|
||||||
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
|
|
||||||
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
|
|
||||||
|
|
||||||
VX_SRCS = $(VX_STR) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
|
|
||||||
|
|
||||||
#CXXFLAGS = -g -O0 -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -march=rv32im -mabi=ilp32
|
|
||||||
|
|
||||||
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
|
|
||||||
CXXFLAGS += -ffreestanding # program may not begin at main()
|
CXXFLAGS += -ffreestanding # program may not begin at main()
|
||||||
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
|
||||||
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
|
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
|
||||||
|
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
||||||
|
CXXFLAGS += -I$(POCL_INC_PATH)
|
||||||
|
|
||||||
LIBS = -lOpenCL
|
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||||
|
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||||
|
|
||||||
PROJECT=sgemm
|
PROJECT=sgemm
|
||||||
|
|
||||||
@@ -40,10 +39,10 @@ lib$(PROJECT).a: kernel.cl
|
|||||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
||||||
|
|
||||||
$(PROJECT).elf: main.cc lib$(PROJECT).a
|
$(PROJECT).elf: main.cc lib$(PROJECT).a
|
||||||
$(CXX) $(CXXFLAGS) -I$(POCL_RT_PATH)/include -L$(POCL_RT_PATH)/lib/static -L. $(VX_SRCS) main.cc -Wl,--whole-archive -l$(PROJECT) -Wl,--no-whole-archive $(LIBS) -o $(PROJECT).elf
|
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
|
||||||
|
|
||||||
$(PROJECT).qemu: main.cc lib$(PROJECT).a
|
$(PROJECT).qemu: main.cc lib$(PROJECT).a
|
||||||
$(CXX) $(CXXFLAGS) -I$(POCL_RT0_PATH)/include -L$(POCL_RT0_PATH)/lib/static -L. main.cc -Wl,--whole-archive -l$(PROJECT) -Wl,--no-whole-archive $(LIBS) -o $(PROJECT).qemu
|
$(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu
|
||||||
|
|
||||||
$(PROJECT).hex: $(PROJECT).elf
|
$(PROJECT).hex: $(PROJECT).elf
|
||||||
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
||||||
@@ -52,10 +51,16 @@ $(PROJECT).dump: $(PROJECT).elf
|
|||||||
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
||||||
|
|
||||||
run: $(PROJECT).hex
|
run: $(PROJECT).hex
|
||||||
$(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
|
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
|
||||||
|
|
||||||
qemu: $(PROJECT).qemu
|
qemu: $(PROJECT).qemu
|
||||||
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu
|
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu
|
||||||
|
|
||||||
|
gdb-s: $(PROJECT).qemu
|
||||||
|
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
|
||||||
|
|
||||||
|
gdb-c: $(PROJECT).qemu
|
||||||
|
$(GDB) $(PROJECT).qemu
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf *.elf *.dump *.hex *.a *.pocl
|
rm -rf *.elf *.dump *.hex
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@@ -17,15 +17,16 @@
|
|||||||
* along with this program; if not, write to the Free Software
|
* along with this program; if not, write to the Free Software
|
||||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
*
|
*
|
||||||
* gcc -o cldemo -std=gnu99 -Wall -I/usr/include/nvidia-current cldemo.c -lOpenCL
|
* gcc -o cldemo -std=gnu99 -Wall -I/usr/include/nvidia-current cldemo.c
|
||||||
|
* -lOpenCL
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <CL/cl.h>
|
#include <CL/cl.h>
|
||||||
#include <iostream>
|
|
||||||
#include <fstream>
|
|
||||||
#include <sstream>
|
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
#include <fstream>
|
||||||
|
#include <iostream>
|
||||||
|
#include <sstream>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
@@ -33,469 +34,180 @@
|
|||||||
|
|
||||||
#define NUM_DATA 64
|
#define NUM_DATA 64
|
||||||
|
|
||||||
#define CL_CHECK(_expr) \
|
#define CL_CHECK(_expr) \
|
||||||
do { \
|
do { \
|
||||||
cl_int _err = _expr; \
|
cl_int _err = _expr; \
|
||||||
if (_err == CL_SUCCESS) \
|
if (_err == CL_SUCCESS) \
|
||||||
break; \
|
break; \
|
||||||
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||||
abort(); \
|
abort(); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define CL_CHECK_ERR(_expr) \
|
#define CL_CHECK_ERR(_expr) \
|
||||||
({ \
|
({ \
|
||||||
cl_int _err = CL_INVALID_VALUE; \
|
cl_int _err = CL_INVALID_VALUE; \
|
||||||
typeof(_expr) _ret = _expr; \
|
typeof(_expr) _ret = _expr; \
|
||||||
if (_err != CL_SUCCESS) { \
|
if (_err != CL_SUCCESS) { \
|
||||||
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||||
abort(); \
|
abort(); \
|
||||||
} \
|
} \
|
||||||
_ret; \
|
_ret; \
|
||||||
})
|
})
|
||||||
|
|
||||||
void pfn_notify(const char *errinfo, const void *private_info, size_t cb, void *user_data)
|
void pfn_notify(const char *errinfo, const void *private_info, size_t cb,
|
||||||
{
|
void *user_data) {
|
||||||
fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo);
|
fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo);
|
||||||
}
|
|
||||||
|
|
||||||
///
|
|
||||||
// Create an OpenCL program from the kernel source file
|
|
||||||
//
|
|
||||||
cl_program CreateProgram(cl_context context, cl_device_id device, const char* fileName)
|
|
||||||
{
|
|
||||||
cl_int errNum;
|
|
||||||
cl_program program;
|
|
||||||
|
|
||||||
std::ifstream kernelFile(fileName, std::ios::in);
|
|
||||||
if (!kernelFile.is_open())
|
|
||||||
{
|
|
||||||
std::cerr << "Failed to open file for reading: " << fileName << std::endl;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::ostringstream oss;
|
|
||||||
oss << kernelFile.rdbuf();
|
|
||||||
|
|
||||||
std::string srcStdStr = oss.str();
|
|
||||||
const char *srcStr = srcStdStr.c_str();
|
|
||||||
program = clCreateProgramWithSource(context, 1,
|
|
||||||
(const char**)&srcStr,
|
|
||||||
NULL, NULL);
|
|
||||||
if (program == NULL)
|
|
||||||
{
|
|
||||||
std::cerr << "Failed to create CL program from source." << std::endl;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
|
|
||||||
if (errNum != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
// Determine the reason for the error
|
|
||||||
char buildLog[16384];
|
|
||||||
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
|
|
||||||
sizeof(buildLog), buildLog, NULL);
|
|
||||||
|
|
||||||
std::cerr << "Error in kernel: " << std::endl;
|
|
||||||
std::cerr << buildLog;
|
|
||||||
clReleaseProgram(program);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return program;
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
///
|
|
||||||
// Retreive program binary for all of the devices attached to the
|
|
||||||
// program an and store the one for the device passed in
|
|
||||||
//
|
|
||||||
bool SaveProgramBinary(cl_program program, cl_device_id device, const char* fileName)
|
|
||||||
{
|
|
||||||
//cl_uint numDevices = malloc(sizeof(cl_uint));
|
|
||||||
//cl_uint* numDevices = malloc(sizeof(cl_uint));
|
|
||||||
cl_int errNum;
|
|
||||||
|
|
||||||
printf("try getting program info\n");
|
|
||||||
// 1 - Query for number of devices attached to program
|
|
||||||
/*errNum = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint),
|
|
||||||
&numDevices, NULL);
|
|
||||||
printf("Got program_num_devices\n");
|
|
||||||
if (errNum != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << "Error querying for number of devices." << std::endl;
|
|
||||||
return false;
|
|
||||||
}*/
|
|
||||||
|
|
||||||
// 2 - Get all of the Device IDs
|
|
||||||
cl_device_id *devices = new cl_device_id[1];
|
|
||||||
errNum = clGetProgramInfo(program, CL_PROGRAM_DEVICES,
|
|
||||||
sizeof(cl_device_id) * 1,
|
|
||||||
devices, NULL);
|
|
||||||
printf("Got program_devices\n");
|
|
||||||
if (errNum != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << "Error querying for devices." << std::endl;
|
|
||||||
delete [] devices;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 3 - Determine the size of each program binary
|
|
||||||
size_t *programBinarySizes = new size_t [1];
|
|
||||||
errNum = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES,
|
|
||||||
sizeof(size_t) * 1,
|
|
||||||
programBinarySizes, NULL);
|
|
||||||
printf("Got program_binary_sizes\n");
|
|
||||||
if (errNum != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << "Error querying for program binary sizes." << std::endl;
|
|
||||||
delete [] devices;
|
|
||||||
delete [] programBinarySizes;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned char **programBinaries = new unsigned char*[1];
|
|
||||||
for (cl_uint i = 0; i < 1; i++)
|
|
||||||
{
|
|
||||||
programBinaries[i] = new unsigned char[programBinarySizes[i]];
|
|
||||||
}
|
|
||||||
|
|
||||||
// 4 - Get all of the program binaries
|
|
||||||
errNum = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char*) * 1,
|
|
||||||
programBinaries, NULL);
|
|
||||||
printf("Got program_binarys\n");
|
|
||||||
if (errNum != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << "Error querying for program binaries" << std::endl;
|
|
||||||
|
|
||||||
delete [] devices;
|
|
||||||
delete [] programBinarySizes;
|
|
||||||
for (cl_uint i = 0; i < 1; i++)
|
|
||||||
{
|
|
||||||
delete [] programBinaries[i];
|
|
||||||
}
|
|
||||||
delete [] programBinaries;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 5 - Finally store the binaries for the device requested out to disk for future reading.
|
|
||||||
for (cl_uint i = 0; i < 1; i++)
|
|
||||||
{
|
|
||||||
// Store the binary just for the device requested. In a scenario where
|
|
||||||
// multiple devices were being used you would save all of the binaries out here.
|
|
||||||
if (devices[i] == device)
|
|
||||||
{
|
|
||||||
FILE *fp = fopen(fileName, "wb");
|
|
||||||
if(fp ==NULL){
|
|
||||||
delete [] devices;
|
|
||||||
delete [] programBinarySizes;
|
|
||||||
for (cl_uint i = 0; i < 1; i++)
|
|
||||||
{
|
|
||||||
delete [] programBinaries[i];
|
|
||||||
}
|
|
||||||
delete [] programBinaries;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
printf("Opened file\n");
|
|
||||||
fwrite(programBinaries[i], 1, programBinarySizes[i], fp);
|
|
||||||
printf("wrote file\n");
|
|
||||||
fclose(fp);
|
|
||||||
printf("close file\n");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cleanup
|
|
||||||
delete [] devices;
|
|
||||||
delete [] programBinarySizes;
|
|
||||||
for (cl_uint i = 0; i < 1; i++)
|
|
||||||
{
|
|
||||||
delete [] programBinaries[i];
|
|
||||||
}
|
|
||||||
delete [] programBinaries;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
///
|
|
||||||
// Attempt to create the program object from a cached binary. Note that
|
|
||||||
// on first run this will fail because the binary has not yet been created.
|
|
||||||
//
|
|
||||||
cl_program CreateProgramFromBinary(cl_context context, cl_device_id device, const char* fileName)
|
|
||||||
{
|
|
||||||
FILE *fp = fopen(fileName, "rb");
|
|
||||||
if (fp == NULL)
|
|
||||||
{
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Determine the size of the binary
|
|
||||||
size_t binarySize;
|
|
||||||
fseek(fp, 0, SEEK_END);
|
|
||||||
binarySize = ftell(fp);
|
|
||||||
rewind(fp);
|
|
||||||
|
|
||||||
unsigned char *programBinary = new unsigned char[binarySize];
|
|
||||||
fread(programBinary, 1, binarySize, fp);
|
|
||||||
fclose(fp);
|
|
||||||
|
|
||||||
cl_int errNum = 0;
|
|
||||||
cl_program program;
|
|
||||||
cl_int binaryStatus;
|
|
||||||
|
|
||||||
program = clCreateProgramWithBinary(context,
|
|
||||||
1,
|
|
||||||
&device,
|
|
||||||
&binarySize,
|
|
||||||
(const unsigned char**)&programBinary,
|
|
||||||
&binaryStatus,
|
|
||||||
&errNum);
|
|
||||||
delete [] programBinary;
|
|
||||||
if (errNum != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << "Error loading program binary." << std::endl;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (binaryStatus != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
std::cerr << "Invalid binary for device" << std::endl;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
errNum = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
|
|
||||||
if (errNum != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
printf("build errNum:%d\n", errNum);
|
|
||||||
// Determine the reason for the error
|
|
||||||
char buildLog[16384];
|
|
||||||
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
|
|
||||||
sizeof(buildLog), buildLog, NULL);
|
|
||||||
|
|
||||||
std::cerr << "Error in program: " << std::endl;
|
|
||||||
std::cerr << buildLog << std::endl;
|
|
||||||
clReleaseProgram(program);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return program;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
///
|
||||||
// Cleanup any created OpenCL resources
|
// Cleanup any created OpenCL resources
|
||||||
//
|
//
|
||||||
void Cleanup(cl_context context, cl_command_queue commandQueue,
|
void Cleanup(cl_context context, cl_command_queue commandQueue,
|
||||||
cl_program program, cl_kernel kernel, cl_mem memObjects[3])
|
cl_program program, cl_kernel kernel, cl_mem memObjects[3]) {
|
||||||
{
|
for (int i = 0; i < 3; i++) {
|
||||||
for (int i = 0; i < 3; i++)
|
if (memObjects[i] != 0)
|
||||||
{
|
clReleaseMemObject(memObjects[i]);
|
||||||
if (memObjects[i] != 0)
|
}
|
||||||
clReleaseMemObject(memObjects[i]);
|
if (commandQueue != 0)
|
||||||
}
|
clReleaseCommandQueue(commandQueue);
|
||||||
if (commandQueue != 0)
|
|
||||||
clReleaseCommandQueue(commandQueue);
|
|
||||||
|
|
||||||
if (kernel != 0)
|
if (kernel != 0)
|
||||||
clReleaseKernel(kernel);
|
clReleaseKernel(kernel);
|
||||||
|
|
||||||
if (program != 0)
|
if (program != 0)
|
||||||
clReleaseProgram(program);
|
clReleaseProgram(program);
|
||||||
|
|
||||||
if (context != 0)
|
|
||||||
clReleaseContext(context);
|
|
||||||
|
|
||||||
|
if (context != 0)
|
||||||
|
clReleaseContext(context);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv) {
|
||||||
{
|
|
||||||
printf("enter demo main\n");
|
printf("enter demo main\n");
|
||||||
fflush(stdout);
|
|
||||||
putenv("POCL_VERBOSE=1");
|
|
||||||
putenv("POCL_DEVICES=basic");
|
|
||||||
putenv("POCL_LEAVE_TEMP_DIRS=1");
|
|
||||||
putenv("POCL_LEAVE_KERNEL_COMPILER_TEMP_FILES=1");
|
|
||||||
putenv("POCL_TEMP_DIR=pocl");
|
|
||||||
putenv("POCL_CACHE_DIR=pocl");
|
|
||||||
putenv("POCL_WORK_GROUP_METHOD=spmd");
|
|
||||||
if(argc >= 2){
|
|
||||||
printf("argv[1]:%s:\n",argv[1]);
|
|
||||||
if(!strcmp(argv[1], "h"))
|
|
||||||
putenv("POCL_WORK_GROUP_METHOD=spmd");
|
|
||||||
if(!strcmp(argv[1], "c"))
|
|
||||||
putenv("POCL_CROSS_COMPILE=1");
|
|
||||||
}
|
|
||||||
if(argc >= 3){
|
|
||||||
printf("argv[2]:%s:\n",argv[2]);
|
|
||||||
if(!strcmp(argv[2], "h"))
|
|
||||||
putenv("POCL_WORK_GROUP_METHOD=spmd");
|
|
||||||
if(!strcmp(argv[2], "c"))
|
|
||||||
putenv("POCL_CROSS_COMPILE=1");
|
|
||||||
}
|
|
||||||
|
|
||||||
//putenv("LD_LIBRARY_PATH=/scratch/colins/build/linux/fs/lib");
|
cl_platform_id platform_id;
|
||||||
//putenv("LTDL_LIBRARY_PATH=/scratch/colins/build/linux/fs/lib");
|
cl_device_id device_id;
|
||||||
//lt_dlsetsearchpath("/scratch/colins/build/linux/fs/lib");
|
size_t binary_size;
|
||||||
//printf("SEARCH_PATH:%s\n",lt_dlgetsearchpath());
|
int i;
|
||||||
cl_platform_id platforms[100];
|
|
||||||
cl_uint platforms_n = 0;
|
|
||||||
CL_CHECK(clGetPlatformIDs(100, platforms, &platforms_n));
|
|
||||||
|
|
||||||
printf("=== %d OpenCL platform(s) found: ===\n", platforms_n);
|
// Getting platform and device information
|
||||||
for (int i=0; i<platforms_n; i++)
|
CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL));
|
||||||
{
|
CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL));
|
||||||
char buffer[10240];
|
|
||||||
printf(" -- %d --\n", i);
|
|
||||||
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, 10240, buffer, NULL));
|
|
||||||
printf(" PROFILE = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, 10240, buffer, NULL));
|
|
||||||
printf(" VERSION = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 10240, buffer, NULL));
|
|
||||||
printf(" NAME = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 10240, buffer, NULL));
|
|
||||||
printf(" VENDOR = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL));
|
|
||||||
printf(" EXTENSIONS = %s\n", buffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (platforms_n == 0)
|
cl_context context;
|
||||||
return 1;
|
context = CL_CHECK_ERR(
|
||||||
|
clCreateContext(NULL, 1, &device_id, &pfn_notify, NULL, &_err));
|
||||||
|
|
||||||
cl_device_id devices[100];
|
cl_command_queue queue;
|
||||||
cl_uint devices_n = 0;
|
queue = CL_CHECK_ERR(clCreateCommandQueue(context, device_id,
|
||||||
// CL_CHECK(clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 100, devices, &devices_n));
|
CL_QUEUE_PROFILING_ENABLE, &_err));
|
||||||
CL_CHECK(clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 100, devices, &devices_n));
|
|
||||||
|
|
||||||
printf("=== %d OpenCL device(s) found on platform:\n", platforms_n);
|
|
||||||
for (int i=0; i<devices_n; i++)
|
|
||||||
{
|
|
||||||
char buffer[10240];
|
|
||||||
cl_uint buf_uint;
|
|
||||||
cl_ulong buf_ulong;
|
|
||||||
printf(" -- %d --\n", i);
|
|
||||||
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(buffer), buffer, NULL));
|
|
||||||
printf(" DEVICE_NAME = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_VENDOR, sizeof(buffer), buffer, NULL));
|
|
||||||
printf(" DEVICE_VENDOR = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, sizeof(buffer), buffer, NULL));
|
|
||||||
printf(" DEVICE_VERSION = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetDeviceInfo(devices[i], CL_DRIVER_VERSION, sizeof(buffer), buffer, NULL));
|
|
||||||
printf(" DRIVER_VERSION = %s\n", buffer);
|
|
||||||
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(buf_uint), &buf_uint, NULL));
|
|
||||||
printf(" DEVICE_MAX_COMPUTE_UNITS = %u\n", (unsigned int)buf_uint);
|
|
||||||
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(buf_uint), &buf_uint, NULL));
|
|
||||||
printf(" DEVICE_MAX_CLOCK_FREQUENCY = %u\n", (unsigned int)buf_uint);
|
|
||||||
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(buf_ulong), &buf_ulong, NULL));
|
|
||||||
printf(" DEVICE_GLOBAL_MEM_SIZE = %llu\n", (unsigned long long)buf_ulong);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (devices_n == 0)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
cl_context context;
|
|
||||||
context = CL_CHECK_ERR(clCreateContext(NULL, 1, devices+1, &pfn_notify, NULL, &_err));
|
|
||||||
|
|
||||||
cl_command_queue queue;
|
|
||||||
queue = CL_CHECK_ERR(clCreateCommandQueue(context, devices[1], CL_QUEUE_PROFILING_ENABLE, &_err));
|
|
||||||
|
|
||||||
cl_kernel kernel = 0;
|
|
||||||
cl_mem memObjects[3] = {0,0,0};
|
|
||||||
|
|
||||||
|
cl_kernel kernel = 0;
|
||||||
|
cl_mem memObjects[3] = {0, 0, 0};
|
||||||
|
|
||||||
// Create OpenCL program - first attempt to load cached binary.
|
// Create OpenCL program - first attempt to load cached binary.
|
||||||
// If that is not available, then create the program from source
|
// If that is not available, then create the program from source
|
||||||
// and store the binary for future use.
|
// and store the binary for future use.
|
||||||
std::cout << "Attempting to create program from binary..." << std::endl;
|
std::cout << "Attempting to create program from binary..." << std::endl;
|
||||||
//cl_program program = CreateProgramFromBinary(context, devices[1], "kernel.cl.bin");
|
// cl_program program = CreateProgramFromBinary(context, device_id,
|
||||||
cl_program program = clCreateProgramWithBuiltInKernels(context, 1, &devices[1], "sgemm", NULL);
|
// "kernel.cl.bin");
|
||||||
if (program == NULL)
|
cl_program program =
|
||||||
{
|
clCreateProgramWithBuiltInKernels(context, 1, &device_id, "sgemm", NULL);
|
||||||
std::cout << "Binary not loaded, create from source..." << std::endl;
|
if (program == NULL) {
|
||||||
program = CreateProgram(context, devices[1], "kernel.cl");
|
std::cerr << "Failed to write program binary" << std::endl;
|
||||||
if (program == NULL)
|
Cleanup(context, queue, program, kernel, memObjects);
|
||||||
{
|
return 1;
|
||||||
Cleanup(context, queue, program, kernel, memObjects);
|
} else {
|
||||||
return 1;
|
std::cout << "Read program from binary." << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << "Save program binary for future run..." << std::endl;
|
// Build program
|
||||||
if (SaveProgramBinary(program, devices[1], "kernel.cl.bin") == false)
|
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
|
||||||
{
|
|
||||||
std::cerr << "Failed to write program binary" << std::endl;
|
|
||||||
Cleanup(context, queue, program, kernel, memObjects);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::cout << "Read program from binary." << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("attempting to create input buffer\n");
|
printf("attempting to create input buffer\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
cl_mem input_bufferA;
|
cl_mem input_bufferA;
|
||||||
input_bufferA = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float)*NUM_DATA*NUM_DATA, NULL, &_err));
|
input_bufferA = CL_CHECK_ERR(
|
||||||
cl_mem input_bufferB;
|
clCreateBuffer(context, CL_MEM_READ_ONLY,
|
||||||
input_bufferB = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float)*NUM_DATA*NUM_DATA, NULL, &_err));
|
sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err));
|
||||||
|
|
||||||
|
cl_mem input_bufferB;
|
||||||
|
input_bufferB = CL_CHECK_ERR(
|
||||||
|
clCreateBuffer(context, CL_MEM_READ_ONLY,
|
||||||
|
sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err));
|
||||||
|
|
||||||
printf("attempting to create output buffer\n");
|
printf("attempting to create output buffer\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
cl_mem output_buffer;
|
cl_mem output_buffer;
|
||||||
output_buffer = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float)*NUM_DATA*NUM_DATA, NULL, &_err));
|
output_buffer = CL_CHECK_ERR(
|
||||||
|
clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
||||||
|
sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err));
|
||||||
|
|
||||||
memObjects[0] = input_bufferA;
|
memObjects[0] = input_bufferA;
|
||||||
memObjects[1] = input_bufferB;
|
memObjects[1] = input_bufferB;
|
||||||
memObjects[2] = output_buffer;
|
memObjects[2] = output_buffer;
|
||||||
|
|
||||||
size_t width = NUM_DATA;
|
size_t width = NUM_DATA;
|
||||||
|
|
||||||
printf("attempting to create kernel\n");
|
printf("attempting to create kernel\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
kernel = CL_CHECK_ERR(clCreateKernel(program, "sgemm_single", &_err));
|
kernel = CL_CHECK_ERR(clCreateKernel(program, "sgemm", &_err));
|
||||||
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_bufferA), &input_bufferA));
|
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_bufferA), &input_bufferA));
|
||||||
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(input_bufferB), &input_bufferB));
|
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(input_bufferB), &input_bufferB));
|
||||||
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer));
|
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer));
|
||||||
CL_CHECK(clSetKernelArg(kernel, 3, sizeof(width), &width));
|
CL_CHECK(clSetKernelArg(kernel, 3, sizeof(width), &width));
|
||||||
|
|
||||||
printf("attempting to enqueue write buffer\n");
|
printf("attempting to enqueue write buffer\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
for (int i=0; i<NUM_DATA*NUM_DATA; i++) {
|
for (int i = 0; i < NUM_DATA * NUM_DATA; i++) {
|
||||||
float in = ((float)rand()/(float)(RAND_MAX)) * 100.0;
|
|
||||||
CL_CHECK(clEnqueueWriteBuffer(queue, input_bufferA, CL_TRUE, i*sizeof(float), 4, &in, 0, NULL, NULL));
|
|
||||||
in = ((float)rand()/(float)(RAND_MAX)) * 100.0;
|
|
||||||
CL_CHECK(clEnqueueWriteBuffer(queue, input_bufferB, CL_TRUE, i*sizeof(float), 4, &in, 0, NULL, NULL));
|
|
||||||
}
|
|
||||||
|
|
||||||
cl_event kernel_completion;
|
float in = ((float)rand() / (float)(RAND_MAX)) * 100.0;
|
||||||
const size_t local_work_size[3] = { 64, 1, 1};
|
CL_CHECK(clEnqueueWriteBuffer(queue, input_bufferA, CL_TRUE,
|
||||||
|
i * sizeof(float), 4, &in, 0, NULL, NULL));
|
||||||
|
in = ((float)rand() / (float)(RAND_MAX)) * 100.0;
|
||||||
|
CL_CHECK(clEnqueueWriteBuffer(queue, input_bufferB, CL_TRUE,
|
||||||
|
i * sizeof(float), 4, &in, 0, NULL, NULL));
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Done enqueueing\n");
|
||||||
|
|
||||||
|
cl_event kernel_completion;
|
||||||
|
const size_t local_work_size[3] = {1, 1, 1};
|
||||||
// a_offset
|
// a_offset
|
||||||
size_t global_work_size[3] = { NUM_DATA, NUM_DATA, NUM_DATA };
|
size_t global_work_size[3] = {NUM_DATA, NUM_DATA, NUM_DATA};
|
||||||
printf("attempting to enqueue kernel\n");
|
printf("attempting to enqueue kernel\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &kernel_completion));
|
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size,
|
||||||
|
local_work_size, 0, NULL,
|
||||||
|
&kernel_completion));
|
||||||
printf("Enqueue'd kerenel\n");
|
printf("Enqueue'd kerenel\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
cl_ulong time_start, time_end;
|
cl_ulong time_start, time_end;
|
||||||
CL_CHECK(clWaitForEvents(1, &kernel_completion));
|
CL_CHECK(clWaitForEvents(1, &kernel_completion));
|
||||||
CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, NULL));
|
CL_CHECK(clGetEventProfilingInfo(kernel_completion,
|
||||||
CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, NULL));
|
CL_PROFILING_COMMAND_START,
|
||||||
|
sizeof(time_start), &time_start, NULL));
|
||||||
|
CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_END,
|
||||||
|
sizeof(time_end), &time_end, NULL));
|
||||||
double elapsed = time_end - time_start;
|
double elapsed = time_end - time_start;
|
||||||
printf("time(ns):%lg\n",elapsed);
|
printf("time(ns):%lg\n", elapsed);
|
||||||
CL_CHECK(clReleaseEvent(kernel_completion));
|
CL_CHECK(clReleaseEvent(kernel_completion));
|
||||||
|
|
||||||
printf("Result:");
|
printf("Result:");
|
||||||
for (int i=0; i<NUM_DATA*NUM_DATA; i++) {
|
for (int i = 0; i < NUM_DATA * NUM_DATA; i++) {
|
||||||
float data;
|
float data;
|
||||||
CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, i*sizeof(float), 4, &data, 0, NULL, NULL));
|
CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE,
|
||||||
//printf(" %f", data);
|
i * sizeof(float), 4, &data, 0, NULL, NULL));
|
||||||
}
|
// printf(" %f", data);
|
||||||
printf("\n");
|
}
|
||||||
|
printf("\n");
|
||||||
|
|
||||||
CL_CHECK(clReleaseMemObject(memObjects[0]));
|
CL_CHECK(clReleaseMemObject(memObjects[0]));
|
||||||
CL_CHECK(clReleaseMemObject(memObjects[1]));
|
CL_CHECK(clReleaseMemObject(memObjects[1]));
|
||||||
CL_CHECK(clReleaseMemObject(memObjects[2]));
|
CL_CHECK(clReleaseMemObject(memObjects[2]));
|
||||||
|
|
||||||
CL_CHECK(clReleaseKernel(kernel));
|
CL_CHECK(clReleaseKernel(kernel));
|
||||||
CL_CHECK(clReleaseProgram(program));
|
CL_CHECK(clReleaseProgram(program));
|
||||||
CL_CHECK(clReleaseContext(context));
|
CL_CHECK(clReleaseContext(context));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -1,36 +1,35 @@
|
|||||||
|
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||||
|
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
|
||||||
|
POCL_INC_PATH = $(wildcard ../include)
|
||||||
|
POCL_LIB_PATH = $(wildcard ../lib)
|
||||||
|
VX_RT_PATH = $(wildcard ../../../runtime)
|
||||||
|
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
|
||||||
|
|
||||||
RISCV_TOOL_PATH=$(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
||||||
|
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
||||||
|
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
||||||
|
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||||
|
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
||||||
|
|
||||||
POCL_CC_PATH=$(wildcard ~/dev/pocl/drops_riscv_cc)
|
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||||
POCL_RT_PATH=$(wildcard ~/dev/pocl/drops_riscv_rt)
|
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
|
||||||
POCL_RT0_PATH=$(wildcard ~/dev/pocl/drops_riscv_rt0)
|
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||||
VX_RT_PATH=$(wildcard ../../../runtime)
|
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||||
VX_SIMX_PATH=$(wildcard ../../../simX/obj_dir)
|
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||||
CC=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
|
||||||
CXX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
|
||||||
DMP=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
|
||||||
HEX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
|
||||||
NEWLIB_PATH=$(RISCV_TOOL_PATH)/riscv32-unknown-elf/lib
|
|
||||||
|
|
||||||
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
|
|
||||||
VX_STR = $(VX_RT_PATH)/startup/vx_start.s
|
|
||||||
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
|
||||||
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
|
||||||
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
|
|
||||||
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
|
|
||||||
|
|
||||||
VX_SRCS = $(VX_STR) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
|
|
||||||
|
|
||||||
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
|
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
|
||||||
|
|
||||||
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
|
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
|
||||||
CXXFLAGS += -ffreestanding # program may not begin at main()
|
CXXFLAGS += -ffreestanding # program may not begin at main()
|
||||||
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
|
||||||
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
|
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
|
||||||
|
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
||||||
|
CXXFLAGS += -I$(POCL_INC_PATH)
|
||||||
|
|
||||||
LIBS = -lOpenCL
|
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||||
|
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||||
|
|
||||||
PROJECT=vecadd
|
PROJECT=vecadd
|
||||||
|
|
||||||
@@ -40,10 +39,10 @@ lib$(PROJECT).a: kernel.cl
|
|||||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
||||||
|
|
||||||
$(PROJECT).elf: main.cc lib$(PROJECT).a
|
$(PROJECT).elf: main.cc lib$(PROJECT).a
|
||||||
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) -I$(POCL_RT_PATH)/include -L$(POCL_RT_PATH)/lib/static -L. $(VX_SRCS) main.cc -Wl,--whole-archive -l$(PROJECT) -Wl,--no-whole-archive $(LIBS) -o $(PROJECT).elf
|
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
|
||||||
|
|
||||||
$(PROJECT).qemu: main.cc lib$(PROJECT).a
|
$(PROJECT).qemu: main.cc lib$(PROJECT).a
|
||||||
$(CXX) $(CXXFLAGS) -I$(POCL_RT0_PATH)/include -L$(POCL_RT0_PATH)/lib/static -L. main.cc -Wl,--whole-archive -l$(PROJECT) -Wl,--no-whole-archive $(LIBS) -o $(PROJECT).qemu
|
$(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu
|
||||||
|
|
||||||
$(PROJECT).hex: $(PROJECT).elf
|
$(PROJECT).hex: $(PROJECT).elf
|
||||||
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
||||||
@@ -52,10 +51,16 @@ $(PROJECT).dump: $(PROJECT).elf
|
|||||||
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
||||||
|
|
||||||
run: $(PROJECT).hex
|
run: $(PROJECT).hex
|
||||||
$(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
|
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
|
||||||
|
|
||||||
qemu: $(PROJECT).qemu
|
qemu: $(PROJECT).qemu
|
||||||
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu
|
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu
|
||||||
|
|
||||||
|
gdb-s: $(PROJECT).qemu
|
||||||
|
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
|
||||||
|
|
||||||
|
gdb-c: $(PROJECT).qemu
|
||||||
|
$(GDB) $(PROJECT).qemu
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf *.elf *.dump *.hex *.a *.pocl
|
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu
|
||||||
Binary file not shown.
@@ -31,46 +31,6 @@
|
|||||||
_ret; \
|
_ret; \
|
||||||
})
|
})
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
const char* name;
|
|
||||||
const void* pfn;
|
|
||||||
uint32_t num_args;
|
|
||||||
uint32_t num_locals;
|
|
||||||
const uint8_t* arg_types;
|
|
||||||
const uint32_t* local_sizes;
|
|
||||||
} kernel_info_t;
|
|
||||||
|
|
||||||
static int g_num_kernels = 0;
|
|
||||||
static kernel_info_t g_kernels [MAX_KERNELS];
|
|
||||||
|
|
||||||
int _pocl_register_kernel(const char* name, const void* pfn, uint32_t num_args, uint32_t num_locals, const uint8_t* arg_types, const uint32_t* local_sizes) {
|
|
||||||
if (g_num_kernels == MAX_KERNELS)
|
|
||||||
return -1;
|
|
||||||
kernel_info_t* kernel = g_kernels + g_num_kernels++;
|
|
||||||
kernel->name = name;
|
|
||||||
kernel->pfn = pfn;
|
|
||||||
kernel->num_args = num_args;
|
|
||||||
kernel->num_locals = num_locals;
|
|
||||||
kernel->arg_types = arg_types;
|
|
||||||
kernel->local_sizes = local_sizes;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int _pocl_query_kernel(const char* name, const void** p_pfn, uint32_t* p_num_args, uint32_t* p_num_locals, const uint8_t** p_arg_types, const uint32_t** p_local_sizes) {
|
|
||||||
for (int i = 0; i < g_num_kernels; ++i) {
|
|
||||||
kernel_info_t* kernel = g_kernels + i;
|
|
||||||
if (strcmp(kernel->name, name) != 0)
|
|
||||||
continue;
|
|
||||||
if (p_pfn) *p_pfn = kernel->pfn;
|
|
||||||
if (p_num_args) *p_num_args = kernel->num_args;
|
|
||||||
if (p_num_locals) *p_num_locals = kernel->num_locals;
|
|
||||||
if (p_arg_types) *p_arg_types = kernel->arg_types;
|
|
||||||
if (p_local_sizes) *p_local_sizes = kernel->local_sizes;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int exitcode = 0;
|
int exitcode = 0;
|
||||||
cl_context context = NULL;
|
cl_context context = NULL;
|
||||||
cl_command_queue commandQueue = NULL;
|
cl_command_queue commandQueue = NULL;
|
||||||
@@ -99,6 +59,8 @@ void cleanup() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int main (int argc, char **argv) {
|
int main (int argc, char **argv) {
|
||||||
|
printf("enter demo main\n");
|
||||||
|
|
||||||
cl_platform_id platform_id;
|
cl_platform_id platform_id;
|
||||||
cl_device_id device_id;
|
cl_device_id device_id;
|
||||||
size_t binary_size;
|
size_t binary_size;
|
||||||
|
|||||||
@@ -31,6 +31,10 @@ unsigned vx_threadID(void);
|
|||||||
// Get hardware warp ID
|
// Get hardware warp ID
|
||||||
unsigned vx_warpID(void);
|
unsigned vx_warpID(void);
|
||||||
|
|
||||||
|
// Get Number cycles/Inst
|
||||||
|
unsigned vx_getCycles(void);
|
||||||
|
unsigned vx_getInst(void);
|
||||||
|
|
||||||
void vx_resetStack(void);
|
void vx_resetStack(void);
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -49,6 +49,19 @@ vx_threadID:
|
|||||||
csrr a0, 0x20 # read thread IDs
|
csrr a0, 0x20 # read thread IDs
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
.type vx_getCycles, @function
|
||||||
|
.global vx_getCycles
|
||||||
|
vx_getCycles:
|
||||||
|
csrr a0, 0x26 # read thread IDs
|
||||||
|
ret
|
||||||
|
|
||||||
|
|
||||||
|
.type vx_getInst, @function
|
||||||
|
.global vx_getInst
|
||||||
|
vx_getInst:
|
||||||
|
csrr a0, 0x25 # read thread IDs
|
||||||
|
ret
|
||||||
|
|
||||||
|
|
||||||
.type vx_resetStack, @function
|
.type vx_resetStack, @function
|
||||||
.global vx_resetStack
|
.global vx_resetStack
|
||||||
|
|||||||
@@ -19,4 +19,5 @@ vx_vec_test:
|
|||||||
vsw.v v2, (a3) # Store result
|
vsw.v v2, (a3) # Store result
|
||||||
add a3, a3, t0 # Bump pointer
|
add a3, a3, t0 # Bump pointer
|
||||||
bnez a0, loop # Loop back
|
bnez a0, loop # Loop back
|
||||||
|
vmacc.vv v1, v2, v2
|
||||||
ret # Finished
|
ret # Finished
|
||||||
22
runtime/mains/vector_test/vx_vec_original.s
Normal file
22
runtime/mains/vector_test/vx_vec_original.s
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
.type vx_vec_test, @function
|
||||||
|
.global vx_vec_test
|
||||||
|
vx_vec_test:
|
||||||
|
# vector-vector add routine of 32-bit integers
|
||||||
|
# void vvaddint32(size_t n, const int*x, const int*y, int*z)
|
||||||
|
# { for (size_t i=0; i<n; i++) { z[i]=x[i]+y[i]; } }
|
||||||
|
#
|
||||||
|
# a0 = n, a1 = x, a2 = y, a3 = z
|
||||||
|
# Non-vector instructions are indented
|
||||||
|
vsetvli t0, a0, e32 # Set vector length based on 32-bit vectors
|
||||||
|
loop:
|
||||||
|
vlw.v v0, (a1) # Get first vector
|
||||||
|
sub a0, a0, t0 # Decrement number done
|
||||||
|
slli t0, t0, 2 # Multiply number done by 4 bytes
|
||||||
|
add a1, a1, t0 # Bump pointer
|
||||||
|
vlw.v v1, (a2) # Get second vector
|
||||||
|
add a2, a2, t0 # Bump pointer
|
||||||
|
vadd.vv v2, v0, v1 # Sum vectors
|
||||||
|
vsw.v v2, (a3) # Store result
|
||||||
|
add a3, a3, t0 # Bump pointer
|
||||||
|
bnez a0, loop # Loop back
|
||||||
|
ret # Finished
|
||||||
@@ -5,7 +5,7 @@ int main()
|
|||||||
{
|
{
|
||||||
vx_tmc(1);
|
vx_tmc(1);
|
||||||
|
|
||||||
int n = 5;
|
int n = 32;
|
||||||
int *a = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
int *a = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
||||||
int *b = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
int *b = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
||||||
int *c = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
int *c = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@@ -1,11 +1,11 @@
|
|||||||
:0200000480007A
|
:0200000480007A
|
||||||
:100000009705000093854506130540006B10B50069
|
:100000009705000093854506130540006B10B50069
|
||||||
:10001000EF004005130510006B0005001385C108B3
|
:10001000EF004005130510006B0005001385C108B3
|
||||||
:100020001386810C3306A64093050000EF10003CB8
|
:100020001386810C3306A64093050000EF10403C78
|
||||||
:10003000171500001305C5AEEF00102AEF001034AD
|
:1000300017150000130505AFEF00502AEF005034EC
|
||||||
:08004000EF0090156F00902AFB
|
:08004000EF00D0156F00D02A7B
|
||||||
:10004800B70700009387070063880700371500800B
|
:10004800B70700009387070063880700371500800B
|
||||||
:100058001305C5B16F00D027678000001305400065
|
:10005800130505B26F0010286780000013054000E3
|
||||||
:100068006B000500972100009381C179F3261002E7
|
:100068006B000500972100009381C179F3261002E7
|
||||||
:100078009396A601732600029315A6001316260070
|
:100078009396A601732600029315A6001316260070
|
||||||
:1000880037F1FF6F3301B1403301D1403301C10073
|
:1000880037F1FF6F3301B1403301D1403301C10073
|
||||||
@@ -13,448 +13,448 @@
|
|||||||
:1000A80067800000D772850007E0051233055540C8
|
:1000A80067800000D772850007E0051233055540C8
|
||||||
:1000B80093922200B38555008760061233065600D6
|
:1000B80093922200B38555008760061233065600D6
|
||||||
:1000C8005781000227E10602B3865600E31E05FCAD
|
:1000C8005781000227E10602B3865600E31E05FCAD
|
||||||
:1000D80067800000130101FD232681021304010338
|
:1000D800D72021B667800000130101FD2326810285
|
||||||
:1000E800232EA4FC232CB4FC232AC4FC8327C4FDA0
|
:1000E80013040103232EA4FC232CB4FC232AC4FCF0
|
||||||
:1000F80083A707002326F4FE930744FD2322F4FE7A
|
:1000F8008327C4FD83A707002326F4FE930744FD46
|
||||||
:10010800832744FE03C707008327C4FE2380E70034
|
:100108002322F4FE832744FE03C707008327C4FE87
|
||||||
:100118008327C4FE93871700032744FE034717006D
|
:100118002380E7008327C4FE93871700032744FE44
|
||||||
:100128002380E7008327C4FE93872700032744FE24
|
:10012800034717002380E7008327C4FE938727002F
|
||||||
:10013800034727002380E7008327C4FE93873700FF
|
:10013800032744FE034727002380E7008327C4FEE4
|
||||||
:10014800032744FE034737002380E7008327C4FEC4
|
:1001480093873700032744FE034737002380E700DF
|
||||||
:10015800938747002326F4FE232404FE6F00400300
|
:100158008327C4FE938747002326F4FE232404FE46
|
||||||
:10016800832784FE032784FDB307F70003C707002E
|
:100168006F004003832784FE032784FDB307F7004D
|
||||||
:100178008327C4FE2380E7008327C4FE93871700E4
|
:1001780003C707008327C4FE2380E7008327C4FE44
|
||||||
:100188002326F4FE832784FE938717002324F4FE96
|
:10018800938717002326F4FE832784FE938717009E
|
||||||
:10019800832744FD032784FEE344F7FC8327C4FE3A
|
:100198002324F4FE832744FD032784FEE344F7FC6D
|
||||||
:1001A8002320F4FE832704FE93F73700032704FE79
|
:1001A8008327C4FE2320F4FE832704FE93F7370039
|
||||||
:1001B800B307F7002320F4FE832704FE2326F4FE6A
|
:1001B800032704FEB307F7002320F4FE832704FE79
|
||||||
:1001C8008327C4FD0327C4FE23A0E7001300000013
|
:1001C8002326F4FE8327C4FD0327C4FE23A0E700EB
|
||||||
:1001D8000324C1021301010367800000130101FC1D
|
:1001D800130000000324C10213010103678000001B
|
||||||
:1001E800232E8102130401042326A4FC2324B4FC37
|
:1001E800130101FC232E8102130401042326A4FC1D
|
||||||
:1001F8008327C4FC83A707002326F4FE9307C4FDC6
|
:1001F8002324B4FC8327C4FC83A707002326F4FE2A
|
||||||
:100208002322F4FE8327C4FE03C70700832744FE86
|
:100208009307C4FD2322F4FE8327C4FE03C7070017
|
||||||
:100218002380E700832744FE938717000327C4FE43
|
:10021800832744FE2380E700832744FE9387170043
|
||||||
:10022800034717002380E700832744FE93872700AE
|
:100228000327C4FE034717002380E700832744FE03
|
||||||
:100238000327C4FE034727002380E700832744FEE3
|
:10023800938727000327C4FE034727002380E7008E
|
||||||
:10024800938737000327C4FE034737002380E7005E
|
:10024800832744FE938737000327C4FE03473700FC
|
||||||
:100258008327C4FE938747002326F4FE232404FE45
|
:100258002380E7008327C4FE938747002326F4FE04
|
||||||
:100268006F004003832784FE032784FCB307F7004D
|
:10026800232404FE6F004003832784FE032784FCB5
|
||||||
:100278000327C4FE034707002380E7008327C4FE43
|
:10027800B307F7000327C4FE034707002380E700FE
|
||||||
:10028800938717002326F4FE832784FE938717009D
|
:100288008327C4FE938717002326F4FE832784FE62
|
||||||
:100298002324F4FE8327C4FD032784FEE344F7FCEC
|
:10029800938717002324F4FE8327C4FD032784FED5
|
||||||
:1002A8008327C4FE2320F4FE832704FE93F7370038
|
:1002A800E344F7FC8327C4FE2320F4FE832704FEDF
|
||||||
:1002B800032704FEB307F7002320F4FE832704FE78
|
:1002B80093F73700032704FEB307F7002320F4FE63
|
||||||
:1002C8002326F4FE8327C4FC0327C4FE23A0E700EB
|
:1002C800832704FE2326F4FE8327C4FC0327C4FEE9
|
||||||
:1002D800130000000324C103130101046780000018
|
:1002D80023A0E700130000000324C1031301010455
|
||||||
:1002E800130101FF232681001304010113000000FC
|
:1002E80067800000130101FF232681001304010128
|
||||||
:1002F8000324C1001301010167800000130101FEFE
|
:1002F800130000000324C1001301010167800000FE
|
||||||
:10030800232E8100130401022326A4FE2324B4FE15
|
:10030800130101FE232E8100130401022326A4FEFB
|
||||||
:10031800832784FE3727000023A2E7009307000005
|
:100318002324B4FE832784FE3727000023A2E700A6
|
||||||
:10032800138507000324C10113010102678000003F
|
:1003280093070000138507000324C101130101028C
|
||||||
:10033800130101FE232E1100232C81001304010256
|
:1003380067800000130101FE232E1100232C810089
|
||||||
:100348002326A4FEB72700801385C7B2EF00C04F4D
|
:10034800130401022326A4FEB7270080138507B3F0
|
||||||
:1003580093071000138507008320C101032481013E
|
:10035800EF00C04F93071000138507008320C101E9
|
||||||
:100368001301010267800000130101FD2326110219
|
:10036800032481011301010267800000130101FDCC
|
||||||
:100378002324810213040103232EA4FC232CB4FCA0
|
:10037800232611022324810213040103232EA4FC43
|
||||||
:10038800232AC4FCB70700712326F4FEB7070072BE
|
:10038800232CB4FC232AC4FCB70700712326F4FEEF
|
||||||
:100398002324F4FE930730002322F4FE130744FEBF
|
:10039800B70700722324F4FE930730002322F4FEEB
|
||||||
:1003A8009307C4FE13064000930507001385070052
|
:1003A800130744FE9307C4FE130640009305070095
|
||||||
:1003B800EFF05FD21307C4FD9307C4FE1306400095
|
:1003B80013850700EFF05FD21307C4FD9307C4FE4F
|
||||||
:1003C8009305070013850700EFF0DFD0130784FDBE
|
:1003C800130640009305070013850700EFF0DFD000
|
||||||
:1003D8009307C4FE13064000930507001385070022
|
:1003D800130784FD9307C4FE130640009305070026
|
||||||
:1003E800EFF05FCF130744FD9307C4FE13064000E8
|
:1003E80013850700EFF05FCF130744FD9307C4FEA2
|
||||||
:1003F8009305070013850700EFF0DFCD83A74107BA
|
:1003F800130640009305070013850700EFF0DFCDD3
|
||||||
:10040800E7800700130704FE930784FE930507009F
|
:1004080083A74107E7800700130704FE930784FECC
|
||||||
:1004180013850700EFF09FDC832704FE1385070090
|
:100418009305070013850700EFF09FDC832704FE90
|
||||||
:100428008320C102032481021301010367800000B5
|
:10042800138507008320C1020324810213010103FD
|
||||||
:10043800130101FD23261102232481021304010361
|
:1004380067800000130101FD232611022324810295
|
||||||
:10044800232EA4FC232CB4FC232AC4FCB707007178
|
:1004480013040103232EA4FC232CB4FC232AC4FC8C
|
||||||
:100458002324F4FEB70700722326F4FE9307400016
|
:10045800B70700712324F4FEB70700722326F4FEC1
|
||||||
:100468002322F4FE130744FE930784FE130640007C
|
:10046800930740002322F4FE130744FE930784FEFB
|
||||||
:100478009305070013850700EFF0DFC51307C4FDD8
|
:10047800130640009305070013850700EFF0DFC55A
|
||||||
:10048800930784FE130640009305070013850700B1
|
:100488001307C4FD930784FE130640009305070075
|
||||||
:10049800EFF05FC4130784FD930784FE1306400042
|
:1004980013850700EFF05FC4130784FD930784FEFC
|
||||||
:1004A8009305070013850700EFF0DFC2130744FD2B
|
:1004A800130640009305070013850700EFF0DFC22D
|
||||||
:1004B800930784FE13064000930507001385070081
|
:1004B800130744FD930784FE1306400093050700C5
|
||||||
:1004C800EFF05FC183A74107E7800700832744FD5A
|
:1004C80013850700EFF05FC183A74107E7800700A6
|
||||||
:1004D800138507008320C10203248102130101034D
|
:1004D800832744FD138507008320C102032481027A
|
||||||
:1004E80067800000130101FD2326110223248102E5
|
:1004E8001301010367800000130101FD2326110297
|
||||||
:1004F80013040103232EA4FC232CB4FC232AC4FCDC
|
:1004F8002324810213040103232EA4FC232CB4FC1F
|
||||||
:10050800B70700712326F4FE930750002324F4FE56
|
:10050800232AC4FCB70700712326F4FE9307500082
|
||||||
:10051800130784FE9307C4FE1306400093050700E3
|
:100518002324F4FE130784FE9307C4FE1306400049
|
||||||
:1005280013850700EFF01FBB1307C4FD9307C4FE34
|
:100528009305070013850700EFF01FBB1307C4FDF1
|
||||||
:10053800130640009305070013850700EFF09FB9E5
|
:100538009307C4FE130640009305070013850700C0
|
||||||
:100548009307C4FE032644FD832584FD1385070015
|
:10054800EFF09FB99307C4FE032644FD832584FD7D
|
||||||
:10055800EFF05FB883A74107E7800700832744FDD2
|
:1005580013850700EFF05FB883A74107E78007001E
|
||||||
:10056800138507008320C1020324810213010103BC
|
:10056800832744FD138507008320C10203248102E9
|
||||||
:1005780067800000130101FD232681021304010393
|
:100578001301010367800000130101FD2326810296
|
||||||
:10058800232EA4FC8327C4FD63D807008327C4FD5A
|
:1005880013040103232EA4FC8327C4FD63D80700AA
|
||||||
:10059800B307F040232EF4FC0327C4FDB73700004F
|
:100598008327C4FDB307F040232EF4FC0327C4FDD2
|
||||||
:1005A8009387078063D8E700B737000093870780F1
|
:1005A800B73700009387078063D8E700B7370000A4
|
||||||
:1005B800232EF4FC83A781072326F4FE03A78107D3
|
:1005B80093870780232EF4FC83A781072326F4FE64
|
||||||
:1005C8008327C4FD3307F70023ACE1068327C4FE65
|
:1005C80003A781078327C4FD3307F70023ACE1069F
|
||||||
:1005D800138507000324C10213010103678000008B
|
:1005D8008327C4FE138507000324C1021301010306
|
||||||
:1005E800130101FE232E1100232C810013040102A4
|
:1005E80067800000130101FE232E1100232C8100D7
|
||||||
:1005F8002326A4FE13050000EF00001E13000000D0
|
:1005F800130401022326A4FE13050000EF00001EC9
|
||||||
:100608008320C101032481011301010267800000D6
|
:10060800130000008320C1010324810113010102AA
|
||||||
:10061800130101FD2326110223248102130401037F
|
:1006180067800000130101FD2326110223248102B3
|
||||||
:10062800232EA4FC232CB4FC232AC4FCB707007196
|
:1006280013040103232EA4FC232CB4FC232AC4FCAA
|
||||||
:100638002326F4FEB70700722324F4FE9307700004
|
:10063800B70700712326F4FEB70700722324F4FEDF
|
||||||
:100648002322F4FE130744FE9307C4FE130640005A
|
:10064800930770002322F4FE130744FE9307C4FEA9
|
||||||
:100658009305070013850700EFF0DFA71307C4FD14
|
:10065800130640009305070013850700EFF0DFA796
|
||||||
:100668009307C4FE1306400093050700138507008F
|
:100668001307C4FD9307C4FE130640009305070053
|
||||||
:10067800EFF05FA6130784FD9307C4FE130640003E
|
:1006780013850700EFF05FA6130784FD9307C4FEF8
|
||||||
:100688009305070013850700EFF0DFA4130744FD67
|
:10068800130640009305070013850700EFF0DFA469
|
||||||
:100698009307C4FE1306400093050700138507005F
|
:10069800130744FD9307C4FE1306400093050700A3
|
||||||
:1006A800EFF05FA383A74107E7800700130704FE65
|
:1006A80013850700EFF05FA383A74107E7800700E2
|
||||||
:1006B800930784FE9305070013850700EFF01FB228
|
:1006B800130704FE930784FE9305070013850700BC
|
||||||
:1006C800832704FE138507008320C10203248102C7
|
:1006C800EFF01FB2832704FE138507008320C102C1
|
||||||
:1006D8001301010367800000130101FF23261100A5
|
:1006D800032481021301010367800000130101FF55
|
||||||
:1006E8002324810013040101B7270080138507B470
|
:1006E800232611002324810013040101B727008069
|
||||||
:1006F800EF008015130000008320C100032481004F
|
:1006F800138547B4EF008015130000008320C10064
|
||||||
:100708001301010167800000130101FF2326110076
|
:10070800032481001301010167800000130101FF28
|
||||||
:100718002324810013040101EF00800E93070500D4
|
:10071800232611002324810013040101EF00800E19
|
||||||
:10072800138507008320C100032481001301010100
|
:1007280093070500138507008320C1000324810077
|
||||||
:1007380067800000130101FF232611002324810094
|
:100738001301010167800000130101FF2326110046
|
||||||
:1007480013040101B7270080138547B6EF00C00FD7
|
:100748002324810013040101B7270080138587B68D
|
||||||
:10075800130000008320C10003248100130101015C
|
:10075800EF00C00F130000008320C10003248100B4
|
||||||
:1007680067800000130101FF232611002324810064
|
:100768001301010167800000130101FF2326110016
|
||||||
:1007780013040101B7270080138587B8EF00C00C68
|
:100778002324810013040101B72700801385C7B81B
|
||||||
:1007880083A781099386170023ACD1081385070036
|
:10078800EF00C00C83A781099386170023ACD1081A
|
||||||
:100798008320C10003248100130101016780000048
|
:10079800138507008320C100032481001301010190
|
||||||
:1007A800130101FF232611002324810013040101F2
|
:1007A80067800000130101FF232611002324810024
|
||||||
:1007B800B7270080138547BBEF000009130000002E
|
:1007B80013040101B7270080138587BBEF000009E8
|
||||||
:1007C8008320C10003248100130101016780000018
|
:1007C800130000008320C1000324810013010101EC
|
||||||
:1007D8006B10B500678000006B00050067800000A3
|
:1007D800678000006B10B500678000006B000500A3
|
||||||
:1007E8006B40B500678000006B2005006780000043
|
:1007E800678000006B40B500678000006B20050043
|
||||||
:1007F8006B300000678000007325100267800000DE
|
:1007F800678000006B3000006780000073251002DE
|
||||||
:100808007325000267800000130540006B00050097
|
:100808006780000073250002678000001305400020
|
||||||
:10081800F32610029396F600732600029315A6009D
|
:100818006B000500F32610029396F600732600027B
|
||||||
:100828001316260037F1FF6F3301B1403301D14071
|
:100828009315A6001316260037F1FF6F3301B14068
|
||||||
:100838003301C100F3261002638606001305000089
|
:100838003301D1403301C100F3261002638606005C
|
||||||
:100848006B00050067800000130141FF23201100A1
|
:10084800130500006B00050067800000130141FFDD
|
||||||
:100858002322B1008345050063880500EF00C0012D
|
:10085800232011002322B100834505006388050089
|
||||||
:10086800130515006FF01FFF832001008325410049
|
:10086800EF00C001130515006FF01FFF8320010082
|
||||||
:100878001301C10067800000B702010023A0B20085
|
:10087800832541001301C10067800000B702010011
|
||||||
:1008880067800000130101FD232611022324810241
|
:1008880023A0B20067800000130101FD2326110296
|
||||||
:1008980013040103232EA4FC0327C4FD9307F000CF
|
:100898002324810213040103232EA4FC0327C4FD8F
|
||||||
:1008A80063E4E702B72700800327C4FD1317270076
|
:1008A8009307F00063E4E702B72700800327C4FD3D
|
||||||
:1008B80093878700B307F70083A70700138507000E
|
:1008B8001317270093878700B307F70083A707005C
|
||||||
:1008C800EFF09FF86F004007930700022326F4FE1D
|
:1008C80013850700EFF09FF86F00400793070002B9
|
||||||
:1008D800A30504FE8327C4FE9387C7FF0327C4FD2F
|
:1008D8002326F4FEA30504FE8327C4FE9387C7FFDF
|
||||||
:1008E800B357F70093F7F7002322F4FE832744FE5B
|
:1008E8000327C4FDB357F70093F7F7002322F4FE5C
|
||||||
:1008F8006386070093071000A305F4FE8347B4FE40
|
:1008F800832744FE6386070093071000A305F4FED0
|
||||||
:1009080063820702B7270080032744FE13172700D6
|
:100908008347B4FE63820702B7270080032744FEAB
|
||||||
:1009180093878700B307F70083A7070013850700AD
|
:100918001317270093878700B307F70083A70700FB
|
||||||
:10092800EFF09FF28327C4FE9387C7FF2326F4FEC8
|
:1009280013850700EFF09FF28327C4FE9387C7FF64
|
||||||
:100938008327C4FEE340F0FA8320C1020324810226
|
:100938002326F4FE8327C4FEE340F0FA8320C10295
|
||||||
:100948001301010367800000130101FE232E11002B
|
:10094800032481021301010367800000130101FEE3
|
||||||
:10095800232C8100130401022326A4FE2324B4FEC1
|
:10095800232E1100232C8100130401022326A4FE58
|
||||||
:100968000325C4FEEFF05FEE032584FEEFF09FF150
|
:100968002324B4FE0325C4FEEFF05FEE032584FEC6
|
||||||
:10097800B7270080138587C1EFF01FED1300000033
|
:10097800EFF09FF1B72700801385C7C1EFF01FED97
|
||||||
:100988008320C10103248101130101026780000053
|
:10098800130000008320C101032481011301010227
|
||||||
:10099800130101FD232611022324810213040103FC
|
:1009980067800000130101FD232611022324810230
|
||||||
:1009A80013051000EFF05FE3930750002322F4FED5
|
:1009A8001304010313051000EFF05FE3930700023F
|
||||||
:1009B800832744FE9397270013850700EF004025FF
|
:1009B8002322F4FE832744FE93972700138507001C
|
||||||
:1009C800930705002320F4FE832744FE939727000E
|
:1009C800EF004025930705002320F4FE832744FE0B
|
||||||
:1009D80013850700EF00C02393070500232EF4FCBE
|
:1009D8009397270013850700EF00C02393070500AE
|
||||||
:1009E800832744FE9397270013850700EF004022D2
|
:1009E800232EF4FC832744FE9397270013850700E2
|
||||||
:1009F80093070500232CF4FC232604FE6F008005D2
|
:1009F800EF00402293070500232CF4FC232604FE75
|
||||||
:100A08008327C4FE93972700032704FEB307F70044
|
:100A08006F0080058327C4FE93972700032704FE01
|
||||||
:100A18001307100023A0E7008327C4FE939727003D
|
:100A1800B307F7001307100023A0E7008327C4FEDD
|
||||||
:100A28000327C4FDB307F7001307100023A0E7004E
|
:100A2800939727000327C4FDB307F70013071000A7
|
||||||
:100A38008327C4FE93972700032784FDB307F70095
|
:100A380023A0E7008327C4FE93972700032784FD9C
|
||||||
:100A48001307100023A0E7008327C4FE938717002D
|
:100A4800B307F7001307100023A0E7008327C4FEAD
|
||||||
:100A58002326F4FE0327C4FE832744FEE342F7FA65
|
:100A5800938717002326F4FE0327C4FE832744FE4A
|
||||||
:100A6800832684FD0326C4FD832504FE032544FE56
|
:100A6800E342F7FA832684FD0326C4FD832504FEAA
|
||||||
:100A7800EFF04FE3232404FE6F00C002832784FEB7
|
:100A7800032544FEEFF00FE3232404FE6F00C002B9
|
||||||
:100A880093972700032784FDB307F70083A7070080
|
:100A8800832784FE93972700032784FDB307F70085
|
||||||
:100A980013850700EFF01FDF832784FE9387170075
|
:100A980083A7070013850700EFF01FDF832784FE75
|
||||||
:100AA8002324F4FE032784FE832744FEE348F7FC4F
|
:100AA800938717002324F4FE032784FE832744FE3C
|
||||||
:100AB80013050000EFF05FD2130000001385070054
|
:100AB800E348F7FC13050000EFF05FD213000000D5
|
||||||
:100AC8008320C1020324810213010103678000000F
|
:100AC800138507008320C102032481021301010357
|
||||||
:100AD80093050500930600001306000013050000A7
|
:100AD80067800000930505009306000013060000D8
|
||||||
:100AE8006F005024130101FF9305000023248100A7
|
:100AE800130500006F005024130101FF9305000057
|
||||||
:100AF8002326110013040500EF00502C03A501075D
|
:100AF800232481002326110013040500EF00502C45
|
||||||
:100B08008327C50363840700E780070013050400F3
|
:100B080003A501078327C50363840700E78007005F
|
||||||
:100B1800EFF01FAD130101FF232481002322910070
|
:100B180013050400EFF01FAD130101FF232481002A
|
||||||
:100B280037240080B72400809387440013044400CE
|
:100B28002322910037240080B72400809387440053
|
||||||
:100B38003304F440232611001354244063020402B2
|
:100B3800130444003304F4402326110013542440C2
|
||||||
:100B4800931424009384C4FFB384F40083A704009F
|
:100B480063020402931424009384C4FFB384F40062
|
||||||
:100B58001304F4FF9384C4FFE7800700E31804FE3E
|
:100B580083A704001304F4FF9384C4FFE78007000D
|
||||||
:100B68008320C10003248100832441001301010173
|
:100B6800E31804FE8320C10003248100832441008C
|
||||||
:100B780067800000130101FF232481002320210145
|
:100B78001301010167800000130101FF2324810094
|
||||||
:100B880037240080372900809307040013090900DF
|
:100B8800232021013724008037290080930704009F
|
||||||
:100B98003309F940232611002322910013592940D3
|
:100B9800130909003309F940232611002322910083
|
||||||
:100BA800630009021304040093040000832704006F
|
:100BA8001359294063000902130404009304000048
|
||||||
:100BB8009384140013044400E7800700E31899FEA7
|
:100BB800832704009384140013044400E78007008B
|
||||||
:100BC800372400803729008093070400130949005F
|
:100BC800E31899FE37240080372900809307040032
|
||||||
:100BD8003309F9401359294063000902130404003A
|
:100BD800130949003309F9401359294063000902F0
|
||||||
:100BE8009304000083270400938414001304440032
|
:100BE8001304040093040000832704009384140072
|
||||||
:100BF800E7800700E31899FE8320C10003248100E1
|
:100BF80013044400E7800700E31899FE8320C1002E
|
||||||
:100C080083244100032901001301010167800000CA
|
:100C08000324810083244100032901001301010109
|
||||||
:100C18009305050003A501086F0000019305050071
|
:100C1800678000009305050003A501086F00000127
|
||||||
:100C280003A501086F00103D130101FD2322910265
|
:100C28009305050003A501086F00103D130101FDA0
|
||||||
:100C3800232E31012326110223248102232021039C
|
:100C380023229102232E310123261102232481022B
|
||||||
:100C4800232C4101232A5101232861012326710104
|
:100C480023202103232C4101232A51012328610158
|
||||||
:100C580023248101232291019384B5009307600125
|
:100C58002326710123248101232291019384B50065
|
||||||
:100C68009309050063E497069307000163ECB72036
|
:100C6800930760019309050063E497069307000161
|
||||||
:100C7800EF0010059304000193078001130620007C
|
:100C780063ECB720EF00100593040001930780018F
|
||||||
:100C8800138981C6B307F90003A44700138787FFB8
|
:100C880013062000138981C6B307F90003A447009F
|
||||||
:100C98006302E424832744008326C40003268400D7
|
:100C9800138787FF6302E424832744008326C40064
|
||||||
:100CA80093F7C7FFB307F40003A747002326D6002E
|
:100CA8000326840093F7C7FFB307F40003A74700A0
|
||||||
:100CB80023A4C600136717001385090023A2E700C1
|
:100CB8002326D60023A4C60013671700138509004E
|
||||||
:100CC800EF005000130584006F00801C93F484FF2C
|
:100CC80023A2E700EF005000130584006F00801C8A
|
||||||
:100CD80063CA041A63E8B41AEF00807E9307701F92
|
:100CD80093F484FF63CA041A63E8B41AEF00807EB1
|
||||||
:100CE80063F8974693D79400638E071C1307400058
|
:100CE8009307701F63F8974693D79400638E071C89
|
||||||
:100CF800636CF73E93D764001386970313858703C5
|
:100CF80013074000636CF73E93D76400138697038D
|
||||||
:100D080093163600138981C6B306D90003A446009A
|
:100D08001385870393163600138981C6B306D90065
|
||||||
:100D1800938686FF638C8602832744009305F00040
|
:100D180003A44600938686FF638C860283274400DB
|
||||||
:100D280093F7C7FF3387974063C0E50263580734DA
|
:100D28009305F00093F7C7FF3387974063C0E50248
|
||||||
:100D38000324C400638C86008327440093F7C7FF0D
|
:100D3800635807340324C400638C86008327440067
|
||||||
:100D480033879740E3D4E5FE130605000324090121
|
:100D480093F7C7FF33879740E3D4E5FE1306050002
|
||||||
:100D580013088900630C0419832544001307F00065
|
:100D58000324090113088900630C0419832544003E
|
||||||
:100D680093F5C5FFB3879540634AF740232A0901E5
|
:100D68001307F00093F5C5FFB3879540634AF74032
|
||||||
:100D78002328090163D4073E9307F01F63E6B730C1
|
:100D7800232A09012328090163D4073E9307F01F9A
|
||||||
:100D880093D53500938715009397370003254900BD
|
:100D880063E6B73093D535009387150093973700FE
|
||||||
:100D9800B307F90083A6070093D525401307100071
|
:100D980003254900B307F90083A6070093D525402A
|
||||||
:100DA8003317B7003367A700938587FF2326B4005E
|
:100DA800130710003317B7003367A700938587FF31
|
||||||
:100DB8002324D4002322E90023A0870023A6860049
|
:100DB8002326B4002324D4002322E90023A087009B
|
||||||
:100DC8009357264093061000B396F600636AD7122D
|
:100DC80023A686009357264093061000B396F60094
|
||||||
:100DD800B3F7E60063940702939616001376C6FFEE
|
:100DD800636AD712B3F7E600639407029396160086
|
||||||
:100DE800B3F7E60013064600639A070093961600C9
|
:100DE8001376C6FFB3F7E60013064600639A0700BA
|
||||||
:100DF800B3F7E60013064600E38A07FE1305F00082
|
:100DF80093961600B3F7E60013064600E38A07FE4B
|
||||||
:100E080093183600B30819019385080013030600E8
|
:100E08001305F00093183600B308190193850800FC
|
||||||
:100E180003A4C500639A85006F00402F6352073012
|
:100E18001303060003A4C500639A85006F00402FE2
|
||||||
:100E28000324C4006384852E8327440093F7C7FFF7
|
:100E2800635207300324C4006384852E832744005B
|
||||||
:100E380033879740E354E5FE8326C40003268400E5
|
:100E380093F7C7FF33879740E354E5FE8326C40042
|
||||||
:100E480093E514002322B4002326D60023A4C60069
|
:100E48000326840093E514002322B4002326D60049
|
||||||
:100E5800B3049400232A9900232899009366170065
|
:100E580023A4C600B3049400232A990023289900E8
|
||||||
:100E680023A6040123A4040123A2D400B307F40099
|
:100E68009366170023A6040123A4040123A2D40037
|
||||||
:100E78001385090023A0E700EF00C0641305840070
|
:100E7800B307F4001385090023A0E700EF00C0645E
|
||||||
:100E88006F0000019307C00023A0F90013050000BC
|
:100E8800130584006F0000019307C00023A0F90038
|
||||||
:100E98008320C10203248102832441020329010221
|
:100E9800130500008320C102032481028324410238
|
||||||
:100EA8008329C101032A8101832A4101032B0101FE
|
:100EA800032901028329C101032A8101832A4101FF
|
||||||
:100EB800832BC100032C8100832C41001301010303
|
:100EB800032B0101832BC100032C8100832C4100EB
|
||||||
:100EC8006780000093060020130600041305F00352
|
:100EC8001301010367800000930600201306000445
|
||||||
:100ED8006FF05FE303A4C70013062600E39C87DADC
|
:100ED8001305F0036FF05FE303A4C70013062600B1
|
||||||
:100EE8000324090113088900E31804E703274900CC
|
:100EE800E39C87DA0324090113088900E31804E75F
|
||||||
:100EF8009357264093061000B396F600E37AD7EC92
|
:100EF800032749009357264093061000B396F6003F
|
||||||
:100F080003248900832A440013FCCAFF63689C00F9
|
:100F0800E37AD7EC03248900832A440013FCCAFF40
|
||||||
:100F1800B3079C401307F000634EF71283AA4109F8
|
:100F180063689C00B3079C401307F000634EF71208
|
||||||
:100F280003A741089307F0FF330A8401B38A5401E9
|
:100F280083AA410903A741089307F0FF330A840104
|
||||||
:100F38006308F732B71700009387F700B38AFA00FF
|
:100F3800B38A54016308F732B71700009387F700A4
|
||||||
:100F4800B7F7FFFFB3FAFA0093850A001385090083
|
:100F4800B38AFA00B7F7FFFFB3FAFA0093850A00ED
|
||||||
:100F5800EF0080579307F0FF130B0500630AF5268F
|
:100F580013850900EF0080579307F0FF130B050076
|
||||||
:100F680063664527938BC10983A70B00B387FA00F3
|
:100F6800630AF52663664527938BC10983A70B009F
|
||||||
:100F780023A0FB00138707006306AA3883A641084D
|
:100F7800B387FA0023A0FB00138707006306AA388B
|
||||||
:100F88009307F0FF638EF638330A4B413307EA00C4
|
:100F880083A641089307F0FF638EF638330A4B4176
|
||||||
:100F980023A0EB00937C7B0063860C2E330B9B41D4
|
:100F98003307EA0023A0EB00937C7B0063860C2ECA
|
||||||
:100FA800B7170000130B8B00138AF7FFB30A5B0116
|
:100FA800330B9B41B7170000130B8B00138AF7FF15
|
||||||
:100FB8009387870033F74A01B3879741B387E740A0
|
:100FB800B30A5B019387870033F74A01B3879741E8
|
||||||
:100FC80033FA470193050A0013850900EF00C04F63
|
:100FC800B387E74033FA470193050A001385090000
|
||||||
:100FD8009307F0FF6300F53A33056541B30A45010D
|
:100FD800EF00C04F9307F0FF6300F53A3305654112
|
||||||
:100FE80083A70B002324690193EA1A00B307FA00C8
|
:100FE800B30A450183A70B002324690193EA1A0079
|
||||||
:100FF80023A0FB0023225B01630824331306F000BF
|
:100FF800B307FA0023A0FB0023225B016308243314
|
||||||
:10100800637886338326440013074CFF137787FFE2
|
:101008001306F000637886338326440013074CFFE9
|
||||||
:1010180093F61600B3E6E6002322D40093055000A9
|
:10101800137787FF93F61600B3E6E6002322D40081
|
||||||
:10102800B306E40023A2B60023A4B600636EE63438
|
:1010280093055000B306E40023A2B60023A4B6003B
|
||||||
:10103800832A4B0013040B0083A6010963F4F6000E
|
:10103800636EE634832A4B0013040B0083A6010970
|
||||||
:1010480023A8F10883A6C10863F8F61823A6F108B7
|
:1010480063F4F60023A8F10883A6C10863F8F6182C
|
||||||
:101058006F00801813E714002322E400B3049400FF
|
:1010580023A6F1086F00801813E714002322E40088
|
||||||
:101068002324990093E717001385090023A2F400AD
|
:10106800B30494002324990093E71700138509001B
|
||||||
:10107800EF004045130584006FF09FE18326C4000C
|
:1010780023A2F400EF004045130584006FF09FE1C0
|
||||||
:10108800032684006FF01FC293D795001307400012
|
:101088008326C400032684006FF01FC293D79500FF
|
||||||
:101098006374F712130740016360F7229386C7054C
|
:10109800130740006374F712130740016360F722D7
|
||||||
:1010A8001387B70593963600B306D90083A70600C1
|
:1010A8009386C7051387B70593963600B306D9000C
|
||||||
:1010B800938686FF638AF61A03A747001377C7FF4C
|
:1010B80083A70600938686FF638AF61A03A747006C
|
||||||
:1010C80063F6E50083A78700E398F6FE83A6C700CA
|
:1010C8001377C7FF63F6E50083A78700E398F6FE6A
|
||||||
:1010D800032749002326D4002324F40023A48600F0
|
:1010D80083A6C700032749002326D4002324F4004D
|
||||||
:1010E80023A687006FF0DFCD130740016378F71060
|
:1010E80023A4860023A687006FF0DFCD13074001F5
|
||||||
:1010F800130740056360F71E93D7C4001386F706ED
|
:1010F8006378F710130740056360F71E93D7C400A1
|
||||||
:101108001385E706931636006FF0DFBF130313004D
|
:101108001386F7061385E706931636006FF0DFBFE0
|
||||||
:101118009377330093858500E39C07CE6F00C00F5B
|
:10111800130313009377330093858500E39C07CE70
|
||||||
:10112800B307F40003A747008326C40003268400FE
|
:101128006F00C00FB307F40003A747008326C4006D
|
||||||
:101138001367170023A2E7002326D60013850900AA
|
:10113800032684001367170023A2E7002326D6009E
|
||||||
:1011480023A4C600EF000038130584006FF05FD4B5
|
:101148001385090023A4C600EF00003813058400A6
|
||||||
:1011580013D63400938784006FF09FB2B305B400B0
|
:101158006FF05FD413D63400938784006FF09FB28A
|
||||||
:1011680083A745001385090093E7170023A2F5001C
|
:10116800B305B40083A745001385090093E717006A
|
||||||
:10117800EF004035130584006FF09FD113E714008A
|
:1011780023A2F500EF004035130584006FF09FD1DE
|
||||||
:101188002322E400B3049400232A99002328990019
|
:1011880013E714002322E400B3049400232A9900EF
|
||||||
:1011980013E7170023A6040123A4040123A2E400F3
|
:101198002328990013E7170023A6040123A40401B8
|
||||||
:1011A800B305B4001385090023A0F500EF008031D2
|
:1011A80023A2E400B305B4001385090023A0F500C9
|
||||||
:1011B800130584006FF0DFCD93D7650093869703FE
|
:1011B800EF008031130584006FF0DFCD93D7650011
|
||||||
:1011C80013878703939636006FF01FEE630224138C
|
:1011C8009386970313878703939636006FF01FEE75
|
||||||
:1011D80003248900832A440093FACAFFB3879A40FC
|
:1011D8006302241303248900832A440093FACAFF74
|
||||||
:1011E80063E69A001307F000E346F7E61385090063
|
:1011E800B3879A4063E69A001307F000E346F7E6F0
|
||||||
:1011F800EF00402D130500006FF09FC91386C70547
|
:1011F80013850900EF00402D130500006FF09FC90B
|
||||||
:101208001385B705931636006FF0DFAF83A7880004
|
:101208001386C7051385B705931636006FF0DFAF51
|
||||||
:101218001306F6FF6394171D93773600938888FFAB
|
:1012180083A788001306F6FF6394171D937736009B
|
||||||
:10122800E39607FE0327490093C7F6FFB3F7E700E5
|
:10122800938888FFE39607FE0327490093C7F6FFD4
|
||||||
:101238002322F90093961600E3E4D7CCE38206CC88
|
:10123800B3F7E7002322F90093961600E3E4D7CC2E
|
||||||
:1012480033F7F600631A07009396160033F7F60093
|
:10124800E38206CC33F7F600631A0700939616007C
|
||||||
:1012580013034300E30A07FE130603006FF05FBAA7
|
:1012580033F7F60013034300E30A07FE13060300FF
|
||||||
:10126800938A0A016FF05FCE032549009355274002
|
:101268006FF05FBA938A0A016FF05FCE03254900D9
|
||||||
:10127800130710003317B7003367A7002322E900CC
|
:1012780093552740130710003317B7003367A700AB
|
||||||
:101288006FF05FE5B71700001387F7FF330A5B01BC
|
:101288002322E9006FF05FE5B71700001387F7FF27
|
||||||
:10129800337AEA00B387474133FAE70093050A0037
|
:10129800330A5B01337AEA00B387474133FAE70040
|
||||||
:1012A80013850900EF0040229307F0FFE316F5D2FB
|
:1012A80093050A0013850900EF0040229307F0FF19
|
||||||
:1012B800130A00006FF0DFD2130740056360F708D8
|
:1012B800E316F5D2130A00006FF0DFD213074005DA
|
||||||
:1012C80093D7C5009386F7061387E70693963600EB
|
:1012C8006360F70893D7C5009386F7061387E70688
|
||||||
:1012D8006FF09FDD130740156360F70893D7F4009C
|
:1012D800939636006FF09FDD130740156360F7089B
|
||||||
:1012E8001386870713857707931636006FF09FA13B
|
:1012E80093D7F4001386870713857707931636007C
|
||||||
:1012F800938BC10903A70B003387EA0023A0EB00F7
|
:1012F8006FF09FA1938BC10903A70B003387EA0006
|
||||||
:101308006FF0DFC793164A01E39A06C603248900E3
|
:1013080023A0EB006FF0DFC793164A01E39A06C6E5
|
||||||
:10131800B30A5C0193EA1A00232254016FF0DFD16B
|
:1013180003248900B30A5C0193EA1A0023225401CA
|
||||||
:1013280023A261096FF01FC713040B006FF0DFD011
|
:101328006FF0DFD123A261096FF01FC713040B0010
|
||||||
:10133800930710002322FB006FF05FEB13074015A3
|
:101338006FF0DFD0930710002322FB006FF05FEB04
|
||||||
:101348006362F70693D7F5009386870713877707B5
|
:10134800130740156362F70693D7F500938687075E
|
||||||
:10135800939636006FF05FD5130740556362F70622
|
:1013580013877707939636006FF05FD513074055CC
|
||||||
:1013680093D724011386D7071385C707931636002A
|
:101368006362F70693D724011386D7071385C70747
|
||||||
:101378006FF05F99938C8CFFB38A9A01B38A6A41A4
|
:10137800931636006FF05F99938C8CFFB38A9A01AD
|
||||||
:10138800130A00006FF0DFC5930584001385090078
|
:10138800B38A6A41130A00006FF0DFC59305840031
|
||||||
:10139800EF0040460324890083A70B00832A4400FA
|
:1013980013850900EF0040460324890083A70B004A
|
||||||
:1013A8006FF09FC9130740556364F70293D725016F
|
:1013A800832A44006FF09FC9130740556364F7020E
|
||||||
:1013B8009386D7071387C707939636006FF0DFCE5B
|
:1013B80093D725019386D7071387C70793963600D7
|
||||||
:1013C8009306803F1306F0071305E0076FF09F931D
|
:1013C8006FF0DFCE9306803F1306F0071305E007A2
|
||||||
:1013D8009306803F1307E0076FF01FCD832749006E
|
:1013D8006FF09F939306803F1307E0076FF01FCDD0
|
||||||
:1013E8006FF05FE51303F00013070500637EC30287
|
:1013E800832749006FF05FE51303F000130705003A
|
||||||
:1013F8009377F7006390070A63920508937606FFD0
|
:1013F800637EC3029377F7006390070A6392050838
|
||||||
:101408001376F600B386E6002320B7002322B70040
|
:10140800937606FF1376F600B386E6002320B7002E
|
||||||
:101418002324B7002326B70013070701E366D7FE86
|
:101418002322B7002324B7002326B70013070701A8
|
||||||
:101428006314060067800000B306C3409396260045
|
:10142800E366D7FE6314060067800000B306C34076
|
||||||
:1014380097020000B38656006780C6002307B700EE
|
:101438009396260097020000B38656006780C60080
|
||||||
:10144800A306B7002306B700A305B7002305B70016
|
:101448002307B700A306B7002306B700A305B70014
|
||||||
:10145800A304B7002304B700A303B7002303B7000E
|
:101458002305B700A304B7002304B700A303B7000C
|
||||||
:10146800A302B7002302B700A301B7002301B70006
|
:101468002303B700A302B7002302B700A301B70004
|
||||||
:10147800A300B7002300B7006780000093F5F50FBD
|
:101478002301B700A300B7002300B700678000006E
|
||||||
:1014880093968500B3E5D50093960501B3E5D5009D
|
:1014880093F5F50F93968500B3E5D500939605017E
|
||||||
:101498006FF0DFF69396270097020000B386560098
|
:10149800B3E5D5006FF0DFF69396270097020000BA
|
||||||
:1014A80093820000E78006FA93800200938707FF83
|
:1014A800B386560093820000E78006FA9380020014
|
||||||
:1014B8003307F7403306F600E378C3F66FF0DFF33F
|
:1014B800938707FF3307F7403306F600E378C3F650
|
||||||
:1014C8006780000067800000130101FF232481006A
|
:1014C8006FF0DFF36780000067800000130101FF01
|
||||||
:1014D800232291009304050013850500232611009B
|
:1014D800232481002322910093040500138505002D
|
||||||
:1014E80023A2010CEFF00F899307F0FF630CF500BE
|
:1014E8002326110023A2010CEFF00F899307F0FFC8
|
||||||
:1014F8008320C100032481008324410013010101DA
|
:1014F800630CF5008320C10003248100832441008C
|
||||||
:101508006780000083A7410CE38407FE8320C100A5
|
:10150800130101016780000083A7410CE38407FEF3
|
||||||
:101518000324810023A0F400832441001301010166
|
:101518008320C1000324810023A0F4008324410018
|
||||||
:101528006780000003A7010783278714638C0704DB
|
:10152800130101016780000003A7010783278714BF
|
||||||
:1015380003A747001308F001634EE80613182700B5
|
:10153800638C070403A747001308F001634EE8060D
|
||||||
:1015480063060502338307012324C30883A8871889
|
:101548001318270063060502338307012324C30801
|
||||||
:10155800130610003316E600B3E8C80023A41719D1
|
:1015580083A88718130610003316E600B3E8C800FE
|
||||||
:101568002324D310930620006304D5021307170021
|
:1015680023A417192324D310930620006304D5025B
|
||||||
:1015780023A2E700B387070123A4B70013050000DF
|
:101578001307170023A2E700B387070123A4B700C6
|
||||||
:10158800678000009307C7142324F7146FF05FFAED
|
:1015880013050000678000009307C7142324F7148D
|
||||||
:1015980083A6C7181307170023A2E70033E6C6007F
|
:101598006FF05FFA83A6C7181307170023A2E700A6
|
||||||
:1015A80023A6C718B387070123A4B70013050000B3
|
:1015A80033E6C60023A6C718B387070123A4B700EC
|
||||||
:1015B800678000001305F0FF67800000130101FD3C
|
:1015B80013050000678000001305F0FF6780000036
|
||||||
:1015C8002324810103AC0107232E3101232C41017F
|
:1015C800130101FD2324810103AC0107232E3101FE
|
||||||
:1015D800232A510123286101232611022324810291
|
:1015D800232C4101232A51012328610123261102CA
|
||||||
:1015E800232291022320210323267101930A050057
|
:1015E800232481022322910223202103232671012F
|
||||||
:1015F800138B0500130A10009309F0FF03298C14BC
|
:1015F800930A0500138B0500130A10009309F0FFE6
|
||||||
:1016080063080902832449001384F4FF6342040237
|
:1016080003298C1463080902832449001384F4FF16
|
||||||
:1016180093942400B304990063040B0483A7441033
|
:101618006342040293942400B304990063040B0406
|
||||||
:10162800638067051304F4FF9384C4FFE31634FF53
|
:1016280083A74410638067051304F4FF9384C4FF01
|
||||||
:101638008320C10203248102832441020329010279
|
:10163800E31634FF8320C10203248102832441027C
|
||||||
:101648008329C101032A8101832A4101032B010156
|
:10164800032901028329C101032A8101832A410157
|
||||||
:10165800832BC100032C8100130101036780000064
|
:10165800032B0101832BC100032C8100130101031B
|
||||||
:101668008327490083A644009387F7FF638A87048A
|
:10166800678000008327490083A644009387F7FF1B
|
||||||
:1016780023A20400E38806FA8327891833178A000F
|
:10167800638A870423A20400E38806FA832789186B
|
||||||
:10168800832B4900B377F700639E0700E7800600C5
|
:1016880033178A00832B4900B377F700639E07005E
|
||||||
:1016980083274900E39477F783278C14E38427F999
|
:10169800E780060083274900E39477F783278C14B3
|
||||||
:1016A8006FF0DFF58327C91883A544083377F7005F
|
:1016A800E38427F96FF0DFF58327C91883A5440879
|
||||||
:1016B800631C070013850A00E78006006FF05FFDD2
|
:1016B8003377F700631C070013850A00E7800600EC
|
||||||
:1016C800232289006FF01FFB13850500E7800600C1
|
:1016C8006FF05FFD232289006FF01FFB1385050073
|
||||||
:1016D8006FF01FFC130101FE23282101232C810038
|
:1016D800E78006006FF01FFC130101FE232821019B
|
||||||
:1016E800232A91002326310113840500232E11009B
|
:1016E800232C8100232A910023263101138405002D
|
||||||
:1016F80093090500138981C6EFF09FDC83268900D2
|
:1016F800232E110093090500138981C6EFF09FDCA2
|
||||||
:10170800371700009307F7FE83A446003384874009
|
:1017080083268900371700009307F7FE83A4460055
|
||||||
:1017180093F4C4FF330494001354C4001304F4FF77
|
:101718003384874093F4C4FF330494001354C40003
|
||||||
:101728001314C400634EE4009305000013850900F8
|
:101728001304F4FF1314C400634EE400930500008F
|
||||||
:10173800EFF09FD983278900B38797006306F502E6
|
:1017380013850900EFF09FD983278900B3879700A5
|
||||||
:1017480013850900EFF01FD88320C101032481010C
|
:101748006306F50213850900EFF01FD88320C10155
|
||||||
:1017580083244101032901018329C10013050000E5
|
:101758000324810183244101032901018329C10054
|
||||||
:101768001301010267800000B3058040138509005A
|
:10176800130500001301010267800000B3058040E3
|
||||||
:10177800EFF09FD59307F0FF6304F50483A7C10931
|
:1017780013850900EFF09FD59307F0FF6304F50484
|
||||||
:1017880083268900B384844093E41400338487401B
|
:1017880083A7C10983268900B384844093E41400A5
|
||||||
:101798001385090023A2960023AE8108EFF09FD29B
|
:10179800338487401385090023A2960023AE81086D
|
||||||
:1017A8008320C1010324810183244101032901010C
|
:1017A800EFF09FD28320C1010324810183244101EA
|
||||||
:1017B8008329C1001305100013010102678000008E
|
:1017B800032901018329C100130510001301010247
|
||||||
:1017C8009305000013850900EFF01FD00327890057
|
:1017C800678000009305000013850900EFF01FD023
|
||||||
:1017D8009306F000B307E540E3D4F6F683A6410884
|
:1017D800032789009306F000B307E540E3D4F6F643
|
||||||
:1017E80093E717002322F7003305D54023AEA1085D
|
:1017E80083A6410893E717002322F7003305D54065
|
||||||
:1017F8006FF01FF563860512130101FF2324810092
|
:1017F80023AEA1086FF01FF563860512130101FFE0
|
||||||
:101808002322910013840500930405002326110068
|
:1018080023248100232291001384050093040500FA
|
||||||
:10181800EFF01FCB0325C4FF130784FF9377E5FF81
|
:1018180023261100EFF01FCB0325C4FF130784FF15
|
||||||
:101828003306F700938581C68326460003A8850002
|
:101828009377E5FF3306F700938581C68326460044
|
||||||
:1018380093F6C6FF630EC8182322D6001375150049
|
:1018380003A8850093F6C6FF630EC8182322D600B6
|
||||||
:101848003308D6006310050A032384FF03284800E1
|
:10184800137515003308D6006310050A032384FFB7
|
||||||
:101858003307674083288700138501C7B38767006C
|
:10185800032848003307674083288700138501C79A
|
||||||
:10186800137818006380A8140323C70023A6680010
|
:10186800B3876700137818006380A8140323C700A0
|
||||||
:10187800232413016306081C93E617002322D700CC
|
:1018780023A66800232413016306081C93E61700B7
|
||||||
:101888002320F6009306F01F63E8F60A93D7370083
|
:101888002322D7002320F6009306F01F63E8F60A08
|
||||||
:10189800938617009396360003A84500B386D500B3
|
:1018980093D73700938617009396360003A8450020
|
||||||
:1018A80003A5060013D6274093071000B397C70077
|
:1018A800B386D50003A5060013D62740930710007A
|
||||||
:1018B800B3E70701138686FF2326C7002324A70062
|
:1018B800B397C700B3E70701138686FF2326C7003F
|
||||||
:1018C80023A2F50023A0E6002326E50003248100D7
|
:1018C8002324A70023A2F50023A0E6002326E50091
|
||||||
:1018D8008320C10013850400832441001301010102
|
:1018D800032481008320C100138504008324410070
|
||||||
:1018E8006FF05FBE0325480013751500631C0502E1
|
:1018E800130101016FF05FBE032548001375150051
|
||||||
:1018F800B387D700138501C78326860093E81700AE
|
:1018F800631C0502B387D700138501C783268600BA
|
||||||
:101908003308F700638AA6140326C60023A6C60078
|
:1019080093E817003308F700638AA6140326C60075
|
||||||
:101918002324D600232217012320F8006FF09FF616
|
:1019180023A6C6002324D600232217012320F8007B
|
||||||
:101928006780000093E61700232ED4FE2320F600DC
|
:101928006FF09FF66780000093E61700232ED4FE21
|
||||||
:101938009306F01FE3FCF6F493D6970013064000D5
|
:101938002320F6009306F01FE3FCF6F493D69700F5
|
||||||
:10194800636ED60C93D667001385960313868603B9
|
:1019480013064000636ED60C93D667001385960382
|
||||||
:10195800131535003385A50083260500130585FF7B
|
:1019580013868603131535003385A50083260500F5
|
||||||
:10196800630AD51003A646001376C6FF63F6C700C0
|
:10196800130585FF630AD51003A646001376C6FF44
|
||||||
:1019780083A68600E318D5FE03A5C6002326A70084
|
:1019780063F6C70083A68600E318D5FE03A5C60054
|
||||||
:101988002324D700032481002324E5008320C100F9
|
:101988002326A7002324D700032481002324E5006D
|
||||||
:10199800138504008324410023A6E60013010101F6
|
:101998008320C100138504008324410023A6E600A8
|
||||||
:1019A8006FF05FB263180812832586000326C6000D
|
:1019A800130101016FF05FB26318081283258600E6
|
||||||
:1019B800B387F60093E6170023A6C5002324B600D4
|
:1019B8000326C600B387F60093E6170023A6C500E2
|
||||||
:1019C8002322D7003307F7002320F7006FF01FF01A
|
:1019C8002324B6002322D7003307F7002320F7008B
|
||||||
:1019D80013751500B387D70063100502032584FF2C
|
:1019D8006FF01FF013751500B387D7006310050269
|
||||||
:1019E8003307A7408326C70003268700B387A700CD
|
:1019E800032584FF3307A7408326C7000326870003
|
||||||
:1019F8002326D60023A4C60013E6170083A6810871
|
:1019F800B387A7002326D60023A4C60013E6170042
|
||||||
:101A08002322C70023A4E500E3E2D7EC83A541091C
|
:101A080083A681082322C70023A4E500E3E2D7ECDC
|
||||||
:101A180013850400EFF01FCC6FF05FEB1306400155
|
:101A180083A5410913850400EFF01FCC6FF05FEB3D
|
||||||
:101A28006374D602130640056364D60693D6C700CE
|
:101A2800130640016374D602130640056364D606A4
|
||||||
:101A38001385F6061386E606131535006FF09FF139
|
:101A380093D6C7001385F6061386E60613153500F8
|
||||||
:101A4800B387D7006FF05FEB1385C6051386B6051D
|
:101A48006FF09FF1B387D7006FF05FEB1385C60582
|
||||||
:101A5800131535006FF01FF023AAE50023A8E50051
|
:101A58001386B605131535006FF01FF023AAE500AD
|
||||||
:101A68002326A7002324A700232217012320F800F8
|
:101A680023A8E5002326A7002324A7002322170183
|
||||||
:101A78006FF0DFE503A845001356264093071000D2
|
:101A78002320F8006FF0DFE503A845001356264041
|
||||||
:101A88003396C7003366060123A2C5006FF01FEF27
|
:101A8800930710003396C7003366060123A2C500EA
|
||||||
:101A980013064015636CD60093D6F70013858607A6
|
:101A98006FF01FEF13064015636CD60093D6F7005E
|
||||||
:101AA80013867607131535006FF0DFEA13064055E5
|
:101AA8001385860713867607131535006FF0DFEA6E
|
||||||
:101AB800636CD60093D627011385D6071386C6070D
|
:101AB80013064055636CD60093D627011385D607C5
|
||||||
:101AC800131535006FF01FE91305803F1306E00773
|
:101AC8001386C607131535006FF01FE91305803F0D
|
||||||
:101AD8006FF05FE893E617002322D7002320F60073
|
:101AD8001306E0076FF05FE893E617002322D700AC
|
||||||
:041AE8006FF0DFDEDE
|
:081AE8002320F6006FF0DFDEA1
|
||||||
:101AEC003000000031000000320000003300000024
|
:101AF0003000000031000000320000003300000020
|
||||||
:101AFC003400000035000000360000003700000004
|
:101B000034000000350000003600000037000000FF
|
||||||
:101B0C003800000039000000610000006200000095
|
:101B10003800000039000000610000006200000091
|
||||||
:101B1C006300000064000000650000006600000027
|
:101B20006300000064000000650000006600000023
|
||||||
:101B2C0048656C6C6F2066726F6D205F69736174B1
|
:101B300048656C6C6F2066726F6D205F69736174AD
|
||||||
:101B3C0074790A004552524F523A205F6B696C6CB3
|
:101B400074790A004552524F523A205F6B696C6CAF
|
||||||
:101B4C00206E6F742079657420696D706C656D659D
|
:101B5000206E6F742079657420696D706C656D6599
|
||||||
:101B5C006E7465640A0000004552524F523A205F81
|
:101B60006E7465640A0000004552524F523A205F7D
|
||||||
:101B6C00756E6C696E6B206E6F742079657420696C
|
:101B7000756E6C696E6B206E6F7420796574206968
|
||||||
:101B7C006D706C656D656E7465640A004552524FEC
|
:101B80006D706C656D656E7465640A004552524FE8
|
||||||
:101B8C00523A205F67657474696D656F666461793C
|
:101B9000523A205F67657474696D656F6664617938
|
||||||
:101B9C00206E6F742079657420696D706C656D654D
|
:101BA000206E6F742079657420696D706C656D6549
|
||||||
:101BAC006E7465640A0000004552524F523A205F31
|
:101BB0006E7465640A0000004552524F523A205F2D
|
||||||
:101BBC006C696E6B206E6F742079657420696D7022
|
:101BC0006C696E6B206E6F742079657420696D701E
|
||||||
:101BCC006C656D656E7465640A0000003000000081
|
:101BD0006C656D656E7465640A000000300000007D
|
||||||
:101BDC00310000003200000033000000340000002F
|
:101BE000310000003200000033000000340000002B
|
||||||
:101BEC00350000003600000037000000380000000F
|
:101BF000350000003600000037000000380000000B
|
||||||
:101BFC00390000006100000062000000630000007A
|
:101C00003900000061000000620000006300000075
|
||||||
:0E1C0C006400000065000000660000000A0091
|
:0E1C10006400000065000000660000000A008D
|
||||||
:042000004800008014
|
:042000004800008014
|
||||||
:10200800D81B0080DC1B0080E01B0080E41B0080E4
|
:10200800DC1B0080E01B0080E41B0080E81B0080D4
|
||||||
:10201800E81B0080EC1B0080F01B0080F41B008094
|
:10201800EC1B0080F01B0080F41B0080F81B008084
|
||||||
:10202800F81B0080FC1B0080001C0080041C008042
|
:10202800FC1B0080001C0080041C0080081C008031
|
||||||
:10203800081C00800C1C0080101C0080141C0080F0
|
:102038000C1C0080101C0080141C0080181C0080E0
|
||||||
:1020480000000000342300809C23008004240080CA
|
:1020480000000000342300809C23008004240080CA
|
||||||
:102058000000000000000000000000000000000078
|
:102058000000000000000000000000000000000078
|
||||||
:102068000000000000000000000000000000000068
|
:102068000000000000000000000000000000000068
|
||||||
|
|||||||
@@ -148,7 +148,7 @@ int _fstat(int file, struct stat * st)
|
|||||||
|
|
||||||
int _isatty (int file)
|
int _isatty (int file)
|
||||||
{
|
{
|
||||||
vx_print_str("Hello from _isatty\n");
|
// vx_print_str("Hello from _isatty\n");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -237,8 +237,8 @@ static int head_end = (int) 0x20000000;
|
|||||||
|
|
||||||
void * _sbrk (int nbytes)
|
void * _sbrk (int nbytes)
|
||||||
{
|
{
|
||||||
//vx_print_str("Hello from _sbrk\n");
|
// vx_print_str("Hello from _sbrk\n");
|
||||||
//vx_printf("nbytes: ", nbytes);
|
// vx_printf("nbytes: ", nbytes);
|
||||||
|
|
||||||
//if (nbytes < 0) //vx_print_str("nbytes less than zero\n");
|
//if (nbytes < 0) //vx_print_str("nbytes less than zero\n");
|
||||||
// printf("nBytes: %d\n", nbytes);
|
// printf("nBytes: %d\n", nbytes);
|
||||||
@@ -248,19 +248,21 @@ void * _sbrk (int nbytes)
|
|||||||
nbytes = nbytes * -1;
|
nbytes = nbytes * -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nbytes > 10240)
|
// vx_printf("New nbytes: ", nbytes);
|
||||||
{
|
|
||||||
nbytes = 10240;
|
// if (nbytes > 10240)
|
||||||
}
|
// {
|
||||||
|
// nbytes = 10240;
|
||||||
|
// }
|
||||||
|
|
||||||
// if (((unsigned) head_end) > ((unsigned) (heap_ptr + nbytes)))
|
// if (((unsigned) head_end) > ((unsigned) (heap_ptr + nbytes)))
|
||||||
if (true)
|
if (true)
|
||||||
{
|
{
|
||||||
int base = heap_start;
|
int base = heap_start;
|
||||||
heap_start += nbytes;
|
heap_start += nbytes;
|
||||||
////vx_print_str("_sbrk returning: ");
|
// vx_print_str("_sbrk returning: ");
|
||||||
//vx_print_hex((unsigned) base);
|
// vx_print_hex((unsigned) base);
|
||||||
////vx_print_str("\n");
|
// vx_print_str("\n");
|
||||||
return (void *) base;
|
return (void *) base;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -303,7 +305,7 @@ int _open(const char *name, int flags, int mode)
|
|||||||
|
|
||||||
void _kill()
|
void _kill()
|
||||||
{
|
{
|
||||||
vx_print_str("ERROR: _kill not yet implemented\n");
|
vx_tmc(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned _getpid()
|
unsigned _getpid()
|
||||||
@@ -320,7 +322,7 @@ static int curr_time = 0;
|
|||||||
|
|
||||||
int _gettimeofday()
|
int _gettimeofday()
|
||||||
{
|
{
|
||||||
vx_print_str("ERROR: _gettimeofday not yet implemented\n");
|
// vx_print_str("ERROR: _gettimeofday not yet implemented\n");
|
||||||
return curr_time++;
|
return curr_time++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ _start:
|
|||||||
# Initialize SP
|
# Initialize SP
|
||||||
# la sp, __stack_top
|
# la sp, __stack_top
|
||||||
la a1, vx_set_sp
|
la a1, vx_set_sp
|
||||||
li a0, 4
|
li a0, 32
|
||||||
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
|
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
|
||||||
jal vx_set_sp
|
jal vx_set_sp
|
||||||
li a0, 1
|
li a0, 1
|
||||||
@@ -46,7 +46,7 @@ _start:
|
|||||||
.type vx_set_sp, @function
|
.type vx_set_sp, @function
|
||||||
.global vx_set_sp
|
.global vx_set_sp
|
||||||
vx_set_sp:
|
vx_set_sp:
|
||||||
li a0, 4
|
li a0, 32
|
||||||
.word 0x0005006b # tmc 4
|
.word 0x0005006b # tmc 4
|
||||||
|
|
||||||
.option push
|
.option push
|
||||||
|
|||||||
@@ -7,6 +7,8 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define TOTAL_WARPS 2
|
||||||
|
#define TOTAL_THREADS 16
|
||||||
|
|
||||||
func_t global_function_pointer;
|
func_t global_function_pointer;
|
||||||
// void (func_t)(void *)
|
// void (func_t)(void *)
|
||||||
@@ -46,16 +48,39 @@ uint8_t * pocl_args;
|
|||||||
uint8_t * pocl_ctx;
|
uint8_t * pocl_ctx;
|
||||||
vx_pocl_workgroup_func pocl_pfn;
|
vx_pocl_workgroup_func pocl_pfn;
|
||||||
|
|
||||||
|
unsigned global_z;
|
||||||
|
unsigned global_y;
|
||||||
|
unsigned global_x;
|
||||||
|
|
||||||
|
|
||||||
void pocl_spawn_real()
|
void pocl_spawn_real()
|
||||||
{
|
{
|
||||||
vx_tmc(pocl_threads);
|
vx_tmc(pocl_threads);
|
||||||
int x = vx_threadID();
|
int base_x = vx_threadID();
|
||||||
int y = vx_warpID();
|
int base_y = vx_warpID();
|
||||||
|
|
||||||
(pocl_pfn)( pocl_args, pocl_ctx, x, y, 0);
|
int local_x;
|
||||||
|
int local_y;
|
||||||
|
|
||||||
if (y != 0)
|
for (int iter_z = 0; iter_z < global_z; iter_z++)
|
||||||
|
{
|
||||||
|
for (int iter_x = 0; iter_x < global_x; iter_x++)
|
||||||
|
{
|
||||||
|
for (int iter_y = 0; iter_y < global_y; iter_y++)
|
||||||
|
{
|
||||||
|
|
||||||
|
local_x = (iter_x * TOTAL_THREADS) + base_x;
|
||||||
|
local_y = (iter_y * TOTAL_WARPS ) + base_y;
|
||||||
|
|
||||||
|
(pocl_pfn)( pocl_args, pocl_ctx, local_x, local_y, iter_z);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// (pocl_pfn)( pocl_args, pocl_ctx, x, y, 0);
|
||||||
|
|
||||||
|
if (base_y != 0)
|
||||||
{
|
{
|
||||||
vx_tmc(0);
|
vx_tmc(0);
|
||||||
}
|
}
|
||||||
@@ -66,24 +91,67 @@ void pocl_spawn_real()
|
|||||||
void pocl_spawn(struct context_t * ctx, const void * pfn, void * arguments)
|
void pocl_spawn(struct context_t * ctx, const void * pfn, void * arguments)
|
||||||
{
|
{
|
||||||
|
|
||||||
if (ctx->num_groups[2] > 1)
|
|
||||||
|
// printf("ctx->num_groups[0]: %d\n", ctx->num_groups[0]);
|
||||||
|
// printf("ctx->num_groups[1]: %d\n", ctx->num_groups[1]);
|
||||||
|
// printf("ctx->num_groups[2]: %d\n", ctx->num_groups[2]);
|
||||||
|
|
||||||
|
// printf("\n\n");
|
||||||
|
|
||||||
|
// printf("ctx->local_size[0]: %d\n", ctx->local_size[0]);
|
||||||
|
// printf("ctx->local_size[1]: %d\n", ctx->local_size[1]);
|
||||||
|
// printf("ctx->local_size[2]: %d\n", ctx->local_size[2]);
|
||||||
|
if (ctx->num_groups[0] > TOTAL_THREADS)
|
||||||
{
|
{
|
||||||
printf("ERROR: pocl_spawn doesn't support Z dimension yet!\n");
|
pocl_threads = TOTAL_THREADS;
|
||||||
return;
|
global_x = ctx->num_groups[0] / TOTAL_THREADS;
|
||||||
|
printf("pocl_threads: %d\n", pocl_threads);
|
||||||
|
// printf("global_x: %d\n", global_x);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pocl_threads = ctx->num_groups[0];
|
||||||
|
global_x = 1;
|
||||||
|
// printf("pocl_threads: %d\n", pocl_threads);
|
||||||
|
// printf("global_x: %d\n", global_x);
|
||||||
}
|
}
|
||||||
|
|
||||||
pocl_threads = ctx->num_groups[0];
|
|
||||||
|
global_z = ctx->num_groups[2];
|
||||||
pocl_pfn = (vx_pocl_workgroup_func) pfn;
|
pocl_pfn = (vx_pocl_workgroup_func) pfn;
|
||||||
pocl_ctx = (uint8_t *) ctx;
|
pocl_ctx = (uint8_t *) ctx;
|
||||||
pocl_args = (uint8_t *) arguments;
|
pocl_args = (uint8_t *) arguments;
|
||||||
|
|
||||||
if (ctx->num_groups[1] > 1)
|
if (ctx->num_groups[1] > 1)
|
||||||
{
|
{
|
||||||
vx_wspawn(ctx->num_groups[1], (unsigned) &pocl_spawn_real);
|
if (ctx->num_groups[1] > TOTAL_WARPS)
|
||||||
|
{
|
||||||
|
global_y = ctx->num_groups[1] / TOTAL_WARPS;
|
||||||
|
vx_wspawn(TOTAL_WARPS, (unsigned) &pocl_spawn_real);
|
||||||
|
// printf("global_y: %d\n", global_y);
|
||||||
|
// printf("Warps: %d\n", TOTAL_WARPS);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
global_y = 1;
|
||||||
|
vx_wspawn(ctx->num_groups[1], (unsigned) &pocl_spawn_real);
|
||||||
|
// printf("global_y: %d\n", global_y);
|
||||||
|
// printf("Warps: %d\n", ctx->num_groups[1]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned starting_cycles = vx_getCycles();
|
||||||
|
unsigned starting_inst = vx_getInst();
|
||||||
|
|
||||||
pocl_spawn_real();
|
pocl_spawn_real();
|
||||||
|
|
||||||
|
unsigned end_cycles = vx_getCycles();
|
||||||
|
unsigned end_inst = vx_getInst();
|
||||||
|
|
||||||
|
|
||||||
|
printf("pocl_spawn: Total Cycles: %d\n", (end_cycles - starting_cycles));
|
||||||
|
printf("pocl_spawn: Total Inst : %d\n", (end_inst - starting_inst ));
|
||||||
|
|
||||||
// int z;
|
// int z;
|
||||||
// int y;
|
// int y;
|
||||||
// int x;
|
// int x;
|
||||||
|
|||||||
@@ -35,3 +35,8 @@ HEX: ELF
|
|||||||
|
|
||||||
ELF:
|
ELF:
|
||||||
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
|
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
|
||||||
|
|
||||||
|
run:
|
||||||
|
../../simX/obj_dir/Vcache_simX -E -a rv32i --core vx_vector_main.hex -s -b 1> emulator.debug
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
30
rvvector/basic/_1_vx_vec.s
Normal file
30
rvvector/basic/_1_vx_vec.s
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
.type vx_vec_test, @function
|
||||||
|
.global vx_vec_test
|
||||||
|
vx_vec_test:
|
||||||
|
li a1, 7
|
||||||
|
sw a1, 0(a0)
|
||||||
|
ret
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# slli a0, a0, 2
|
||||||
|
# add a0, a0, a3
|
||||||
|
# vmv.v.x vv0, a2
|
||||||
|
# # vsplat4 vv0, a2
|
||||||
|
# stripmine_loop:
|
||||||
|
# vlb4 vv1, (a1)
|
||||||
|
# vcmpez4 vp0, vv1
|
||||||
|
# !vp0 vlw4 vv1, (a3)
|
||||||
|
# !vp0 vlw4 vv2, (a4)
|
||||||
|
# !vp0 vfma4 vv1, vv0, vv1, vv2
|
||||||
|
# !vp0 vsw4 vv1, (a4)
|
||||||
|
# addi a1, a1, 4
|
||||||
|
# addi a3, a3, 16
|
||||||
|
# addi a4, a4, 16
|
||||||
|
# bleu a3, a0, stripmine_loop
|
||||||
|
# handle edge cases
|
||||||
|
# when (n % 4) != 0 ...
|
||||||
32
rvvector/basic/_1_vx_vector_main.c
Normal file
32
rvvector/basic/_1_vx_vector_main.c
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
|
||||||
|
#include "../../runtime/intrinsics/vx_intrinsics.h"
|
||||||
|
#include "vx_vec.h"
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
vx_tmc(1);
|
||||||
|
// int * a = malloc(4);
|
||||||
|
// int * b = malloc(4);
|
||||||
|
// int * c = malloc(4);
|
||||||
|
|
||||||
|
|
||||||
|
int * a = malloc(4);
|
||||||
|
*a = 5;
|
||||||
|
printf("Value of a: %d\n", *a);
|
||||||
|
|
||||||
|
vx_vec_test(a);
|
||||||
|
|
||||||
|
printf("Value of a: %d\n", *a);
|
||||||
|
|
||||||
|
|
||||||
|
// for (int i = 0; i < 4; i++)
|
||||||
|
// {
|
||||||
|
// if (c[i] != (a[i] + b[i]))
|
||||||
|
// {
|
||||||
|
// printf("Fail\n");
|
||||||
|
// break;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
vx_tmc(0);
|
||||||
|
}
|
||||||
91
rvvector/basic/__vx_vector_main.c
Normal file
91
rvvector/basic/__vx_vector_main.c
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "../../runtime/intrinsics/vx_intrinsics.h"
|
||||||
|
#include "vx_vec.h"
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
vx_tmc(1);
|
||||||
|
#if 0
|
||||||
|
# vector-vector add routine of 32-bit integers
|
||||||
|
# void vvaddint32(size_t n, const int*x, const int*y, int*z)
|
||||||
|
# { for (size_t i=0; i<n; i++) { z[i]=x[i]+y[i]; } }
|
||||||
|
#
|
||||||
|
# a0 = n, a1 = x, a2 = y, a3 = z
|
||||||
|
# Non-vector instructions are indented
|
||||||
|
#endif
|
||||||
|
#if 1
|
||||||
|
int n = 5;
|
||||||
|
int *a = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
||||||
|
int *b = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
||||||
|
int *c = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
||||||
|
|
||||||
|
for(int i = 0; i < n; ++i)
|
||||||
|
{
|
||||||
|
a[i] = b[i] = c[i] = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int i = 0; i < n; ++i) printf("%d, ", a[i]);
|
||||||
|
printf("\n");
|
||||||
|
// for(int i = 0; i < n; ++i) printf("%d, ", b[i]);
|
||||||
|
// printf("\n");
|
||||||
|
// for(int i = 0; i < n; ++i) printf("%d, ", c[i]);
|
||||||
|
|
||||||
|
int *d;
|
||||||
|
*d = 1;
|
||||||
|
vx_vec_test(n, d, b, c);
|
||||||
|
|
||||||
|
|
||||||
|
printf("(after: n = %d, %d)\n", n, *d);
|
||||||
|
for(int i = 0; i < n; ++i) printf("%d, ", a[i]);
|
||||||
|
// printf("\n");
|
||||||
|
// for(int i = 0; i < n; ++i) printf("%d, ", b[i]);
|
||||||
|
// printf("\n");
|
||||||
|
// for(int i = 0; i < n; ++i) printf("%d, ", c[i]);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
#if 0
|
||||||
|
int * a = malloc(sizeof(int) * 10);
|
||||||
|
for(int i = 0; i < 10; ++i) a[i] = 5;
|
||||||
|
|
||||||
|
|
||||||
|
for(int i = 0; i < 10; ++i)
|
||||||
|
printf("%d, ", a[i]);
|
||||||
|
|
||||||
|
vx_vec_test(a);
|
||||||
|
//vx_vec_test(2, a, a, a);
|
||||||
|
|
||||||
|
printf("after--------\n");
|
||||||
|
for(int i = 0; i < 10; ++i)
|
||||||
|
printf("%d, ", a[i]);
|
||||||
|
#endif
|
||||||
|
#if 0
|
||||||
|
int n = 5;
|
||||||
|
int *a = (int*)malloc(sizeof(int) * 5); //{1, 1, 1, 1, 1};
|
||||||
|
int *b = (int*)malloc(sizeof(int) * 5); //{1, 1, 1, 1, 1};
|
||||||
|
int *c = (int*)malloc(sizeof(int) * 5); //{1, 1, 1, 1, 1};
|
||||||
|
|
||||||
|
for(int i = 0; i < n; ++i)
|
||||||
|
{
|
||||||
|
a[i] = 1;
|
||||||
|
b[i] = 1;
|
||||||
|
c[i] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Value of a: %d, b: %d, c: %d, n: %d\n", a[0], b[0], c[0], n);
|
||||||
|
vx_vec_test(n, a, b, c);
|
||||||
|
printf("Value of a: %d, b: %d, c: %d, n: %d\n", a[0], b[0], c[0], n);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// for (int i = 0; i < 4; i++)
|
||||||
|
// {
|
||||||
|
// if (c[i] != (a[i] + b[i]))
|
||||||
|
// {
|
||||||
|
// printf("Fail\n");
|
||||||
|
// break;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
vx_tmc(0);
|
||||||
|
}
|
||||||
@@ -7,7 +7,7 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void vx_vec_test(int *);
|
void vx_vec_test(int n, int* a, int* b, int* c); //vvaddint32
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|||||||
@@ -1,30 +1,23 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
.type vx_vec_test, @function
|
.type vx_vec_test, @function
|
||||||
.global vx_vec_test
|
.global vx_vec_test
|
||||||
vx_vec_test:
|
vx_vec_test:
|
||||||
li a1, 7
|
# vector-vector add routine of 32-bit integers
|
||||||
sw a1, 0(a0)
|
# void vvaddint32(size_t n, const int*x, const int*y, int*z)
|
||||||
ret
|
# { for (size_t i=0; i<n; i++) { z[i]=x[i]+y[i]; } }
|
||||||
|
#
|
||||||
|
# a0 = n, a1 = x, a2 = y, a3 = z
|
||||||
|
# Non-vector instructions are indented
|
||||||
|
vsetvli t0, a0, e32 # Set vector length based on 32-bit vectors
|
||||||
# slli a0, a0, 2
|
vlw.v v0, (a1) # Get first vector
|
||||||
# add a0, a0, a3
|
sub a0, a0, t0 # Decrement number done
|
||||||
# vmv.v.x vv0, a2
|
slli t0, t0, 2 # Multiply number done by 4 bytes
|
||||||
# # vsplat4 vv0, a2
|
add a1, a1, t0 # Bump pointer
|
||||||
# stripmine_loop:
|
vlw.v v1, (a2) # Get second vector
|
||||||
# vlb4 vv1, (a1)
|
add a2, a2, t0 # Bump pointer
|
||||||
# vcmpez4 vp0, vv1
|
vadd.vv v2, v0, v1 # Sum vectors
|
||||||
# !vp0 vlw4 vv1, (a3)
|
vsw.v v2, (a3) # Store result
|
||||||
# !vp0 vlw4 vv2, (a4)
|
add a3, a3, t0 # Bump pointer
|
||||||
# !vp0 vfma4 vv1, vv0, vv1, vv2
|
bnez a0, vx_vec_test # Loop back
|
||||||
# !vp0 vsw4 vv1, (a4)
|
ret # Finished
|
||||||
# addi a1, a1, 4
|
|
||||||
# addi a3, a3, 16
|
|
||||||
# addi a4, a4, 16
|
|
||||||
# bleu a3, a0, stripmine_loop
|
|
||||||
# handle edge cases
|
|
||||||
# when (n % 4) != 0 ...
|
|
||||||
27
rvvector/basic/vx_vec_main.c
Normal file
27
rvvector/basic/vx_vec_main.c
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
#include "../../runtime/intrinsics/vx_intrinsics.h"
|
||||||
|
#include "vx_vec.h"
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
vx_tmc(1);
|
||||||
|
printf("----------------hello!!! \n");
|
||||||
|
|
||||||
|
int n = 8;
|
||||||
|
int *a = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
||||||
|
int *b = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
||||||
|
int *c = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
||||||
|
|
||||||
|
printf("hello!!! \n");
|
||||||
|
|
||||||
|
for(int i = 0; i < n; ++i)
|
||||||
|
{
|
||||||
|
a[i] = b[i] = c[i] = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
vx_vec_test(n, a, b, c);
|
||||||
|
|
||||||
|
for(int i = 0; i < n; ++i)
|
||||||
|
printf("%d ", c[i]);
|
||||||
|
|
||||||
|
vx_tmc(0);
|
||||||
|
}
|
||||||
@@ -1,32 +1,29 @@
|
|||||||
|
|
||||||
#include "../../runtime/intrinsics/vx_intrinsics.h"
|
#include "../../runtime/intrinsics/vx_intrinsics.h"
|
||||||
#include "vx_vec.h"
|
#include "vx_vec.h"
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
vx_tmc(1);
|
vx_tmc(1);
|
||||||
// int * a = malloc(4);
|
|
||||||
// int * b = malloc(4);
|
printf("Hello\n");
|
||||||
// int * c = malloc(4);
|
|
||||||
|
int n = 64;
|
||||||
|
int *a = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
||||||
|
int *b = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
||||||
|
int *c = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
||||||
|
|
||||||
|
for(int i = 0; i < n; ++i)
|
||||||
|
{
|
||||||
|
a[i] = b[i] = c[i] = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
vx_vec_test(n, a, b, c);
|
||||||
|
|
||||||
|
for (int i = 0; i < n; ++i)
|
||||||
|
{
|
||||||
|
printf("a[%d]=%d, b[%d]=%d, c[%d]=%d\n", i, a[i], i, b[i], i, c[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int * a = malloc(4);
|
vx_tmc(0);
|
||||||
*a = 5;
|
|
||||||
printf("Value of a: %d\n", *a);
|
|
||||||
|
|
||||||
vx_vec_test(a);
|
|
||||||
|
|
||||||
printf("Value of a: %d\n", *a);
|
|
||||||
|
|
||||||
|
|
||||||
// for (int i = 0; i < 4; i++)
|
|
||||||
// {
|
|
||||||
// if (c[i] != (a[i] + b[i]))
|
|
||||||
// {
|
|
||||||
// printf("Fail\n");
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
vx_tmc(0);
|
|
||||||
}
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
166
rvvector/benchmark_temp/1
Normal file
166
rvvector/benchmark_temp/1
Normal file
@@ -0,0 +1,166 @@
|
|||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "../../runtime/intrinsics/vx_intrinsics.h"
|
||||||
|
#include "vx_vec_benchmark.h"
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
vx_tmc(1);
|
||||||
|
|
||||||
|
int n = 65536;
|
||||||
|
int scalar = 10;
|
||||||
|
|
||||||
|
int *a = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
||||||
|
int *b = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
||||||
|
int *c = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
||||||
|
|
||||||
|
for (int i = 0; i < n; ++i) { a[i] = 1; b[i] = 2; c[i] = 5; }
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
//---------------------------------------------------------------
|
||||||
|
/* vvaddint32
|
||||||
|
* # vector-vector add routine of 32-bit integers
|
||||||
|
* # void vvaddint32(size_t n, const int*x, const int*y, int*z)
|
||||||
|
* # { for (size_t i=0; i<n; i++) { z[i]=x[i]+y[i]; } } */
|
||||||
|
printf("vvaddint...\na[%d]: ", n);
|
||||||
|
for(int i = 0; i < n; ++i) printf("%d ", a[i]);
|
||||||
|
printf("\nb[%d]: ", n);
|
||||||
|
for(int i = 0; i < n; ++i) printf("%d ", b[i]);
|
||||||
|
printf("\nc[%d] = a[%d] + b[%d]: ", n, n, n);
|
||||||
|
for(int i = 0; i < n; ++i) printf("%d ", c[i]);
|
||||||
|
|
||||||
|
vx_vec_vvaddint32(n, a, b, c);
|
||||||
|
|
||||||
|
for(int i = 0; i < n; ++i)
|
||||||
|
{
|
||||||
|
if(c[i] != (a[i]+b[i]))
|
||||||
|
{
|
||||||
|
printf("\n<vddint32> failed at <index: %d>! \n", i);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf("\nPASSED.......................... <vddint32> \n");
|
||||||
|
#endif
|
||||||
|
#if 0
|
||||||
|
//---------------------------------------------------------------
|
||||||
|
/* # vector-scalar add
|
||||||
|
# for (i=0; i<N; i++) { C[i] = A[i] + B; } // 32-bit ints */
|
||||||
|
for (int i = 0; i < n; ++i) { a[i] = 1; b[i] = 1;}
|
||||||
|
printf("vsadd...scalar:%d\na[%d]: ", scalar, n);
|
||||||
|
for(int i = 0; i < n; ++i) printf("%d \n", a[i]);
|
||||||
|
printf("\nb: %d", scalar);
|
||||||
|
|
||||||
|
vx_vec_vsadd(n, a, scalar);
|
||||||
|
|
||||||
|
for(int i = 0; i < n; ++i)
|
||||||
|
{
|
||||||
|
if(a[i] != (b[i] * scalar))
|
||||||
|
{
|
||||||
|
printf("\n<vsadd> failed at <index: %d>! \n", i);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf("\nPASSED.......................... <vsadd> \n");
|
||||||
|
|
||||||
|
#endif
|
||||||
|
#if 0
|
||||||
|
//---------------------------------------------------------------
|
||||||
|
/* # memory copy
|
||||||
|
# void *memcpy(void* dest, const void* src, size_t n) */
|
||||||
|
for (int i = 0; i < n; ++i) { a[i] = 1; b[i] = 2;}
|
||||||
|
printf("memcpy\na[%d]: ", n);
|
||||||
|
for(int i = 0; i < n; ++i) printf("%d \n", a[i]);
|
||||||
|
printf("\nb[%d]: ", n);
|
||||||
|
for(int i = 0; i < n; ++i) printf("%d \n", b[i]);
|
||||||
|
|
||||||
|
vx_vec_memcpy(a, b, n);
|
||||||
|
|
||||||
|
for(int i = 0; i < n; ++i)
|
||||||
|
{
|
||||||
|
if(a[i] != b[i])
|
||||||
|
{
|
||||||
|
printf("\n<memcpy> failed at <index: %d>! \n", i);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf("\nPASSED.......................... <memcpy> \n");
|
||||||
|
#endif
|
||||||
|
#if 1
|
||||||
|
//---------------------------------------------------------------
|
||||||
|
/* # void saxpy(size_t n, const float a, const float *x, float *y)
|
||||||
|
# ==> convert to int!!
|
||||||
|
# void saxpy(size_t n, const int a, const int *x, int *y)
|
||||||
|
# {
|
||||||
|
# size_t i;
|
||||||
|
# for (i=0; i<n; i++) y[i] = a * x[i] + y[i];
|
||||||
|
# } */
|
||||||
|
for (int i = 0; i < n; ++i) { a[i] = 4; b[i] = 2; c[i] = 2;}
|
||||||
|
|
||||||
|
printf("saxpy\na[%d]: ", n);
|
||||||
|
for(int i = 0; i < n; ++i) printf("%d \n", a[i]);
|
||||||
|
printf("\nb[%d]: ", n);
|
||||||
|
for(int i = 0; i < n; ++i) printf("%d \n", b[i]);
|
||||||
|
|
||||||
|
vx_vec_saxpy(n, scalar, a, b);
|
||||||
|
|
||||||
|
printf("saxpy\na[%d]: ", n);
|
||||||
|
for(int i = 0; i < n; ++i) printf("%d \n", a[i]);
|
||||||
|
printf("\nb[%d]: ", n);
|
||||||
|
for(int i = 0; i < n; ++i) printf("%d \n", b[i]);
|
||||||
|
|
||||||
|
for(int i = 0; i < n; ++i)
|
||||||
|
{
|
||||||
|
if(b[i] != ((a[i] * scalar) + c[i]))
|
||||||
|
{
|
||||||
|
printf("\n<saxpy> failed at <index: %d>! \n", i);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf("\nPASSED.......................... <saxpy> \n");
|
||||||
|
#endif
|
||||||
|
#if 0
|
||||||
|
//---------------------------------------------------------------
|
||||||
|
/* # void sgemm_nn(size_t n, size_t m, size_t k, const float*a, // m * k matrix
|
||||||
|
# size_t lda, const float*b, // k * n matrix
|
||||||
|
# size_t ldb, float*c, // m * n matrix
|
||||||
|
# size_t ldc)
|
||||||
|
# c += a*b (alpha=1, no transpose on input matrices)
|
||||||
|
# matrices stored in C row-major order */
|
||||||
|
|
||||||
|
int m = 8;
|
||||||
|
int k = 8;
|
||||||
|
int n = 8
|
||||||
|
int lda = 4;
|
||||||
|
int ldb = 4;
|
||||||
|
int ldc = 4;
|
||||||
|
|
||||||
|
int* a1 = (int*)malloc(sizeof(m * k));
|
||||||
|
int* b1 = (int*)malloc(sizeof(k * n));
|
||||||
|
int* c1 = (int*)malloc(sizeof(m * n));
|
||||||
|
|
||||||
|
for(int i = 0; i < (m * k); ++i) a1[i] = 1;
|
||||||
|
for(int i = 0; i < (k * n); ++i) b1[i] = 1;
|
||||||
|
for(int i = 0; i < (m * n); ++i) c1[i] = 1;
|
||||||
|
|
||||||
|
printf("sgemm_nn\na[%d]: ", n);
|
||||||
|
for(int i = 0; i < n; ++i) printf("%d \n", a1[i]);
|
||||||
|
printf("\nb[%d]: ", n);
|
||||||
|
for(int i = 0; i < n; ++i) printf("%d \n", b1[i]);
|
||||||
|
|
||||||
|
vx_vec_sgemm_nn(n, m, k, a1, lda, b1, ldb, c1, ldc);
|
||||||
|
|
||||||
|
//for(int i = 0; i < n; ++i)
|
||||||
|
//{
|
||||||
|
// if(b[i] != ((a[i] * scalar) + c[i]))
|
||||||
|
// {
|
||||||
|
// printf("\n<sgemm_nn> failed at <index: %d>! \n", i);
|
||||||
|
// return;
|
||||||
|
// }
|
||||||
|
//}
|
||||||
|
printf("\nNOT TESTED.......................... <sgemm_nn> \n");
|
||||||
|
//---------------------------------------------------------------
|
||||||
|
#endif
|
||||||
|
|
||||||
|
vx_tmc(0);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
@@ -34,8 +34,7 @@ HEX: ELF
|
|||||||
$(CPY) -O ihex $(VX_MAIN).elf $(VX_MAIN).hex
|
$(CPY) -O ihex $(VX_MAIN).elf $(VX_MAIN).hex
|
||||||
|
|
||||||
ELF:
|
ELF:
|
||||||
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC1) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
|
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC2) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
|
||||||
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC2) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
|
|
||||||
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC3) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
|
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC3) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
|
||||||
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC4) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
|
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC4) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
|
||||||
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC5) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf~
|
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC5) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf~
|
||||||
|
|||||||
@@ -6,14 +6,17 @@
|
|||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
vx_tmc(1);
|
vx_tmc(1);
|
||||||
|
|
||||||
int n = 5;
|
int n = 5;
|
||||||
|
int scalar = 10;
|
||||||
|
|
||||||
int *a = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
int *a = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
||||||
int *b = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
int *b = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
||||||
int *c = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
int *c = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
|
||||||
|
|
||||||
for (int i = 0; i < n; ++i) { a[i] = 1; b[i] = 2; c[i] = 5; }
|
for (int i = 0; i < n; ++i) { a[i] = 1; b[i] = 2; c[i] = 5; }
|
||||||
|
|
||||||
#if 1
|
#if 0
|
||||||
//---------------------------------------------------------------
|
//---------------------------------------------------------------
|
||||||
/* vvaddint32
|
/* vvaddint32
|
||||||
* # vector-vector add routine of 32-bit integers
|
* # vector-vector add routine of 32-bit integers
|
||||||
@@ -43,7 +46,6 @@ int main()
|
|||||||
/* # vector-scalar add
|
/* # vector-scalar add
|
||||||
# for (i=0; i<N; i++) { C[i] = A[i] + B; } // 32-bit ints */
|
# for (i=0; i<N; i++) { C[i] = A[i] + B; } // 32-bit ints */
|
||||||
for (int i = 0; i < n; ++i) { a[i] = 1; b[i] = 1;}
|
for (int i = 0; i < n; ++i) { a[i] = 1; b[i] = 1;}
|
||||||
int scalar = 10;
|
|
||||||
printf("vsadd...scalar:%d\na[%d]: ", scalar, n);
|
printf("vsadd...scalar:%d\na[%d]: ", scalar, n);
|
||||||
for(int i = 0; i < n; ++i) printf("%d \n", a[i]);
|
for(int i = 0; i < n; ++i) printf("%d \n", a[i]);
|
||||||
printf("\nb: %d", scalar);
|
printf("\nb: %d", scalar);
|
||||||
@@ -78,10 +80,18 @@ int main()
|
|||||||
if(a[i] != b[i])
|
if(a[i] != b[i])
|
||||||
{
|
{
|
||||||
printf("\n<memcpy> failed at <index: %d>! \n", i);
|
printf("\n<memcpy> failed at <index: %d>! \n", i);
|
||||||
|
<<<<<<< HEAD
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
printf("\nPASSED.......................... <memcpy> \n");
|
printf("\nPASSED.......................... <memcpy> \n");
|
||||||
|
=======
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf("\nPASSED.......................... <memcpy> \n");
|
||||||
|
#endif
|
||||||
|
#if 1
|
||||||
//---------------------------------------------------------------
|
//---------------------------------------------------------------
|
||||||
/* # void saxpy(size_t n, const float a, const float *x, float *y)
|
/* # void saxpy(size_t n, const float a, const float *x, float *y)
|
||||||
# ==> convert to int!!
|
# ==> convert to int!!
|
||||||
@@ -99,6 +109,11 @@ int main()
|
|||||||
|
|
||||||
vx_vec_saxpy(n, scalar, a, b);
|
vx_vec_saxpy(n, scalar, a, b);
|
||||||
|
|
||||||
|
printf("saxpy\na[%d]: ", n);
|
||||||
|
for(int i = 0; i < n; ++i) printf("%d \n", a[i]);
|
||||||
|
printf("\nb[%d]: ", n);
|
||||||
|
for(int i = 0; i < n; ++i) printf("%d \n", b[i]);
|
||||||
|
|
||||||
for(int i = 0; i < n; ++i)
|
for(int i = 0; i < n; ++i)
|
||||||
{
|
{
|
||||||
if(b[i] != ((a[i] * scalar) + c[i]))
|
if(b[i] != ((a[i] * scalar) + c[i]))
|
||||||
@@ -109,6 +124,12 @@ int main()
|
|||||||
}
|
}
|
||||||
printf("\nPASSED.......................... <saxpy> \n");
|
printf("\nPASSED.......................... <saxpy> \n");
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf("\nPASSED.......................... <saxpy> \n");
|
||||||
|
#endif
|
||||||
|
#if 0
|
||||||
//---------------------------------------------------------------
|
//---------------------------------------------------------------
|
||||||
/* # void sgemm_nn(size_t n, size_t m, size_t k, const float*a, // m * k matrix
|
/* # void sgemm_nn(size_t n, size_t m, size_t k, const float*a, // m * k matrix
|
||||||
# size_t lda, const float*b, // k * n matrix
|
# size_t lda, const float*b, // k * n matrix
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
BIN
rvvector/basic/vx_vector_main.elf → rvvector/benchmark_temp/vx_vec_benchmark.elf
Normal file → Executable file
BIN
rvvector/basic/vx_vector_main.elf → rvvector/benchmark_temp/vx_vec_benchmark.elf
Normal file → Executable file
Binary file not shown.
@@ -5,10 +5,10 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void vx_vec_vvaddint32(int n, int* a, int* b, int *c);
|
//void vx_vec_vvaddint32(int n, int* a, int* b, int *c);
|
||||||
//void vx_vec_vsadd(int n, int* a, int scalar);
|
//void vx_vec_vsadd(int n, int* a, int scalar);
|
||||||
//void vx_vec_memcpy(int* a, int* b, int n);
|
//void vx_vec_memcpy(int* a, int* b, int n);
|
||||||
//void vx_vec_saxpy(int n, int scalar, int* a, int* b);
|
void vx_vec_saxpy(int n, int scalar, int* a, int* b);
|
||||||
//void vx_vec_sgemm_nn(int n, int m, int k, int* a1, int lda, int* b1, int ldb, int* c1, int ldc);
|
//void vx_vec_sgemm_nn(int n, int m, int k, int* a1, int lda, int* b1, int ldb, int* c1, int ldc);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|||||||
@@ -26,3 +26,31 @@ saxpy:
|
|||||||
add a2, a2, a4
|
add a2, a2, a4
|
||||||
bnez a0, saxpy
|
bnez a0, saxpy
|
||||||
ret
|
ret
|
||||||
|
#vx_vec_saxpy:
|
||||||
|
# vsetvli a4, a0, e32, m8
|
||||||
|
#saxpy:
|
||||||
|
# vlw.v v0, (a1)
|
||||||
|
# sub a0, a0, a4
|
||||||
|
# slli a4, a4, 2
|
||||||
|
# add a1, a1, a4
|
||||||
|
# vlw.v v8, (a2)
|
||||||
|
# vfmacc.vf v8, fa0, v0
|
||||||
|
# vsw.v v8, (a2)
|
||||||
|
# add a2, a2, a4
|
||||||
|
# bnez a0, saxpy
|
||||||
|
# ret
|
||||||
|
|
||||||
|
# a0 n, rs1 a, a2 x, a3 y
|
||||||
|
vx_vec_saxpy:
|
||||||
|
vsetvli a4, a0, e32, m8
|
||||||
|
saxpy:
|
||||||
|
vlw.v v0, (a2)
|
||||||
|
sub a0, a0, a4
|
||||||
|
slli a4, a4, 2
|
||||||
|
add a2, a2, a4
|
||||||
|
vlw.v v1, (a3)
|
||||||
|
vmacc.vx v1, rs1, v0
|
||||||
|
vsw.v v1, (a3)
|
||||||
|
add a3, a3, a4
|
||||||
|
bnez a0, saxpy
|
||||||
|
ret
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
################################################################################
|
################################################################################
|
||||||
# HARPtools by Chad D. Kersey, Summer 2011 #
|
# HARPtools by Chad D. Kersey, Summer 2011 #
|
||||||
################################################################################
|
################################################################################
|
||||||
CXXFLAGS ?= -std=c++11 -fPIC -O3 # -g -DUSE_DEBUG=3 -DPRINT_ACTIVE_THREADS
|
CXXFLAGS ?= -std=c++11 -fPIC -O3 -g # -g -DUSE_DEBUG=3 -DPRINT_ACTIVE_THREADS
|
||||||
|
|
||||||
LIB_OBJS=simX.cpp args.cpp mem.cpp core.cpp instruction.cpp enc.cpp util.cpp
|
LIB_OBJS=simX.cpp args.cpp mem.cpp core.cpp instruction.cpp enc.cpp util.cpp
|
||||||
|
|
||||||
|
|||||||
131
simX/core.cpp
131
simX/core.cpp
@@ -46,7 +46,8 @@
|
|||||||
trace_inst.vd = -1; \
|
trace_inst.vd = -1; \
|
||||||
trace_inst.is_lw = false; \
|
trace_inst.is_lw = false; \
|
||||||
trace_inst.is_sw = false; \
|
trace_inst.is_sw = false; \
|
||||||
trace_inst.mem_addresses = new unsigned[a.getNThds()]; \
|
if (trace_inst.mem_addresses != NULL) free(trace_inst.mem_addresses); \
|
||||||
|
trace_inst.mem_addresses = (unsigned *) malloc(32 * sizeof(unsigned)); \
|
||||||
for (int tid = 0; tid < a.getNThds(); tid++) trace_inst.mem_addresses[tid] = 0xdeadbeef; \
|
for (int tid = 0; tid < a.getNThds(); tid++) trace_inst.mem_addresses[tid] = 0xdeadbeef; \
|
||||||
trace_inst.mem_stall_cycles = 0; \
|
trace_inst.mem_stall_cycles = 0; \
|
||||||
trace_inst.fetch_stall_cycles = 0; \
|
trace_inst.fetch_stall_cycles = 0; \
|
||||||
@@ -79,19 +80,19 @@ using namespace std;
|
|||||||
|
|
||||||
void printTrace(trace_inst_t * trace, const char * stage_name)
|
void printTrace(trace_inst_t * trace, const char * stage_name)
|
||||||
{
|
{
|
||||||
cout << "********************************** " << stage_name << " *********************************\n";
|
D(3, "********************************** " << stage_name << " *********************************");
|
||||||
cout << "valid: " << trace->valid_inst << '\n';
|
D(3, "valid: " << trace->valid_inst);
|
||||||
cout << "PC: " << hex << trace->pc << dec << '\n';
|
D(3, "PC: " << hex << trace->pc << dec);
|
||||||
cout << "wid: " << trace->wid << '\n';
|
D(3, "wid: " << trace->wid);
|
||||||
cout << "rd: " << trace->rd << "\trs1: " << trace->rs1 << "\trs2: " << trace->rs2 << '\n';
|
D(3, "rd: " << trace->rd << "\trs1: " << trace->rs1 << "\trs2: " << trace->rs2);
|
||||||
cout << "is_lw: " << trace->is_lw << '\n';
|
D(3, "is_lw: " << trace->is_lw);
|
||||||
cout << "is_sw: " << trace->is_sw << '\n';
|
D(3, "is_sw: " << trace->is_sw);
|
||||||
cout << "fetch_stall_cycles: " << trace->fetch_stall_cycles << '\n';
|
D(3, "fetch_stall_cycles: " << trace->fetch_stall_cycles);
|
||||||
cout << "mem_stall_cycles: " << trace->mem_stall_cycles << '\n';
|
D(3, "mem_stall_cycles: " << trace->mem_stall_cycles);
|
||||||
|
|
||||||
cout << "stall_warp: " << trace->stall_warp << '\n';
|
D(3, "stall_warp: " << trace->stall_warp);
|
||||||
cout << "wspawn: " << trace->wspawn << '\n';
|
D(3, "wspawn: " << trace->wspawn);
|
||||||
cout << "stalled: " << trace->stalled << '\n';
|
D(3, "stalled: " << trace->stalled);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef EMU_INSTRUMENTATION
|
#ifdef EMU_INSTRUMENTATION
|
||||||
@@ -105,7 +106,7 @@ void Harp::reg_doWrite(Word cpuId, Word regNum) {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
Core::Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id):
|
Core::Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id):
|
||||||
a(a), iDec(d), mem(mem), steps(4)
|
a(a), iDec(d), mem(mem), steps(4), num_cycles(0), num_instructions(0)
|
||||||
{
|
{
|
||||||
release_warp = false;
|
release_warp = false;
|
||||||
foundSchedule = true;
|
foundSchedule = true;
|
||||||
@@ -133,9 +134,9 @@ Core::Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id):
|
|||||||
|
|
||||||
cache_simulator = new Vcache_simX;
|
cache_simulator = new Vcache_simX;
|
||||||
|
|
||||||
m_trace = new VerilatedVcdC;
|
// m_trace = new VerilatedVcdC;
|
||||||
cache_simulator->trace(m_trace, 99);
|
// cache_simulator->trace(m_trace, 99);
|
||||||
m_trace->open("simXtrace.vcd");
|
// m_trace->open("simXtrace.vcd");
|
||||||
|
|
||||||
cache_simulator->reset = 1;
|
cache_simulator->reset = 1;
|
||||||
cache_simulator->clk = 0;
|
cache_simulator->clk = 0;
|
||||||
@@ -161,17 +162,19 @@ bool Core::interrupt(Word r0) {
|
|||||||
|
|
||||||
void Core::step()
|
void Core::step()
|
||||||
{
|
{
|
||||||
cout << "\n\n\n------------------------------------------------------\n";
|
D(3, "\n\n\n------------------------------------------------------");
|
||||||
|
|
||||||
|
D(3, "Started core::step" << flush);
|
||||||
|
|
||||||
steps++;
|
steps++;
|
||||||
cout << "CYCLE: " << steps << '\n';
|
this->num_cycles++;
|
||||||
|
D(3, "CYCLE: " << this->num_cycles);
|
||||||
|
|
||||||
cout << "Stalled Warps:\n";
|
D(3, "Stalled Warps:");
|
||||||
for (int widd = 0; widd < a.getNWarps(); widd++)
|
for (int widd = 0; widd < a.getNWarps(); widd++)
|
||||||
{
|
{
|
||||||
cout << stallWarp[widd] << " ";
|
D(3, stallWarp[widd] << " ");
|
||||||
}
|
}
|
||||||
cout << '\n';
|
|
||||||
|
|
||||||
// cout << "Rename table\n";
|
// cout << "Rename table\n";
|
||||||
// for (int regii = 0; regii < 32; regii++)
|
// for (int regii = 0; regii < 32; regii++)
|
||||||
@@ -179,20 +182,30 @@ void Core::step()
|
|||||||
// cout << regii << ": " << renameTable[0][regii] << '\n';
|
// cout << regii << ": " << renameTable[0][regii] << '\n';
|
||||||
// }
|
// }
|
||||||
|
|
||||||
cout << '\n';
|
// cout << '\n' << flush;
|
||||||
|
|
||||||
|
// cout << "About to call writeback" << endl;
|
||||||
this->writeback();
|
this->writeback();
|
||||||
|
// cout << "About to call load_store" << endl;
|
||||||
this->load_store();
|
this->load_store();
|
||||||
|
// cout << "About to call execute_unit" << endl;
|
||||||
this->execute_unit();
|
this->execute_unit();
|
||||||
|
// cout << "About to call scheduler" << endl;
|
||||||
this->scheduler();
|
this->scheduler();
|
||||||
|
// cout << "About to call decode" << endl;
|
||||||
this->decode();
|
this->decode();
|
||||||
|
// D(3, "About to call fetch" << flush);
|
||||||
this->fetch();
|
this->fetch();
|
||||||
|
// D(3, "Finished fetch" << flush);
|
||||||
|
|
||||||
if (release_warp)
|
if (release_warp)
|
||||||
{
|
{
|
||||||
release_warp = false;
|
release_warp = false;
|
||||||
stallWarp[release_warp_num] = false;
|
stallWarp[release_warp_num] = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
D(3, "released warp" << flush);
|
||||||
|
D(3, "Finished core::step" << flush);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Core::getCacheDelays(trace_inst_t * trace_inst)
|
void Core::getCacheDelays(trace_inst_t * trace_inst)
|
||||||
@@ -238,7 +251,7 @@ void Core::getCacheDelays(trace_inst_t * trace_inst)
|
|||||||
|
|
||||||
cache_simulator->clk = 1;
|
cache_simulator->clk = 1;
|
||||||
cache_simulator->eval();
|
cache_simulator->eval();
|
||||||
m_trace->dump(2*curr_cycle);
|
// m_trace->dump(2*curr_cycle);
|
||||||
|
|
||||||
cache_simulator->in_icache_pc_addr = trace_inst->pc;
|
cache_simulator->in_icache_pc_addr = trace_inst->pc;
|
||||||
cache_simulator->in_icache_valid_pc_addr = 1;
|
cache_simulator->in_icache_valid_pc_addr = 1;
|
||||||
@@ -254,7 +267,7 @@ void Core::getCacheDelays(trace_inst_t * trace_inst)
|
|||||||
// DCache end
|
// DCache end
|
||||||
cache_simulator->clk = 0;
|
cache_simulator->clk = 0;
|
||||||
cache_simulator->eval();
|
cache_simulator->eval();
|
||||||
m_trace->dump(2*curr_cycle+1);
|
// m_trace->dump(2*curr_cycle+1);
|
||||||
|
|
||||||
curr_cycle++;
|
curr_cycle++;
|
||||||
|
|
||||||
@@ -296,7 +309,7 @@ void Core::getCacheDelays(trace_inst_t * trace_inst)
|
|||||||
|
|
||||||
cache_simulator->clk = 1;
|
cache_simulator->clk = 1;
|
||||||
cache_simulator->eval();
|
cache_simulator->eval();
|
||||||
m_trace->dump(2*curr_cycle);
|
// m_trace->dump(2*curr_cycle);
|
||||||
|
|
||||||
//////// Feed input
|
//////// Feed input
|
||||||
if (cache_simulator->out_icache_stall)
|
if (cache_simulator->out_icache_stall)
|
||||||
@@ -331,7 +344,7 @@ void Core::getCacheDelays(trace_inst_t * trace_inst)
|
|||||||
|
|
||||||
cache_simulator->clk = 0;
|
cache_simulator->clk = 0;
|
||||||
cache_simulator->eval();
|
cache_simulator->eval();
|
||||||
m_trace->dump(2*curr_cycle+1);
|
// m_trace->dump(2*curr_cycle+1);
|
||||||
|
|
||||||
|
|
||||||
curr_cycle++;
|
curr_cycle++;
|
||||||
@@ -378,9 +391,9 @@ void Core::warpScheduler()
|
|||||||
void Core::fetch()
|
void Core::fetch()
|
||||||
{
|
{
|
||||||
|
|
||||||
#ifdef PRINT_ACTIVE_THREADS
|
// #ifdef PRINT_ACTIVE_THREADS
|
||||||
cout << endl << "Threads:";
|
D(3, "Threads:");
|
||||||
#endif
|
// #endif
|
||||||
|
|
||||||
// D(-1, "Found schedule: " << foundSchedule);
|
// D(-1, "Found schedule: " << foundSchedule);
|
||||||
|
|
||||||
@@ -395,16 +408,22 @@ void Core::fetch()
|
|||||||
if (foundSchedule)
|
if (foundSchedule)
|
||||||
{
|
{
|
||||||
D(3, "Core step stepping warp " << schedule_w << '[' << w[schedule_w].activeThreads << ']');
|
D(3, "Core step stepping warp " << schedule_w << '[' << w[schedule_w].activeThreads << ']');
|
||||||
|
this->num_instructions = this->num_instructions + w[schedule_w].activeThreads;
|
||||||
|
// this->num_instructions++;
|
||||||
w[schedule_w].step(&inst_in_fetch);
|
w[schedule_w].step(&inst_in_fetch);
|
||||||
D(3, "Now " << w[schedule_w].activeThreads << " active threads in " << schedule_w);
|
D(3, "Now " << w[schedule_w].activeThreads << " active threads in " << schedule_w << flush);
|
||||||
|
|
||||||
this->getCacheDelays(&inst_in_fetch);
|
// this->getCacheDelays(&inst_in_fetch);
|
||||||
|
D(3, "Got cache delays" << flush);
|
||||||
if (inst_in_fetch.stall_warp)
|
if (inst_in_fetch.stall_warp)
|
||||||
{
|
{
|
||||||
stallWarp[inst_in_fetch.wid] = true;
|
stallWarp[inst_in_fetch.wid] = true;
|
||||||
}
|
}
|
||||||
|
D(3, "staled warps\n" << flush);
|
||||||
}
|
}
|
||||||
|
D(3, "About to schedule warp\n" << flush);
|
||||||
warpScheduler();
|
warpScheduler();
|
||||||
|
D(3, "Scheduled warp" << flush);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -413,21 +432,33 @@ void Core::fetch()
|
|||||||
if (inst_in_fetch.fetch_stall_cycles > 0) inst_in_fetch.fetch_stall_cycles--;
|
if (inst_in_fetch.fetch_stall_cycles > 0) inst_in_fetch.fetch_stall_cycles--;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
D(3, "Printing trace" << flush);
|
||||||
printTrace(&inst_in_fetch, "Fetch");
|
printTrace(&inst_in_fetch, "Fetch");
|
||||||
|
D(3, "printed trace" << flush);
|
||||||
|
|
||||||
// #ifdef PRINT_ACTIVE_THREADS
|
// #ifdef PRINT_ACTIVE_THREADS
|
||||||
|
D(3, "About to print active threads" << flush << "\n");
|
||||||
for (unsigned j = 0; j < w[schedule_w].tmask.size(); ++j) {
|
for (unsigned j = 0; j < w[schedule_w].tmask.size(); ++j) {
|
||||||
if (w[schedule_w].activeThreads > j && w[schedule_w].tmask[j]) cout << " 1";
|
if (w[schedule_w].activeThreads > j && w[schedule_w].tmask[j])
|
||||||
else cout << " 0";
|
{
|
||||||
if (j != w[schedule_w].tmask.size()-1 || schedule_w != w.size()-1) cout << ',';
|
D(3, " 1");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
D(3, " 0");
|
||||||
|
}
|
||||||
|
if (j != w[schedule_w].tmask.size()-1 || schedule_w != w.size()-1)
|
||||||
|
{
|
||||||
|
D(3, ',');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
D(3, "\nPrinted active threads" << flush);
|
||||||
// #endif
|
// #endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef PRINT_ACTIVE_THREADS
|
// #ifdef PRINT_ACTIVE_THREADS
|
||||||
cout << endl;
|
// #endif
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Core::decode()
|
void Core::decode()
|
||||||
@@ -522,7 +553,7 @@ void Core::load_store()
|
|||||||
|
|
||||||
void Core::execute_unit()
|
void Core::execute_unit()
|
||||||
{
|
{
|
||||||
// cout << "$$$$$$$$$$$$$$$$$$$ EXE START\n";
|
D(3, "$$$$$$$$$$$$$$$$$$$ EXE START\n" << flush);
|
||||||
bool do_nothing = false;
|
bool do_nothing = false;
|
||||||
// EXEC is always not busy
|
// EXEC is always not busy
|
||||||
if (inst_in_scheduler.is_lw || inst_in_scheduler.is_sw)
|
if (inst_in_scheduler.is_lw || inst_in_scheduler.is_sw)
|
||||||
@@ -546,6 +577,7 @@ void Core::execute_unit()
|
|||||||
// cout << "Rename RS2: " << inst_in_scheduler.rs1 << " is " << renameTable[inst_in_scheduler.wid][inst_in_scheduler.rs2] << " wid: " << inst_in_scheduler.wid << '\n';
|
// cout << "Rename RS2: " << inst_in_scheduler.rs1 << " is " << renameTable[inst_in_scheduler.wid][inst_in_scheduler.rs2] << " wid: " << inst_in_scheduler.wid << '\n';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cout << "About to check vs*\n" << flush;
|
||||||
if(inst_in_scheduler.vs1 > 0)
|
if(inst_in_scheduler.vs1 > 0)
|
||||||
{
|
{
|
||||||
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable[inst_in_scheduler.vs1];
|
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable[inst_in_scheduler.vs1];
|
||||||
@@ -554,6 +586,7 @@ void Core::execute_unit()
|
|||||||
{
|
{
|
||||||
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable[inst_in_scheduler.vs2];
|
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable[inst_in_scheduler.vs2];
|
||||||
}
|
}
|
||||||
|
// cout << "Finished sources\n" << flush;
|
||||||
|
|
||||||
if (scheduler_srcs_ready)
|
if (scheduler_srcs_ready)
|
||||||
{
|
{
|
||||||
@@ -561,15 +594,19 @@ void Core::execute_unit()
|
|||||||
// cout << "rename setting rd: " << inst_in_scheduler.rd << " to not useabel wid: " << inst_in_scheduler.wid << '\n';
|
// cout << "rename setting rd: " << inst_in_scheduler.rd << " to not useabel wid: " << inst_in_scheduler.wid << '\n';
|
||||||
renameTable[inst_in_scheduler.wid][inst_in_scheduler.rd] = false;
|
renameTable[inst_in_scheduler.wid][inst_in_scheduler.rd] = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cout << "About to check vector wb: " << inst_in_scheduler.vd << "\n" << flush;
|
||||||
if(inst_in_scheduler.vd != -1) {
|
if(inst_in_scheduler.vd != -1) {
|
||||||
vecRenameTable[inst_in_scheduler.vd] = false;
|
vecRenameTable[inst_in_scheduler.vd] = false;
|
||||||
}
|
}
|
||||||
|
// cout << "Finished wb checking" << "\n" << flush;
|
||||||
CPY_TRACE(inst_in_exe, inst_in_scheduler);
|
CPY_TRACE(inst_in_exe, inst_in_scheduler);
|
||||||
INIT_TRACE(inst_in_scheduler);
|
INIT_TRACE(inst_in_scheduler);
|
||||||
|
// cout << "Finished trace copying and clearning" << "\n" << flush;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
cout << "&&&&&&&&&&&&&&&&&&&&&&&& EXECUTE SRCS NOT READY\n";
|
D(3, "&&&&&&&&&&&&&&&&&&&&&&&& EXECUTE SRCS NOT READY");
|
||||||
inst_in_scheduler.stalled = true;
|
inst_in_scheduler.stalled = true;
|
||||||
// INIT_TRACE(inst_in_exe);
|
// INIT_TRACE(inst_in_exe);
|
||||||
do_nothing = true;
|
do_nothing = true;
|
||||||
@@ -583,6 +620,7 @@ void Core::execute_unit()
|
|||||||
|
|
||||||
//printTrace(&inst_in_exe, "execute_unit");
|
//printTrace(&inst_in_exe, "execute_unit");
|
||||||
// INIT_TRACE(inst_in_exe);
|
// INIT_TRACE(inst_in_exe);
|
||||||
|
D(3, "EXECUTE END" << flush);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Core::writeback()
|
void Core::writeback()
|
||||||
@@ -623,7 +661,7 @@ void Core::writeback()
|
|||||||
{
|
{
|
||||||
if (serviced_exe)
|
if (serviced_exe)
|
||||||
{
|
{
|
||||||
cout << "$$$$$$$$$$$$$$$$$$$$ Stalling LSU because EXE is being used\n";
|
D(3, "$$$$$$$$$$$$$$$$$$$$ Stalling LSU because EXE is being used");
|
||||||
inst_in_lsu.stalled = true;
|
inst_in_lsu.stalled = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -652,7 +690,7 @@ bool Core::running() const {
|
|||||||
for (unsigned i = 0; i < w.size(); ++i)
|
for (unsigned i = 0; i < w.size(); ++i)
|
||||||
if (w[i].running())
|
if (w[i].running())
|
||||||
{
|
{
|
||||||
cout << "Warp ID " << i << " is running\n";
|
D(3, "Warp ID " << i << " is running");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
@@ -665,7 +703,7 @@ void Core::printStats() const {
|
|||||||
|
|
||||||
cerr << "Total steps: " << steps << endl;
|
cerr << "Total steps: " << steps << endl;
|
||||||
for (unsigned i = 0; i < w.size(); ++i) {
|
for (unsigned i = 0; i < w.size(); ++i) {
|
||||||
cout << "=== Warp " << i << " ===" << endl;
|
// cout << "=== Warp " << i << " ===" << endl;
|
||||||
w[i].printStats();
|
w[i].printStats();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -715,7 +753,7 @@ void Warp::step(trace_inst_t * trace_inst) {
|
|||||||
// ++steps;
|
// ++steps;
|
||||||
|
|
||||||
D(3, "in step pc=0x" << hex << pc);
|
D(3, "in step pc=0x" << hex << pc);
|
||||||
cout << "help: in PC: " << hex << pc << dec << '\n';
|
D(3, "help: in PC: " << hex << pc << dec);
|
||||||
|
|
||||||
// std::cout << "pc: " << hex << pc << "\n";
|
// std::cout << "pc: " << hex << pc << "\n";
|
||||||
|
|
||||||
@@ -727,8 +765,9 @@ void Warp::step(trace_inst_t * trace_inst) {
|
|||||||
bool fetchMore;
|
bool fetchMore;
|
||||||
|
|
||||||
fetchMore = false;
|
fetchMore = false;
|
||||||
unsigned fetchSize(wordSize - (pc+fetchPos)%wordSize);
|
// unsigned fetchSize(wordSize - (pc+fetchPos)%wordSize);
|
||||||
fetchBuffer.resize(fetchPos + fetchSize);
|
unsigned fetchSize = 4;
|
||||||
|
fetchBuffer.resize(fetchSize);
|
||||||
Word fetched = core->mem.fetch(pc + fetchPos, supervisorMode);
|
Word fetched = core->mem.fetch(pc + fetchPos, supervisorMode);
|
||||||
writeWord(fetchBuffer, fetchPos, fetchSize, fetched);
|
writeWord(fetchBuffer, fetchPos, fetchSize, fetched);
|
||||||
decPos = 0;
|
decPos = 0;
|
||||||
|
|||||||
15
simX/enc.cpp
15
simX/enc.cpp
@@ -104,7 +104,8 @@ Instruction *WordDecoder::decode(const std::vector<Byte> &v, Size &idx, trace_in
|
|||||||
bool predicated = false;
|
bool predicated = false;
|
||||||
if (predicated) { inst.setPred((code>>(inst_s-p-1))&pMask); }
|
if (predicated) { inst.setPred((code>>(inst_s-p-1))&pMask); }
|
||||||
|
|
||||||
printf("CUrrent CODE: %x\n", code);
|
// printf("CUrrent CODE: %x\n", code);
|
||||||
|
D(3, "Curr Code: " << hex << code << dec);
|
||||||
|
|
||||||
Opcode op = (Opcode)((code>>shift_opcode)&opcode_mask);
|
Opcode op = (Opcode)((code>>shift_opcode)&opcode_mask);
|
||||||
// std::cout << "opcode: " << op << "\n";
|
// std::cout << "opcode: " << op << "\n";
|
||||||
@@ -234,17 +235,17 @@ Instruction *WordDecoder::decode(const std::vector<Byte> &v, Size &idx, trace_in
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case InstType::V_TYPE:
|
case InstType::V_TYPE:
|
||||||
cout << "Entered here: instr type = vector" << op << endl;
|
D(3, "Entered here: instr type = vector" << op);
|
||||||
switch(op) {
|
switch(op) {
|
||||||
case Opcode::VSET_ARITH: //TODO: arithmetic ops
|
case Opcode::VSET_ARITH: //TODO: arithmetic ops
|
||||||
inst.setDestReg((code>>shift_rd) & reg_mask);
|
inst.setDestReg((code>>shift_rd) & reg_mask);
|
||||||
inst.setSrcReg((code>>shift_rs1) & reg_mask);
|
inst.setSrcReg((code>>shift_rs1) & reg_mask);
|
||||||
func3 = (code>>shift_func3) & func3_mask;
|
func3 = (code>>shift_func3) & func3_mask;
|
||||||
inst.setFunc3 (func3);
|
inst.setFunc3 (func3);
|
||||||
cout << "Entered here: instr type = vector" << endl;
|
D(3, "Entered here: instr type = vector");
|
||||||
|
|
||||||
if(func3 == 7) {
|
if(func3 == 7) {
|
||||||
cout << "Entered here: imm instr";
|
D(3, "Entered here: imm instr");
|
||||||
|
|
||||||
inst.setVsetImm(!(code>>shift_vset));
|
inst.setVsetImm(!(code>>shift_vset));
|
||||||
|
|
||||||
@@ -318,9 +319,9 @@ Instruction *WordDecoder::decode(const std::vector<Byte> &v, Size &idx, trace_in
|
|||||||
Ref *srcRef = refMap[idx-n/8];
|
Ref *srcRef = refMap[idx-n/8];
|
||||||
|
|
||||||
/* Create a new ref tied to this instruction. */
|
/* Create a new ref tied to this instruction. */
|
||||||
Ref *r = new SimpleRef(srcRef->name, *(Addr*)inst.setSrcImm(),
|
// Ref *r = new SimpleRef(srcRef->name, *(Addr*)inst.setSrcImm(),
|
||||||
inst.hasRelImm());
|
// inst.hasRelImm());
|
||||||
inst.setImmRef(*r);
|
// inst.setImmRef(*r);
|
||||||
}
|
}
|
||||||
|
|
||||||
D(2, "Decoded 0x" << hex << code << " into: " << inst << '\n');
|
D(2, "Decoded 0x" << hex << code << " into: " << inst << '\n');
|
||||||
|
|||||||
@@ -23,8 +23,8 @@ namespace Harp {
|
|||||||
encChar = 'w';
|
encChar = 'w';
|
||||||
nRegs = 32;
|
nRegs = 32;
|
||||||
nPRegs = 0;
|
nPRegs = 0;
|
||||||
nThds = 8;
|
nThds = 32;
|
||||||
nWarps = 8;
|
nWarps = 32;
|
||||||
|
|
||||||
extent = EXT_WARPS;
|
extent = EXT_WARPS;
|
||||||
|
|
||||||
|
|||||||
@@ -144,6 +144,8 @@ namespace Harp {
|
|||||||
Word interruptEntry;
|
Word interruptEntry;
|
||||||
|
|
||||||
unsigned long steps;
|
unsigned long steps;
|
||||||
|
unsigned long num_cycles;
|
||||||
|
unsigned long num_instructions;
|
||||||
std::vector<Warp> w;
|
std::vector<Warp> w;
|
||||||
std::map<Word, std::set<Warp *> > b; // Barriers
|
std::map<Word, std::set<Warp *> > b; // Barriers
|
||||||
int schedule_w;
|
int schedule_w;
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
#define __DEBUG_H
|
#define __DEBUG_H
|
||||||
|
|
||||||
// #define USE_DEBUG 9
|
// #define USE_DEBUG 9
|
||||||
#define USE_DEBUG 3
|
// #define USE_DEBUG 3
|
||||||
|
|
||||||
#ifdef USE_DEBUG
|
#ifdef USE_DEBUG
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
@@ -21,10 +21,11 @@
|
|||||||
#define D_RAW(x) do { \
|
#define D_RAW(x) do { \
|
||||||
std::cout << x; \
|
std::cout << x; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define D(lvl, x) do {} while(0)
|
#define D(lvl, x) do {} while(0)
|
||||||
|
#define D_RAW(x) do {} while(0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
BIN
simX/obj_dir/Vcache_simX
Executable file
BIN
simX/obj_dir/Vcache_simX
Executable file
Binary file not shown.
208
simX/obj_dir/Vcache_simX.cpp
Normal file
208
simX/obj_dir/Vcache_simX.cpp
Normal file
@@ -0,0 +1,208 @@
|
|||||||
|
// Verilated -*- C++ -*-
|
||||||
|
// DESCRIPTION: Verilator output: Design implementation internals
|
||||||
|
// See Vcache_simX.h for the primary calling header
|
||||||
|
|
||||||
|
#include "Vcache_simX.h" // For This
|
||||||
|
#include "Vcache_simX__Syms.h"
|
||||||
|
|
||||||
|
//--------------------
|
||||||
|
// STATIC VARIABLES
|
||||||
|
|
||||||
|
|
||||||
|
//--------------------
|
||||||
|
|
||||||
|
VL_CTOR_IMP(Vcache_simX) {
|
||||||
|
Vcache_simX__Syms* __restrict vlSymsp = __VlSymsp = new Vcache_simX__Syms(this, name());
|
||||||
|
Vcache_simX* __restrict vlTOPp VL_ATTR_UNUSED = vlSymsp->TOPp;
|
||||||
|
VL_CELL (__PVT__v, Vcache_simX_cache_simX);
|
||||||
|
// Reset internal values
|
||||||
|
|
||||||
|
// Reset structure values
|
||||||
|
clk = VL_RAND_RESET_I(1);
|
||||||
|
reset = VL_RAND_RESET_I(1);
|
||||||
|
in_icache_pc_addr = VL_RAND_RESET_I(32);
|
||||||
|
in_icache_valid_pc_addr = VL_RAND_RESET_I(1);
|
||||||
|
out_icache_stall = VL_RAND_RESET_I(1);
|
||||||
|
in_dcache_mem_read = VL_RAND_RESET_I(3);
|
||||||
|
in_dcache_mem_write = VL_RAND_RESET_I(3);
|
||||||
|
{ int __Vi0=0; for (; __Vi0<4; ++__Vi0) {
|
||||||
|
in_dcache_in_valid[__Vi0] = VL_RAND_RESET_I(1);
|
||||||
|
}}
|
||||||
|
{ int __Vi0=0; for (; __Vi0<4; ++__Vi0) {
|
||||||
|
in_dcache_in_address[__Vi0] = VL_RAND_RESET_I(32);
|
||||||
|
}}
|
||||||
|
out_dcache_stall = VL_RAND_RESET_I(1);
|
||||||
|
__Vclklast__TOP__clk = VL_RAND_RESET_I(1);
|
||||||
|
__Vclklast__TOP__reset = VL_RAND_RESET_I(1);
|
||||||
|
__Vchglast__TOP__v__dmem_controller__shared_memory__DOT__block_addr = VL_RAND_RESET_I(28);
|
||||||
|
__Vm_traceActivity = VL_RAND_RESET_I(32);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Vcache_simX::__Vconfigure(Vcache_simX__Syms* vlSymsp, bool first) {
|
||||||
|
if (0 && first) {} // Prevent unused
|
||||||
|
this->__VlSymsp = vlSymsp;
|
||||||
|
}
|
||||||
|
|
||||||
|
Vcache_simX::~Vcache_simX() {
|
||||||
|
delete __VlSymsp; __VlSymsp=NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
//--------------------
|
||||||
|
|
||||||
|
|
||||||
|
void Vcache_simX::eval() {
|
||||||
|
Vcache_simX__Syms* __restrict vlSymsp = this->__VlSymsp; // Setup global symbol table
|
||||||
|
Vcache_simX* __restrict vlTOPp VL_ATTR_UNUSED = vlSymsp->TOPp;
|
||||||
|
// Initialize
|
||||||
|
if (VL_UNLIKELY(!vlSymsp->__Vm_didInit)) _eval_initial_loop(vlSymsp);
|
||||||
|
// Evaluate till stable
|
||||||
|
VL_DEBUG_IF(VL_PRINTF("\n----TOP Evaluate Vcache_simX::eval\n"); );
|
||||||
|
int __VclockLoop = 0;
|
||||||
|
QData __Vchange=1;
|
||||||
|
while (VL_LIKELY(__Vchange)) {
|
||||||
|
VL_DEBUG_IF(VL_PRINTF(" Clock loop\n"););
|
||||||
|
vlSymsp->__Vm_activity = true;
|
||||||
|
_eval(vlSymsp);
|
||||||
|
__Vchange = _change_request(vlSymsp);
|
||||||
|
if (++__VclockLoop > 100) vl_fatal(__FILE__,__LINE__,__FILE__,"Verilated model didn't converge");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Vcache_simX::_eval_initial_loop(Vcache_simX__Syms* __restrict vlSymsp) {
|
||||||
|
vlSymsp->__Vm_didInit = true;
|
||||||
|
_eval_initial(vlSymsp);
|
||||||
|
vlSymsp->__Vm_activity = true;
|
||||||
|
int __VclockLoop = 0;
|
||||||
|
QData __Vchange=1;
|
||||||
|
while (VL_LIKELY(__Vchange)) {
|
||||||
|
_eval_settle(vlSymsp);
|
||||||
|
_eval(vlSymsp);
|
||||||
|
__Vchange = _change_request(vlSymsp);
|
||||||
|
if (++__VclockLoop > 100) vl_fatal(__FILE__,__LINE__,__FILE__,"Verilated model didn't DC converge");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//--------------------
|
||||||
|
// Internal Methods
|
||||||
|
|
||||||
|
VL_INLINE_OPT void Vcache_simX::_combo__TOP__1(Vcache_simX__Syms* __restrict vlSymsp) {
|
||||||
|
VL_DEBUG_IF(VL_PRINTF(" Vcache_simX::_combo__TOP__1\n"); );
|
||||||
|
Vcache_simX* __restrict vlTOPp VL_ATTR_UNUSED = vlSymsp->TOPp;
|
||||||
|
// Body
|
||||||
|
vlSymsp->TOP__v.in_dcache_in_valid[3U] = vlTOPp->in_dcache_in_valid
|
||||||
|
[3U];
|
||||||
|
vlSymsp->TOP__v.in_dcache_in_valid[2U] = vlTOPp->in_dcache_in_valid
|
||||||
|
[2U];
|
||||||
|
vlSymsp->TOP__v.in_dcache_in_valid[1U] = vlTOPp->in_dcache_in_valid
|
||||||
|
[1U];
|
||||||
|
vlSymsp->TOP__v.in_dcache_in_valid[0U] = vlTOPp->in_dcache_in_valid
|
||||||
|
[0U];
|
||||||
|
vlSymsp->TOP__v.in_dcache_in_address[3U] = vlTOPp->in_dcache_in_address
|
||||||
|
[3U];
|
||||||
|
vlSymsp->TOP__v.in_dcache_in_address[2U] = vlTOPp->in_dcache_in_address
|
||||||
|
[2U];
|
||||||
|
vlSymsp->TOP__v.in_dcache_in_address[1U] = vlTOPp->in_dcache_in_address
|
||||||
|
[1U];
|
||||||
|
vlSymsp->TOP__v.in_dcache_in_address[0U] = vlTOPp->in_dcache_in_address
|
||||||
|
[0U];
|
||||||
|
}
|
||||||
|
|
||||||
|
VL_INLINE_OPT void Vcache_simX::_combo__TOP__3(Vcache_simX__Syms* __restrict vlSymsp) {
|
||||||
|
VL_DEBUG_IF(VL_PRINTF(" Vcache_simX::_combo__TOP__3\n"); );
|
||||||
|
Vcache_simX* __restrict vlTOPp VL_ATTR_UNUSED = vlSymsp->TOPp;
|
||||||
|
// Body
|
||||||
|
vlTOPp->out_icache_stall = ((IData)(vlSymsp->TOP__v__dmem_controller.__PVT__icache__DOT__new_stored_valid)
|
||||||
|
| (0U != (IData)(vlSymsp->TOP__v__dmem_controller.__PVT__icache__DOT__state)));
|
||||||
|
}
|
||||||
|
|
||||||
|
VL_INLINE_OPT void Vcache_simX::_combo__TOP__5(Vcache_simX__Syms* __restrict vlSymsp) {
|
||||||
|
VL_DEBUG_IF(VL_PRINTF(" Vcache_simX::_combo__TOP__5\n"); );
|
||||||
|
Vcache_simX* __restrict vlTOPp VL_ATTR_UNUSED = vlSymsp->TOPp;
|
||||||
|
// Body
|
||||||
|
vlTOPp->out_dcache_stall = ((0U != (IData)(vlSymsp->TOP__v__dmem_controller.__PVT__shared_memory__DOT__vx_priority_encoder_sm__DOT__more_than_one_valid))
|
||||||
|
| ((0U != (IData)(vlSymsp->TOP__v__dmem_controller.__PVT__dcache__DOT__new_stored_valid))
|
||||||
|
| (0U != (IData)(vlSymsp->TOP__v__dmem_controller.__PVT__dcache__DOT__state))));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Vcache_simX::_eval(Vcache_simX__Syms* __restrict vlSymsp) {
|
||||||
|
VL_DEBUG_IF(VL_PRINTF(" Vcache_simX::_eval\n"); );
|
||||||
|
Vcache_simX* __restrict vlTOPp VL_ATTR_UNUSED = vlSymsp->TOPp;
|
||||||
|
// Body
|
||||||
|
vlSymsp->TOP__v__dmem_controller._combo__TOP__v__dmem_controller__1(vlSymsp);
|
||||||
|
vlTOPp->__Vm_traceActivity = (2U | vlTOPp->__Vm_traceActivity);
|
||||||
|
vlTOPp->_combo__TOP__1(vlSymsp);
|
||||||
|
if ((((IData)(vlTOPp->clk) & (~ (IData)(vlTOPp->__Vclklast__TOP__clk)))
|
||||||
|
| ((IData)(vlTOPp->reset) & (~ (IData)(vlTOPp->__Vclklast__TOP__reset))))) {
|
||||||
|
vlSymsp->TOP__v__dmem_controller._sequent__TOP__v__dmem_controller__3(vlSymsp);
|
||||||
|
vlTOPp->__Vm_traceActivity = (4U | vlTOPp->__Vm_traceActivity);
|
||||||
|
vlSymsp->TOP__v._sequent__TOP__v__2(vlSymsp);
|
||||||
|
vlSymsp->TOP__v__dmem_controller._sequent__TOP__v__dmem_controller__4(vlSymsp);
|
||||||
|
}
|
||||||
|
vlSymsp->TOP__v._combo__TOP__v__3(vlSymsp);
|
||||||
|
vlSymsp->TOP__v__dmem_controller._combo__TOP__v__dmem_controller__5(vlSymsp);
|
||||||
|
vlSymsp->TOP__v__dmem_controller._combo__TOP__v__dmem_controller__7(vlSymsp);
|
||||||
|
if ((((IData)(vlTOPp->clk) & (~ (IData)(vlTOPp->__Vclklast__TOP__clk)))
|
||||||
|
| ((IData)(vlTOPp->reset) & (~ (IData)(vlTOPp->__Vclklast__TOP__reset))))) {
|
||||||
|
vlSymsp->TOP__v__dmem_controller._sequent__TOP__v__dmem_controller__8(vlSymsp);
|
||||||
|
vlTOPp->__Vm_traceActivity = (8U | vlTOPp->__Vm_traceActivity);
|
||||||
|
}
|
||||||
|
vlSymsp->TOP__v__dmem_controller._combo__TOP__v__dmem_controller__10(vlSymsp);
|
||||||
|
vlSymsp->TOP__v__dmem_controller._combo__TOP__v__dmem_controller__12(vlSymsp);
|
||||||
|
vlSymsp->TOP__v__dmem_controller._combo__TOP__v__dmem_controller__14(vlSymsp);
|
||||||
|
vlSymsp->TOP__v__dmem_controller._combo__TOP__v__dmem_controller__16(vlSymsp);
|
||||||
|
vlSymsp->TOP__v__dmem_controller._combo__TOP__v__dmem_controller__18(vlSymsp);
|
||||||
|
vlTOPp->_combo__TOP__3(vlSymsp);
|
||||||
|
vlSymsp->TOP__v__dmem_controller._combo__TOP__v__dmem_controller__20(vlSymsp);
|
||||||
|
vlSymsp->TOP__v__dmem_controller._combo__TOP__v__dmem_controller__22(vlSymsp);
|
||||||
|
vlTOPp->_combo__TOP__5(vlSymsp);
|
||||||
|
// Final
|
||||||
|
vlTOPp->__Vclklast__TOP__clk = vlTOPp->clk;
|
||||||
|
vlTOPp->__Vclklast__TOP__reset = vlTOPp->reset;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Vcache_simX::_eval_initial(Vcache_simX__Syms* __restrict vlSymsp) {
|
||||||
|
VL_DEBUG_IF(VL_PRINTF(" Vcache_simX::_eval_initial\n"); );
|
||||||
|
Vcache_simX* __restrict vlTOPp VL_ATTR_UNUSED = vlSymsp->TOPp;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Vcache_simX::final() {
|
||||||
|
VL_DEBUG_IF(VL_PRINTF(" Vcache_simX::final\n"); );
|
||||||
|
// Variables
|
||||||
|
Vcache_simX__Syms* __restrict vlSymsp = this->__VlSymsp;
|
||||||
|
Vcache_simX* __restrict vlTOPp VL_ATTR_UNUSED = vlSymsp->TOPp;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Vcache_simX::_eval_settle(Vcache_simX__Syms* __restrict vlSymsp) {
|
||||||
|
VL_DEBUG_IF(VL_PRINTF(" Vcache_simX::_eval_settle\n"); );
|
||||||
|
Vcache_simX* __restrict vlTOPp VL_ATTR_UNUSED = vlSymsp->TOPp;
|
||||||
|
// Body
|
||||||
|
vlSymsp->TOP__v__dmem_controller._combo__TOP__v__dmem_controller__1(vlSymsp);
|
||||||
|
vlTOPp->__Vm_traceActivity = (1U | vlTOPp->__Vm_traceActivity);
|
||||||
|
vlTOPp->_combo__TOP__1(vlSymsp);
|
||||||
|
vlSymsp->TOP__v._settle__TOP__v__1(vlSymsp);
|
||||||
|
vlSymsp->TOP__v._settle__TOP__v__4(vlSymsp);
|
||||||
|
vlSymsp->TOP__v__dmem_controller._settle__TOP__v__dmem_controller__6(vlSymsp);
|
||||||
|
vlSymsp->TOP__v__dmem_controller._settle__TOP__v__dmem_controller__9(vlSymsp);
|
||||||
|
vlSymsp->TOP__v__dmem_controller._settle__TOP__v__dmem_controller__11(vlSymsp);
|
||||||
|
vlSymsp->TOP__v__dmem_controller._settle__TOP__v__dmem_controller__13(vlSymsp);
|
||||||
|
vlSymsp->TOP__v__dmem_controller._settle__TOP__v__dmem_controller__15(vlSymsp);
|
||||||
|
vlSymsp->TOP__v__dmem_controller._settle__TOP__v__dmem_controller__17(vlSymsp);
|
||||||
|
vlSymsp->TOP__v__dmem_controller._settle__TOP__v__dmem_controller__19(vlSymsp);
|
||||||
|
vlTOPp->_combo__TOP__3(vlSymsp);
|
||||||
|
vlSymsp->TOP__v__dmem_controller._settle__TOP__v__dmem_controller__21(vlSymsp);
|
||||||
|
vlTOPp->_combo__TOP__5(vlSymsp);
|
||||||
|
}
|
||||||
|
|
||||||
|
VL_INLINE_OPT QData Vcache_simX::_change_request(Vcache_simX__Syms* __restrict vlSymsp) {
|
||||||
|
VL_DEBUG_IF(VL_PRINTF(" Vcache_simX::_change_request\n"); );
|
||||||
|
Vcache_simX* __restrict vlTOPp VL_ATTR_UNUSED = vlSymsp->TOPp;
|
||||||
|
// Body
|
||||||
|
// Change detection
|
||||||
|
QData __req = false; // Logically a bool
|
||||||
|
__req |= ((vlSymsp->TOP__v__dmem_controller.__PVT__shared_memory__DOT__block_addr ^ vlTOPp->__Vchglast__TOP__v__dmem_controller__shared_memory__DOT__block_addr));
|
||||||
|
VL_DEBUG_IF( if(__req && ((vlSymsp->TOP__v__dmem_controller.__PVT__shared_memory__DOT__block_addr ^ vlTOPp->__Vchglast__TOP__v__dmem_controller__shared_memory__DOT__block_addr))) VL_PRINTF(" CHANGE: ../rtl/shared_memory/VX_shared_memory.v:49: shared_memory.block_addr\n"); );
|
||||||
|
// Final
|
||||||
|
vlTOPp->__Vchglast__TOP__v__dmem_controller__shared_memory__DOT__block_addr
|
||||||
|
= vlSymsp->TOP__v__dmem_controller.__PVT__shared_memory__DOT__block_addr;
|
||||||
|
return __req;
|
||||||
|
}
|
||||||
113
simX/obj_dir/Vcache_simX.h
Normal file
113
simX/obj_dir/Vcache_simX.h
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
// Verilated -*- C++ -*-
|
||||||
|
// DESCRIPTION: Verilator output: Primary design header
|
||||||
|
//
|
||||||
|
// This header should be included by all source files instantiating the design.
|
||||||
|
// The class here is then constructed to instantiate the design.
|
||||||
|
// See the Verilator manual for examples.
|
||||||
|
|
||||||
|
#ifndef _Vcache_simX_H_
|
||||||
|
#define _Vcache_simX_H_
|
||||||
|
|
||||||
|
#include "verilated.h"
|
||||||
|
#include "Vcache_simX__Inlines.h"
|
||||||
|
class Vcache_simX__Syms;
|
||||||
|
class Vcache_simX_cache_simX;
|
||||||
|
class VerilatedVcd;
|
||||||
|
|
||||||
|
//----------
|
||||||
|
|
||||||
|
VL_MODULE(Vcache_simX) {
|
||||||
|
public:
|
||||||
|
// CELLS
|
||||||
|
// Public to allow access to /*verilator_public*/ items;
|
||||||
|
// otherwise the application code can consider these internals.
|
||||||
|
Vcache_simX_cache_simX* __PVT__v;
|
||||||
|
|
||||||
|
// PORTS
|
||||||
|
// The application code writes and reads these signals to
|
||||||
|
// propagate new values into/out from the Verilated model.
|
||||||
|
VL_IN8(clk,0,0);
|
||||||
|
VL_IN8(reset,0,0);
|
||||||
|
VL_IN8(in_icache_valid_pc_addr,0,0);
|
||||||
|
VL_OUT8(out_icache_stall,0,0);
|
||||||
|
VL_IN8(in_dcache_mem_read,2,0);
|
||||||
|
VL_IN8(in_dcache_mem_write,2,0);
|
||||||
|
VL_OUT8(out_dcache_stall,0,0);
|
||||||
|
//char __VpadToAlign7[1];
|
||||||
|
VL_IN(in_icache_pc_addr,31,0);
|
||||||
|
VL_IN8(in_dcache_in_valid[4],0,0);
|
||||||
|
VL_IN(in_dcache_in_address[4],31,0);
|
||||||
|
|
||||||
|
// LOCAL SIGNALS
|
||||||
|
// Internals; generally not touched by application code
|
||||||
|
|
||||||
|
// LOCAL VARIABLES
|
||||||
|
// Internals; generally not touched by application code
|
||||||
|
VL_SIG8(__Vclklast__TOP__clk,0,0);
|
||||||
|
VL_SIG8(__Vclklast__TOP__reset,0,0);
|
||||||
|
//char __VpadToAlign42[2];
|
||||||
|
VL_SIG(__Vchglast__TOP__v__dmem_controller__shared_memory__DOT__block_addr,27,0);
|
||||||
|
VL_SIG(__Vm_traceActivity,31,0);
|
||||||
|
|
||||||
|
// INTERNAL VARIABLES
|
||||||
|
// Internals; generally not touched by application code
|
||||||
|
Vcache_simX__Syms* __VlSymsp; // Symbol table
|
||||||
|
|
||||||
|
// PARAMETERS
|
||||||
|
// Parameters marked /*verilator public*/ for use by application code
|
||||||
|
|
||||||
|
// CONSTRUCTORS
|
||||||
|
private:
|
||||||
|
Vcache_simX& operator= (const Vcache_simX&); ///< Copying not allowed
|
||||||
|
Vcache_simX(const Vcache_simX&); ///< Copying not allowed
|
||||||
|
public:
|
||||||
|
/// Construct the model; called by application code
|
||||||
|
/// The special name may be used to make a wrapper with a
|
||||||
|
/// single model invisible WRT DPI scope names.
|
||||||
|
Vcache_simX(const char* name="TOP");
|
||||||
|
/// Destroy the model; called (often implicitly) by application code
|
||||||
|
~Vcache_simX();
|
||||||
|
/// Trace signals in the model; called by application code
|
||||||
|
void trace (VerilatedVcdC* tfp, int levels, int options=0);
|
||||||
|
|
||||||
|
// USER METHODS
|
||||||
|
|
||||||
|
// API METHODS
|
||||||
|
/// Evaluate the model. Application must call when inputs change.
|
||||||
|
void eval();
|
||||||
|
/// Simulation complete, run final blocks. Application must call on completion.
|
||||||
|
void final();
|
||||||
|
|
||||||
|
// INTERNAL METHODS
|
||||||
|
private:
|
||||||
|
static void _eval_initial_loop(Vcache_simX__Syms* __restrict vlSymsp);
|
||||||
|
public:
|
||||||
|
void __Vconfigure(Vcache_simX__Syms* symsp, bool first);
|
||||||
|
private:
|
||||||
|
static QData _change_request(Vcache_simX__Syms* __restrict vlSymsp);
|
||||||
|
public:
|
||||||
|
static void _combo__TOP__1(Vcache_simX__Syms* __restrict vlSymsp);
|
||||||
|
static void _combo__TOP__3(Vcache_simX__Syms* __restrict vlSymsp);
|
||||||
|
static void _combo__TOP__5(Vcache_simX__Syms* __restrict vlSymsp);
|
||||||
|
static void _eval(Vcache_simX__Syms* __restrict vlSymsp);
|
||||||
|
static void _eval_initial(Vcache_simX__Syms* __restrict vlSymsp);
|
||||||
|
static void _eval_settle(Vcache_simX__Syms* __restrict vlSymsp);
|
||||||
|
static void traceChgThis(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
|
||||||
|
static void traceChgThis__2(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
|
||||||
|
static void traceChgThis__3(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
|
||||||
|
static void traceChgThis__4(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
|
||||||
|
static void traceChgThis__5(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
|
||||||
|
static void traceChgThis__6(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
|
||||||
|
static void traceChgThis__7(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
|
||||||
|
static void traceChgThis__8(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
|
||||||
|
static void traceChgThis__9(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
|
||||||
|
static void traceFullThis(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
|
||||||
|
static void traceFullThis__1(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
|
||||||
|
static void traceInitThis(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
|
||||||
|
static void traceInitThis__1(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
|
||||||
|
static void traceInit (VerilatedVcd* vcdp, void* userthis, uint32_t code);
|
||||||
|
static void traceFull (VerilatedVcd* vcdp, void* userthis, uint32_t code);
|
||||||
|
static void traceChg (VerilatedVcd* vcdp, void* userthis, uint32_t code);
|
||||||
|
} VL_ATTR_ALIGNED(128);
|
||||||
|
|
||||||
|
#endif /*guard*/
|
||||||
91
simX/obj_dir/Vcache_simX.mk
Normal file
91
simX/obj_dir/Vcache_simX.mk
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
# Verilated -*- Makefile -*-
|
||||||
|
# DESCRIPTION: Verilator output: Makefile for building Verilated archive or executable
|
||||||
|
#
|
||||||
|
# Execute this makefile from the object directory:
|
||||||
|
# make -f Vcache_simX.mk
|
||||||
|
|
||||||
|
default: Vcache_simX
|
||||||
|
|
||||||
|
### Constants...
|
||||||
|
# Perl executable (from $PERL)
|
||||||
|
PERL = perl
|
||||||
|
# Path to Verilator kit (from $VERILATOR_ROOT)
|
||||||
|
VERILATOR_ROOT = /usr/share/verilator
|
||||||
|
# Path to SystemPerl kit top (from $SYSTEMPERL)
|
||||||
|
SYSTEMPERL =
|
||||||
|
# Path to SystemPerl kit includes (from $SYSTEMPERL_INCLUDE)
|
||||||
|
SYSTEMPERL_INCLUDE =
|
||||||
|
# SystemC include directory with systemc.h (from $SYSTEMC_INCLUDE)
|
||||||
|
SYSTEMC_INCLUDE ?=
|
||||||
|
# SystemC library directory with libsystemc.a (from $SYSTEMC_LIBDIR)
|
||||||
|
SYSTEMC_LIBDIR ?=
|
||||||
|
|
||||||
|
### Switches...
|
||||||
|
# SystemPerl output mode? 0/1 (from --sp)
|
||||||
|
VM_SP = 0
|
||||||
|
# SystemC output mode? 0/1 (from --sc)
|
||||||
|
VM_SC = 0
|
||||||
|
# SystemPerl or SystemC output mode? 0/1 (from --sp/--sc)
|
||||||
|
VM_SP_OR_SC = 0
|
||||||
|
# Deprecated
|
||||||
|
VM_PCLI = 1
|
||||||
|
# Deprecated: SystemC architecture to find link library path (from $SYSTEMC_ARCH)
|
||||||
|
VM_SC_TARGET_ARCH = linux
|
||||||
|
|
||||||
|
### Vars...
|
||||||
|
# Design prefix (from --prefix)
|
||||||
|
VM_PREFIX = Vcache_simX
|
||||||
|
# Module prefix (from --prefix)
|
||||||
|
VM_MODPREFIX = Vcache_simX
|
||||||
|
# User CFLAGS (from -CFLAGS on Verilator command line)
|
||||||
|
VM_USER_CFLAGS = \
|
||||||
|
-std=c++11 -fPIC -O3 \
|
||||||
|
|
||||||
|
# User LDLIBS (from -LDFLAGS on Verilator command line)
|
||||||
|
VM_USER_LDLIBS = \
|
||||||
|
|
||||||
|
# User .cpp files (from .cpp's on Verilator command line)
|
||||||
|
VM_USER_CLASSES = \
|
||||||
|
args \
|
||||||
|
core \
|
||||||
|
enc \
|
||||||
|
instruction \
|
||||||
|
mem \
|
||||||
|
simX \
|
||||||
|
util \
|
||||||
|
|
||||||
|
# User .cpp directories (from .cpp's on Verilator command line)
|
||||||
|
VM_USER_DIR = \
|
||||||
|
. \
|
||||||
|
|
||||||
|
|
||||||
|
### Default rules...
|
||||||
|
# Include list of all generated classes
|
||||||
|
include Vcache_simX_classes.mk
|
||||||
|
# Include global rules
|
||||||
|
include $(VERILATOR_ROOT)/include/verilated.mk
|
||||||
|
|
||||||
|
### Executable rules... (from --exe)
|
||||||
|
VPATH += $(VM_USER_DIR)
|
||||||
|
|
||||||
|
args.o: args.cpp
|
||||||
|
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_FAST) -c -o $@ $<
|
||||||
|
core.o: core.cpp
|
||||||
|
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_FAST) -c -o $@ $<
|
||||||
|
enc.o: enc.cpp
|
||||||
|
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_FAST) -c -o $@ $<
|
||||||
|
instruction.o: instruction.cpp
|
||||||
|
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_FAST) -c -o $@ $<
|
||||||
|
mem.o: mem.cpp
|
||||||
|
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_FAST) -c -o $@ $<
|
||||||
|
simX.o: simX.cpp
|
||||||
|
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_FAST) -c -o $@ $<
|
||||||
|
util.o: util.cpp
|
||||||
|
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_FAST) -c -o $@ $<
|
||||||
|
|
||||||
|
### Link rules... (from --exe)
|
||||||
|
Vcache_simX: $(VK_USER_OBJS) $(VK_GLOBAL_OBJS) $(VM_PREFIX)__ALL.a
|
||||||
|
$(LINK) $(LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -o $@ $(LIBS) $(SC_LIBS) 2>&1 | c++filt
|
||||||
|
|
||||||
|
|
||||||
|
# Verilated -*- Makefile -*-
|
||||||
30
simX/obj_dir/Vcache_simX_VX_dcache_request_inter.cpp
Normal file
30
simX/obj_dir/Vcache_simX_VX_dcache_request_inter.cpp
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
// Verilated -*- C++ -*-
|
||||||
|
// DESCRIPTION: Verilator output: Design implementation internals
|
||||||
|
// See Vcache_simX.h for the primary calling header
|
||||||
|
|
||||||
|
#include "Vcache_simX_VX_dcache_request_inter.h" // For This
|
||||||
|
#include "Vcache_simX__Syms.h"
|
||||||
|
|
||||||
|
//--------------------
|
||||||
|
// STATIC VARIABLES
|
||||||
|
|
||||||
|
|
||||||
|
//--------------------
|
||||||
|
|
||||||
|
VL_CTOR_IMP(Vcache_simX_VX_dcache_request_inter) {
|
||||||
|
// Reset internal values
|
||||||
|
// Reset structure values
|
||||||
|
VL_RAND_RESET_W(128,__PVT__out_cache_driver_in_address);
|
||||||
|
__PVT__out_cache_driver_in_valid = VL_RAND_RESET_I(4);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Vcache_simX_VX_dcache_request_inter::__Vconfigure(Vcache_simX__Syms* vlSymsp, bool first) {
|
||||||
|
if (0 && first) {} // Prevent unused
|
||||||
|
this->__VlSymsp = vlSymsp;
|
||||||
|
}
|
||||||
|
|
||||||
|
Vcache_simX_VX_dcache_request_inter::~Vcache_simX_VX_dcache_request_inter() {
|
||||||
|
}
|
||||||
|
|
||||||
|
//--------------------
|
||||||
|
// Internal Methods
|
||||||
55
simX/obj_dir/Vcache_simX_VX_dcache_request_inter.h
Normal file
55
simX/obj_dir/Vcache_simX_VX_dcache_request_inter.h
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
// Verilated -*- C++ -*-
|
||||||
|
// DESCRIPTION: Verilator output: Design internal header
|
||||||
|
// See Vcache_simX.h for the primary calling header
|
||||||
|
|
||||||
|
#ifndef _Vcache_simX_VX_dcache_request_inter_H_
|
||||||
|
#define _Vcache_simX_VX_dcache_request_inter_H_
|
||||||
|
|
||||||
|
#include "verilated.h"
|
||||||
|
#include "Vcache_simX__Inlines.h"
|
||||||
|
class Vcache_simX__Syms;
|
||||||
|
class VerilatedVcd;
|
||||||
|
|
||||||
|
//----------
|
||||||
|
|
||||||
|
VL_MODULE(Vcache_simX_VX_dcache_request_inter) {
|
||||||
|
public:
|
||||||
|
// CELLS
|
||||||
|
|
||||||
|
// PORTS
|
||||||
|
|
||||||
|
// LOCAL SIGNALS
|
||||||
|
VL_SIG8(__PVT__out_cache_driver_in_valid,3,0);
|
||||||
|
//char __VpadToAlign5[3];
|
||||||
|
VL_SIGW(__PVT__out_cache_driver_in_address,127,0,4);
|
||||||
|
|
||||||
|
// LOCAL VARIABLES
|
||||||
|
|
||||||
|
// INTERNAL VARIABLES
|
||||||
|
private:
|
||||||
|
Vcache_simX__Syms* __VlSymsp; // Symbol table
|
||||||
|
public:
|
||||||
|
|
||||||
|
// PARAMETERS
|
||||||
|
|
||||||
|
// CONSTRUCTORS
|
||||||
|
private:
|
||||||
|
Vcache_simX_VX_dcache_request_inter& operator= (const Vcache_simX_VX_dcache_request_inter&); ///< Copying not allowed
|
||||||
|
Vcache_simX_VX_dcache_request_inter(const Vcache_simX_VX_dcache_request_inter&); ///< Copying not allowed
|
||||||
|
public:
|
||||||
|
Vcache_simX_VX_dcache_request_inter(const char* name="TOP");
|
||||||
|
~Vcache_simX_VX_dcache_request_inter();
|
||||||
|
void trace (VerilatedVcdC* tfp, int levels, int options=0);
|
||||||
|
|
||||||
|
// USER METHODS
|
||||||
|
|
||||||
|
// API METHODS
|
||||||
|
|
||||||
|
// INTERNAL METHODS
|
||||||
|
void __Vconfigure(Vcache_simX__Syms* symsp, bool first);
|
||||||
|
static void traceInit (VerilatedVcd* vcdp, void* userthis, uint32_t code);
|
||||||
|
static void traceFull (VerilatedVcd* vcdp, void* userthis, uint32_t code);
|
||||||
|
static void traceChg (VerilatedVcd* vcdp, void* userthis, uint32_t code);
|
||||||
|
} VL_ATTR_ALIGNED(128);
|
||||||
|
|
||||||
|
#endif /*guard*/
|
||||||
28
simX/obj_dir/Vcache_simX_VX_dcache_response_inter.cpp
Normal file
28
simX/obj_dir/Vcache_simX_VX_dcache_response_inter.cpp
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
// Verilated -*- C++ -*-
|
||||||
|
// DESCRIPTION: Verilator output: Design implementation internals
|
||||||
|
// See Vcache_simX.h for the primary calling header
|
||||||
|
|
||||||
|
#include "Vcache_simX_VX_dcache_response_inter.h" // For This
|
||||||
|
#include "Vcache_simX__Syms.h"
|
||||||
|
|
||||||
|
//--------------------
|
||||||
|
// STATIC VARIABLES
|
||||||
|
|
||||||
|
|
||||||
|
//--------------------
|
||||||
|
|
||||||
|
VL_CTOR_IMP(Vcache_simX_VX_dcache_response_inter) {
|
||||||
|
// Reset internal values
|
||||||
|
// Reset structure values
|
||||||
|
}
|
||||||
|
|
||||||
|
void Vcache_simX_VX_dcache_response_inter::__Vconfigure(Vcache_simX__Syms* vlSymsp, bool first) {
|
||||||
|
if (0 && first) {} // Prevent unused
|
||||||
|
this->__VlSymsp = vlSymsp;
|
||||||
|
}
|
||||||
|
|
||||||
|
Vcache_simX_VX_dcache_response_inter::~Vcache_simX_VX_dcache_response_inter() {
|
||||||
|
}
|
||||||
|
|
||||||
|
//--------------------
|
||||||
|
// Internal Methods
|
||||||
53
simX/obj_dir/Vcache_simX_VX_dcache_response_inter.h
Normal file
53
simX/obj_dir/Vcache_simX_VX_dcache_response_inter.h
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
// Verilated -*- C++ -*-
|
||||||
|
// DESCRIPTION: Verilator output: Design internal header
|
||||||
|
// See Vcache_simX.h for the primary calling header
|
||||||
|
|
||||||
|
#ifndef _Vcache_simX_VX_dcache_response_inter_H_
|
||||||
|
#define _Vcache_simX_VX_dcache_response_inter_H_
|
||||||
|
|
||||||
|
#include "verilated.h"
|
||||||
|
#include "Vcache_simX__Inlines.h"
|
||||||
|
class Vcache_simX__Syms;
|
||||||
|
class VerilatedVcd;
|
||||||
|
|
||||||
|
//----------
|
||||||
|
|
||||||
|
VL_MODULE(Vcache_simX_VX_dcache_response_inter) {
|
||||||
|
public:
|
||||||
|
// CELLS
|
||||||
|
|
||||||
|
// PORTS
|
||||||
|
|
||||||
|
// LOCAL SIGNALS
|
||||||
|
|
||||||
|
// LOCAL VARIABLES
|
||||||
|
|
||||||
|
// INTERNAL VARIABLES
|
||||||
|
private:
|
||||||
|
//char __VpadToAlign12[4];
|
||||||
|
Vcache_simX__Syms* __VlSymsp; // Symbol table
|
||||||
|
public:
|
||||||
|
|
||||||
|
// PARAMETERS
|
||||||
|
|
||||||
|
// CONSTRUCTORS
|
||||||
|
private:
|
||||||
|
Vcache_simX_VX_dcache_response_inter& operator= (const Vcache_simX_VX_dcache_response_inter&); ///< Copying not allowed
|
||||||
|
Vcache_simX_VX_dcache_response_inter(const Vcache_simX_VX_dcache_response_inter&); ///< Copying not allowed
|
||||||
|
public:
|
||||||
|
Vcache_simX_VX_dcache_response_inter(const char* name="TOP");
|
||||||
|
~Vcache_simX_VX_dcache_response_inter();
|
||||||
|
void trace (VerilatedVcdC* tfp, int levels, int options=0);
|
||||||
|
|
||||||
|
// USER METHODS
|
||||||
|
|
||||||
|
// API METHODS
|
||||||
|
|
||||||
|
// INTERNAL METHODS
|
||||||
|
void __Vconfigure(Vcache_simX__Syms* symsp, bool first);
|
||||||
|
static void traceInit (VerilatedVcd* vcdp, void* userthis, uint32_t code);
|
||||||
|
static void traceFull (VerilatedVcd* vcdp, void* userthis, uint32_t code);
|
||||||
|
static void traceChg (VerilatedVcd* vcdp, void* userthis, uint32_t code);
|
||||||
|
} VL_ATTR_ALIGNED(128);
|
||||||
|
|
||||||
|
#endif /*guard*/
|
||||||
13421
simX/obj_dir/Vcache_simX_VX_dmem_controller__V0_VB1000.cpp
Normal file
13421
simX/obj_dir/Vcache_simX_VX_dmem_controller__V0_VB1000.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1037
simX/obj_dir/Vcache_simX_VX_dmem_controller__V0_VB1000.h
Normal file
1037
simX/obj_dir/Vcache_simX_VX_dmem_controller__V0_VB1000.h
Normal file
File diff suppressed because it is too large
Load Diff
29
simX/obj_dir/Vcache_simX_VX_dram_req_rsp_inter__N1_NB4.cpp
Normal file
29
simX/obj_dir/Vcache_simX_VX_dram_req_rsp_inter__N1_NB4.cpp
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
// Verilated -*- C++ -*-
|
||||||
|
// DESCRIPTION: Verilator output: Design implementation internals
|
||||||
|
// See Vcache_simX.h for the primary calling header
|
||||||
|
|
||||||
|
#include "Vcache_simX_VX_dram_req_rsp_inter__N1_NB4.h" // For This
|
||||||
|
#include "Vcache_simX__Syms.h"
|
||||||
|
|
||||||
|
//--------------------
|
||||||
|
// STATIC VARIABLES
|
||||||
|
|
||||||
|
|
||||||
|
//--------------------
|
||||||
|
|
||||||
|
VL_CTOR_IMP(Vcache_simX_VX_dram_req_rsp_inter__N1_NB4) {
|
||||||
|
// Reset internal values
|
||||||
|
// Reset structure values
|
||||||
|
VL_RAND_RESET_W(128,__PVT__i_m_readdata);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Vcache_simX_VX_dram_req_rsp_inter__N1_NB4::__Vconfigure(Vcache_simX__Syms* vlSymsp, bool first) {
|
||||||
|
if (0 && first) {} // Prevent unused
|
||||||
|
this->__VlSymsp = vlSymsp;
|
||||||
|
}
|
||||||
|
|
||||||
|
Vcache_simX_VX_dram_req_rsp_inter__N1_NB4::~Vcache_simX_VX_dram_req_rsp_inter__N1_NB4() {
|
||||||
|
}
|
||||||
|
|
||||||
|
//--------------------
|
||||||
|
// Internal Methods
|
||||||
54
simX/obj_dir/Vcache_simX_VX_dram_req_rsp_inter__N1_NB4.h
Normal file
54
simX/obj_dir/Vcache_simX_VX_dram_req_rsp_inter__N1_NB4.h
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
// Verilated -*- C++ -*-
|
||||||
|
// DESCRIPTION: Verilator output: Design internal header
|
||||||
|
// See Vcache_simX.h for the primary calling header
|
||||||
|
|
||||||
|
#ifndef _Vcache_simX_VX_dram_req_rsp_inter__N1_NB4_H_
|
||||||
|
#define _Vcache_simX_VX_dram_req_rsp_inter__N1_NB4_H_
|
||||||
|
|
||||||
|
#include "verilated.h"
|
||||||
|
#include "Vcache_simX__Inlines.h"
|
||||||
|
class Vcache_simX__Syms;
|
||||||
|
class VerilatedVcd;
|
||||||
|
|
||||||
|
//----------
|
||||||
|
|
||||||
|
VL_MODULE(Vcache_simX_VX_dram_req_rsp_inter__N1_NB4) {
|
||||||
|
public:
|
||||||
|
// CELLS
|
||||||
|
|
||||||
|
// PORTS
|
||||||
|
|
||||||
|
// LOCAL SIGNALS
|
||||||
|
//char __VpadToAlign4[4];
|
||||||
|
VL_SIGW(__PVT__i_m_readdata,127,0,4);
|
||||||
|
|
||||||
|
// LOCAL VARIABLES
|
||||||
|
|
||||||
|
// INTERNAL VARIABLES
|
||||||
|
private:
|
||||||
|
Vcache_simX__Syms* __VlSymsp; // Symbol table
|
||||||
|
public:
|
||||||
|
|
||||||
|
// PARAMETERS
|
||||||
|
|
||||||
|
// CONSTRUCTORS
|
||||||
|
private:
|
||||||
|
Vcache_simX_VX_dram_req_rsp_inter__N1_NB4& operator= (const Vcache_simX_VX_dram_req_rsp_inter__N1_NB4&); ///< Copying not allowed
|
||||||
|
Vcache_simX_VX_dram_req_rsp_inter__N1_NB4(const Vcache_simX_VX_dram_req_rsp_inter__N1_NB4&); ///< Copying not allowed
|
||||||
|
public:
|
||||||
|
Vcache_simX_VX_dram_req_rsp_inter__N1_NB4(const char* name="TOP");
|
||||||
|
~Vcache_simX_VX_dram_req_rsp_inter__N1_NB4();
|
||||||
|
void trace (VerilatedVcdC* tfp, int levels, int options=0);
|
||||||
|
|
||||||
|
// USER METHODS
|
||||||
|
|
||||||
|
// API METHODS
|
||||||
|
|
||||||
|
// INTERNAL METHODS
|
||||||
|
void __Vconfigure(Vcache_simX__Syms* symsp, bool first);
|
||||||
|
static void traceInit (VerilatedVcd* vcdp, void* userthis, uint32_t code);
|
||||||
|
static void traceFull (VerilatedVcd* vcdp, void* userthis, uint32_t code);
|
||||||
|
static void traceChg (VerilatedVcd* vcdp, void* userthis, uint32_t code);
|
||||||
|
} VL_ATTR_ALIGNED(128);
|
||||||
|
|
||||||
|
#endif /*guard*/
|
||||||
29
simX/obj_dir/Vcache_simX_VX_dram_req_rsp_inter__N4_NB4.cpp
Normal file
29
simX/obj_dir/Vcache_simX_VX_dram_req_rsp_inter__N4_NB4.cpp
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
// Verilated -*- C++ -*-
|
||||||
|
// DESCRIPTION: Verilator output: Design implementation internals
|
||||||
|
// See Vcache_simX.h for the primary calling header
|
||||||
|
|
||||||
|
#include "Vcache_simX_VX_dram_req_rsp_inter__N4_NB4.h" // For This
|
||||||
|
#include "Vcache_simX__Syms.h"
|
||||||
|
|
||||||
|
//--------------------
|
||||||
|
// STATIC VARIABLES
|
||||||
|
|
||||||
|
|
||||||
|
//--------------------
|
||||||
|
|
||||||
|
VL_CTOR_IMP(Vcache_simX_VX_dram_req_rsp_inter__N4_NB4) {
|
||||||
|
// Reset internal values
|
||||||
|
// Reset structure values
|
||||||
|
VL_RAND_RESET_W(512,__PVT__i_m_readdata);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Vcache_simX_VX_dram_req_rsp_inter__N4_NB4::__Vconfigure(Vcache_simX__Syms* vlSymsp, bool first) {
|
||||||
|
if (0 && first) {} // Prevent unused
|
||||||
|
this->__VlSymsp = vlSymsp;
|
||||||
|
}
|
||||||
|
|
||||||
|
Vcache_simX_VX_dram_req_rsp_inter__N4_NB4::~Vcache_simX_VX_dram_req_rsp_inter__N4_NB4() {
|
||||||
|
}
|
||||||
|
|
||||||
|
//--------------------
|
||||||
|
// Internal Methods
|
||||||
54
simX/obj_dir/Vcache_simX_VX_dram_req_rsp_inter__N4_NB4.h
Normal file
54
simX/obj_dir/Vcache_simX_VX_dram_req_rsp_inter__N4_NB4.h
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
// Verilated -*- C++ -*-
|
||||||
|
// DESCRIPTION: Verilator output: Design internal header
|
||||||
|
// See Vcache_simX.h for the primary calling header
|
||||||
|
|
||||||
|
#ifndef _Vcache_simX_VX_dram_req_rsp_inter__N4_NB4_H_
|
||||||
|
#define _Vcache_simX_VX_dram_req_rsp_inter__N4_NB4_H_
|
||||||
|
|
||||||
|
#include "verilated.h"
|
||||||
|
#include "Vcache_simX__Inlines.h"
|
||||||
|
class Vcache_simX__Syms;
|
||||||
|
class VerilatedVcd;
|
||||||
|
|
||||||
|
//----------
|
||||||
|
|
||||||
|
VL_MODULE(Vcache_simX_VX_dram_req_rsp_inter__N4_NB4) {
|
||||||
|
public:
|
||||||
|
// CELLS
|
||||||
|
|
||||||
|
// PORTS
|
||||||
|
|
||||||
|
// LOCAL SIGNALS
|
||||||
|
//char __VpadToAlign4[4];
|
||||||
|
VL_SIGW(__PVT__i_m_readdata,511,0,16);
|
||||||
|
|
||||||
|
// LOCAL VARIABLES
|
||||||
|
|
||||||
|
// INTERNAL VARIABLES
|
||||||
|
private:
|
||||||
|
Vcache_simX__Syms* __VlSymsp; // Symbol table
|
||||||
|
public:
|
||||||
|
|
||||||
|
// PARAMETERS
|
||||||
|
|
||||||
|
// CONSTRUCTORS
|
||||||
|
private:
|
||||||
|
Vcache_simX_VX_dram_req_rsp_inter__N4_NB4& operator= (const Vcache_simX_VX_dram_req_rsp_inter__N4_NB4&); ///< Copying not allowed
|
||||||
|
Vcache_simX_VX_dram_req_rsp_inter__N4_NB4(const Vcache_simX_VX_dram_req_rsp_inter__N4_NB4&); ///< Copying not allowed
|
||||||
|
public:
|
||||||
|
Vcache_simX_VX_dram_req_rsp_inter__N4_NB4(const char* name="TOP");
|
||||||
|
~Vcache_simX_VX_dram_req_rsp_inter__N4_NB4();
|
||||||
|
void trace (VerilatedVcdC* tfp, int levels, int options=0);
|
||||||
|
|
||||||
|
// USER METHODS
|
||||||
|
|
||||||
|
// API METHODS
|
||||||
|
|
||||||
|
// INTERNAL METHODS
|
||||||
|
void __Vconfigure(Vcache_simX__Syms* symsp, bool first);
|
||||||
|
static void traceInit (VerilatedVcd* vcdp, void* userthis, uint32_t code);
|
||||||
|
static void traceFull (VerilatedVcd* vcdp, void* userthis, uint32_t code);
|
||||||
|
static void traceChg (VerilatedVcd* vcdp, void* userthis, uint32_t code);
|
||||||
|
} VL_ATTR_ALIGNED(128);
|
||||||
|
|
||||||
|
#endif /*guard*/
|
||||||
28
simX/obj_dir/Vcache_simX_VX_icache_request_inter.cpp
Normal file
28
simX/obj_dir/Vcache_simX_VX_icache_request_inter.cpp
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
// Verilated -*- C++ -*-
|
||||||
|
// DESCRIPTION: Verilator output: Design implementation internals
|
||||||
|
// See Vcache_simX.h for the primary calling header
|
||||||
|
|
||||||
|
#include "Vcache_simX_VX_icache_request_inter.h" // For This
|
||||||
|
#include "Vcache_simX__Syms.h"
|
||||||
|
|
||||||
|
//--------------------
|
||||||
|
// STATIC VARIABLES
|
||||||
|
|
||||||
|
|
||||||
|
//--------------------
|
||||||
|
|
||||||
|
VL_CTOR_IMP(Vcache_simX_VX_icache_request_inter) {
|
||||||
|
// Reset internal values
|
||||||
|
// Reset structure values
|
||||||
|
}
|
||||||
|
|
||||||
|
void Vcache_simX_VX_icache_request_inter::__Vconfigure(Vcache_simX__Syms* vlSymsp, bool first) {
|
||||||
|
if (0 && first) {} // Prevent unused
|
||||||
|
this->__VlSymsp = vlSymsp;
|
||||||
|
}
|
||||||
|
|
||||||
|
Vcache_simX_VX_icache_request_inter::~Vcache_simX_VX_icache_request_inter() {
|
||||||
|
}
|
||||||
|
|
||||||
|
//--------------------
|
||||||
|
// Internal Methods
|
||||||
53
simX/obj_dir/Vcache_simX_VX_icache_request_inter.h
Normal file
53
simX/obj_dir/Vcache_simX_VX_icache_request_inter.h
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
// Verilated -*- C++ -*-
|
||||||
|
// DESCRIPTION: Verilator output: Design internal header
|
||||||
|
// See Vcache_simX.h for the primary calling header
|
||||||
|
|
||||||
|
#ifndef _Vcache_simX_VX_icache_request_inter_H_
|
||||||
|
#define _Vcache_simX_VX_icache_request_inter_H_
|
||||||
|
|
||||||
|
#include "verilated.h"
|
||||||
|
#include "Vcache_simX__Inlines.h"
|
||||||
|
class Vcache_simX__Syms;
|
||||||
|
class VerilatedVcd;
|
||||||
|
|
||||||
|
//----------
|
||||||
|
|
||||||
|
VL_MODULE(Vcache_simX_VX_icache_request_inter) {
|
||||||
|
public:
|
||||||
|
// CELLS
|
||||||
|
|
||||||
|
// PORTS
|
||||||
|
|
||||||
|
// LOCAL SIGNALS
|
||||||
|
|
||||||
|
// LOCAL VARIABLES
|
||||||
|
|
||||||
|
// INTERNAL VARIABLES
|
||||||
|
private:
|
||||||
|
//char __VpadToAlign12[4];
|
||||||
|
Vcache_simX__Syms* __VlSymsp; // Symbol table
|
||||||
|
public:
|
||||||
|
|
||||||
|
// PARAMETERS
|
||||||
|
|
||||||
|
// CONSTRUCTORS
|
||||||
|
private:
|
||||||
|
Vcache_simX_VX_icache_request_inter& operator= (const Vcache_simX_VX_icache_request_inter&); ///< Copying not allowed
|
||||||
|
Vcache_simX_VX_icache_request_inter(const Vcache_simX_VX_icache_request_inter&); ///< Copying not allowed
|
||||||
|
public:
|
||||||
|
Vcache_simX_VX_icache_request_inter(const char* name="TOP");
|
||||||
|
~Vcache_simX_VX_icache_request_inter();
|
||||||
|
void trace (VerilatedVcdC* tfp, int levels, int options=0);
|
||||||
|
|
||||||
|
// USER METHODS
|
||||||
|
|
||||||
|
// API METHODS
|
||||||
|
|
||||||
|
// INTERNAL METHODS
|
||||||
|
void __Vconfigure(Vcache_simX__Syms* symsp, bool first);
|
||||||
|
static void traceInit (VerilatedVcd* vcdp, void* userthis, uint32_t code);
|
||||||
|
static void traceFull (VerilatedVcd* vcdp, void* userthis, uint32_t code);
|
||||||
|
static void traceChg (VerilatedVcd* vcdp, void* userthis, uint32_t code);
|
||||||
|
} VL_ATTR_ALIGNED(128);
|
||||||
|
|
||||||
|
#endif /*guard*/
|
||||||
28
simX/obj_dir/Vcache_simX_VX_icache_response_inter.cpp
Normal file
28
simX/obj_dir/Vcache_simX_VX_icache_response_inter.cpp
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
// Verilated -*- C++ -*-
|
||||||
|
// DESCRIPTION: Verilator output: Design implementation internals
|
||||||
|
// See Vcache_simX.h for the primary calling header
|
||||||
|
|
||||||
|
#include "Vcache_simX_VX_icache_response_inter.h" // For This
|
||||||
|
#include "Vcache_simX__Syms.h"
|
||||||
|
|
||||||
|
//--------------------
|
||||||
|
// STATIC VARIABLES
|
||||||
|
|
||||||
|
|
||||||
|
//--------------------
|
||||||
|
|
||||||
|
VL_CTOR_IMP(Vcache_simX_VX_icache_response_inter) {
|
||||||
|
// Reset internal values
|
||||||
|
// Reset structure values
|
||||||
|
}
|
||||||
|
|
||||||
|
void Vcache_simX_VX_icache_response_inter::__Vconfigure(Vcache_simX__Syms* vlSymsp, bool first) {
|
||||||
|
if (0 && first) {} // Prevent unused
|
||||||
|
this->__VlSymsp = vlSymsp;
|
||||||
|
}
|
||||||
|
|
||||||
|
Vcache_simX_VX_icache_response_inter::~Vcache_simX_VX_icache_response_inter() {
|
||||||
|
}
|
||||||
|
|
||||||
|
//--------------------
|
||||||
|
// Internal Methods
|
||||||
53
simX/obj_dir/Vcache_simX_VX_icache_response_inter.h
Normal file
53
simX/obj_dir/Vcache_simX_VX_icache_response_inter.h
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
// Verilated -*- C++ -*-
|
||||||
|
// DESCRIPTION: Verilator output: Design internal header
|
||||||
|
// See Vcache_simX.h for the primary calling header
|
||||||
|
|
||||||
|
#ifndef _Vcache_simX_VX_icache_response_inter_H_
|
||||||
|
#define _Vcache_simX_VX_icache_response_inter_H_
|
||||||
|
|
||||||
|
#include "verilated.h"
|
||||||
|
#include "Vcache_simX__Inlines.h"
|
||||||
|
class Vcache_simX__Syms;
|
||||||
|
class VerilatedVcd;
|
||||||
|
|
||||||
|
//----------
|
||||||
|
|
||||||
|
VL_MODULE(Vcache_simX_VX_icache_response_inter) {
|
||||||
|
public:
|
||||||
|
// CELLS
|
||||||
|
|
||||||
|
// PORTS
|
||||||
|
|
||||||
|
// LOCAL SIGNALS
|
||||||
|
|
||||||
|
// LOCAL VARIABLES
|
||||||
|
|
||||||
|
// INTERNAL VARIABLES
|
||||||
|
private:
|
||||||
|
//char __VpadToAlign12[4];
|
||||||
|
Vcache_simX__Syms* __VlSymsp; // Symbol table
|
||||||
|
public:
|
||||||
|
|
||||||
|
// PARAMETERS
|
||||||
|
|
||||||
|
// CONSTRUCTORS
|
||||||
|
private:
|
||||||
|
Vcache_simX_VX_icache_response_inter& operator= (const Vcache_simX_VX_icache_response_inter&); ///< Copying not allowed
|
||||||
|
Vcache_simX_VX_icache_response_inter(const Vcache_simX_VX_icache_response_inter&); ///< Copying not allowed
|
||||||
|
public:
|
||||||
|
Vcache_simX_VX_icache_response_inter(const char* name="TOP");
|
||||||
|
~Vcache_simX_VX_icache_response_inter();
|
||||||
|
void trace (VerilatedVcdC* tfp, int levels, int options=0);
|
||||||
|
|
||||||
|
// USER METHODS
|
||||||
|
|
||||||
|
// API METHODS
|
||||||
|
|
||||||
|
// INTERNAL METHODS
|
||||||
|
void __Vconfigure(Vcache_simX__Syms* symsp, bool first);
|
||||||
|
static void traceInit (VerilatedVcd* vcdp, void* userthis, uint32_t code);
|
||||||
|
static void traceFull (VerilatedVcd* vcdp, void* userthis, uint32_t code);
|
||||||
|
static void traceChg (VerilatedVcd* vcdp, void* userthis, uint32_t code);
|
||||||
|
} VL_ATTR_ALIGNED(128);
|
||||||
|
|
||||||
|
#endif /*guard*/
|
||||||
BIN
simX/obj_dir/Vcache_simX__ALL.a
Normal file
BIN
simX/obj_dir/Vcache_simX__ALL.a
Normal file
Binary file not shown.
11
simX/obj_dir/Vcache_simX__ALLcls.cpp
Normal file
11
simX/obj_dir/Vcache_simX__ALLcls.cpp
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
// DESCRIPTION: Generated by verilator_includer via makefile
|
||||||
|
#define VL_INCLUDE_OPT include
|
||||||
|
#include "Vcache_simX.cpp"
|
||||||
|
#include "Vcache_simX_cache_simX.cpp"
|
||||||
|
#include "Vcache_simX_VX_dmem_controller__V0_VB1000.cpp"
|
||||||
|
#include "Vcache_simX_VX_icache_request_inter.cpp"
|
||||||
|
#include "Vcache_simX_VX_icache_response_inter.cpp"
|
||||||
|
#include "Vcache_simX_VX_dram_req_rsp_inter__N4_NB4.cpp"
|
||||||
|
#include "Vcache_simX_VX_dram_req_rsp_inter__N1_NB4.cpp"
|
||||||
|
#include "Vcache_simX_VX_dcache_request_inter.cpp"
|
||||||
|
#include "Vcache_simX_VX_dcache_response_inter.cpp"
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user