project directories reorganization

This commit is contained in:
Blaise Tine
2020-04-14 06:35:20 -04:00
parent 1de06fd9c0
commit fc155e1223
1056 changed files with 8120 additions and 8120 deletions

23
sw/driver/Makefile Normal file
View File

@@ -0,0 +1,23 @@
all: stub
stub:
$(MAKE) -C stub
opae:
$(MAKE) -C opae
rtlsim:
$(MAKE) -C rtlsim
simx:
$(MAKE) -C simx
clean:
$(MAKE) clean -C dummy
$(MAKE) clean -C opae
$(MAKE) clean -C rtlsim
$(MAKE) clean -C simx
.PHONY: all opae rtlsim simx clean

View File

@@ -0,0 +1,118 @@
#include <iostream>
#include <fstream>
#include <cstring>
#include <vortex.h>
#include <config.h>
extern int vx_dev_caps(int caps_id) {
switch (caps_id) {
case VX_CAPS_VERSION:
return 0;
case VX_CAPS_MAX_CORES:
return NUMBER_CORES;
case VX_CAPS_MAX_WARPS:
return NW;
case VX_CAPS_MAX_THREADS:
return NT;
case VX_CAPS_CACHE_LINESIZE:
return GLOBAL_BLOCK_SIZE_BYTES;
case VX_CAPS_LOCAL_MEM_SIZE:
return 0xffffffff;
case VX_CAPS_ALLOC_BASE_ADDR:
return 0x10000000;
case VX_CAPS_KERNEL_BASE_ADDR:
return 0x80000000;
default:
std::cout << "invalid caps id: " << caps_id << std::endl;
std::abort();
return 0;
}
}
extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size) {
int err = 0;
if (NULL == content || 0 == size)
return -1;
uint32_t buffer_transfer_size = 65536;
uint32_t kernel_base_addr = vx_dev_caps(VX_CAPS_KERNEL_BASE_ADDR);
// allocate device buffer
vx_buffer_h buffer;
err = vx_alloc_shared_mem(device, buffer_transfer_size, &buffer);
if (err != 0)
return -1;
// get buffer address
auto buf_ptr = (uint8_t*)vx_host_ptr(buffer);
#if defined(USE_SIMX)
// default startup routine
((uint32_t*)buf_ptr)[0] = 0xf1401073;
((uint32_t*)buf_ptr)[1] = 0xf1401073;
((uint32_t*)buf_ptr)[2] = 0x30101073;
((uint32_t*)buf_ptr)[3] = 0x800000b7;
((uint32_t*)buf_ptr)[4] = 0x000080e7;
err = vx_copy_to_dev(buffer, 0, 5 * 4, 0);
if (err != 0) {
vx_buf_release(buffer);
return err;
}
// newlib io simulator trap
((uint32_t*)buf_ptr)[0] = 0x00008067;
err = vx_copy_to_dev(buffer, 0x70000000, 4, 0);
if (err != 0) {
vx_buf_release(buffer);
return err;
}
#endif
//
// upload content
//
size_t offset = 0;
while (offset < size) {
auto chunk_size = std::min<size_t>(buffer_transfer_size, size - offset);
std::memcpy(buf_ptr, (uint8_t*)content + offset, chunk_size);
err = vx_copy_to_dev(buffer, kernel_base_addr + offset, chunk_size, 0);
if (err != 0) {
vx_buf_release(buffer);
return err;
}
offset += chunk_size;
}
vx_buf_release(buffer);
return 0;
}
extern int vx_upload_kernel_file(vx_device_h device, const char* filename) {
std::ifstream ifs(filename);
if (!ifs) {
std::cout << "error: " << filename << " not found" << std::endl;
return -1;
}
// get length of file:
ifs.seekg(0, ifs.end);
auto size = ifs.tellg();
ifs.seekg(0, ifs.beg);
// allocate buffer
auto content = new char [size];
// read file content
ifs.read(content, size);
// upload
int err = vx_upload_kernel_bytes(device, content, size);
// release buffer
delete[] content;
return err;
}

View File

@@ -0,0 +1,72 @@
#ifndef __VX_DRIVER_H__
#define __VX_DRIVER_H__
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef void* vx_device_h;
typedef void* vx_buffer_h;
// device caps ids
#define VX_CAPS_VERSION 0x0
#define VX_CAPS_MAX_CORES 0x1
#define VX_CAPS_MAX_WARPS 0x2
#define VX_CAPS_MAX_THREADS 0x3
#define VX_CAPS_CACHE_LINESIZE 0x4
#define VX_CAPS_LOCAL_MEM_SIZE 0x5
#define VX_CAPS_ALLOC_BASE_ADDR 0x6
#define VX_CAPS_KERNEL_BASE_ADDR 0x7
// return device configurations
int vx_dev_caps(int caps_id);
// open the device and connect to it
int vx_dev_open(vx_device_h* hdevice);
// Close the device when all the operations are done
int vx_dev_close(vx_device_h hdevice);
// Allocate shared buffer with device
int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer);
// Get host pointer address
volatile void* vx_host_ptr(vx_buffer_h hbuffer);
// release buffer
int vx_buf_release(vx_buffer_h hbuffer);
// allocate device memory and return address
int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr);
// Copy bytes from device local memory to buffer
int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size);
// Copy bytes from buffer to device local memory
int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset);
// Copy bytes from device local memory to buffer
int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dst_offset);
// Start device execution
int vx_start(vx_device_h hdevice);
// Wait for device ready with milliseconds timeout
int vx_ready_wait(vx_device_h hdevice, long long timeout);
////////////////////////////// UTILITY FUNCIONS ///////////////////////////////
// upload kernel bytes to device
int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size);
// upload kernel file to device
int vx_upload_kernel_file(vx_device_h device, const char* filename);
#ifdef __cplusplus
}
#endif
#endif // __VX_DRIVER_H__

68
sw/driver/opae/Makefile Normal file
View File

@@ -0,0 +1,68 @@
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../include -I/tools/opae/1.4.0/include -I../../../runtime
LDFLAGS += -L/tools/opae/1.4.0/lib
# stack execution protection
LDFLAGS +=-z noexecstack
# data relocation and projection
LDFLAGS +=-z relro -z now
# stack buffer overrun detection
CXXFLAGS +=-fstack-protector
# Position independent code
CXXFLAGS += -fPIC
CXXFLAGS += -DGLOBAL_BLOCK_SIZE_BYTES=64
LDFLAGS += -luuid
LDFLAGS += -shared
FPGA_LIBS += -lopae-c
ASE_LIBS += -lopae-c-ase
LIB_DIR=../lib
ASE_DIR = ase
PROJECT = libvortex.so
PROJECT_ASE = $(ASE_DIR)/libvortex.so
AFU_JSON_INFO = vortex_afu.h
SRCS = vortex.cpp ../vx_utils.cpp
all: $(PROJECT) $(PROJECT_ASE)
# AFU info from JSON file, including AFU UUID
$(AFU_JSON_INFO): ../../hw/vortex_afu.json
afu_json_mgr json-info --afu-json=$^ --c-hdr=$@
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) $(FPGA_LIBS) -o $@
$(PROJECT_ASE): $(SRCS) $(ASE_DIR)
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $@
vortex.o: vortex.cpp $(AFU_JSON_INFO)
$(CXX) $(CXXFLAGS) -c vortex.cpp -o $@
$(ASE_DIR):
mkdir -p ase
.depend: $(SRCS) $(AFU_JSON_INFO)
$(CXX) $(CXXFLAGS) -MM $(SRCS) > .depend;
clean:
rm -rf $(PROJECT) $(PROJECT_ASE) $(AFU_JSON_INFO) *.o .depend
ifneq ($(MAKECMDGOALS),clean)
-include .depend
endif

338
sw/driver/opae/vortex.cpp Executable file
View File

@@ -0,0 +1,338 @@
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <uuid/uuid.h>
#include <opae/fpga.h>
#include <vortex.h>
#include "vortex_afu.h"
#define CHECK_RES(_expr) \
do { \
fpga_result res = _expr; \
if (res == FPGA_OK) \
break; \
printf("OPAE Error: '%s' returned %d, %s!\n", \
#_expr, (int)res, fpgaErrStr(res)); \
return -1; \
} while (false)
///////////////////////////////////////////////////////////////////////////////
#define CMD_TYPE_READ AFU_IMAGE_CMD_TYPE_READ
#define CMD_TYPE_WRITE AFU_IMAGE_CMD_TYPE_WRITE
#define CMD_TYPE_RUN AFU_IMAGE_CMD_TYPE_RUN
#define CMD_TYPE_CLFLUSH AFU_IMAGE_CMD_TYPE_CLFLUSH
#define MMIO_CSR_CMD (AFU_IMAGE_MMIO_CSR_CMD * 4)
#define MMIO_CSR_STATUS (AFU_IMAGE_MMIO_CSR_STATUS * 4)
#define MMIO_CSR_IO_ADDR (AFU_IMAGE_MMIO_CSR_IO_ADDR * 4)
#define MMIO_CSR_MEM_ADDR (AFU_IMAGE_MMIO_CSR_MEM_ADDR * 4)
#define MMIO_CSR_DATA_SIZE (AFU_IMAGE_MMIO_CSR_DATA_SIZE * 4)
///////////////////////////////////////////////////////////////////////////////
typedef struct vx_device_ {
fpga_handle fpga;
size_t mem_allocation;
} vx_device_t;
typedef struct vx_buffer_ {
uint64_t wsid;
volatile void* host_ptr;
uint64_t io_addr;
vx_device_h hdevice;
size_t size;
} vx_buffer_t;
static size_t align_size(size_t size) {
uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
return cache_block_size * ((size + cache_block_size - 1) / cache_block_size);
}
///////////////////////////////////////////////////////////////////////////////
extern int vx_dev_open(vx_device_h* hdevice) {
fpga_properties filter = nullptr;
fpga_result res;
fpga_guid guid;
fpga_token accel_token;
uint32_t num_matches;
fpga_handle accel_handle;
vx_device_t* device;
if (nullptr == hdevice)
return -1;
// ensure that the block size 64
assert(64 == vx_dev_caps(VX_CAPS_CACHE_LINESIZE));
// Set up a filter that will search for an accelerator
fpgaGetProperties(nullptr, &filter);
fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR);
// Add the desired UUID to the filter
uuid_parse(AFU_ACCEL_UUID, guid);
fpgaPropertiesSetGUID(filter, guid);
// Do the search across the available FPGA contexts
num_matches = 1;
fpgaEnumerate(&filter, 1, &accel_token, 1, &num_matches);
// Not needed anymore
fpgaDestroyProperties(&filter);
if (num_matches < 1) {
fprintf(stderr, "Accelerator %s not found!\n", AFU_ACCEL_UUID);
return -1;
}
// Open accelerator
res = fpgaOpen(accel_token, &accel_handle, 0);
if (FPGA_OK != res) {
return -1;
}
// Done with token
fpgaDestroyToken(&accel_token);
// allocate device object
device = (vx_device_t*)malloc(sizeof(vx_device_t));
if (nullptr == device) {
fpgaClose(accel_handle);
return -1;
}
device->fpga = accel_handle;
device->mem_allocation = vx_dev_caps(VX_CAPS_ALLOC_BASE_ADDR);
*hdevice = device;
return 0;
}
extern int vx_dev_close(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
fpgaClose(device->fpga);
free(device);
return 0;
}
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
if (nullptr == hdevice
|| nullptr == dev_maddr
|| 0 >= size)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
size_t asize = align_size(size);
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
if (device->mem_allocation + asize > dev_mem_size)
return -1;
*dev_maddr = device->mem_allocation;
device->mem_allocation += asize;
return 0;
}
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
fpga_result res;
void* host_ptr;
uint64_t wsid;
uint64_t io_addr;
vx_buffer_t* buffer;
if (nullptr == hdevice
|| 0 >= size
|| nullptr == hbuffer)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
size_t asize = align_size(size);
res = fpgaPrepareBuffer(device->fpga, asize, &host_ptr, &wsid, 0);
if (FPGA_OK != res) {
return -1;
}
// Get the physical address of the buffer in the accelerator
res = fpgaGetIOAddress(device->fpga, wsid, &io_addr);
if (FPGA_OK != res) {
fpgaReleaseBuffer(device->fpga, wsid);
return -1;
}
// allocate buffer object
buffer = (vx_buffer_t*)malloc(sizeof(vx_buffer_t));
if (nullptr == buffer) {
fpgaReleaseBuffer(device->fpga, wsid);
return -1;
}
buffer->wsid = wsid;
buffer->host_ptr = host_ptr;
buffer->io_addr = io_addr;
buffer->hdevice = hdevice;
buffer->size = size;
*hbuffer = buffer;
return 0;
}
extern volatile void* vx_host_ptr(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return nullptr;
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
return buffer->host_ptr;
}
extern int vx_buf_release(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return -1;
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
fpgaReleaseBuffer(device->fpga, buffer->wsid);
free(buffer);
return 0;
}
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
if (nullptr == hdevice)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
uint64_t data = 0;
struct timespec sleep_time;
#if defined(USE_ASE)
sleep_time.tv_sec = 1;
sleep_time.tv_nsec = 0;
#else
sleep_time.tv_sec = 0;
sleep_time.tv_nsec = 1000000;
#endif
// to milliseconds
long long sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000);
for (;;) {
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_STATUS, &data));
if (0 == data || 0 == timeout)
break;
nanosleep(&sleep_time, nullptr);
timeout -= sleep_time_ms;
};
return 0;
}
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
// bound checking
if (size + src_offset > buffer->size)
return -1;
// Ensure ready for new command
if (vx_ready_wait(buffer->hdevice, -1) != 0)
return -1;
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, buffer->io_addr + src_offset));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_WRITE));
// Wait for the write operation to finish
if (vx_ready_wait(buffer->hdevice, -1) != 0)
return -1;
return 0;
}
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
// bound checking
if (size + dest_offset > buffer->size)
return -1;
// Ensure ready for new command
if (vx_ready_wait(buffer->hdevice, -1) != 0)
return -1;
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, buffer->io_addr + dest_offset));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_READ));
// Wait for the write operation to finish
if (vx_ready_wait(buffer->hdevice, -1) != 0)
return -1;
return 0;
}
extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
if (nullptr == hdevice
|| 0 >= size)
return -1;
vx_device_t* device = ((vx_device_t*)hdevice);
// Ensure ready for new command
if (vx_ready_wait(hdevice, -1) != 0)
return -1;
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_CLFLUSH));
// Wait for the write operation to finish
if (vx_ready_wait(hdevice, -1) != 0)
return -1;
return 0;
}
extern int vx_start(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
// Ensure ready for new command
if (vx_ready_wait(hdevice, -1) != 0)
return -1;
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_RUN));
return 0;
}

2
sw/driver/rtlsim/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
obj_dir
*.so

50
sw/driver/rtlsim/Makefile Normal file
View File

@@ -0,0 +1,50 @@
# CFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors
CFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors
# CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
USE_MULTICORE=1
CFLAGS += -I../../include -I../../../../rtl/simulate -I../../../../runtime
CFLAGS += -fPIC
CFLAGS += -DUSE_RTLSIM
LDFLAGS += -shared -pthread
ifdef USE_MULTICORE
CFLAGS += -DUSE_MULTICORE
RTL_TOP = Vortex_SOC
else
VL_FLAGS += -DSINGLE_CORE_BENCH
RTL_TOP = Vortex
endif
SRCS = vortex.cpp ../vx_utils.cpp ../../../rtl/simulate/simulator.cpp
RTL_INCLUDE = -I../../../rtl -I../../../rtl/interfaces -I../../../rtl/cache -I../../../rtl/VX_cache -I../../../rtl/shared_memory -I../../../rtl/pipe_regs -I../../../rtl/compat
# Enable Verilator multithreaded simulation
#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
#VL_FLAGS += --threads $(THREADS)
VL_FLAGS += -Wno-UNDRIVEN --Wno-PINMISSING -Wno-STMTDLY -Wno-WIDTH -Wno-UNSIGNED -Wno-UNOPTFLAT -Wno-LITENDIAN -Wno-BLKLOOPINIT
# Debugigng
#VL_FLAGS += --trace -DVL_DEBUG=1
#CFLAGS += -DVCD_OUTPUT
PROJECT = libvortex.so
all: $(PROJECT)
.PHONY: build_config
build_config:
(cd ../../../rtl && ./gen_config.py --rtl_locations)
$(PROJECT): $(SRCS) build_config
verilator --exe --cc $(RTL_TOP).v $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
make -j -C obj_dir -f V$(RTL_TOP).mk
clean:
rm -rf $(PROJECT) obj_dir

310
sw/driver/rtlsim/vortex.cpp Normal file
View File

@@ -0,0 +1,310 @@
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <iostream>
#include <thread>
#include <mutex>
#include <chrono>
#include <vortex.h>
#include <ram.h>
#include <simulator.h>
///////////////////////////////////////////////////////////////////////////////
static size_t align_size(size_t size) {
uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
return cache_block_size * ((size + cache_block_size - 1) / cache_block_size);
}
///////////////////////////////////////////////////////////////////////////////
class vx_device;
class vx_buffer {
public:
vx_buffer(size_t size, vx_device* device)
: size_(size)
, device_(device) {
auto aligned_asize = align_size(size);
data_ = malloc(aligned_asize);
}
~vx_buffer() {
if (data_) {
free(data_);
}
}
void* data() const {
return data_;
}
size_t size() const {
return size_;
}
vx_device* device() const {
return device_;
}
private:
size_t size_;
vx_device* device_;
void* data_;
};
///////////////////////////////////////////////////////////////////////////////
class vx_device {
public:
vx_device()
: is_done_(false)
, simulator_(&ram_) {
simulator_.reset();
thread_ = new std::thread(__thread_proc__, this);
mem_allocation_ = vx_dev_caps(VX_CAPS_ALLOC_BASE_ADDR);
}
~vx_device() {
if (thread_) {
mutex_.lock();
is_done_ = true;
mutex_.unlock();
thread_->join();
delete thread_;
}
}
int alloc_local_mem(size_t size, size_t* dev_maddr) {
size_t asize = align_size(size);
auto dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
if (mem_allocation_ + asize > dev_mem_size)
return -1;
*dev_maddr = mem_allocation_;
mem_allocation_ += asize;
return 0;
}
int upload(void* src, size_t dest_addr, size_t size, size_t src_offset) {
size_t asize = align_size(size);
if (dest_addr + asize > ram_.size())
return -1;
/*printf("VXDRV: upload %d bytes to 0x%x\n", size, dest_addr);
for (int i = 0; i < size; i += 4) {
printf("mem-write: 0x%x <- 0x%x\n", dest_addr + i, *(uint32_t*)((uint8_t*)src + src_offset + i));
}*/
ram_.write(dest_addr, asize, (uint8_t*)src + src_offset);
return 0;
}
int download(const void* dest, size_t src_addr, size_t size, size_t dest_offset) {
size_t asize = align_size(size);
if (src_addr + asize > ram_.size())
return -1;
ram_.read(src_addr, asize, (uint8_t*)dest + dest_offset);
/*printf("VXDRV: download %d bytes from 0x%x\n", size, src_addr);
for (int i = 0; i < size; i += 4) {
printf("mem-read: 0x%x -> 0x%x\n", src_addr + i, *(uint32_t*)((uint8_t*)dest + dest_offset + i));
}*/
return 0;
}
int flush_caches(size_t dev_maddr, size_t size) {
mutex_.lock();
simulator_.flush_caches(dev_maddr, size);
mutex_.unlock();
return 0;
}
int start() {
mutex_.lock();
simulator_.reset();
mutex_.unlock();
return 0;
}
int wait(long long timeout) {
auto timeout_sec = (timeout < 0) ? timeout : (timeout / 1000);
for (;;) {
mutex_.lock();
bool is_busy = simulator_.is_busy();
mutex_.unlock();
if (!is_busy || 0 == timeout_sec--)
break;
std::this_thread::sleep_for(std::chrono::seconds(1));
}
return 0;
}
private:
void thread_proc() {
std::cout << "Device ready..." << std::endl;
for (;;) {
mutex_.lock();
bool is_done = is_done_;
mutex_.unlock();
if (is_done)
break;
mutex_.lock();
simulator_.step();
mutex_.unlock();
}
std::cout << "Device shutdown..." << std::endl;
}
static void __thread_proc__(vx_device* device) {
device->thread_proc();
}
bool is_done_;
size_t mem_allocation_;
RAM ram_;
Simulator simulator_;
std::thread* thread_;
std::mutex mutex_;
};
///////////////////////////////////////////////////////////////////////////////
extern int vx_dev_open(vx_device_h* hdevice) {
if (nullptr == hdevice)
return -1;
*hdevice = new vx_device();
return 0;
}
extern int vx_dev_close(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
delete device;
return 0;
}
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
if (nullptr == hdevice
|| nullptr == dev_maddr
|| 0 >= size)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->alloc_local_mem(size, dev_maddr);
}
extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
if (nullptr == hdevice
|| 0 >= size)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->flush_caches(dev_maddr, size);
}
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
if (nullptr == hdevice
|| 0 >= size
|| nullptr == hbuffer)
return -1;
vx_device *device = ((vx_device*)hdevice);
auto buffer = new vx_buffer(size, device);
if (nullptr == buffer->data()) {
delete buffer;
return -1;
}
*hbuffer = buffer;
return 0;
}
extern volatile void* vx_host_ptr(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return nullptr;
vx_buffer* buffer = ((vx_buffer*)hbuffer);
return buffer->data();
}
extern int vx_buf_release(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return -1;
vx_buffer* buffer = ((vx_buffer*)hbuffer);
delete buffer;
return 0;
}
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
auto buffer = (vx_buffer*)hbuffer;
if (size + src_offset > buffer->size())
return -1;
return buffer->device()->upload(buffer->data(), dev_maddr, size, src_offset);
}
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
auto buffer = (vx_buffer*)hbuffer;
if (size + dest_offset > buffer->size())
return -1;
return buffer->device()->download(buffer->data(), dev_maddr, size, dest_offset);
}
extern int vx_start(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->start();
}
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->wait(timeout);
}

2
sw/driver/simx/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
obj_dir
libvortex.so

32
sw/driver/simx/Makefile Normal file
View File

@@ -0,0 +1,32 @@
CFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
CFLAGS += -I../../include -I../../../../simX/include -I../../../../runtime
CFLAGS += -fPIC
CFLAGS += -DUSE_SIMX
LDFLAGS += -shared -pthread
SRCS = vortex.cpp ../vx_utils.cpp ../../../simX/args.cpp ../../../simX/mem.cpp ../../../simX/core.cpp ../../../simX/instruction.cpp ../../../simX/enc.cpp ../../../simX/util.cpp
RTL_TOP = ../../../simX/cache_simX.v
RTL_INCLUDE = -I../../../old_rtl -I../../../old_rtl/interfaces -I../../../old_rtl/cache -I../../../old_rtl/shared_memory
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
VL_FLAGS += --threads $(THREADS)
VL_FLAGS += -Wno-UNOPTFLAT -Wno-WIDTH
PROJECT = libvortex.so
all: $(PROJECT)
$(PROJECT): $(SRCS)
verilator --exe --cc $(RTL_TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
make -j -C obj_dir -f Vcache_simX.mk
clean:
rm -rf $(PROJECT) obj_dir

318
sw/driver/simx/vortex.cpp Normal file
View File

@@ -0,0 +1,318 @@
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <iostream>
#include <thread>
#include <mutex>
#include <chrono>
#include <vortex.h>
#include <core.h>
#include <config.h>
#define PAGE_SIZE 4096
///////////////////////////////////////////////////////////////////////////////
static size_t align_size(size_t size) {
uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
return cache_block_size * ((size + cache_block_size - 1) / cache_block_size);
}
///////////////////////////////////////////////////////////////////////////////
class vx_device;
class vx_buffer {
public:
vx_buffer(size_t size, vx_device* device)
: size_(size)
, device_(device) {
auto aligned_asize = align_size(size);
data_ = malloc(aligned_asize);
}
~vx_buffer() {
if (data_) {
free(data_);
}
}
void* data() const {
return data_;
}
size_t size() const {
return size_;
}
vx_device* device() const {
return device_;
}
private:
size_t size_;
vx_device* device_;
void* data_;
};
///////////////////////////////////////////////////////////////////////////////
class vx_device {
public:
vx_device()
: is_done_(false)
, is_running_(false)
, thread_(__thread_proc__, this) {
mem_allocation_ = vx_dev_caps(VX_CAPS_ALLOC_BASE_ADDR);
}
~vx_device() {
mutex_.lock();
is_done_ = true;
mutex_.unlock();
thread_.join();
}
int alloc_local_mem(size_t size, size_t* dev_maddr) {
auto asize = align_size(size);
auto dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
if (mem_allocation_ + asize > dev_mem_size)
return -1;
*dev_maddr = mem_allocation_;
mem_allocation_ += asize;
return 0;
}
int upload(void* src, size_t dest_addr, size_t size, size_t src_offset) {
auto asize = align_size(size);
if (dest_addr + asize > ram_.size())
return -1;
/*printf("VXDRV: upload %d bytes to 0x%x\n", size, dest_addr);
for (int i = 0; i < size; i += 4) {
printf("mem-write: 0x%x <- 0x%x\n", dest_addr + i, *(uint32_t*)((uint8_t*)src + src_offset + i));
}*/
ram_.write(dest_addr, asize, (uint8_t*)src + src_offset);
return 0;
}
int download(const void* dest, size_t src_addr, size_t size, size_t dest_offset) {
size_t asize = align_size(size);
if (src_addr + asize > ram_.size())
return -1;
ram_.read(src_addr, asize, (uint8_t*)dest + dest_offset);
/*printf("VXDRV: download %d bytes from 0x%x\n", size, src_addr);
for (int i = 0; i < size; i += 4) {
printf("mem-read: 0x%x -> 0x%x\n", src_addr + i, *(uint32_t*)((uint8_t*)dest + dest_offset + i));
}*/
return 0;
}
int start() {
mutex_.lock();
is_running_ = true;
mutex_.unlock();
return 0;
}
int wait(long long timeout) {
auto timeout_sec = (timeout < 0) ? timeout : (timeout / 1000);
for (;;) {
mutex_.lock();
bool is_running = is_running_;
mutex_.unlock();
if (!is_running || 0 == timeout_sec--)
break;
std::this_thread::sleep_for(std::chrono::seconds(1));
}
return 0;
}
private:
void run() {
Harp::ArchDef arch("rv32i", NW, NT);
Harp::WordDecoder dec(arch);
Harp::MemoryUnit mu(PAGE_SIZE, arch.getWordSize(), true);
Harp::Core core(arch, dec, mu);
mu.attach(ram_, 0);
while (core.running()) {
core.step();
}
core.printStats();
}
void thread_proc() {
std::cout << "Device ready..." << std::endl;
for (;;) {
mutex_.lock();
bool is_done = is_done_;
bool is_running = is_running_;
mutex_.unlock();
if (is_done)
break;
if (is_running) {
std::cout << "Device running..." << std::endl;
this->run();
mutex_.lock();
is_running_ = false;
mutex_.unlock();
std::cout << "Device ready..." << std::endl;
}
}
std::cout << "Device shutdown..." << std::endl;
}
static void __thread_proc__(vx_device* device) {
device->thread_proc();
}
bool is_done_;
bool is_running_;
size_t mem_allocation_;
std::thread thread_;
Harp::RAM ram_;
std::mutex mutex_;
};
///////////////////////////////////////////////////////////////////////////////
extern int vx_dev_open(vx_device_h* hdevice) {
if (nullptr == hdevice)
return -1;
*hdevice = new vx_device();
return 0;
}
extern int vx_dev_close(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
delete device;
return 0;
}
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
if (nullptr == hdevice
|| nullptr == dev_maddr
|| 0 >= size)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->alloc_local_mem(size, dev_maddr);
}
extern int vx_flush_caches(vx_device_h hdevice, size_t /*dev_maddr*/, size_t size) {
if (nullptr == hdevice
|| 0 >= size)
return -1;
// this functionality is not need by simX
return 0;
}
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
if (nullptr == hdevice
|| 0 >= size
|| nullptr == hbuffer)
return -1;
vx_device *device = ((vx_device*)hdevice);
auto buffer = new vx_buffer(size, device);
if (nullptr == buffer->data()) {
delete buffer;
return -1;
}
*hbuffer = buffer;
return 0;
}
extern volatile void* vx_host_ptr(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return nullptr;
vx_buffer* buffer = ((vx_buffer*)hbuffer);
return buffer->data();
}
extern int vx_buf_release(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return -1;
vx_buffer* buffer = ((vx_buffer*)hbuffer);
delete buffer;
return 0;
}
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
auto buffer = (vx_buffer*)hbuffer;
if (size + src_offset > buffer->size())
return -1;
return buffer->device()->upload(buffer->data(), dev_maddr, size, src_offset);
}
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
auto buffer = (vx_buffer*)hbuffer;
if (size + dest_offset > buffer->size())
return -1;
return buffer->device()->download(buffer->data(), dev_maddr, size, dest_offset);
}
extern int vx_start(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->start();
}
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->wait(timeout);
}

20
sw/driver/stub/Makefile Normal file
View File

@@ -0,0 +1,20 @@
CXXFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors
#CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../include -I../../../runtime
CXXFLAGS += -fPIC
LDFLAGS += -shared -pthread
SRCS = vortex.cpp ../vx_utils.cpp
PROJECT = libvortex.so
all: $(PROJECT)
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
clean:
rm -rf $(PROJECT) obj_dir

45
sw/driver/stub/vortex.cpp Normal file
View File

@@ -0,0 +1,45 @@
#include <vortex.h>
extern int vx_dev_open(vx_device_h* /*hdevice*/) {
return -1;
}
extern int vx_dev_close(vx_device_h /*hdevice*/) {
return -1;
}
extern int vx_alloc_dev_mem(vx_device_h /*hdevice*/, size_t /*size*/, size_t* /*dev_maddr*/) {
return -1;
}
extern int vx_flush_caches(vx_device_h /*hdevice*/, size_t /*dev_maddr*/, size_t /*size*/) {
return -1;
}
extern int vx_alloc_shared_mem(vx_device_h /*hdevice*/, size_t /*size*/, vx_buffer_h* /*hbuffer*/) {
return -1;
}
extern volatile void* vx_host_ptr(vx_buffer_h /*hbuffer*/) {
return nullptr;
}
extern int vx_buf_release(vx_buffer_h /*hbuffer*/) {
return -1;
}
extern int vx_copy_to_dev(vx_buffer_h /*hbuffer*/, size_t /*dev_maddr*/, size_t /*size*/, size_t /*src_offset*/) {
return -1;
}
extern int vx_copy_from_dev(vx_buffer_h /*hbuffer*/, size_t /*dev_maddr*/, size_t /*size*/, size_t /*dest_offset*/) {
return -1;
}
extern int vx_start(vx_device_h /*hdevice*/) {
return -1;
}
extern int vx_ready_wait(vx_device_h /*hdevice*/, long long /*timeout*/) {
return -1;
}

View File

@@ -0,0 +1,67 @@
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
VX_RT_PATH ?= $(wildcard ../../../runtime)
VX_CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
VX_CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
VX_DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
VX_CPY = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
VX_STR = $(VX_RT_PATH)/startup/vx_start.S
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
VX_CFLAGS = -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections
VX_SRCS = kernel.c
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../../sw/include
LDFLAGS +=
PROJECT = basic
SRCS = basic.cpp
all: $(PROJECT)
kernel.dump: kernel.elf
$(VX_DMP) -D kernel.elf > kernel.dump
kernel.hex: kernel.elf
$(VX_CPY) -O ihex kernel.elf kernel.hex
kernel.bin: kernel.elf
$(VX_CPY) -O binary kernel.elf kernel.bin
kernel.elf: $(SRCS)
$(VX_CC) $(VX_CFLAGS) $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_SRCS) -I$(VX_RT_PATH) -o kernel.elf
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../sw/dummy -lvortex -o $@
run-fpga: $(PROJECT)
LD_LIBRARY_PATH=../../sw/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
run-ase: $(PROJECT)
ASE_LOG=0 LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
run-rtlsim: $(PROJECT)
LD_LIBRARY_PATH=../../sw/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
run-simx: $(PROJECT)
LD_LIBRARY_PATH=../../sw/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
clean:
rm -rf $(PROJECT) *.o .depend
ifneq ($(MAKECMDGOALS),clean)
-include .depend
endif

BIN
sw/driver/tests/basic/basic Executable file

Binary file not shown.

233
sw/driver/tests/basic/basic.cpp Executable file
View File

@@ -0,0 +1,233 @@
#include <iostream>
#include <unistd.h>
#include <vortex.h>
int test = -1;
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "t:h?")) != -1) {
switch (c) {
case 't': {
test = atoi(optarg);
} break;
case 'h':
case '?': {
std::cout << "Test." << std::endl;
std::cout << "Usage: [-t testno][-h: help]" << std::endl;
exit(0);
} break;
default:
exit(-1);
}
}
}
uint64_t shuffle(int i, uint64_t value) {
return (value << i) | (value & ((1 << i)-1));;
}
int run_memcopy_test(vx_buffer_h sbuf,
vx_buffer_h dbuf,
uint32_t address,
uint64_t value,
int num_blocks) {
int ret;
int errors = 0;
// write sbuf data
for (int i = 0; i < 8 * num_blocks; ++i) {
((uint64_t*)vx_host_ptr(sbuf))[i] = shuffle(i, value);
}
// write buffer to local memory
std::cout << "write buffer to local memory" << std::endl;
ret = vx_copy_to_dev(sbuf, address, 64 * num_blocks, 0);
if (ret != 0)
return ret;
// read buffer from local memory
std::cout << "read buffer from local memory" << std::endl;
ret = vx_copy_from_dev(dbuf, address, 64 * num_blocks, 0);
if (ret != 0)
return ret;
// verify result
std::cout << "verify result" << std::endl;
for (int i = 0; i < 8 * num_blocks; ++i) {
auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i];
auto ref = shuffle(i, value);
if (curr != ref) {
std::cout << "error @ " << std::hex << (address + 64 * i)
<< ": actual " << curr << ", expected " << ref << std::endl;
++errors;
}
}
if (errors != 0) {
std::cout << "Found " << errors << " errors!" << std::endl;
std::cout << "FAILED!" << std::endl;
return 1;
}
return 0;
}
int run_kernel_test(vx_device_h device,
vx_buffer_h sbuf,
vx_buffer_h dbuf,
const char* program) {
int ret;
int errors = 0;
uint64_t seed = 0x0badf00d40ff40ff;
int num_blocks = 4;
unsigned src_dev_addr = 0x10000000;
unsigned dest_dev_addr = 0x20000000;
// write sbuf data
for (int i = 0; i < 8 * num_blocks; ++i) {
((uint64_t*)vx_host_ptr(sbuf))[i] = shuffle(i, seed);
}
// write buffer to local memory
std::cout << "write buffer to local memory" << std::endl;
ret = vx_copy_to_dev(sbuf, src_dev_addr, 64 * num_blocks, 0);
if (ret != 0)
return ret;
// upload program
std::cout << "upload program" << std::endl;
ret = vx_upload_kernel_file(device, program);
if (ret != 0) {
return ret;
}
// start device
std::cout << "start device" << std::endl;
ret = vx_start(device);
if (ret != 0) {
return ret;
}
// wait for completion
std::cout << "wait for completion" << std::endl;
ret = vx_ready_wait(device, -1);
if (ret != 0) {
return ret;
}
// flush the caches
std::cout << "flush the caches" << std::endl;
ret = vx_flush_caches(device, dest_dev_addr, 64 * num_blocks);
if (ret != 0) {
return ret;
}
// read buffer from local memory
std::cout << "read buffer from local memory" << std::endl;
ret = vx_copy_from_dev(dbuf, dest_dev_addr, 64 * num_blocks, 0);
if (ret != 0)
return ret;
// verify result
std::cout << "verify result" << std::endl;
for (int i = 0; i < 8 * num_blocks; ++i) {
auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i];
auto ref = shuffle(i, seed);
if (curr != ref) {
std::cout << "error @ " << std::hex << (dest_dev_addr + 64 * i)
<< ": actual " << curr << ", expected " << ref << std::endl;
++errors;
}
}
if (errors != 0) {
std::cout << "Found " << errors << " errors!" << std::endl;
std::cout << "FAILED!" << std::endl;
return 1;
}
return 0;
}
vx_device_h device = nullptr;
vx_buffer_h sbuf = nullptr;
vx_buffer_h dbuf = nullptr;
void cleanup() {
if (sbuf) {
vx_buf_release(sbuf);
}
if (dbuf) {
vx_buf_release(dbuf);
}
if (device) {
vx_dev_close(device);
}
}
int main(int argc, char *argv[]) {
int ret;
// parse command arguments
parse_args(argc, argv);
// open device connection
std::cout << "open device connection" << std::endl;
vx_device_h device;
ret = vx_dev_open(&device);
if (ret != 0)
return ret;
// create source buffer
std::cout << "create source buffer" << std::endl;
ret = vx_alloc_shared_mem(device, 4096, &sbuf);
if (ret != 0) {
cleanup();
return ret;
}
// create destination buffer
std::cout << "create destination buffer" << std::endl;
ret = vx_alloc_shared_mem(device, 4096, &dbuf);
if (ret != 0) {
cleanup();
return ret;
}
// run tests
if (0 == test || -1 == test) {
std::cout << "run memcopy test" << std::endl;
ret = run_memcopy_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 1);
if (ret != 0) {
cleanup();
return ret;
}
ret = run_memcopy_test(sbuf, dbuf, 0x20000000, 0x0badf00d40ff40ff, 8);
if (ret != 0) {
cleanup();
return ret;
}
}
if (1 == test || -1 == test) {
std::cout << "run kernel test" << std::endl;
ret = run_kernel_test(device, sbuf, dbuf, "kernel.bin");
if (ret != 0) {
cleanup();
return ret;
}
}
// cleanup
std::cout << "cleanup" << std::endl;
cleanup();
std::cout << "Test PASSED" << std::endl;
return 0;
}

BIN
sw/driver/tests/basic/kernel.bin Executable file

Binary file not shown.

View File

@@ -0,0 +1,9 @@
#include <stdint.h>
void main() {
int64_t* x = (int64_t*)0x10000000;
int64_t* y = (int64_t*)0x20000000;
for (int i = 0; i < 8 * 4; ++i) {
y[i] = x[i];
}
}

View File

@@ -0,0 +1,65 @@
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
VX_RT_PATH ?= $(wildcard ../../../runtime)
VX_CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
VX_CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
VX_DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
VX_CPY = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
VX_STR = $(VX_RT_PATH)/startup/vx_start.S
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
VX_CFLAGS = -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections
VX_SRCS = kernel.c
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../../sw/include
PROJECT = demo
SRCS = demo.cpp
all: $(PROJECT)
kernel.dump: kernel.elf
$(VX_DMP) -D kernel.elf > kernel.dump
kernel.hex: kernel.elf
$(VX_CPY) -O ihex kernel.elf kernel.hex
kernel.bin: kernel.elf
$(VX_CPY) -O binary kernel.elf kernel.bin
kernel.elf: $(SRCS)
$(VX_CC) $(VX_CFLAGS) $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_SRCS) -I$(VX_RT_PATH) -o kernel.elf
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../sw/stub -lvortex -o $@
run-fpga: $(PROJECT)
LD_LIBRARY_PATH=../../sw/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
run-ase: $(PROJECT)
ASE_LOG=0 LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
run-rtlsim: $(PROJECT)
LD_LIBRARY_PATH=../../sw/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
run-simx: $(PROJECT)
LD_LIBRARY_PATH=../../sw/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
clean:
rm -rf $(PROJECT) *.o *.dump .depend
ifneq ($(MAKECMDGOALS),clean)
-include .depend
endif

View File

@@ -0,0 +1,15 @@
#ifndef _COMMON_H_
#define _COMMON_H_
#define KERNEL_ARG_DEV_MEM_ADDR 0x7fffff00
struct kernel_arg_t {
uint32_t num_warps;
uint32_t num_threads;
uint32_t stride;
uint32_t src0_ptr;
uint32_t src1_ptr;
uint32_t dst_ptr;
};
#endif

BIN
sw/driver/tests/demo/demo Executable file

Binary file not shown.

View File

@@ -0,0 +1,241 @@
#include <iostream>
#include <unistd.h>
#include <string.h>
#include <vortex.h>
#include "common.h"
const char* program_file = "kernel.bin";
uint32_t data_stride = 0xffffffff;
static void show_usage() {
std::cout << "Vortex Driver Test." << std::endl;
std::cout << "Usage: [-f: program] [-n stride] [-h: help]" << std::endl;
}
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "n:f:h?")) != -1) {
switch (c) {
case 'n':
data_stride = atoi(optarg);
break;
case 'f':
program_file = optarg;
break;
case 'h':
case '?': {
show_usage();
exit(0);
} break;
default:
show_usage();
exit(-1);
}
}
if (nullptr == program_file) {
show_usage();
exit(-1);
}
}
int run_test(vx_device_h device,
vx_buffer_h buffer,
const kernel_arg_t& kernel_arg,
uint32_t buf_size,
uint32_t num_points) {
int ret;
// start device
std::cout << "start device" << std::endl;
ret = vx_start(device);
if (ret != 0) {
return ret;
}
// wait for completion
std::cout << "wait for completion" << std::endl;
ret = vx_ready_wait(device, -1);
if (ret != 0) {
return ret;
}
// flush the destination buffer caches
std::cout << "flush the destination buffer caches" << std::endl;
ret = vx_flush_caches(device, kernel_arg.dst_ptr, buf_size);
if (ret != 0) {
return ret;
}
// download destination buffer
std::cout << "download destination buffer" << std::endl;
ret = vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0);
if (ret != 0) {
return ret;
}
// verify result
std::cout << "verify result" << std::endl;
{
int errors = 0;
auto buf_ptr = (int*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < num_points; ++i) {
int ref = i + i;
int cur = buf_ptr[i];
if (cur != ref) {
++errors;
}
}
if (errors != 0) {
std::cout << "Found " << errors << " errors!" << std::endl;
std::cout << "FAILED!" << std::endl;
return 1;
}
}
return 0;
}
vx_device_h device = nullptr;
vx_buffer_h buffer = nullptr;
void cleanup() {
if (buffer) {
vx_buf_release(buffer);
}
if (device) {
vx_dev_close(device);
}
}
int main(int argc, char *argv[]) {
int ret;
size_t value;
kernel_arg_t kernel_arg;
// parse command arguments
parse_args(argc, argv);
uint32_t block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES);
uint32_t max_warps = vx_dev_caps(VX_CAPS_MAX_WARPS);
uint32_t max_threads = vx_dev_caps(VX_CAPS_MAX_THREADS);
if (data_stride == 0xffffffff) {
data_stride = block_size / sizeof(uint32_t);
}
uint32_t num_points = max_cores * max_warps * max_threads * data_stride;
uint32_t buf_size = num_points * sizeof(uint32_t);
std::cout << "number of workitems: " << num_points << std::endl;
// open device connection
std::cout << "open device connection" << std::endl;
ret = vx_dev_open(&device);
if (ret != 0)
return ret;
// upload program
std::cout << "upload program" << std::endl;
ret = vx_upload_kernel_file(device, program_file);
if (ret != 0) {
cleanup();
return ret;
}
// allocate device memory
std::cout << "allocate device memory" << std::endl;
ret = vx_alloc_dev_mem(device, buf_size, &value);
if (ret != 0) {
cleanup();
return ret;
}
kernel_arg.src0_ptr = value;
ret = vx_alloc_dev_mem(device, buf_size, &value);
if (ret != 0) {
cleanup();
return ret;
}
kernel_arg.src1_ptr = value;
ret = vx_alloc_dev_mem(device, buf_size, &value);
if (ret != 0) {
cleanup();
return ret;
}
kernel_arg.dst_ptr = value;
// allocate shared memory
std::cout << "allocate shared memory" << std::endl;
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
ret = vx_alloc_shared_mem(device, alloc_size, &buffer);
if (ret != 0) {
cleanup();
return ret;
}
// populate source buffer values
std::cout << "populate source buffer values" << std::endl;
{
auto buf_ptr = (int*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = i;
}
}
// upload source buffers
std::cout << "upload source buffers" << std::endl;
ret = vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0);
if (ret != 0) {
cleanup();
return ret;
}
ret = vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0);
if (ret != 0) {
cleanup();
return ret;
}
// upload kernel argument
std::cout << "upload kernel argument" << std::endl;
{
kernel_arg.num_warps = max_warps;
kernel_arg.num_threads = max_threads;
kernel_arg.stride = data_stride;
auto buf_ptr = (int*)vx_host_ptr(buffer);
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
ret = vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0);
if (ret != 0) {
cleanup();
return ret;
}
}
// run tests
std::cout << "run tests" << std::endl;
ret = run_test(device, buffer, kernel_arg, buf_size, num_points);
if (ret != 0) {
cleanup();
return ret;
}
ret = run_test(device, buffer, kernel_arg, buf_size, num_points);
if (ret != 0) {
cleanup();
return ret;
}
// cleanup
std::cout << "cleanup" << std::endl;
cleanup();
std::cout << "PASSED!" << std::endl;
return 0;
}

BIN
sw/driver/tests/demo/kernel.bin Executable file

Binary file not shown.

View File

@@ -0,0 +1,32 @@
#include <stdlib.h>
#include <stdio.h>
#include "intrinsics/vx_intrinsics.h"
#include "vx_api/vx_api.h"
#include "common.h"
void kernel_body(void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
int* x = (int*)_arg->src0_ptr;
int* y = (int*)_arg->src1_ptr;
int* z = (int*)_arg->dst_ptr;
unsigned wNo = vx_warpNum();
unsigned tid = vx_threadID();
unsigned i = ((wNo * _arg->num_threads) + tid) * _arg->stride;
for (unsigned j = 0; j < _arg->stride; ++j) {
z[i+j] = x[i+j] + y[i+j];
}
}
void main() {
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
/*printf("num_warps=%d\n", arg->num_warps);
printf("num_threads=%d\n", arg->num_threads);
printf("stride=%d\n", arg->stride);
printf("src0_ptr=0x%x\n", arg->src0_ptr);
printf("src1_ptr=0x%x\n", arg->src1_ptr);
printf("dst_ptr=0x%x\n", arg->dst_ptr);*/
vx_spawnWarps(arg->num_warps, arg->num_threads, kernel_body, arg);
}

BIN
sw/driver/tests/demo/kernel.elf Executable file

Binary file not shown.

49274
sw/driver/tests/demo/run.log Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,603 @@
//
// Copyright (c) 2017, Intel Corporation
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// Neither the name of the Intel Corporation nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
// Read from the memory locations first and then write to the memory locations
`include "platform_if.vh"
`include "afu_json_info.vh"
module ccip_std_afu
(
// CCI-P Clocks and Resets
input logic pClk, // 400MHz - CCI-P clock domain. Primary interface clock
input logic pClkDiv2, // 200MHz - CCI-P clock domain.
input logic pClkDiv4, // 100MHz - CCI-P clock domain.
input logic uClk_usr, // User clock domain. Refer to clock programming guide ** Currently provides fixed 300MHz clock **
input logic uClk_usrDiv2, // User clock domain. Half the programmed frequency ** Currently provides fixed 150MHz clock **
input logic pck_cp2af_softReset, // CCI-P ACTIVE HIGH Soft Reset
input logic [1:0] pck_cp2af_pwrState, // CCI-P AFU Power State
input logic pck_cp2af_error, // CCI-P Protocol Error Detected
// Interface structures
input t_if_ccip_Rx pck_cp2af_sRx, // CCI-P Rx Port
output t_if_ccip_Tx pck_af2cp_sTx // CCI-P Tx Port
);
//
// Run the entire design at the standard CCI-P frequency (400 MHz).
//
logic clk;
assign clk = pClk;
logic reset;
assign reset = pck_cp2af_softReset;
logic [511:0] wr_data;
logic [511:0] rd_data;
logic get_write_addr;
logic do_update;
logic rd_end_of_list;
logic rd_needed;
logic wr_needed;
logic [15:0] cnt_list_length;
// =========================================================================
//
// Register requests.
//
// =========================================================================
//
// The incoming pck_cp2af_sRx and outgoing pck_af2cp_sTx must both be
// registered. Here we register pck_cp2af_sRx and assign it to sRx.
// We also assign pck_af2cp_sTx to sTx here but don't register it.
// The code below never uses combinational logic to write sTx.
//
t_if_ccip_Rx sRx;
always_ff @(posedge clk)
begin
sRx <= pck_cp2af_sRx;
end
t_if_ccip_Tx sTx;
assign pck_af2cp_sTx = sTx;
// =========================================================================
//
// CSR (MMIO) handling.
//
// =========================================================================
// The AFU ID is a unique ID for a given program. Here we generated
// one with the "uuidgen" program and stored it in the AFU's JSON file.
// ASE and synthesis setup scripts automatically invoke afu_json_mgr
// to extract the UUID into afu_json_info.vh.
logic [127:0] afu_id = `AFU_ACCEL_UUID;
//
// A valid AFU must implement a device feature list, starting at MMIO
// address 0. Every entry in the feature list begins with 5 64-bit
// words: a device feature header, two AFU UUID words and two reserved
// words.
//
// Is a CSR read request active this cycle?
logic is_csr_read;
assign is_csr_read = sRx.c0.mmioRdValid;
// Is a CSR write request active this cycle?
logic is_csr_write;
assign is_csr_write = sRx.c0.mmioWrValid;
// The MMIO request header is overlayed on the normal c0 memory read
// response data structure. Cast the c0Rx header to an MMIO request
// header.
t_ccip_c0_ReqMmioHdr mmio_req_hdr;
assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(sRx.c0.hdr);
//
// Implement the device feature list by responding to MMIO reads.
//
always_ff @(posedge clk)
begin
if (reset)
begin
sTx.c2.mmioRdValid <= 1'b0;
end
else
begin
// Always respond with something for every read request
sTx.c2.mmioRdValid <= is_csr_read;
// The unique transaction ID matches responses to requests
sTx.c2.hdr.tid <= mmio_req_hdr.tid;
// Addresses are of 32-bit objects in MMIO space. Addresses
// of 64-bit objects are thus multiples of 2.
case (mmio_req_hdr.address)
0: // AFU DFH (device feature header)
begin
// Here we define a trivial feature list. In this
// example, our AFU is the only entry in this list.
sTx.c2.data <= t_ccip_mmioData'(0);
// Feature type is AFU
sTx.c2.data[63:60] <= 4'h1;
// End of list (last entry in list)
sTx.c2.data[40] <= 1'b1;
end
// AFU_ID_L
2: sTx.c2.data <= afu_id[63:0];
// AFU_ID_H
4: sTx.c2.data <= afu_id[127:64];
// DFH_RSVD0
6: sTx.c2.data <= t_ccip_mmioData'(0);
// DFH_RSVD1
8: sTx.c2.data <= t_ccip_mmioData'(0);
default: sTx.c2.data <= t_ccip_mmioData'(0);
endcase
end
end
//
// CSR write handling. Host software must tell the AFU the memory address
// to which it should be writing. The address is set by writing a CSR.
//
// We use MMIO address 0 to set the memory address. The read and
// write MMIO spaces are logically separate so we are free to use
// whatever we like. This may not be good practice for cleanly
// organizing the MMIO address space, but it is legal.
logic is_mem_addr_csr_write;
assign is_mem_addr_csr_write = get_write_addr && is_csr_write &&
(mmio_req_hdr.address == t_ccip_mmioAddr'(0));
// Memory address to which this AFU will write.
t_ccip_clAddr write_mem_addr;
always_ff @(posedge clk)
begin
if (reset)
begin
get_write_addr <= 1'b1;
end
else if (is_mem_addr_csr_write)
begin
write_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
get_write_addr <= 1'b0;
end
end
// We use MMIO address 0 to set the memory address for reading data.
logic is_mem_addr_csr_read;
assign is_mem_addr_csr_read = !get_write_addr && is_csr_write &&
(mmio_req_hdr.address == t_ccip_mmioAddr'(0));
// Memory address from which this AFU will read.
logic start_read;
t_ccip_clAddr read_mem_addr;
//logic start_traversal = 'b0;
//t_ccip_clAddr start_traversal_addr;
always_ff @(posedge clk)
begin
if (reset)
begin
start_read <= 1'b0;
end
else if (is_mem_addr_csr_read)
begin
read_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
start_read <= 'b1;
end
end
// =========================================================================
//
// Main AFU logic
//
// =========================================================================
//
// States in our simple example.
//
//typedef enum logic [0:0]
typedef enum logic [1:0]
{
STATE_IDLE,
STATE_READ,
STATE_UPDATE,
STATE_WRITE
}
t_state;
t_state state;
//
// State machine
//
always_ff @(posedge clk)
begin
if (reset)
begin
state <= STATE_IDLE;
rd_end_of_list <= 1'b0;
end
else
begin
case (state)
STATE_IDLE:
begin
// Traversal begins when CSR 1 is written
if (start_read)
begin
state <= STATE_READ;
$display("AFU starting traversal at 0x%x", t_ccip_clAddr'(read_mem_addr));
end
end
STATE_READ:
begin
if (rd_needed)
begin
// Read data from the address and update address
state <= STATE_UPDATE;
start_read <= 'b0;
$display("AFU reading data and pointing to next read address...");
end
end
STATE_UPDATE:
begin
// Update the read value to be written back
if (do_update)
begin
state <= STATE_WRITE;
$display("AFU performing comutations on the read values...");
end
end
STATE_WRITE:
begin
// Write the updated value to the address
// Point to new address after that
// if done then point to IDLE; else read new values
if (rd_end_of_list)
begin
state <= STATE_IDLE;
$display("AFU done...");
end
else
begin
if (wr_needed)
begin
state <= STATE_READ;
$display("AFU reading again from read address...");
end
end
end
endcase
end
end
// =========================================================================
//
// Read logic.
//
// =========================================================================
//
// READ REQUEST
//
// Did a write response just arrive
logic addr_next_valid;
// Next read address
t_ccip_clAddr addr_next;
always_ff @(posedge clk)
begin
// Next read address is valid when we have got the write response back
// and channel is not full
//addr_next_valid <= sRx.c0TxAlmFull;
addr_next_valid <= sRx.c1.rspValid;
// Next address is current address plus address length
// Apurve
//addr_next <= addr_next + addr_size;
addr_next <= addr_next + 0;
// End of list reached if we have read 10 times
rd_end_of_list <= (cnt_list_length == 'h10);
end
//
// Since back pressure may prevent an immediate read request, we must
// record whether a read is needed and hold it until the request can
// be sent to the FIU.
//
t_ccip_clAddr rd_addr;
always_ff @(posedge clk)
begin
if (reset)
begin
rd_needed <= 1'b0;
end
else
begin
// If reads are allowed this cycle then we can safely clear
// any previously requested reads. This simple AFU has only
// one read in flight at a time since it is walking a pointer
// chain.
if (rd_needed)
begin
rd_needed <= sRx.c0TxAlmFull;
end
else
begin
// Need a read under two conditions:
// - Starting a new walk
// - A read response just arrived from a line containing
// a next pointer.
rd_needed <= (start_read || (addr_next_valid && ! rd_end_of_list));
rd_addr <= (start_read ? read_mem_addr : addr_next);
end
end
end
//
// Emit read requests to the FIU.
//
// Read header defines the request to the FIU
t_cci_c0_ReqMemHdr rd_hdr;
always_comb
begin
rd_hdr = t_cci_c0_ReqMemHdr'(0);
// Read request type
rd_hdr.req_type = eREQ_RDLINE_I;
// Virtual address (MPF virtual addressing is enabled)
rd_hdr.address = rd_addr;
// Let the FIU pick the channel
rd_hdr.vc_sel = eVC_VA;
// Read 4 lines (the size of an entry in the list)
rd_hdr.cl_len = eCL_LEN_4;
end
// Send read requests to the FIU
always_ff @(posedge clk)
begin
if (reset)
begin
sTx.c0.valid <= 1'b0;
cnt_list_length <= 0;
end
else
begin
// Generate a read request when needed and the FIU isn't full
sTx.c0.valid <= (rd_needed && ! sRx.c0TxAlmFull);
sTx.c0.hdr <= rd_hdr;
if (rd_needed && ! sRx.c0TxAlmFull)
begin
cnt_list_length <= cnt_list_length + 1;
//$display(" Reading from VA 0x%x", clAddrToByteAddr(rd_addr));
$display("Incrementing read count...");
end
end
end
//
// READ RESPONSE HANDLING
//
//
// Receive data (read responses).
//
always_ff @(posedge clk)
begin
if (reset)
begin
do_update <= 1'b0;
end
else
begin
if (state == STATE_READ)
begin
rd_data <= sRx.c0.data;
do_update <= 1'b1;
end
if (state == STATE_UPDATE)
begin
// Update the read data and put it in the write data to be written
wr_data <= rd_data + 1;
do_update <= 1'b0;
end
end
end
// =========================================================================
//
// Write logic.
//
// =========================================================================
//
// WRITE REQUEST
//
// Did a write response just arrive
logic wr_addr_next_valid;
// Next write address
t_ccip_clAddr wr_addr_next;
always_ff @(posedge clk)
begin
// Next write address is valid when we have got the read response back
// and channel is not full
//wr_addr_next_valid <= sRx.c1TxAlmFull;
wr_addr_next_valid <= sRx.c0.rspValid;
// Next address is current address plus address length
// Apurve
//wr_addr_next <= wr_addr_next + addr_size;
wr_addr_next <= wr_addr_next + 0;
end
//
// Since back pressure may prevent an immediate write request, we must
// record whether a write is needed and hold it until the request can
// be sent to the FIU.
//
t_ccip_clAddr wr_addr;
always_ff @(posedge clk)
begin
if (reset)
begin
wr_needed <= 1'b0;
end
else
begin
// If writes are allowed this cycle then we can safely clear
// any previously requested writes. This simple AFU has only
// one write in flight at a time since it is walking a pointer
// chain.
if (wr_needed)
begin
wr_needed <= sRx.c1TxAlmFull;
end
else
begin
// Need a write under two conditions:
// - Starting a new walk
// - A write response just arrived from a line containing
// a next pointer.
//wr_needed <= (start_write || (wr_addr_next_valid && ! rd_end_of_list));
wr_needed <= (start_write || wr_addr_next_valid);
wr_addr <= (start_write ? write_mem_addr : wr_addr_next);
end
end
end
//
// Emit write requests to the FIU.
//
// Write header defines the request to the FIU
t_ccip_c1_ReqMemHdr wr_hdr;
always_comb
begin
wr_hdr = t_cci_c1_ReqMemHdr'(0);
// Write request type
wr_hdr.req_type = eREQ_RDLINE_I;
// Virtual address (MPF virtual addressing is enabled)
wr_hdr.address = wr_addr;
// Let the FIU pick the channel
wr_hdr.vc_sel = eVC_VA;
// Write 4 lines (the size of an entry in the list)
wr_hdr.cl_len = eCL_LEN_4;
// Start of packet is true (single line write)
wr_hdr.sop = 1'b1;
end
// Send write requests to the FIU
always_ff @(posedge clk)
begin
if (reset)
begin
sTx.c1.valid <= 1'b0;
//cnt_list_length <= 0;
end
else
begin
// Generate a write request when needed and the FIU isn't full
sTx.c1.valid <= (wr_needed && ! sRx.c1TxAlmFull);
sTx.c1.hdr <= wr_hdr;
sTx.c1.data = t_ccip_clData'(wr_data);
//if (wr_needed && ! sRx.c1TxAlmFull)
//begin
// cnt_list_length <= cnt_list_length + 1;
// //$display(" Writing from VA 0x%x", clAddrToByteAddr(rd_addr));
// $display("Incrementing write count...");
//end
end
end
//
// WRITE RESPONSE HANDLING
//
// Apurve: Check if a signal is to be sent to read to start reading in case
// write response does not work
//
// Send data (write requests).
//
//always_ff @(posedge clk)
//begin
// if (state == STATE_WRITE)
// begin
// rd_data <= sRx.c0.data;
// end
// if (state == STATE_UPDATE)
// begin
// // Update the write data and put it in the write data to be written
// wr_data <= rd_data + 1;
// end
//end
endmodule

View File

@@ -0,0 +1,18 @@
{
"version": 1,
"afu-image": {
"power": 0,
"afu-top-interface":
{
"name": "ccip_std_afu"
},
"accelerator-clusters":
[
{
"name": "cci_hello",
"total-contexts": 1,
"accelerator-type-uuid": "c6aa954a-9b91-4a37-abc1-1d9f0709dcc3"
}
]
}
}

View File

@@ -0,0 +1,653 @@
//
// Copyright (c) 2017, Intel Corporation
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// Neither the name of the Intel Corporation nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
// Read from the memory locations first and then write to the memory locations
`include "platform_if.vh"
`include "afu_json_info.vh"
module ccip_std_afu
(
// CCI-P Clocks and Resets
input logic pClk, // 400MHz - CCI-P clock domain. Primary interface clock
input logic pClkDiv2, // 200MHz - CCI-P clock domain.
input logic pClkDiv4, // 100MHz - CCI-P clock domain.
input logic uClk_usr, // User clock domain. Refer to clock programming guide ** Currently provides fixed 300MHz clock **
input logic uClk_usrDiv2, // User clock domain. Half the programmed frequency ** Currently provides fixed 150MHz clock **
input logic pck_cp2af_softReset, // CCI-P ACTIVE HIGH Soft Reset
input logic [1:0] pck_cp2af_pwrState, // CCI-P AFU Power State
input logic pck_cp2af_error, // CCI-P Protocol Error Detected
// Interface structures
input t_if_ccip_Rx pck_cp2af_sRx, // CCI-P Rx Port
output t_if_ccip_Tx pck_af2cp_sTx // CCI-P Tx Port
);
//
// Run the entire design at the standard CCI-P frequency (400 MHz).
//
logic clk;
assign clk = pClk;
logic reset;
assign reset = pck_cp2af_softReset;
logic [511:0] wr_data;
logic [511:0] rd_data;
logic do_update;
logic start_read;
logic start_write;
logic wr_addr_next_valid;
logic addr_next_valid;
logic rd_end_of_list;
logic rd_needed;
logic wr_needed;
logic read_req;
logic write_req;
logic [15:0] cnt_list_length;
t_ccip_clAddr rd_addr;
t_ccip_clAddr wr_addr;
t_ccip_clAddr addr_next;
t_ccip_clAddr wr_addr_next;
// =========================================================================
//
// Register requests.
//
// =========================================================================
//
// The incoming pck_cp2af_sRx and outgoing pck_af2cp_sTx must both be
// registered. Here we register pck_cp2af_sRx and assign it to sRx.
// We also assign pck_af2cp_sTx to sTx here but don't register it.
// The code below never uses combinational logic to write sTx.
//
t_if_ccip_Rx sRx;
always_ff @(posedge clk)
begin
sRx <= pck_cp2af_sRx;
end
t_if_ccip_Tx sTx;
assign pck_af2cp_sTx = sTx;
// =========================================================================
//
// CSR (MMIO) handling.
//
// =========================================================================
// The AFU ID is a unique ID for a given program. Here we generated
// one with the "uuidgen" program and stored it in the AFU's JSON file.
// ASE and synthesis setup scripts automatically invoke afu_json_mgr
// to extract the UUID into afu_json_info.vh.
logic [127:0] afu_id = `AFU_ACCEL_UUID;
//
// A valid AFU must implement a device feature list, starting at MMIO
// address 0. Every entry in the feature list begins with 5 64-bit
// words: a device feature header, two AFU UUID words and two reserved
// words.
//
// Is a CSR read request active this cycle?
logic is_csr_read;
assign is_csr_read = sRx.c0.mmioRdValid;
// Is a CSR write request active this cycle?
logic is_csr_write;
assign is_csr_write = sRx.c0.mmioWrValid;
// The MMIO request header is overlayed on the normal c0 memory read
// response data structure. Cast the c0Rx header to an MMIO request
// header.
t_ccip_c0_ReqMmioHdr mmio_req_hdr;
assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(sRx.c0.hdr);
//
// Implement the device feature list by responding to MMIO reads.
//
always_ff @(posedge clk)
begin
if (reset)
begin
sTx.c2.mmioRdValid <= 1'b0;
end
else
begin
// Always respond with something for every read request
sTx.c2.mmioRdValid <= is_csr_read;
// The unique transaction ID matches responses to requests
sTx.c2.hdr.tid <= mmio_req_hdr.tid;
// Addresses are of 32-bit objects in MMIO space. Addresses
// of 64-bit objects are thus multiples of 2.
case (mmio_req_hdr.address)
0: // AFU DFH (device feature header)
begin
// Here we define a trivial feature list. In this
// example, our AFU is the only entry in this list.
sTx.c2.data <= t_ccip_mmioData'(0);
// Feature type is AFU
sTx.c2.data[63:60] <= 4'h1;
// End of list (last entry in list)
sTx.c2.data[40] <= 1'b1;
end
// AFU_ID_L
2: sTx.c2.data <= afu_id[63:0];
// AFU_ID_H
4: sTx.c2.data <= afu_id[127:64];
// DFH_RSVD0
6: sTx.c2.data <= t_ccip_mmioData'(0);
// DFH_RSVD1
8: sTx.c2.data <= t_ccip_mmioData'(0);
// Updated by apurve to check fpgaReadMMIO
10: sTx.c2.data <= t_ccip_mmioData'(start_read);
default: sTx.c2.data <= t_ccip_mmioData'(0);
endcase
end
end
//
// CSR write handling. Host software must tell the AFU the memory address
// to which it should be writing. The address is set by writing a CSR.
//
// We use MMIO address 0 to set the memory address. The read and
// write MMIO spaces are logically separate so we are free to use
// whatever we like. This may not be good practice for cleanly
// organizing the MMIO address space, but it is legal.
logic is_mem_addr_csr_write;
assign is_mem_addr_csr_write = is_csr_write &&
(mmio_req_hdr.address == t_ccip_mmioAddr'(0));
// Memory address to which this AFU will write.
t_ccip_clAddr write_mem_addr;
always_ff @(posedge clk)
begin
if (reset)
begin
start_write <= 1'b0;
end
else if (is_mem_addr_csr_write)
begin
write_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
start_write <= 1'b1;
//$display("Write mem address is 0x%x", t_ccip_clAddr'(write_mem_addr));
end
end
// We use MMIO address 8 to set the memory address for reading data.
logic is_mem_addr_csr_read;
assign is_mem_addr_csr_read = is_csr_write &&
(mmio_req_hdr.address == t_ccip_mmioAddr'(2));
// Memory address from which this AFU will read.
t_ccip_clAddr read_mem_addr;
//logic start_traversal = 'b0;
//t_ccip_clAddr start_traversal_addr;
always_ff @(posedge clk)
begin
if (reset)
begin
start_read <= 1'b0;
end
else if (is_mem_addr_csr_read)
begin
read_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
start_read <= 1'b1;
//$display("Read mem address is 0x%x", t_ccip_clAddr'(read_mem_addr));
end
end
// =========================================================================
//
// Main AFU logic
//
// =========================================================================
//
// States in our simple example.
//
//typedef enum logic [0:0]
typedef enum logic [1:0]
{
STATE_IDLE,
STATE_READ,
STATE_UPDATE,
STATE_WRITE
}
t_state;
t_state state;
//
// State machine
//
always_ff @(posedge clk)
begin
if (reset)
begin
state <= STATE_IDLE;
rd_end_of_list <= 1'b0;
end
else
begin
case (state)
STATE_IDLE:
begin
// Traversal begins when CSR 1 is written
if (start_read)
begin
state <= STATE_READ;
$display("AFU starting traversal at 0x%x", t_ccip_clAddr'(read_mem_addr));
end
end
STATE_READ:
begin
$display("AFU in READ...");
$display("do_update is %d...",do_update);
$display("addr_next_valid is %d...",addr_next_valid);
$display("rd_needed is %d...",rd_needed);
if (!rd_needed && do_update)
begin
state <= STATE_UPDATE;
$display("AFU moving to UPDATE...");
end
end
STATE_UPDATE:
begin
// Update the read value to be written back
$display("AFU in UPDATE...");
if (!do_update)
begin
state <= STATE_WRITE;
wr_needed <= 1'b1;
$display("AFU moving to WRITE...");
end
end
STATE_WRITE:
begin
// Write the updated value to the address
// Point to new address after that
// if done then point to IDLE; else read new values
$display("AFU in WRITE...");
if (rd_end_of_list)
begin
state <= STATE_IDLE;
$display("AFU done...");
end
else if (!wr_needed)
begin
state <= STATE_READ;
$display("AFU moving to READ from WRITE...");
start_write <= 1'b0;
write_req <= 1'b0;
end
end
endcase
end
end
// =========================================================================
//
// Read logic.
//
// =========================================================================
//
// READ REQUEST
//
// Did a write response just arrive
// Next read address
always_ff @(posedge clk)
begin
// Next read address is valid when we have got the write response back
if (sRx.c1.rspValid)
begin
addr_next_valid <= sRx.c1.rspValid;
//if (state == STATE_READ && !rd_needed)
//begin
// Apurve: Next address is current address plus address length
//addr_next <= addr_next + addr_size;
addr_next <= (addr_next_valid ? rd_addr + 0 : rd_addr);
// End of list reached if we have read 5 times
rd_end_of_list <= (cnt_list_length == 'h5);
//end
end
end
//
// Since back pressure may prevent an immediate read request, we must
// record whether a read is needed and hold it until the request can
// be sent to the FIU.
//
always_ff @(posedge clk)
begin
if (reset)
begin
rd_needed <= 1'b0;
end
else
begin
// If reads are allowed this cycle then we can safely clear
// any previously requested reads. This simple AFU has only
// one read in flight at a time since it is walking a pointer
// chain.
if (rd_needed)
begin
//rd_needed <= sRx.c0TxAlmFull;
//rd_needed <= (!sRx.c0TxAlmFull && !sRx.c0.rspValid);
rd_needed <= !sRx.c0.rspValid;
end
else if (state == STATE_READ)
begin
// Need a read under two conditions:
// - Starting a new walk
// - A read response just arrived from a line containing
// a next pointer.
rd_needed <= (start_read || (!sRx.c0TxAlmFull && (addr_next_valid && ! rd_end_of_list)));
rd_addr <= (start_read ? read_mem_addr : addr_next);
//$display("rd_addr is 0x%x", t_ccip_clAddr'(rd_addr));
//$display("read mem addr is 0x%x", t_ccip_clAddr'(read_mem_addr));
//$display("start read is %d", start_read);
end
end
end
//
// Emit read requests to the FIU.
//
// Read header defines the request to the FIU
t_ccip_c0_ReqMemHdr rd_hdr;
always_comb
begin
rd_hdr = t_ccip_c0_ReqMemHdr'(0);
// Read request type (No intention to cache)
//rd_hdr.req_type = 4'h0;
// Virtual address (MPF virtual addressing is enabled)
rd_hdr.address = rd_addr;
// Read over channel VA
//rd_hdr.vc_sel = 2'h0;
// Read one cache line (64 bytes)
//rd_hdr.cl_len = 2'h0;
end
// Send read requests to the FIU
always_ff @(posedge clk)
begin
if (reset)
begin
sTx.c0.valid <= 1'b0;
cnt_list_length <= 0;
read_req <= 1'b0;
end
else
begin
// Generate a read request when needed and the FIU isn't full
if (state == STATE_READ)
begin
sTx.c0.valid <= (rd_needed && !sRx.c0TxAlmFull && !read_req);
if (rd_needed && !sRx.c0TxAlmFull && !read_req)
begin
sTx.c0.hdr <= rd_hdr;
cnt_list_length <= cnt_list_length + 1;
read_req <= 1'b1;
$display("Incrementing read count...%d",cnt_list_length);
$display("Read address is 0x%x...",rd_hdr.address);
addr_next_valid <= 1'b0;
// Apurve: Add something to stop read once this section has been accessed
//rd_needed <= 1'b0;
end
end
end
end
//
// READ RESPONSE HANDLING
//
//
// Receive data (read responses).
//
always_ff @(posedge clk)
begin
if (reset)
begin
do_update <= 1'b0;
end
else
begin
if (!do_update && sRx.c0.rspValid)
begin
rd_data <= sRx.c0.data;
do_update <= 1'b1;
$display("rd data is %d...",rd_data);
end
if ((state == STATE_UPDATE) && (do_update == 1'b1))
begin
// Update the read data and put it in the write data to be written
wr_data <= rd_data + 2;
do_update <= 1'b0;
read_req <= 1'b0;
$display("write data is %d...",wr_data);
// First read done. Next reads should be from the updated addresses
start_read <= 1'b0;
end
end
end
// =========================================================================
//
// Write logic.
//
// =========================================================================
//
// WRITE REQUEST
//
// Did a write response just arrive
// Next write address
always_ff @(posedge clk)
begin
if (sRx.c0.rspValid)
begin
// Next write address is valid when we have got the read response back
wr_addr_next_valid <= sRx.c0.rspValid;
//wr_addr_next_valid <= (!start_write && sRx.c0.rspValid);
//if (state == STATE_WRITE && !wr_needed)
//begin
// Apurve: Next address is current address plus address length
//wr_addr_next <= wr_addr + 0;
wr_addr_next <= (wr_addr_next_valid ? wr_addr + 0 : wr_addr);
//end
end
end
//
// Since back pressure may prevent an immediate write request, we must
// record whether a write is needed and hold it until the request can
// be sent to the FIU.
//
always_ff @(posedge clk)
begin
if (reset)
begin
wr_needed <= 1'b0;
end
else
begin
// If writes are allowed this cycle then we can safely clear
// any previously requested writes. This simple AFU has only
// one write in flight at a time since it is walking a pointer
// chain.
if (wr_needed)
begin
//wr_needed <= sRx.c1TxAlmFull;
//wr_needed <= (!sRx.c1TxAlmFull && !sRx.c1.rspValid);
wr_needed <= !sRx.c1.rspValid;
end
else
begin
// Need a write under two conditions:
// - Starting a new walk
// - A write response just arrived from a line containing
// a next pointer.
wr_needed <= (start_write || (!sRx.c1TxAlmFull && wr_addr_next_valid));
wr_addr <= (start_write ? write_mem_addr : wr_addr_next);
//$display("Write mem address later is 0x%x", t_ccip_clAddr'(write_mem_addr));
end
end
end
//
// Emit write requests to the FIU.
//
// Write header defines the request to the FIU
t_ccip_c1_ReqMemHdr wr_hdr;
always_comb
begin
wr_hdr = t_ccip_c1_ReqMemHdr'(0);
// Write request type
//wr_hdr.req_type = 4'h0;
// Virtual address (MPF virtual addressing is enabled)
wr_hdr.address = wr_addr;
// Let the FIU pick the channel
//wr_hdr.vc_sel = 2'h2;
// Write 1 cache line (64 bytes)
//wr_hdr.cl_len = 2'h0;
// Start of packet is true (single line write)
wr_hdr.sop = 1'b1;
end
// Send write requests to the FIU
always_ff @(posedge clk)
begin
if (reset)
begin
sTx.c1.valid <= 1'b0;
write_req <= 1'b0;
end
else
begin
// Generate a write request when needed and the FIU isn't full
if (state == STATE_WRITE)
begin
sTx.c1.valid <= (wr_needed && !sRx.c1TxAlmFull && !write_req);
if (wr_needed && !sRx.c1TxAlmFull && !write_req)
begin
sTx.c1.hdr <= wr_hdr;
sTx.c1.data <= t_ccip_clData'(wr_data);
write_req <= 1'b1;
wr_addr_next_valid <= 1'b0;
$display("Write address is 0x%x...", wr_hdr.address);
end
end
end
end
//
// WRITE RESPONSE HANDLING
//
// Apurve: Check if a signal is to be sent to read to start reading in case
// write response does not work
//
// Send data (write requests).
//
//always_ff @(posedge clk)
//begin
// if (state == STATE_WRITE)
// begin
// rd_data <= sRx.c0.data;
// end
// if (state == STATE_UPDATE)
// begin
// // Update the write data and put it in the write data to be written
// wr_data <= rd_data + 1;
// end
//end
endmodule

View File

@@ -0,0 +1,621 @@
//
// Copyright (c) 2017, Intel Corporation
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// Neither the name of the Intel Corporation nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
// Read from the memory locations first and then write to the memory locations
`include "platform_if.vh"
`include "afu_json_info.vh"
module ccip_std_afu
(
// CCI-P Clocks and Resets
input logic pClk, // 400MHz - CCI-P clock domain. Primary interface clock
input logic pClkDiv2, // 200MHz - CCI-P clock domain.
input logic pClkDiv4, // 100MHz - CCI-P clock domain.
input logic uClk_usr, // User clock domain. Refer to clock programming guide ** Currently provides fixed 300MHz clock **
input logic uClk_usrDiv2, // User clock domain. Half the programmed frequency ** Currently provides fixed 150MHz clock **
input logic pck_cp2af_softReset, // CCI-P ACTIVE HIGH Soft Reset
input logic [1:0] pck_cp2af_pwrState, // CCI-P AFU Power State
input logic pck_cp2af_error, // CCI-P Protocol Error Detected
// Interface structures
input t_if_ccip_Rx pck_cp2af_sRx, // CCI-P Rx Port
output t_if_ccip_Tx pck_af2cp_sTx // CCI-P Tx Port
);
//
// Run the entire design at the standard CCI-P frequency (400 MHz).
//
logic clk;
assign clk = pClk;
logic reset;
assign reset = pck_cp2af_softReset;
logic [511:0] wr_data;
logic [511:0] rd_data;
logic do_update;
logic start_read;
logic start_write;
logic wr_addr_next_valid;
logic addr_next_valid;
logic rd_end_of_list;
logic rd_needed;
logic wr_needed;
logic [15:0] cnt_list_length;
t_ccip_clAddr rd_addr;
t_ccip_clAddr wr_addr;
t_ccip_clAddr addr_next;
t_ccip_clAddr wr_addr_next;
// =========================================================================
//
// Register requests.
//
// =========================================================================
//
// The incoming pck_cp2af_sRx and outgoing pck_af2cp_sTx must both be
// registered. Here we register pck_cp2af_sRx and assign it to sRx.
// We also assign pck_af2cp_sTx to sTx here but don't register it.
// The code below never uses combinational logic to write sTx.
//
t_if_ccip_Rx sRx;
always_ff @(posedge clk)
begin
sRx <= pck_cp2af_sRx;
end
t_if_ccip_Tx sTx;
assign pck_af2cp_sTx = sTx;
// =========================================================================
//
// CSR (MMIO) handling.
//
// =========================================================================
// The AFU ID is a unique ID for a given program. Here we generated
// one with the "uuidgen" program and stored it in the AFU's JSON file.
// ASE and synthesis setup scripts automatically invoke afu_json_mgr
// to extract the UUID into afu_json_info.vh.
logic [127:0] afu_id = `AFU_ACCEL_UUID;
//
// A valid AFU must implement a device feature list, starting at MMIO
// address 0. Every entry in the feature list begins with 5 64-bit
// words: a device feature header, two AFU UUID words and two reserved
// words.
//
// Is a CSR read request active this cycle?
logic is_csr_read;
assign is_csr_read = sRx.c0.mmioRdValid;
// Is a CSR write request active this cycle?
logic is_csr_write;
assign is_csr_write = sRx.c0.mmioWrValid;
// The MMIO request header is overlayed on the normal c0 memory read
// response data structure. Cast the c0Rx header to an MMIO request
// header.
t_ccip_c0_ReqMmioHdr mmio_req_hdr;
assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(sRx.c0.hdr);
//
// Implement the device feature list by responding to MMIO reads.
//
always_ff @(posedge clk)
begin
if (reset)
begin
sTx.c2.mmioRdValid <= 1'b0;
end
else
begin
// Always respond with something for every read request
sTx.c2.mmioRdValid <= is_csr_read;
// The unique transaction ID matches responses to requests
sTx.c2.hdr.tid <= mmio_req_hdr.tid;
// Addresses are of 32-bit objects in MMIO space. Addresses
// of 64-bit objects are thus multiples of 2.
case (mmio_req_hdr.address)
0: // AFU DFH (device feature header)
begin
// Here we define a trivial feature list. In this
// example, our AFU is the only entry in this list.
sTx.c2.data <= t_ccip_mmioData'(0);
// Feature type is AFU
sTx.c2.data[63:60] <= 4'h1;
// End of list (last entry in list)
sTx.c2.data[40] <= 1'b1;
end
// AFU_ID_L
2: sTx.c2.data <= afu_id[63:0];
// AFU_ID_H
4: sTx.c2.data <= afu_id[127:64];
// DFH_RSVD0
6: sTx.c2.data <= t_ccip_mmioData'(0);
// DFH_RSVD1
8: sTx.c2.data <= t_ccip_mmioData'(0);
// Updated by apurve to check fpgaReadMMIO
10: sTx.c2.data <= t_ccip_mmioData'(start_read);
default: sTx.c2.data <= t_ccip_mmioData'(0);
endcase
end
end
//
// CSR write handling. Host software must tell the AFU the memory address
// to which it should be writing. The address is set by writing a CSR.
//
// We use MMIO address 0 to set the memory address. The read and
// write MMIO spaces are logically separate so we are free to use
// whatever we like. This may not be good practice for cleanly
// organizing the MMIO address space, but it is legal.
logic is_mem_addr_csr_write;
assign is_mem_addr_csr_write = is_csr_write &&
(mmio_req_hdr.address == t_ccip_mmioAddr'(0));
// Memory address to which this AFU will write.
t_ccip_clAddr write_mem_addr;
always_ff @(posedge clk)
begin
if (reset)
begin
start_write <= 1'b0;
end
else if (is_mem_addr_csr_write)
begin
write_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
start_write <= 1'b1;
//$display("Write mem address is 0x%x", t_ccip_clAddr'(write_mem_addr));
end
end
// We use MMIO address 8 to set the memory address for reading data.
logic is_mem_addr_csr_read;
assign is_mem_addr_csr_read = is_csr_write &&
(mmio_req_hdr.address == t_ccip_mmioAddr'(2));
// Memory address from which this AFU will read.
t_ccip_clAddr read_mem_addr;
//logic start_traversal = 'b0;
//t_ccip_clAddr start_traversal_addr;
always_ff @(posedge clk)
begin
if (reset)
begin
start_read <= 1'b0;
end
else if (is_mem_addr_csr_read)
begin
read_mem_addr <= t_ccip_clAddr'(sRx.c0.data);
start_read <= 1'b1;
//$display("Read mem address is 0x%x", t_ccip_clAddr'(read_mem_addr));
end
end
// =========================================================================
//
// Main AFU logic
//
// =========================================================================
//
// States in our simple example.
//
//typedef enum logic [0:0]
typedef enum logic [1:0]
{
STATE_IDLE,
STATE_READ,
STATE_UPDATE,
STATE_WRITE
}
t_state;
t_state state;
//
// State machine
//
always_ff @(posedge clk)
begin
if (reset)
begin
state <= STATE_IDLE;
rd_end_of_list <= 1'b0;
end
else
begin
case (state)
STATE_IDLE:
begin
// Traversal begins when CSR 1 is written
if (start_read)
begin
state <= STATE_READ;
$display("AFU starting traversal at 0x%x", t_ccip_clAddr'(read_mem_addr));
end
end
STATE_READ:
begin
$display("AFU in READ...");
if (!rd_needed && do_update)
begin
state <= STATE_UPDATE;
$display("AFU moving to UPDATE...");
end
end
STATE_UPDATE:
begin
// Update the read value to be written back
$display("AFU in UPDATE...");
if (!do_update)
begin
state <= STATE_WRITE;
wr_needed <= 1'b1;
$display("AFU moving to WRITE...");
end
end
STATE_WRITE:
begin
// Write the updated value to the address
// Point to new address after that
// if done then point to IDLE; else read new values
$display("AFU in WRITE...");
if (rd_end_of_list)
begin
state <= STATE_IDLE;
$display("AFU done...");
end
else if (!wr_needed)
begin
state <= STATE_READ;
$display("AFU moving to READ from WRITE...");
start_write <= 1'b0;
end
end
endcase
end
end
// =========================================================================
//
// Read logic.
//
// =========================================================================
//
// READ REQUEST
//
// Did a write response just arrive
// Next read address
always_ff @(posedge clk)
begin
// Next read address is valid when we have got the write response back
addr_next_valid <= sRx.c1.rspValid;
// Apurve: Next address is current address plus address length
//addr_next <= addr_next + addr_size;
addr_next <= rd_addr + 0;
// End of list reached if we have read 5 times
rd_end_of_list <= (cnt_list_length == 'h5);
end
//
// Since back pressure may prevent an immediate read request, we must
// record whether a read is needed and hold it until the request can
// be sent to the FIU.
//
always_ff @(posedge clk)
begin
if (reset)
begin
rd_needed <= 1'b0;
end
else
begin
// If reads are allowed this cycle then we can safely clear
// any previously requested reads. This simple AFU has only
// one read in flight at a time since it is walking a pointer
// chain.
if (rd_needed)
begin
rd_needed <= sRx.c0TxAlmFull;
end
else
begin
// Need a read under two conditions:
// - Starting a new walk
// - A read response just arrived from a line containing
// a next pointer.
rd_needed <= (start_read || (!sRx.c0TxAlmFull && (addr_next_valid && ! rd_end_of_list)));
rd_addr <= (start_read ? read_mem_addr : addr_next);
//$display("rd_addr is 0x%x", t_ccip_clAddr'(rd_addr));
//$display("read mem addr is 0x%x", t_ccip_clAddr'(read_mem_addr));
//$display("start read is %d", start_read);
end
end
end
//
// Emit read requests to the FIU.
//
// Read header defines the request to the FIU
t_ccip_c0_ReqMemHdr rd_hdr;
always_comb
begin
rd_hdr = t_ccip_c0_ReqMemHdr'(0);
// Read request type (No intention to cache)
//rd_hdr.req_type = 4'h0;
// Virtual address (MPF virtual addressing is enabled)
rd_hdr.address = rd_addr;
// Read over channel VA
//rd_hdr.vc_sel = 2'h0;
// Read one cache line (64 bytes)
//rd_hdr.cl_len = 2'h0;
end
// Send read requests to the FIU
always_ff @(posedge clk)
begin
if (reset)
begin
sTx.c0.valid <= 1'b0;
cnt_list_length <= 0;
end
else
begin
// Generate a read request when needed and the FIU isn't full
if (state == STATE_READ)
begin
sTx.c0.valid <= (rd_needed && !sRx.c0TxAlmFull);
if (rd_needed && !sRx.c0TxAlmFull)
begin
sTx.c0.hdr <= rd_hdr;
cnt_list_length <= cnt_list_length + 1;
$display("Incrementing read count...%d",cnt_list_length);
$display("Read address is 0x%x...",rd_hdr.address);
// Apurve: Add something to stop read once this section has been accessed
end
end
end
end
//
// READ RESPONSE HANDLING
//
//
// Receive data (read responses).
//
always_ff @(posedge clk)
begin
if (reset)
begin
do_update <= 1'b0;
end
else
begin
if (sRx.c0.rspValid)
begin
rd_data <= sRx.c0.data;
do_update <= 1'b1;
//$display("rd data is %d...",rd_data);
end
if (state == STATE_UPDATE)
begin
// Update the read data and put it in the write data to be written
wr_data <= rd_data + 2;
do_update <= 1'b0;
$display("write data is %d...",wr_data);
// First read done. Next reads should be from the updated addresses
start_read <= 1'b0;
end
end
end
// =========================================================================
//
// Write logic.
//
// =========================================================================
//
// WRITE REQUEST
//
// Did a write response just arrive
// Next write address
always_ff @(posedge clk)
begin
// Next write address is valid when we have got the read response back
wr_addr_next_valid <= sRx.c0.rspValid;
// Apurve: Next address is current address plus address length
wr_addr_next <= wr_addr + 0;
end
//
// Since back pressure may prevent an immediate write request, we must
// record whether a write is needed and hold it until the request can
// be sent to the FIU.
//
always_ff @(posedge clk)
begin
if (reset)
begin
wr_needed <= 1'b0;
end
else
begin
// If writes are allowed this cycle then we can safely clear
// any previously requested writes. This simple AFU has only
// one write in flight at a time since it is walking a pointer
// chain.
if (wr_needed)
begin
wr_needed <= sRx.c1TxAlmFull;
end
else
begin
// Need a write under two conditions:
// - Starting a new walk
// - A write response just arrived from a line containing
// a next pointer.
wr_needed <= (start_write || (!sRx.c1TxAlmFull && wr_addr_next_valid));
wr_addr <= (start_write ? write_mem_addr : wr_addr_next);
//$display("Write mem address later is 0x%x", t_ccip_clAddr'(write_mem_addr));
end
end
end
//
// Emit write requests to the FIU.
//
// Write header defines the request to the FIU
t_ccip_c1_ReqMemHdr wr_hdr;
always_comb
begin
wr_hdr = t_ccip_c1_ReqMemHdr'(0);
// Write request type
//wr_hdr.req_type = 4'h0;
// Virtual address (MPF virtual addressing is enabled)
wr_hdr.address = wr_addr;
// Let the FIU pick the channel
//wr_hdr.vc_sel = 2'h2;
// Write 1 cache line (64 bytes)
//wr_hdr.cl_len = 2'h0;
// Start of packet is true (single line write)
wr_hdr.sop = 1'b1;
end
// Send write requests to the FIU
always_ff @(posedge clk)
begin
if (reset)
begin
sTx.c1.valid <= 1'b0;
end
else
begin
// Generate a write request when needed and the FIU isn't full
if (state == STATE_WRITE)
begin
sTx.c1.valid <= (wr_needed && !sRx.c1TxAlmFull);
if (wr_needed && !sRx.c1TxAlmFull)
begin
sTx.c1.hdr <= wr_hdr;
sTx.c1.data <= t_ccip_clData'(wr_data);
end
end
end
end
//
// WRITE RESPONSE HANDLING
//
// Apurve: Check if a signal is to be sent to read to start reading in case
// write response does not work
//
// Send data (write requests).
//
//always_ff @(posedge clk)
//begin
// if (state == STATE_WRITE)
// begin
// rd_data <= sRx.c0.data;
// end
// if (state == STATE_UPDATE)
// begin
// // Update the write data and put it in the write data to be written
// wr_data <= rd_data + 1;
// end
//end
endmodule

View File

@@ -0,0 +1,2 @@
cci_hello.json
cci_hello_afu.sv

View File

@@ -0,0 +1,11 @@
#!/bin/sh
##
## Setup ASE environment using ../rtl/sources.txt.
##
# Absolute path to this script
SCRIPT=$(readlink -f "$0")
SCRIPT_PATH=$(dirname "$SCRIPT")
afu_sim_setup --sources="${SCRIPT_PATH}/../rtl/sources.txt" $@

View File

@@ -0,0 +1,41 @@
include ../../common/sw/common_include.mk
# Primary test name
TEST = cci_hello
# Build directory
OBJDIR = obj
CFLAGS += -I./$(OBJDIR)
CPPFLAGS += -I./$(OBJDIR)
# Files and folders
SRCS = $(TEST).c
OBJS = $(addprefix $(OBJDIR)/,$(patsubst %.c,%.o,$(SRCS)))
# Targets (build only $(TEST)_ase by default)
all: $(TEST) $(TEST)_ase
# AFU info from JSON file, including AFU UUID
AFU_JSON_INFO = $(OBJDIR)/afu_json_info.h
$(AFU_JSON_INFO): ../hw/rtl/$(TEST).json | objdir
afu_json_mgr json-info --afu-json=$^ --c-hdr=$@
$(OBJS): $(AFU_JSON_INFO)
$(TEST): $(OBJS)
$(CC) -o $@ $^ $(LDFLAGS) $(FPGA_LIBS)
$(TEST)_ase: $(OBJS)
$(CC) -o $@ $^ $(LDFLAGS) $(ASE_LIBS)
$(OBJDIR)/%.o: %.c | objdir
$(CC) $(CFLAGS) -c $< -o $@
clean:
rm -rf $(TEST) $(TEST)_ase $(OBJDIR)
objdir:
@mkdir -p $(OBJDIR)
.PHONY: all clean

View File

@@ -0,0 +1,210 @@
//
// Copyright (c) 2017, Intel Corporation
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// Neither the name of the Intel Corporation nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <uuid/uuid.h>
#include <opae/fpga.h>
// State from the AFU's JSON file, extracted using OPAE's afu_json_mgr script
#include "afu_json_info.h"
#define CACHELINE_BYTES 64
#define CL(x) ((x) * CACHELINE_BYTES)
//
// Search for an accelerator matching the requested UUID and connect to it.
//
static fpga_handle connect_to_accel(const char *accel_uuid)
{
fpga_properties filter = NULL;
fpga_guid guid;
fpga_token accel_token;
uint32_t num_matches;
fpga_handle accel_handle;
fpga_result r;
// Don't print verbose messages in ASE by default
//setenv("ASE_LOG", "0", 0);
// Set up a filter that will search for an accelerator
fpgaGetProperties(NULL, &filter);
fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR);
// Add the desired UUID to the filter
uuid_parse(accel_uuid, guid);
fpgaPropertiesSetGUID(filter, guid);
// Do the search across the available FPGA contexts
num_matches = 1;
fpgaEnumerate(&filter, 1, &accel_token, 1, &num_matches);
// Not needed anymore
fpgaDestroyProperties(&filter);
if (num_matches < 1)
{
fprintf(stderr, "Accelerator %s not found!\n", accel_uuid);
return 0;
}
// Open accelerator
r = fpgaOpen(accel_token, &accel_handle, 0);
assert(FPGA_OK == r);
// Done with token
fpgaDestroyToken(&accel_token);
return accel_handle;
}
//
// Allocate a buffer in I/O memory, shared with the FPGA.
//
static volatile void* alloc_buffer(fpga_handle accel_handle,
ssize_t size,
uint64_t *wsid,
uint64_t *io_addr)
{
fpga_result r;
volatile void* buf;
r = fpgaPrepareBuffer(accel_handle, size, (void*)&buf, wsid, 0);
if (FPGA_OK != r) return NULL;
// Get the physical address of the buffer in the accelerator
r = fpgaGetIOAddress(accel_handle, *wsid, io_addr);
assert(FPGA_OK == r);
return buf;
}
int main(int argc, char *argv[])
{
fpga_handle accel_handle;
volatile char *buf;
volatile char *buf_r;
uint64_t wsid1;
uint64_t wsid2;
uint64_t buf_pa;
uint64_t ret_buf_pa;
uint64_t buf_rpa;
uint64_t ret_buf_rpa;
fpga_result r;
// Find and connect to the accelerator
accel_handle = connect_to_accel(AFU_ACCEL_UUID);
// Allocate a single page memory buffer for write
buf = (volatile char*)alloc_buffer(accel_handle, 4 * getpagesize(),
&wsid1, &buf_pa);
// Allocate a single page memory buffer for read
buf_r = (volatile char*)alloc_buffer(accel_handle, 4 * getpagesize(),
&wsid2, &buf_rpa);
assert(NULL != buf);
//// Set the low byte of the shared buffer to 0. The FPGA will write
//// a non-zero value to it.
//buf[0] = 0;
// Set the low byte of the shared buffer buf_r to 0. The FPGA will read
// the values and write to buf address
buf[0] = 5;
buf_r[0] = 5;
// Tell the accelerator the address of the buffer using cache line
// addresses. The accelerator will respond by writing to the buffer.
r = fpgaWriteMMIO64(accel_handle, 0, 0, buf_pa / CL(1));
printf("Write address is %08lx\n", buf_pa);
printf("Write address div 64 is %08lx\n", buf_pa/ CL(1));
assert(FPGA_OK == r);
// Wait for response from FPGA. Check using fpgaReadMMIO
//r = fpgaReadMMIO64(accel_handle, 0, 0, &ret_buf_pa);
//printf("Returned write is %08lx\n", ret_buf_pa);
//assert(FPGA_OK == r);
///////////////////// Added to check fpgaRead
// Wait for response from FPGA. Check using fpgaReadMMIO
r = fpgaReadMMIO64(accel_handle, 0, 5 * sizeof(uint64_t), &ret_buf_rpa);
printf("Returned read at 10 is %08lx\n", ret_buf_rpa);
assert(FPGA_OK == r);
///////////////////////////////////////////////
// Tell the accelerator the address of the buffer using cache line
// addresses. The accelerator will read from the buffer.
// Write the address to MMIO 1
r = fpgaWriteMMIO64(accel_handle, 0, sizeof(uint64_t), buf_rpa / CL(1));
printf("Read address is %08lx\n", buf_rpa);
printf("Read address div64 is %08lx\n", buf_rpa / CL(1));
assert(FPGA_OK == r);
// Wait for response from FPGA. Check using fpgaReadMMIO
//r = fpgaReadMMIO64(accel_handle, 0, sizeof(uint64_t), &ret_buf_rpa);
//printf("Returned write is %08lx\n", ret_buf_rpa);
//assert(FPGA_OK == r);
// Update this
// Spin, waiting for the value in memory to change to something non-zero.
while (5 == buf[0])
{
// A well-behaved program would use _mm_pause(), nanosleep() or
// equivalent to save power here.
};
// Print the string written by the FPGA
printf("%d\n", buf[0]);
do {
//printf("%d\n", buf[0]);
} while (10 != buf[0]);
// Done
fpgaReleaseBuffer(accel_handle, wsid1);
fpgaReleaseBuffer(accel_handle, wsid2);
fpgaClose(accel_handle);
return 0;
}

View File

@@ -0,0 +1,13 @@
//
// Generated by afu_json_mgr from ../hw/rtl/cci_hello.json
//
#ifndef __AFU_JSON_INFO__
#define __AFU_JSON_INFO__
#define AFU_ACCEL_NAME "cci_hello"
#define AFU_ACCEL_UUID "C6AA954A-9B91-4A37-ABC1-1D9F0709DCC3"
#define AFU_IMAGE_POWER 0
#define AFU_TOP_IFC "ccip_std_afu"
#endif // __AFU_JSON_INFO__

Binary file not shown.

1
sw/runtime/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
/config.h

6
sw/runtime/Makefile Normal file
View File

@@ -0,0 +1,6 @@
.PHONY: build_config
build_config:
../rtl/gen_config.py --outv none --outc ./config.h

View File

@@ -0,0 +1,22 @@
#ifndef FILE_IOO
#define FILE_IOO
// #include <sys/stat.h>
// #include <errno.h>
// #include <stdio.h>
// void vx_close();
// void vx_fstat();
// void vx_isatty();
// void vx_read();
// void vx_write();
#endif

View File

@@ -0,0 +1,48 @@
# .section .FileIO
# .type vx_close, @function
# .global vx_close
# vx_close:
# nop
# ret
# nop
# nop
# .type vx_fstat, @function
# .global vx_fstat
# vx_fstat:
# nop
# ret
# nop
# nop
# .type vx_isatty, @function
# .global vx_isatty
# vx_isatty:
# nop
# ret
# nop
# nop
# .type vx_read, @function
# .global vx_read
# vx_read:
# nop
# ret
# nop
# nop
# .type vx_write, @function
# .global vx_write
# vx_write:
# nop
# ret
# nop
# nop

View File

@@ -0,0 +1,51 @@
#ifndef VX_INTRINSICS
#define VX_INTRINSICS
#ifdef __cplusplus
extern "C" {
#endif
// Spawns Warps
void vx_wspawn(unsigned numWarps, unsigned PC_spawn);
// Changes thread mask (activated/deactivates threads)
void vx_tmc(unsigned numThreads);
// Warp Barrier
void vx_barrier(unsigned barriedID, unsigned numWarps);
// split on a predicate
void vx_split(unsigned predicate);
// Join
void vx_join(void);
// Get Hardware thread ID
unsigned vx_threadID(void);
// Get hardware warp ID
unsigned vx_warpID(void);
// Get global warp number
unsigned vx_warpNum(void);
// Get Number cycles/Inst
unsigned vx_getCycles(void);
unsigned vx_getInst(void);
void vx_resetStack(void);
#define __if(b) vx_split(b); \
if (b)
#define __else else
#define __endif vx_join();
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,85 @@
.section .text
.type vx_wspawn, @function
.global vx_wspawn
vx_wspawn:
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
ret
.type vx_tmc, @function
.global vx_tmc
vx_tmc:
.word 0x0005006b # tmc a0
ret
.type vx_barrier, @function
.global vx_barrier
vx_barrier:
.word 0x00b5406b # barrier a0(barrier id), a1(numWarps)
ret
.type vx_split, @function
.global vx_split
vx_split:
.word 0x0005206b # split a0
ret
.type vx_join, @function
.global vx_join
vx_join:
.word 0x0000306b #join
ret
.type vx_warpID, @function
.global vx_warpID
vx_warpID:
csrr a0, 0x21 # read warp IDs
ret
.type vx_warpNum, @function
.global vx_warpNum
vx_warpNum:
csrr a0, 0x22 # read warp IDs
ret
.type vx_threadID, @function
.global vx_threadID
vx_threadID:
csrr a0, 0x20 # read thread IDs
ret
.type vx_getCycles, @function
.global vx_getCycles
vx_getCycles:
csrr a0, 0x26 # read thread IDs
ret
.type vx_getInst, @function
.global vx_getInst
vx_getInst:
csrr a0, 0x25 # read thread IDs
ret
.type vx_resetStack, @function
.global vx_resetStack
vx_resetStack:
li a0, 4
.word 0x0005006b # tmc 4
csrr a3, 0x21 # get wid
slli a3, a3, 15 # shift by wid
csrr a2, 0x20 # get tid
slli a1, a2, 10 # multiply tid by 1024
slli a2, a2, 2 # multiply tid by 4
lui sp, 0x6ffff # load base sp
sub sp, sp, a1 # sub sp - (1024*tid)
sub sp, sp, a3 # shoft per warp
add sp, sp, a2 # shift sp for better performance
csrr a3, 0x21 # get wid
beqz a3, RETURN
li a0, 0
.word 0x0005006b # tmc 0
RETURN:
ret

38
sw/runtime/io/vx_io.c Normal file
View File

@@ -0,0 +1,38 @@
#include "vx_io.h"
#ifdef __cplusplus
extern "C" {
#endif
void vx_print_hex(unsigned f)
{
// vx_print_str(hextoa[f]);
if (f < 16)
{
vx_print_str(hextoa[f]);
return;
}
int temp;
int sf = 32;
bool start = false;
do
{
temp = (f >> (sf - 4)) & 0xf;
if (temp != 0) start = true;
if (start) vx_print_str(hextoa[temp]);
sf -= 4;
} while(sf > 0);
}
void vx_printf(const char * c, unsigned f)
{
vx_print_str(c);
vx_print_hex(f);
vx_print_str("\n");
}
#ifdef __cplusplus
}
#endif

20
sw/runtime/io/vx_io.h Normal file
View File

@@ -0,0 +1,20 @@
#pragma once
#include <stdbool.h>
#ifdef __cplusplus
extern "C" {
#endif
static char * hextoa[] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f"};
void vx_print_hex(unsigned);
void vx_printf(const char *, unsigned);
void vx_print_str(const char *);
void vx_printc(unsigned, char c);
#ifdef __cplusplus
}
#endif

33
sw/runtime/io/vx_io.s Normal file
View File

@@ -0,0 +1,33 @@
.type vx_print_str, @function
.global vx_print_str
vx_print_str:
addi sp, sp, -12
sw ra, 0(sp)
sw a1, 4(sp)
bl:
lbu a1,0(a0)
beqz a1,be
jal vx_printc
addi a0, a0, 1
j bl
be:
lw ra, 0(sp)
lw a1, 4(sp)
addi sp, sp, 12
ret
.type vx_printc, @function
.global vx_printc
vx_printc:
la t0, print_addr
lw t0, 0(t0)
sw a1, 0(t0)
ret
.section .data
print_addr:
.word 0x00010000

343
sw/runtime/newlib/newlib.c Normal file
View File

@@ -0,0 +1,343 @@
#include "../io/vx_io.h"
#include "../fileio/fileio.h"
#include "../intrinsics/vx_intrinsics.h"
#include <sys/stat.h>
#include <errno.h>
#include <stdio.h>
#define CLOSE 1
#define ISATTY 2
#define LSEEK 3
#define READ 4
#define WRITE 5
#define FSTAT 6
#define OPEN 7
#define FILE_IO_WRITE 0x71000000
#define FILE_IO_READ 0x72000000
#ifdef __cplusplus
extern "C" {
#endif
typedef void (*funct_t)(void);
funct_t trap_to_simulator = (funct_t) 0x70000000;
void upload(char ** ptr, char * src, int size)
{
char * drain = *ptr;
// *((int *) drain) = size;
char * size_ptr = (char *) &size;
drain[0] = size_ptr[0];
drain[1] = size_ptr[1];
drain[2] = size_ptr[2];
drain[3] = size_ptr[3];
// vx_printf("size: ", (unsigned) size);
// vx_printf("drain_ptr: ", (unsigned) drain);
drain += 4;
for (int i = 0; i < size; i++)
{
(*drain) = src[i];
drain += 1;
}
unsigned drain_val = (unsigned) drain;
drain_val += (drain_val%4);
drain = (char *) drain_val;
*ptr = drain;
}
void download(char ** ptr, char * drain)
{
char * src = *ptr;
int size;
// size = *((int *) src);
char * size_ptr = (char *) &size;
size_ptr[0] = src[0];
size_ptr[1] = src[1];
size_ptr[2] = src[2];
size_ptr[3] = src[3];
src += 4;
// vx_printf("newlib.c: Size of download: ", size);
// vx_printf("newlib.c: Real size: ", sizeof(struct stat));
for (int i = 0; i < size; i++)
{
drain[i] = (*src);
src += 1;
}
unsigned src_val = (unsigned) src;
src_val += (src_val%4);
src = (char *) src_val;
*ptr = src;
}
void _close()
{
// vx_print_str("Hello from _close\n");
}
int _fstat(int file, struct stat * st)
{
// char * write_buffer = (char *) FILE_IO_WRITE;
// int cmd_id = FSTAT;
// upload((char **) &write_buffer, (char *) &cmd_id, sizeof(int));
// upload((char **) &write_buffer, (char *) &file , sizeof(int));
// trap_to_simulator();
// char * read_buffer = (char *) FILE_IO_READ;
// unsigned value;
// download((char **) &read_buffer, (char *) &value);
// st->st_mode = value;
// download((char **) &read_buffer, (char *) &value);
// st->st_dev = value;
// download((char **) &read_buffer, (char *) &value);
// st->st_uid = value;
// download((char **) &read_buffer, (char *) &value);
// st->st_gid = value;
// download((char **) &read_buffer, (char *) &value);
// st->st_size = value;
// download((char **) &read_buffer, (char *) &value);
// st->st_blksize = value;
// download((char **) &read_buffer, (char *) &value);
// st->st_blocks = value;
// vx_print_str("Hello from fstat\n");
// // st->st_mode = 33279;
// vx_printf("st_mode: ", st->st_mode);
// vx_printf("st_dev: ", st->st_dev);
// vx_printf("st_ino: ", st->st_ino);
// vx_printf("st_uid: ", st->st_uid);
// vx_printf("st_gid: ", st->st_gid);
// vx_printf("st_rdev: ", st->st_rdev);
// vx_printf("st_size: ", st->st_size);
// vx_printf("st_blksize: ", st->st_blksize);
// vx_printf("st_blocks: ", st->st_blocks);
st->st_mode = S_IFCHR;
return 0;
}
int _isatty (int file)
{
// vx_print_str("Hello from _isatty\n");
return 1;
}
int _lseek(int fd, int offset, int whence)
{
// // vx_print_str("Hello from _lseek\n");
// char * write_buffer = (char *) FILE_IO_WRITE;
// char * read_buffer = (char *) FILE_IO_READ;
// int cmd_id = LSEEK;
// upload((char **) &write_buffer, (char *) &cmd_id , sizeof(int));
// upload((char **) &write_buffer, (char *) &fd , sizeof(int));
// upload((char **) &write_buffer, (char *) &offset , sizeof(int));
// upload((char **) &write_buffer, (char *) &whence , sizeof(int));
// trap_to_simulator();
// int retval;
// download((char **) &read_buffer, (char *) &retval);
// return retval;
return 0;
}
// void _lseek()
// {
// }
int _read (int file, char *ptr, int len)
{
// char * write_buffer = (char *) FILE_IO_WRITE;
// char * read_buffer = (char *) FILE_IO_READ;
// int cmd_id = READ;
// upload((char **) &write_buffer, (char *) &cmd_id, sizeof(int));
// upload((char **) &write_buffer, (char *) &file , sizeof(int));
// upload((char **) &write_buffer, (char *) &ptr , sizeof(int));
// upload((char **) &write_buffer, (char *) &len , sizeof(int));
// trap_to_simulator();
// return len;
return 0;
}
int _write (int file, char *buf, int nbytes)
{
// char * write_buffer = (char *) FILE_IO_WRITE;
// int cmd_id = WRITE;
// upload((char **) &write_buffer, (char *) &cmd_id, sizeof(int));
// upload((char **) &write_buffer, (char *) &file , sizeof(int));
// upload((char **) &write_buffer, (char *) buf , nbytes);
// trap_to_simulator();
// vx_print_str("Hello from _write\n");
int i;
unsigned int volatile * const print_addr = (unsigned int *) 0x00010000;
for (i = 0; i < nbytes; i++)
{
(*print_addr) = buf[i];
}
return nbytes;
}
static int heap_start = (int) 0x90000000;
static int head_end = (int) 0xa0000000;
void * _sbrk (int nbytes)
{
vx_print_str("Hello from _sbrk\n");
// vx_printf("nbytes: ", nbytes);
//if (nbytes < 0) //vx_print_str("nbytes less than zero\n");
// printf("nBytes: %d\n", nbytes);
if (nbytes < 0)
{
nbytes = nbytes * -1;
}
// vx_printf("New nbytes: ", nbytes);
if (nbytes > 10240)
{
nbytes = 10240;
}
// if (((unsigned) head_end) > ((unsigned) (heap_ptr + nbytes)))
if (true)
{
int base = heap_start;
heap_start += nbytes;
// vx_print_str("_sbrk returning: ");
// vx_print_hex((unsigned) base);
// vx_print_str("\n");
return (void *) base;
}
// else
// {
// errno = ENOMEM;
// return (void *) -1;
// }
} /* _sbrk () */
void _exit(int val)
{
// vx_print_str("Hello from exit\n");
vx_tmc(0);
}
int _open(const char *name, int flags, int mode)
{
// char * write_buffer = (char *) FILE_IO_WRITE;
// char * read_buffer = (char *) FILE_IO_READ;
// int cmd_id = OPEN;
// upload((char **) &write_buffer, (char *) &cmd_id, sizeof(int));
// upload((char **) &write_buffer, (char *) &name , sizeof (char *));
// upload((char **) &write_buffer, (char *) &flags , sizeof(int));
// upload((char **) &write_buffer, (char *) & mode , sizeof(int));
// trap_to_simulator();
// int fd;
// download((char **) &read_buffer, (char *) &fd);
// return fd;
return 0;
}
void _kill()
{
vx_tmc(0);
}
unsigned _getpid()
{
return vx_threadID();
}
void _unlink()
{
vx_print_str("ERROR: _unlink not yet implemented\n");
}
static int curr_time = 0;
int _gettimeofday()
{
// vx_print_str("ERROR: _gettimeofday not yet implemented\n");
return curr_time++;
}
void _link()
{
vx_print_str("ERROR: _link not yet implemented\n");
}
#ifdef __cplusplus
}
#endif

27
sw/runtime/qemu/vx_api.c Normal file
View File

@@ -0,0 +1,27 @@
#include <stdio.h>
#include <stdlib.h>
#include "../vx_api/vx_api.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef void (*pocl_workgroup_func) (
void * /* args */,
void * /* pocl_context */,
uint32_t /* group_x */,
uint32_t /* group_y */,
uint32_t /* group_z */
);
void pocl_spawn(struct pocl_context_t * ctx, pocl_workgroup_func pfn, const void * args) {
uint32_t x, y, z;
for (z = 0; z < ctx->num_groups[2]; ++z)
for (y = 0; y < ctx->num_groups[1]; ++y)
for (x = 0; x < ctx->num_groups[0]; ++x)
(pfn)(arguments, ctx, x, y, z);
}
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,63 @@
#include "../config.h"
.section .init, "ax"
.global _start
.type _start, @function
_start:
la a1, vx_set_sp
li a0, NW # activate all warps
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
jal vx_set_sp
li a0, 1
.word 0x0005006b # back to single thread
# Initialize global pointerp
# call __cxx_global_var_init
# Clear the bss segment
la a0, _edata
la a2, _end
sub a2, a2, a0
li a1, 0
call memset
la a0, __libc_fini_array # Register global termination functions
call atexit # to be called upon exit
call __libc_init_array # Run global initialization functions
call main
tail exit
.size _start, .-_start
.section .text
.type vx_set_sp, @function
.global vx_set_sp
vx_set_sp:
li a0, NT
.word 0x0005006b # activate all threads
.option push
.option norelax
1:auipc gp, %pcrel_hi(__global_pointer$)
addi gp, gp, %pcrel_lo(1b)
.option pop
csrr a3, 0x22 # get global warp number
slli a3, a3, 0x1a # shift by wid
csrr a2, 0x20 # get tid
slli a1, a2, 10 # multiply tid by 1024
slli a2, a2, 2 # multiply tid by 4
lui sp, 0x6ffff # load base sp
sub sp, sp, a1 # sub sp - (1024*tid)
sub sp, sp, a3 # shoft per warp
add sp, sp, a2 # shift sp for better performance
csrr a3, 0x21 # get wid
beqz a3, RETURN
li a0, 0
.word 0x0005006b # tmc 0
RETURN:
ret
.section .data
.global __dso_handle
.weak __dso_handle
__dso_handle:
.long 0

View File

@@ -0,0 +1,142 @@
#include "tests.h"
#include "../intrinsics/vx_intrinsics.h"
#include "../io/vx_io.h"
int tmc_array[4] = {5,5,5,5};
void test_tmc()
{
vx_print_str("testing_tmc\n");
vx_tmc(4);
unsigned tid = vx_threadID(); // Get TID
tmc_array[tid] = tid;
vx_tmc(1);
vx_print_hex(tmc_array[0]);
vx_print_str("\n");
vx_print_hex(tmc_array[1]);
vx_print_str("\n");
vx_print_hex(tmc_array[2]);
vx_print_str("\n");
vx_print_hex(tmc_array[3]);
vx_print_str("\n");
return;
}
int div_arr[4];
void test_divergence()
{
unsigned tid = vx_threadID(); // Get TID
bool b = tid < 2;
__if (b)
{
bool c = tid < 1;
__if (c)
{
div_arr[tid] = 10;
}
__else
{
div_arr[tid] = 11;
}
__endif
}
__else
{
bool c = tid < 3;
__if (c)
{
div_arr[tid] = 12;
}
__else
{
div_arr[tid] = 13;
}
__endif
}
__endif
vx_print_hex(div_arr[0]);
vx_print_str("\n");
vx_print_hex(div_arr[1]);
vx_print_str("\n");
vx_print_hex(div_arr[2]);
vx_print_str("\n");
vx_print_hex(div_arr[3]);
vx_print_str("\n");
}
unsigned wsapwn_arr[4];
void simple_kernel()
{
unsigned wid = vx_warpID();
wsapwn_arr[wid] = wid;
wid = vx_warpID();
if (wid != 0)
{
vx_tmc(0);
}
}
void test_wsapwn()
{
unsigned func_ptr = (unsigned) simple_kernel;
vx_wspawn(4, func_ptr);
simple_kernel();
for (int i = 0; i < 100; i++) {}
vx_print_hex(wsapwn_arr[0]);
vx_print_str("\n");
vx_print_hex(wsapwn_arr[1]);
vx_print_str("\n");
vx_print_hex(wsapwn_arr[2]);
vx_print_str("\n");
vx_print_hex(wsapwn_arr[3]);
vx_print_str("\n");
}
void intrinsics_tests()
{
// TMC test
test_tmc();
// Control Divergence Test
vx_print_str("test_divergence\n");
vx_tmc(4);
test_divergence();
vx_tmc(1);
// Test wspawn
vx_print_str("test_spawn\n");
test_wsapwn();
}

View File

@@ -0,0 +1,16 @@
#ifndef TESTS
#define TESTS
void test_tmc();
void test_divergence();
void test_wsapwn();
void intrinsics_tests();
#endif

View File

@@ -0,0 +1,42 @@
#include "io/io.h" // Printing functions
#include "intrinsics/instrinsics.h" // vx_threadID and vx_WarpID
struct args
{
void * data;
};
void function(void * arg)
{
struct args * real_arg = (struct args *) arg;
unsigned tid = vx_threadID();
unsigned wid = vx_WarpID();
__if(something) // Control divergent if
{
}
__else
{
}
__endif
}
int main()
{
void * data = vx_loadfile("filename.txt"); // The raw char data will be returned by vx_loadfile
struct args arg;
arg.data = data;
vx_spawnWarps(numWarps, numThreads, function, &data);
}

View File

@@ -0,0 +1,31 @@
# To set a custom TOOLPATH, call make like this:
# TOOLPATH=../../../../riscv-gnu-toolchain/drops/bin make ...
TOOLPATH ?= ~/dev/riscv-gnu-toolchain/drops/bin
COMP = $(TOOLPATH)/riscv32-unknown-elf-gcc
# CC_FLAGS = -march=rv32im -mabi=ilp32 -O0 -Wl,-Bstatic,-T,linker.ld -ffreestanding -nostdlib
CC_FLAGS = -march=rv32im -mabi=ilp32 -O0 -Wl,-Bstatic,-T,../vortex_link.ld -ffreestanding -nostartfiles
DMP = $(TOOLPATH)/riscv32-unknown-elf-objdump
CPY = $(TOOLPATH)/riscv32-unknown-elf-objcopy
VX_STR = ../../startup/vx_start.S
VX_INT = ../../intrinsics/vx_intrinsics.s
VX_IO = ../../io/vx_io.s ../../io/vx_io.c
VX_API = ../../vx_api/vx_api.c
VX_TEST = ../../tests/tests.c
VX_MAIN = ./vx_dev_main.c
all: HEX DUMP ELF
DUMP: ELF
$(DMP) -D vx_dev_main.elf > vx_dev_main.dump
HEX: ELF
$(CPY) -O ihex vx_dev_main.elf vx_dev_main.hex
ELF:
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN) -o vx_dev_main.elf

View File

@@ -0,0 +1,115 @@
#include "../../intrinsics/vx_intrinsics.h"
#include "../../io/vx_io.h"
#include "../../tests/tests.h"
#include "../../vx_api/vx_api.h"
// #include <utlist.h>
#include <string.h>
#include <unistd.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdlib.h>
typedef struct
{
unsigned * x;
unsigned * y;
unsigned * z;
unsigned numColums;
unsigned numRows;
} mat_add_args_t;
unsigned x[] = {5, 5, 5, 5,
6, 6, 6, 6,
7, 7, 7, 7,
8, 8, 8, 8};
unsigned y[] = {1, 1, 1, 1,
1, 1, 1, 1,
1, 1, 1, 1,
1, 1, 1, 1};
unsigned z[] = {0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0};
void mat_add_kernel(void * void_arguments)
{
mat_add_args_t * arguments = (mat_add_args_t *) void_arguments;
unsigned wid = vx_warpID();
unsigned tid = vx_threadID();
bool valid = (wid < arguments->numRows) && (tid < arguments->numColums);
__if (valid)
{
unsigned index = (wid * arguments->numColums) + tid;
arguments->z[index] = arguments->x[index] + arguments->y[index];
}
__endif
}
void vx_print_mat(unsigned * matPtr, int numRows, int numCols)
{
vx_print_str("---------------------\n");
for (int i = 0; i < numRows; i++)
{
for (int j = 0; j < numCols; j++)
{
unsigned index = (i * numCols) + j;
vx_print_hex(matPtr[index]);
vx_print_str(" ");
}
vx_print_str("\n");
}
}
int main()
{
// Main is called with all threads active of warp 0
vx_tmc(1);
// void * hellp = malloc(4);
vx_print_str("Confirm Dev Main\n");
vx_print_str("vx_spawnWarps\n");
mat_add_args_t arguments;
arguments.x = x;
arguments.y = y;
arguments.z = z;
arguments.numColums = 4;
arguments.numRows = 4;
int numWarps = 4;
int numThreads = 4;
// First kernel call
vx_spawnWarps(numWarps, numThreads, mat_add_kernel, &arguments);
vx_print_mat(z, arguments.numRows, arguments.numColums);
arguments.x = z;
arguments.y = y;
arguments.z = z;
arguments.numColums = 4;
arguments.numRows = 4;
// Second Kernel Call
vx_spawnWarps(numWarps, numThreads, mat_add_kernel, &arguments);
vx_print_mat(z, arguments.numRows, arguments.numColums);
return 1;
}

File diff suppressed because it is too large Load Diff

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,30 @@
COMP = ../../../../riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-gcc
# CC_FLAGS = -march=rv32im -mabi=ilp32 -O0 -Wl,-Bstatic,-T,linker.ld -ffreestanding -nostdlib
CC_FLAGS = -march=rv32im -mabi=ilp32 -O0 -Wl,-Bstatic,-T,../vortex_link.ld
DMP = ../../../../riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objdump
CPY = ../../../../riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy
NEWLIB = ../../newlib/newlib.c ../../newlib/newlib_notimp.c ../../newlib/newlib.s
VX_STR =
VX_INT = ../../intrinsics/vx_intrinsics.s
VX_IO =
VX_API =
VX_TEST =
VX_FIO =
LIBS = ../../../../riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libc.a ../../../../riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
VX_MAIN = hello
all: HEX DUMP ELF
DUMP: ELF
$(DMP) -D $(VX_MAIN).elf > $(VX_MAIN).dump
HEX: ELF
$(CPY) -O ihex $(VX_MAIN).elf $(VX_MAIN).hex
ELF:
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).cpp $(LIBS) -Iinclude -o $(VX_MAIN).elf

View File

@@ -0,0 +1,18 @@
struct hello {
int a;
hello()
{
a = 55;
}
};
hello nameing;
int main()
{
nameing.a = 20;
int b;
}

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@@ -0,0 +1,262 @@
:0200000480007A
:1000000097110000938101A613850100138601005A
:100010003306A64093050000EF00806F171500001F
:100020001305059563080500171500001305858E57
:10003000EF00D013EF000064032501009305410099
:1000400013060000EF00C0546F00805FB707008107
:1000500003C7475863140704130101FF23248100D9
:1000600013840700B70700002326110093870700B9
:10007000638A0700370500811305054B97000000D0
:10008000E7000000930710008320C1002302F4580A
:1000900003248100130101016780000067800000D4
:1000A000B707000093870700638E0700B70500813C
:1000B00037050081938585581305054B170300000C
:1000C0006700000067800000130101FD2326810204
:1000D00013040103232EA4FC232CB4FC232AC4FC08
:1000E0008327C4FD83A707002326F4FE832744FD4E
:1000F0002322F4FE832744FE03C707008327C4FEA0
:100100002380E7008327C4FE93871700032744FE5C
:10011000034717002380E7008327C4FE9387270047
:10012000032744FE034727002380E7008327C4FEFC
:1001300093873700032744FE034737002380E700F7
:100140008327C4FE938747002326F4FE232404FE5E
:100150006F004003832784FE032784FDB307F70065
:1001600003C707008327C4FE2380E7008327C4FE5C
:10017000938717002326F4FE832784FE93871700B6
:100180002324F4FE032784FE832744FDE344F7FC85
:100190008327C4FD0327C4FE23A0E700130000004B
:1001A0000324C1021301010367800000130101FD54
:1001B0002326810213040103232EA4FC232CB4FC68
:1001C0008327C4FD83A707002326F4FE832744FE6C
:1001D0002320F4FE8327C4FE03C70700832704FE01
:1001E0002380E700832704FE938717000327C4FEBC
:1001F000034717002380E700832704FE9387270027
:100200000327C4FE034727002380E700832704FE5B
:10021000938737000327C4FE034737002380E70096
:100220008327C4FE938747002326F4FE232404FE7D
:100230006F004003832784FE032784FDB307F70084
:100240000327C4FE034707002380E7008327C4FE7B
:10025000938717002326F4FE832784FE93871700D5
:100260002324F4FE032784FE832744FEE344F7FCA3
:100270008327C4FD0327C4FE23A0E700130000006A
:100280000324C1021301010367800000130101FF71
:100290002326810013040101130000000324C10080
:1002A0001301010167800000130101FE232E81006C
:1002B000130401022326A4FE2324B4FE832784FE14
:1002C0003727000023A2E7009307000013850700EB
:1002D0000324C1011301010267800000130101FE24
:1002E000232E8100130401022326A4FE930710008D
:1002F000138507000324C101130101026780000078
:10030000130101FF232681001304010113000000E3
:100310000324C1001301010167800000130101FFE4
:100320002326810013040101130000000324C100EF
:100330001301010167800000130101FD23268102E2
:1003400013040103232EA4FC232CB4FC232AC4FC95
:10035000B70701002324F4FE232604FE6F00C00229
:100360008327C4FE032784FDB307F70083C7070074
:1003700013870700832784FE23A0E7008327C4FE9A
:10038000938717002326F4FE0327C4FE832744FD2A
:10039000E348F7FC832744FD138507000324C102CB
:1003A0001301010367800000130101FD2326810270
:1003B00013040103232EA4FC8327C4FD63D8070084
:1003C0008327C4FDB307F040232EF4FC0327C4FDAC
:1003D000B73700009387078063D8E700B73700007E
:1003E00093870780232EF4FCB707008183A78756E5
:1003F0002326F4FEB707008103A787568327C4FD91
:100400003307F700B707008123A4E7568327C4FE0C
:10041000138507000324C102130101036780000054
:10042000130101FF2326110023248100130401017D
:1004300013050000EF00C00E130000008320C10070
:10044000032481001301010167800000130101FFF3
:100450002326810013040101130000000324C100BE
:100460001301010167800000130101FF23268100B1
:1004700013040101130000000324C1001301010152
:1004800067800000130101FF23261100232481004F
:1004900013040101EF00400B9307050013850700CB
:1004A0008320C10003248100130101016780000043
:1004B000130101FF23268100130401011300000032
:1004C0000324C1001301010167800000130101FF33
:1004D0002326810013040101B707008183A7075A6F
:1004E00093861700370700812320D75A138507000A
:1004F0000324C1001301010167800000130101FF03
:100500002326810013040101130000000324C1000D
:1005100013010101678000006B10B50067800000C7
:100520006B000500678000006B40B500678000002D
:100530006B200500678000006B30000067800000C2
:100540007325100267800000732500026780000099
:10055000130540006B000500F32610029396F60089
:10056000732600029315A6001316260037F1FF6FBD
:100570003301B1403301D1403301C100F3261002F1
:1005800063860600130500006B000500678000000D
:10059000130101FF2326810013040101B707008125
:1005A0001307400123A2E75A9307000013850700B1
:1005B0000324C1001301010167800000130101FE43
:1005C000232E1100232C8100130401022326A4FEF4
:1005D0002324B4FE0327C4FE930710006310F70220
:1005E000032784FEB70701009387F7FF6318F7001E
:1005F000B70700811385475AEF00C043130000007E
:100600008320C101032481011301010267800000DE
:10061000130101FF2326110023248100130401018B
:10062000B70701009385F7FF13051000EFF01FF9DE
:100630008320C100032481001301010167800000B1
:10064000130101FF930500002324810023261100DC
:1006500013040500EF008019B707008103A5070404
:100660008327C50363840700E780070013050400A0
:10067000EFF01FDB130101FF232481002320210160
:10068000370400003709000093070400130909002C
:100690003309F940232611002322910013592940E0
:1006A000630009021304040093040000832704007C
:1006B0009384140013044400E7800700E31899FEB4
:1006C00037040000370900009307040013090900EC
:1006D0003309F94013592940630009021304040047
:1006E000930400008327040093841400130444003F
:1006F000E7800700E31899FE8320C10003248100EE
:1007000083244100032901001301010167800000D7
:100710001303F00013070500637EC3029377F7000D
:100720006390070A63920508937606FF1376F60036
:10073000B386E6002320B7002322B7002324B700A6
:100740002326B70013070701E366D7FE63140600EC
:1007500067800000B306C34093962600970200000E
:10076000B38656006780C6002307B700A306B7000C
:100770002306B700A305B7002305B700A304B700FD
:100780002304B700A303B7002303B700A302B700F5
:100790002302B700A301B7002301B700A300B700ED
:1007A0002300B7006780000093F5F50F939685004E
:1007B000B3E5D50093960501B3E5D5006FF0DFF6FC
:1007C0009396270097020000B3865600938200009C
:1007D000E78006FA93800200938707FF3307F7400C
:1007E0003306F600E378C3F66FF0DFF3130101FD83
:1007F000B7070081232C410103AA0704232021030A
:100800002326110203298A14232481022322910220
:10081000232E3101232A510123286101232671014E
:100820002324810163000904130B0500938B050049
:10083000930A10009309F0FF832449001384F4FF06
:100840006342040293942400B304990063840B046C
:1008500083A74410638077051304F4FF9384C4FFD7
:10086000E31634FF8320C102032481028324410262
:10087000032901028329C101032A8101832A41013D
:10088000032B0101832BC100032C81001301010301
:10089000678000008327490083A644009387F7FF01
:1008A000638E870423A20400E38806FA832789184D
:1008B00033978A00032C4900B377F700639207024D
:1008C000E78006000327490083278A146314870101
:1008D000E304F9F8E38807F8138907006FF0DFF500
:1008E0008327C91883A544083377F700631C0700E2
:1008F00013050B00E78006006FF0DFFC2322890060
:100900006FF09FFA13850500E78006006FF09FFBEC
:10091000130101FF23248100B70700003704000002
:100920001304040093870700B387874023229100B4
:100930002326110093D42740638004029387C7FFC6
:1009400033848700832704009384F4FF1304C4FFD7
:10095000E7800700E39804FE8320C10003248100A0
:100960008324410013010101678000009305050005
:100970009306000013060000130500006F004000FE
:10098000B707008103A7070483278714638C070434
:1009900003A747001308F001634EE8061318270069
:1009A00063060502338307012324C30883A887183D
:1009B000130610003316E600B3E8C80023A4171985
:1009C0002324D310930620006304D50213071700D5
:1009D00023A2E700B387070123A4B7001305000093
:1009E000678000009307C7142324F7146FF05FFAA1
:1009F00083A6C7181307170023A2E70033E6C60033
:100A000023A6C718B387070123A4B7001305000066
:100A1000678000001305F0FF67800000B707000043
:100A20009387070063860700138501EB6FF01FF4BF
:100A300067800000130101FE232E810013040102D0
:100A40002326A4FE8327C4FE1307700323A0E70018
:100A5000130000000324C10113010102678000009C
:02000004810079
:10000000300000003100000032000000330000002A
:10001000340000003500000036000000370000000A
:10002000380000003900000061000000620000009C
:0E003000630000006400000065000000660030
:0400400088000081B3
:100048000000008104000081080000810C0000818C
:100058001000008114000081180000811C0000813C
:100068002000008124000081280000812C000081EC
:100078003000008134000081380000813C0000819C
:100088000000000074030081DC0300814404008147
:100098000000000000000000000000000000000058
:1000A8000000000000000000000000000000000048
:1000B8000000000000000000000000000000000038
:1000C8000000000000000000000000000000000028
:1000D8000000000000000000000000000000000018
:1000E8000000000000000000000000000000000008
:1000F80000000000000000000000000000000000F8
:1001080000000000000000000000000000000000E7
:1001180000000000000000000000000000000000D7
:1001280000000000000000000100000000000000C6
:100138000E33CDAB34126DE6ECDE05000B0000008B
:1001480000000000000000000000000000000000A7
:100158000000000000000000000000000000000097
:100168000000000000000000000000000000000087
:100178000000000000000000000000000000000077
:100188000000000000000000000000000000000067
:100198000000000000000000000000000000000057
:1001A8000000000000000000000000000000000047
:1001B8000000000000000000000000000000000037
:1001C8000000000000000000000000000000000027
:1001D8000000000000000000000000000000000017
:1001E8000000000000000000000000000000000007
:1001F80000000000000000000000000000000000F7
:1002080000000000000000000000000000000000E6
:1002180000000000000000000000000000000000D6
:1002280000000000000000000000000000000000C6
:1002380000000000000000000000000000000000B6
:1002480000000000000000000000000000000000A6
:100258000000000000000000000000000000000096
:100268000000000000000000000000000000000086
:100278000000000000000000000000000000000076
:100288000000000000000000000000000000000066
:100298000000000000000000000000000000000056
:1002A8000000000000000000000000000000000046
:1002B8000000000000000000000000000000000036
:1002C8000000000000000000000000000000000026
:1002D8000000000000000000000000000000000016
:1002E8000000000000000000000000000000000006
:1002F80000000000000000000000000000000000F6
:1003080000000000000000000000000000000000E5
:1003180000000000000000000000000000000000D5
:1003280000000000000000000000000000000000C5
:1003380000000000000000000000000000000000B5
:1003480000000000000000000000000000000000A5
:100358000000000000000000000000000000000095
:100368000000000000000000000000000000000085
:100378000000000000000000000000000000000075
:100388000000000000000000000000000000000065
:100398000000000000000000000000000000000055
:1003A8000000000000000000000000000000000045
:1003B8000000000000000000000000000000000035
:1003C8000000000000000000000000000000000025
:1003D8000000000000000000000000000000000015
:1003E8000000000000000000000000000000000005
:1003F80000000000000000000000000000000000F5
:1004080000000000000000000000000000000000E4
:1004180000000000000000000000000000000000D4
:1004280000000000000000000000000000000000C4
:1004380000000000000000000000000000000000B4
:1004480000000000000000000000000000000000A4
:100458000000000000000000000000000000000094
:100468000000000000000000000000000000000084
:100478000000000000000000000000000000000074
:100488000000000000000000000000000000000064
:100498000000000000000000000000000000000054
:0804A80000000000000000004C
:1004B0001000000000000000017A5200017C0101E0
:1004C0001B0D02002000000018000000680500FF5E
:1004D0002C00000000440E20448801440C08005801
:1004E000C80C0220440E0000200000003C00000068
:1004F000A00000FF2C00000000440E1044880144BE
:100500000C080058C80C0210440E00002400000023
:1005100060000000A80000FF5400000000440E200E
:100520004881018802440C080078C144C80C0220AC
:10053000440E00002400000088000000D40000FFEA
:100540003000000000440E104881018802440C086D
:100550000054C144C80C0210440E0000000000000A
:1005600000000000000000700000001000000020EB
:04057000880000817E
:040574004C000080B7
:08057800A000008010060080C5
:040580001C0A0080D1
:040000058000000077
:00000001FF

View File

@@ -0,0 +1,34 @@
COMP = ../../../../riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-g++
# CC_FLAGS = -march=rv32im -mabi=ilp32 -O0 -Wl,-Bstatic,-T,linker.ld -ffreestanding -nostdlib
# CC_FLAGS = -march=rv32im -mabi=ilp32 -O0 -Wl,-Bstatic,-T,linker.ld -ffreestanding -nostartfiles
CC_FLAGS = -ffreestanding -O0 -Wl,--gc-sections -nostartfiles -nostdlib -nostartfiles -nodefaultlibs -Wl,-Bstatic,-T,../vortex_link.ld -march=rv32im -mabi=ilp32
DMP = ../../../../riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objdump
CPY = ../../../../riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy
# VX_STR = ../../startup/vx_start.S
NEWLIB = ../../newlib/newlib.c
VX_STR = ../../startup/vx_start.S
VX_INT = ../../intrinsics/vx_intrinsics.s
VX_IO = ../../io/vx_io.s ../../io/vx_io.c
VX_API = ../../vx_api/vx_api.c
VX_TEST = ../../tests/tests.c
VX_FIO = ../../fileio/fileio.s
LIBS = -Wl,--whole-archive ./libs/libvecadd.a -Wl,--no-whole-archive ./libs/libOpenCL.a ../../../../riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libc.a ../../../../riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
VX_MAIN = vx_pocl_main
all: HEX DUMP ELF
DUMP: ELF
$(DMP) -D $(VX_MAIN).elf > $(VX_MAIN).dump
HEX: ELF
$(CPY) -O ihex $(VX_MAIN).elf $(VX_MAIN).hex
ELF:
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf

View File

@@ -0,0 +1,46 @@
#=============================================================================
# CMake build system files
#
# Copyright (c) 2014 pocl developers
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
#=============================================================================
#cl.hpp: $(top_srcdir)/include/CL/cl.hpp.in $(top_srcdir)/tools/patches/khronos_cl.hpp.patch
if(INSTALL_OPENCL_HEADERS)
install(FILES cl.h
cl.hpp
cl2.hpp
cl_d3d10.h
cl_d3d11.h
cl_dx9_media_sharing.h
cl_dx9_media_sharing_intel.h
cl_ext.h
cl_egl.h
cl_ext_intel.h
cl_gl.h
cl_gl_ext.h
cl_platform.h
cl_va_api_media_sharing_intel.h
cl_version.h
opencl.h
DESTINATION "${POCL_INSTALL_OPENCL_HEADER_DIR}")
endif()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,131 @@
/**********************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_CL_D3D10_H
#define __OPENCL_CL_D3D10_H
#include <d3d10.h>
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************
* cl_khr_d3d10_sharing */
#define cl_khr_d3d10_sharing 1
typedef cl_uint cl_d3d10_device_source_khr;
typedef cl_uint cl_d3d10_device_set_khr;
/******************************************************************************/
/* Error Codes */
#define CL_INVALID_D3D10_DEVICE_KHR -1002
#define CL_INVALID_D3D10_RESOURCE_KHR -1003
#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004
#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005
/* cl_d3d10_device_source_nv */
#define CL_D3D10_DEVICE_KHR 0x4010
#define CL_D3D10_DXGI_ADAPTER_KHR 0x4011
/* cl_d3d10_device_set_nv */
#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012
#define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013
/* cl_context_info */
#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014
#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C
/* cl_mem_info */
#define CL_MEM_D3D10_RESOURCE_KHR 0x4015
/* cl_image_info */
#define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017
#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018
/******************************************************************************/
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)(
cl_platform_id platform,
cl_d3d10_device_source_khr d3d_device_source,
void * d3d_object,
cl_d3d10_device_set_khr d3d_device_set,
cl_uint num_entries,
cl_device_id * devices,
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Buffer * resource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Texture2D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Texture3D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_D3D10_H */

View File

@@ -0,0 +1,131 @@
/**********************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_CL_D3D11_H
#define __OPENCL_CL_D3D11_H
#include <d3d11.h>
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************
* cl_khr_d3d11_sharing */
#define cl_khr_d3d11_sharing 1
typedef cl_uint cl_d3d11_device_source_khr;
typedef cl_uint cl_d3d11_device_set_khr;
/******************************************************************************/
/* Error Codes */
#define CL_INVALID_D3D11_DEVICE_KHR -1006
#define CL_INVALID_D3D11_RESOURCE_KHR -1007
#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008
#define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009
/* cl_d3d11_device_source */
#define CL_D3D11_DEVICE_KHR 0x4019
#define CL_D3D11_DXGI_ADAPTER_KHR 0x401A
/* cl_d3d11_device_set */
#define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B
#define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C
/* cl_context_info */
#define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D
#define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D
/* cl_mem_info */
#define CL_MEM_D3D11_RESOURCE_KHR 0x401E
/* cl_image_info */
#define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020
#define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021
/******************************************************************************/
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)(
cl_platform_id platform,
cl_d3d11_device_source_khr d3d_device_source,
void * d3d_object,
cl_d3d11_device_set_khr d3d_device_set,
cl_uint num_entries,
cl_device_id * devices,
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D11Buffer * resource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D11Texture2D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D11Texture3D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_D3D11_H */

View File

@@ -0,0 +1,132 @@
/**********************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H
#define __OPENCL_CL_DX9_MEDIA_SHARING_H
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************/
/* cl_khr_dx9_media_sharing */
#define cl_khr_dx9_media_sharing 1
typedef cl_uint cl_dx9_media_adapter_type_khr;
typedef cl_uint cl_dx9_media_adapter_set_khr;
#if defined(_WIN32)
#include <d3d9.h>
typedef struct _cl_dx9_surface_info_khr
{
IDirect3DSurface9 *resource;
HANDLE shared_handle;
} cl_dx9_surface_info_khr;
#endif
/******************************************************************************/
/* Error Codes */
#define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010
#define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011
#define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012
#define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013
/* cl_media_adapter_type_khr */
#define CL_ADAPTER_D3D9_KHR 0x2020
#define CL_ADAPTER_D3D9EX_KHR 0x2021
#define CL_ADAPTER_DXVA_KHR 0x2022
/* cl_media_adapter_set_khr */
#define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023
#define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024
/* cl_context_info */
#define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025
#define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026
#define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027
/* cl_mem_info */
#define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028
#define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029
/* cl_image_info */
#define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B
#define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C
/******************************************************************************/
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)(
cl_platform_id platform,
cl_uint num_media_adapters,
cl_dx9_media_adapter_type_khr * media_adapter_type,
void * media_adapters,
cl_dx9_media_adapter_set_khr media_adapter_set,
cl_uint num_entries,
cl_device_id * devices,
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)(
cl_context context,
cl_mem_flags flags,
cl_dx9_media_adapter_type_khr adapter_type,
void * surface_info,
cl_uint plane,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_H */

View File

@@ -0,0 +1,182 @@
/**********************************************************************************
* Copyright (c) 2008-2019 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/*****************************************************************************\
Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
File Name: cl_dx9_media_sharing_intel.h
Abstract:
Notes:
\*****************************************************************************/
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
#define __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
#include <CL/cl.h>
#include <CL/cl_platform.h>
#include <d3d9.h>
#include <dxvahd.h>
#include <wtypes.h>
#include <d3d9types.h>
#ifdef __cplusplus
extern "C" {
#endif
/***************************************
* cl_intel_dx9_media_sharing extension *
****************************************/
#define cl_intel_dx9_media_sharing 1
typedef cl_uint cl_dx9_device_source_intel;
typedef cl_uint cl_dx9_device_set_intel;
/* error codes */
#define CL_INVALID_DX9_DEVICE_INTEL -1010
#define CL_INVALID_DX9_RESOURCE_INTEL -1011
#define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL -1012
#define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL -1013
/* cl_dx9_device_source_intel */
#define CL_D3D9_DEVICE_INTEL 0x4022
#define CL_D3D9EX_DEVICE_INTEL 0x4070
#define CL_DXVA_DEVICE_INTEL 0x4071
/* cl_dx9_device_set_intel */
#define CL_PREFERRED_DEVICES_FOR_DX9_INTEL 0x4024
#define CL_ALL_DEVICES_FOR_DX9_INTEL 0x4025
/* cl_context_info */
#define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026
#define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072
#define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073
/* cl_mem_info */
#define CL_MEM_DX9_RESOURCE_INTEL 0x4027
#define CL_MEM_DX9_SHARED_HANDLE_INTEL 0x4074
/* cl_image_info */
#define CL_IMAGE_DX9_PLANE_INTEL 0x4075
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL 0x402A
#define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL 0x402B
/******************************************************************************/
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceIDsFromDX9INTEL(
cl_platform_id platform,
cl_dx9_device_source_intel dx9_device_source,
void* dx9_object,
cl_dx9_device_set_intel dx9_device_set,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int (CL_API_CALL* clGetDeviceIDsFromDX9INTEL_fn)(
cl_platform_id platform,
cl_dx9_device_source_intel dx9_device_source,
void* dx9_object,
cl_dx9_device_set_intel dx9_device_set,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromDX9MediaSurfaceINTEL(
cl_context context,
cl_mem_flags flags,
IDirect3DSurface9* resource,
HANDLE sharedHandle,
UINT plane,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceINTEL_fn)(
cl_context context,
cl_mem_flags flags,
IDirect3DSurface9* resource,
HANDLE sharedHandle,
UINT plane,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireDX9ObjectsINTEL(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9ObjectsINTEL_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseDX9ObjectsINTEL(
cl_command_queue command_queue,
cl_uint num_objects,
cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9ObjectsINTEL_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H */

View File

@@ -0,0 +1,132 @@
/*******************************************************************************
* Copyright (c) 2008-2019 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
#ifndef __OPENCL_CL_EGL_H
#define __OPENCL_CL_EGL_H
#include <CL/cl.h>
#ifdef __cplusplus
extern "C" {
#endif
/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */
#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F
#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D
#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E
/* Error type for clCreateFromEGLImageKHR */
#define CL_INVALID_EGL_OBJECT_KHR -1093
#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092
/* CLeglImageKHR is an opaque handle to an EGLImage */
typedef void* CLeglImageKHR;
/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */
typedef void* CLeglDisplayKHR;
/* CLeglSyncKHR is an opaque handle to an EGLSync object */
typedef void* CLeglSyncKHR;
/* properties passed to clCreateFromEGLImageKHR */
typedef intptr_t cl_egl_image_properties_khr;
#define cl_khr_egl_image 1
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromEGLImageKHR(cl_context context,
CLeglDisplayKHR egldisplay,
CLeglImageKHR eglimage,
cl_mem_flags flags,
const cl_egl_image_properties_khr * properties,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)(
cl_context context,
CLeglDisplayKHR egldisplay,
CLeglImageKHR eglimage,
cl_mem_flags flags,
const cl_egl_image_properties_khr * properties,
cl_int * errcode_ret);
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event);
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event);
#define cl_khr_egl_event 1
extern CL_API_ENTRY cl_event CL_API_CALL
clCreateEventFromEGLSyncKHR(cl_context context,
CLeglSyncKHR sync,
CLeglDisplayKHR display,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)(
cl_context context,
CLeglSyncKHR sync,
CLeglDisplayKHR display,
cl_int * errcode_ret);
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_EGL_H */

View File

@@ -0,0 +1,762 @@
/*******************************************************************************
* Copyright (c) 2008-2019 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/* cl_ext.h contains OpenCL extensions which don't have external */
/* (OpenGL, D3D) dependencies. */
#ifndef __CL_EXT_H
#define __CL_EXT_H
#ifdef __cplusplus
extern "C" {
#endif
#include <CL/cl.h>
/* cl_khr_fp64 extension - no extension #define since it has no functions */
/* CL_DEVICE_DOUBLE_FP_CONFIG is defined in CL.h for OpenCL >= 120 */
#if CL_TARGET_OPENCL_VERSION <= 110
#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
#endif
/* cl_khr_fp16 extension - no extension #define since it has no functions */
#define CL_DEVICE_HALF_FP_CONFIG 0x1033
/* Memory object destruction
*
* Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
*
* Registers a user callback function that will be called when the memory object is deleted and its resources
* freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
* stack associated with memobj. The registered user callback functions are called in the reverse order in
* which they were registered. The user callback functions are called and then the memory object is deleted
* and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
* notified when the memory referenced by host_ptr, specified when the memory object is created and used as
* the storage bits for the memory object, can be reused or freed.
*
* The application may not call CL api's with the cl_mem object passed to the pfn_notify.
*
* Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
* before using.
*/
#define cl_APPLE_SetMemObjectDestructor 1
cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem memobj,
void (* pfn_notify)(cl_mem memobj, void * user_data),
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
/* Context Logging Functions
*
* The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
* Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
* before using.
*
* clLogMessagesToSystemLog forwards on all log messages to the Apple System Logger
*/
#define cl_APPLE_ContextLoggingFunctions 1
extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * errstr,
const void * private_info,
size_t cb,
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * errstr,
const void * private_info,
size_t cb,
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * errstr,
const void * private_info,
size_t cb,
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
/************************
* cl_khr_icd extension *
************************/
#define cl_khr_icd 1
/* cl_platform_info */
#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920
/* Additional Error Codes */
#define CL_PLATFORM_NOT_FOUND_KHR -1001
extern CL_API_ENTRY cl_int CL_API_CALL
clIcdGetPlatformIDsKHR(cl_uint num_entries,
cl_platform_id * platforms,
cl_uint * num_platforms);
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(cl_uint num_entries,
cl_platform_id * platforms,
cl_uint * num_platforms);
/*******************************
* cl_khr_il_program extension *
*******************************/
#define cl_khr_il_program 1
/* New property to clGetDeviceInfo for retrieving supported intermediate
* languages
*/
#define CL_DEVICE_IL_VERSION_KHR 0x105B
/* New property to clGetProgramInfo for retrieving for retrieving the IL of a
* program
*/
#define CL_PROGRAM_IL_KHR 0x1169
extern CL_API_ENTRY cl_program CL_API_CALL
clCreateProgramWithILKHR(cl_context context,
const void * il,
size_t length,
cl_int * errcode_ret);
typedef CL_API_ENTRY cl_program
(CL_API_CALL *clCreateProgramWithILKHR_fn)(cl_context context,
const void * il,
size_t length,
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
/* Extension: cl_khr_image2d_from_buffer
*
* This extension allows a 2D image to be created from a cl_mem buffer without
* a copy. The type associated with a 2D image created from a buffer in an
* OpenCL program is image2d_t. Both the sampler and sampler-less read_image
* built-in functions are supported for 2D images and 2D images created from
* a buffer. Similarly, the write_image built-ins are also supported for 2D
* images created from a buffer.
*
* When the 2D image from buffer is created, the client must specify the
* width, height, image format (i.e. channel order and channel data type)
* and optionally the row pitch.
*
* The pitch specified must be a multiple of
* CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR pixels.
* The base address of the buffer must be aligned to
* CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR pixels.
*/
#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR 0x104A
#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR 0x104B
/**************************************
* cl_khr_initialize_memory extension *
**************************************/
#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x2030
/**************************************
* cl_khr_terminate_context extension *
**************************************/
#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x2031
#define CL_CONTEXT_TERMINATE_KHR 0x2032
#define cl_khr_terminate_context 1
extern CL_API_ENTRY cl_int CL_API_CALL
clTerminateContextKHR(cl_context context) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clTerminateContextKHR_fn)(cl_context context) CL_EXT_SUFFIX__VERSION_1_2;
/*
* Extension: cl_khr_spir
*
* This extension adds support to create an OpenCL program object from a
* Standard Portable Intermediate Representation (SPIR) instance
*/
#define CL_DEVICE_SPIR_VERSIONS 0x40E0
#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1
/*****************************************
* cl_khr_create_command_queue extension *
*****************************************/
#define cl_khr_create_command_queue 1
typedef cl_bitfield cl_queue_properties_khr;
extern CL_API_ENTRY cl_command_queue CL_API_CALL
clCreateCommandQueueWithPropertiesKHR(cl_context context,
cl_device_id device,
const cl_queue_properties_khr* properties,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_command_queue
(CL_API_CALL *clCreateCommandQueueWithPropertiesKHR_fn)(cl_context context,
cl_device_id device,
const cl_queue_properties_khr* properties,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
/******************************************
* cl_nv_device_attribute_query extension *
******************************************/
/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
#define CL_DEVICE_WARP_SIZE_NV 0x4003
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
/*********************************
* cl_amd_device_attribute_query *
*********************************/
#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
/*********************************
* cl_arm_printf extension
*********************************/
#define CL_PRINTF_CALLBACK_ARM 0x40B0
#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1
/***********************************
* cl_ext_device_fission extension
***********************************/
#define cl_ext_device_fission 1
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clReleaseDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clRetainDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
typedef cl_ulong cl_device_partition_property_ext;
extern CL_API_ENTRY cl_int CL_API_CALL
clCreateSubDevicesEXT(cl_device_id in_device,
const cl_device_partition_property_ext * properties,
cl_uint num_entries,
cl_device_id * out_devices,
cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clCreateSubDevicesEXT_fn)(cl_device_id in_device,
const cl_device_partition_property_ext * properties,
cl_uint num_entries,
cl_device_id * out_devices,
cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1;
/* cl_device_partition_property_ext */
#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053
/* clDeviceGetInfo selectors */
#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058
/* error codes */
#define CL_DEVICE_PARTITION_FAILED_EXT -1057
#define CL_INVALID_PARTITION_COUNT_EXT -1058
#define CL_INVALID_PARTITION_NAME_EXT -1059
/* CL_AFFINITY_DOMAINs */
#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100
/* cl_device_partition_property_ext list terminators */
#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0)
#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0)
#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1)
/***********************************
* cl_ext_migrate_memobject extension definitions
***********************************/
#define cl_ext_migrate_memobject 1
typedef cl_bitfield cl_mem_migration_flags_ext;
#define CL_MIGRATE_MEM_OBJECT_HOST_EXT 0x1
#define CL_COMMAND_MIGRATE_MEM_OBJECT_EXT 0x4040
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueMigrateMemObjectEXT(cl_command_queue command_queue,
cl_uint num_mem_objects,
const cl_mem * mem_objects,
cl_mem_migration_flags_ext flags,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event);
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clEnqueueMigrateMemObjectEXT_fn)(cl_command_queue command_queue,
cl_uint num_mem_objects,
const cl_mem * mem_objects,
cl_mem_migration_flags_ext flags,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event);
/*********************************
* cl_qcom_ext_host_ptr extension
*********************************/
#define cl_qcom_ext_host_ptr 1
#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29)
#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0
#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1
#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2
#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3
#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4
#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5
#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6
#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7
typedef cl_uint cl_image_pitch_info_qcom;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceImageInfoQCOM(cl_device_id device,
size_t image_width,
size_t image_height,
const cl_image_format *image_format,
cl_image_pitch_info_qcom param_name,
size_t param_value_size,
void *param_value,
size_t *param_value_size_ret);
typedef struct _cl_mem_ext_host_ptr
{
/* Type of external memory allocation. */
/* Legal values will be defined in layered extensions. */
cl_uint allocation_type;
/* Host cache policy for this external memory allocation. */
cl_uint host_cache_policy;
} cl_mem_ext_host_ptr;
/*******************************************
* cl_qcom_ext_host_ptr_iocoherent extension
********************************************/
/* Cache policy specifying io-coherence */
#define CL_MEM_HOST_IOCOHERENT_QCOM 0x40A9
/*********************************
* cl_qcom_ion_host_ptr extension
*********************************/
#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8
typedef struct _cl_mem_ion_host_ptr
{
/* Type of external memory allocation. */
/* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */
cl_mem_ext_host_ptr ext_host_ptr;
/* ION file descriptor */
int ion_filedesc;
/* Host pointer to the ION allocated memory */
void* ion_hostptr;
} cl_mem_ion_host_ptr;
/*********************************
* cl_qcom_android_native_buffer_host_ptr extension
*********************************/
#define CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM 0x40C6
typedef struct _cl_mem_android_native_buffer_host_ptr
{
/* Type of external memory allocation. */
/* Must be CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM for Android native buffers. */
cl_mem_ext_host_ptr ext_host_ptr;
/* Virtual pointer to the android native buffer */
void* anb_ptr;
} cl_mem_android_native_buffer_host_ptr;
/******************************************
* cl_img_yuv_image extension *
******************************************/
/* Image formats used in clCreateImage */
#define CL_NV21_IMG 0x40D0
#define CL_YV12_IMG 0x40D1
/******************************************
* cl_img_cached_allocations extension *
******************************************/
/* Flag values used by clCreateBuffer */
#define CL_MEM_USE_UNCACHED_CPU_MEMORY_IMG (1 << 26)
#define CL_MEM_USE_CACHED_CPU_MEMORY_IMG (1 << 27)
/******************************************
* cl_img_use_gralloc_ptr extension *
******************************************/
#define cl_img_use_gralloc_ptr 1
/* Flag values used by clCreateBuffer */
#define CL_MEM_USE_GRALLOC_PTR_IMG (1 << 28)
/* To be used by clGetEventInfo: */
#define CL_COMMAND_ACQUIRE_GRALLOC_OBJECTS_IMG 0x40D2
#define CL_COMMAND_RELEASE_GRALLOC_OBJECTS_IMG 0x40D3
/* Error code from clEnqueueReleaseGrallocObjectsIMG */
#define CL_GRALLOC_RESOURCE_NOT_ACQUIRED_IMG 0x40D4
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireGrallocObjectsIMG(cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseGrallocObjectsIMG(cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
/*********************************
* cl_khr_subgroups extension
*********************************/
#define cl_khr_subgroups 1
#if !defined(CL_VERSION_2_1)
/* For OpenCL 2.1 and newer, cl_kernel_sub_group_info is declared in CL.h.
In hindsight, there should have been a khr suffix on this type for
the extension, but keeping it un-suffixed to maintain backwards
compatibility. */
typedef cl_uint cl_kernel_sub_group_info;
#endif
/* cl_kernel_sub_group_info */
#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033
#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034
extern CL_API_ENTRY cl_int CL_API_CALL
clGetKernelSubGroupInfoKHR(cl_kernel in_kernel,
cl_device_id in_device,
cl_kernel_sub_group_info param_name,
size_t input_value_size,
const void * input_value,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel in_kernel,
cl_device_id in_device,
cl_kernel_sub_group_info param_name,
size_t input_value_size,
const void * input_value,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
/*********************************
* cl_khr_mipmap_image extension
*********************************/
/* cl_sampler_properties */
#define CL_SAMPLER_MIP_FILTER_MODE_KHR 0x1155
#define CL_SAMPLER_LOD_MIN_KHR 0x1156
#define CL_SAMPLER_LOD_MAX_KHR 0x1157
/*********************************
* cl_khr_priority_hints extension
*********************************/
/* This extension define is for backwards compatibility.
It shouldn't be required since this extension has no new functions. */
#define cl_khr_priority_hints 1
typedef cl_uint cl_queue_priority_khr;
/* cl_command_queue_properties */
#define CL_QUEUE_PRIORITY_KHR 0x1096
/* cl_queue_priority_khr */
#define CL_QUEUE_PRIORITY_HIGH_KHR (1<<0)
#define CL_QUEUE_PRIORITY_MED_KHR (1<<1)
#define CL_QUEUE_PRIORITY_LOW_KHR (1<<2)
/*********************************
* cl_khr_throttle_hints extension
*********************************/
/* This extension define is for backwards compatibility.
It shouldn't be required since this extension has no new functions. */
#define cl_khr_throttle_hints 1
typedef cl_uint cl_queue_throttle_khr;
/* cl_command_queue_properties */
#define CL_QUEUE_THROTTLE_KHR 0x1097
/* cl_queue_throttle_khr */
#define CL_QUEUE_THROTTLE_HIGH_KHR (1<<0)
#define CL_QUEUE_THROTTLE_MED_KHR (1<<1)
#define CL_QUEUE_THROTTLE_LOW_KHR (1<<2)
/*********************************
* cl_khr_subgroup_named_barrier
*********************************/
/* This extension define is for backwards compatibility.
It shouldn't be required since this extension has no new functions. */
#define cl_khr_subgroup_named_barrier 1
/* cl_device_info */
#define CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR 0x2035
/**********************************
* cl_arm_import_memory extension *
**********************************/
#define cl_arm_import_memory 1
typedef intptr_t cl_import_properties_arm;
/* Default and valid proporties name for cl_arm_import_memory */
#define CL_IMPORT_TYPE_ARM 0x40B2
/* Host process memory type default value for CL_IMPORT_TYPE_ARM property */
#define CL_IMPORT_TYPE_HOST_ARM 0x40B3
/* DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */
#define CL_IMPORT_TYPE_DMA_BUF_ARM 0x40B4
/* Protected DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */
#define CL_IMPORT_TYPE_PROTECTED_ARM 0x40B5
/* This extension adds a new function that allows for direct memory import into
* OpenCL via the clImportMemoryARM function.
*
* Memory imported through this interface will be mapped into the device's page
* tables directly, providing zero copy access. It will never fall back to copy
* operations and aliased buffers.
*
* Types of memory supported for import are specified as additional extension
* strings.
*
* This extension produces cl_mem allocations which are compatible with all other
* users of cl_mem in the standard API.
*
* This extension maps pages with the same properties as the normal buffer creation
* function clCreateBuffer.
*/
extern CL_API_ENTRY cl_mem CL_API_CALL
clImportMemoryARM( cl_context context,
cl_mem_flags flags,
const cl_import_properties_arm *properties,
void *memory,
size_t size,
cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_0;
/******************************************
* cl_arm_shared_virtual_memory extension *
******************************************/
#define cl_arm_shared_virtual_memory 1
/* Used by clGetDeviceInfo */
#define CL_DEVICE_SVM_CAPABILITIES_ARM 0x40B6
/* Used by clGetMemObjectInfo */
#define CL_MEM_USES_SVM_POINTER_ARM 0x40B7
/* Used by clSetKernelExecInfoARM: */
#define CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM 0x40B8
#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM 0x40B9
/* To be used by clGetEventInfo: */
#define CL_COMMAND_SVM_FREE_ARM 0x40BA
#define CL_COMMAND_SVM_MEMCPY_ARM 0x40BB
#define CL_COMMAND_SVM_MEMFILL_ARM 0x40BC
#define CL_COMMAND_SVM_MAP_ARM 0x40BD
#define CL_COMMAND_SVM_UNMAP_ARM 0x40BE
/* Flag values returned by clGetDeviceInfo with CL_DEVICE_SVM_CAPABILITIES_ARM as the param_name. */
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_ARM (1 << 0)
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_ARM (1 << 1)
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_ARM (1 << 2)
#define CL_DEVICE_SVM_ATOMICS_ARM (1 << 3)
/* Flag values used by clSVMAllocARM: */
#define CL_MEM_SVM_FINE_GRAIN_BUFFER_ARM (1 << 10)
#define CL_MEM_SVM_ATOMICS_ARM (1 << 11)
typedef cl_bitfield cl_svm_mem_flags_arm;
typedef cl_uint cl_kernel_exec_info_arm;
typedef cl_bitfield cl_device_svm_capabilities_arm;
extern CL_API_ENTRY void * CL_API_CALL
clSVMAllocARM(cl_context context,
cl_svm_mem_flags_arm flags,
size_t size,
cl_uint alignment) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY void CL_API_CALL
clSVMFreeARM(cl_context context,
void * svm_pointer) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMFreeARM(cl_command_queue command_queue,
cl_uint num_svm_pointers,
void * svm_pointers[],
void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,
cl_uint num_svm_pointers,
void * svm_pointers[],
void * user_data),
void * user_data,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMMemcpyARM(cl_command_queue command_queue,
cl_bool blocking_copy,
void * dst_ptr,
const void * src_ptr,
size_t size,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMMemFillARM(cl_command_queue command_queue,
void * svm_ptr,
const void * pattern,
size_t pattern_size,
size_t size,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMMapARM(cl_command_queue command_queue,
cl_bool blocking_map,
cl_map_flags flags,
void * svm_ptr,
size_t size,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMUnmapARM(cl_command_queue command_queue,
void * svm_ptr,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clSetKernelArgSVMPointerARM(cl_kernel kernel,
cl_uint arg_index,
const void * arg_value) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clSetKernelExecInfoARM(cl_kernel kernel,
cl_kernel_exec_info_arm param_name,
size_t param_value_size,
const void * param_value) CL_EXT_SUFFIX__VERSION_1_2;
/********************************
* cl_arm_get_core_id extension *
********************************/
#ifdef CL_VERSION_1_2
#define cl_arm_get_core_id 1
/* Device info property for bitfield of cores present */
#define CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM 0x40BF
#endif /* CL_VERSION_1_2 */
/*********************************
* cl_arm_job_slot_selection
*********************************/
#define cl_arm_job_slot_selection 1
/* cl_device_info */
#define CL_DEVICE_JOB_SLOTS_ARM 0x41E0
/* cl_command_queue_properties */
#define CL_QUEUE_JOB_SLOT_ARM 0x41E1
#ifdef __cplusplus
}
#endif
#endif /* __CL_EXT_H */

View File

@@ -0,0 +1,423 @@
/*******************************************************************************
* Copyright (c) 2008-2019 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/*****************************************************************************\
Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
File Name: cl_ext_intel.h
Abstract:
Notes:
\*****************************************************************************/
#ifndef __CL_EXT_INTEL_H
#define __CL_EXT_INTEL_H
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/***************************************
* cl_intel_thread_local_exec extension *
****************************************/
#define cl_intel_thread_local_exec 1
#define CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL (((cl_bitfield)1) << 31)
/***********************************************
* cl_intel_device_partition_by_names extension *
************************************************/
#define cl_intel_device_partition_by_names 1
#define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052
#define CL_PARTITION_BY_NAMES_LIST_END_INTEL -1
/************************************************
* cl_intel_accelerator extension *
* cl_intel_motion_estimation extension *
* cl_intel_advanced_motion_estimation extension *
*************************************************/
#define cl_intel_accelerator 1
#define cl_intel_motion_estimation 1
#define cl_intel_advanced_motion_estimation 1
typedef struct _cl_accelerator_intel* cl_accelerator_intel;
typedef cl_uint cl_accelerator_type_intel;
typedef cl_uint cl_accelerator_info_intel;
typedef struct _cl_motion_estimation_desc_intel {
cl_uint mb_block_type;
cl_uint subpixel_mode;
cl_uint sad_adjust_mode;
cl_uint search_path_type;
} cl_motion_estimation_desc_intel;
/* error codes */
#define CL_INVALID_ACCELERATOR_INTEL -1094
#define CL_INVALID_ACCELERATOR_TYPE_INTEL -1095
#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL -1096
#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL -1097
/* cl_accelerator_type_intel */
#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL 0x0
/* cl_accelerator_info_intel */
#define CL_ACCELERATOR_DESCRIPTOR_INTEL 0x4090
#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL 0x4091
#define CL_ACCELERATOR_CONTEXT_INTEL 0x4092
#define CL_ACCELERATOR_TYPE_INTEL 0x4093
/* cl_motion_detect_desc_intel flags */
#define CL_ME_MB_TYPE_16x16_INTEL 0x0
#define CL_ME_MB_TYPE_8x8_INTEL 0x1
#define CL_ME_MB_TYPE_4x4_INTEL 0x2
#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL 0x2
#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x1
#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL 0x0
#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL 0x1
#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL 0x5
#define CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL 0x0
#define CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL 0x1
#define CL_ME_LUMA_INTRA_PREDICT_ENABLED_INTEL 0x2
#define CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL 0x4
#define CL_ME_FORWARD_INPUT_MODE_INTEL 0x1
#define CL_ME_BACKWARD_INPUT_MODE_INTEL 0x2
#define CL_ME_BIDIRECTION_INPUT_MODE_INTEL 0x3
#define CL_ME_BIDIR_WEIGHT_QUARTER_INTEL 16
#define CL_ME_BIDIR_WEIGHT_THIRD_INTEL 21
#define CL_ME_BIDIR_WEIGHT_HALF_INTEL 32
#define CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 43
#define CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 48
#define CL_ME_COST_PENALTY_NONE_INTEL 0x0
#define CL_ME_COST_PENALTY_LOW_INTEL 0x1
#define CL_ME_COST_PENALTY_NORMAL_INTEL 0x2
#define CL_ME_COST_PENALTY_HIGH_INTEL 0x3
#define CL_ME_COST_PRECISION_QPEL_INTEL 0x0
#define CL_ME_COST_PRECISION_HPEL_INTEL 0x1
#define CL_ME_COST_PRECISION_PEL_INTEL 0x2
#define CL_ME_COST_PRECISION_DPEL_INTEL 0x3
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CL_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
#define CL_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
#define CL_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
#define CL_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CL_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
#define CL_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
/* cl_device_info */
#define CL_DEVICE_ME_VERSION_INTEL 0x407E
#define CL_ME_VERSION_LEGACY_INTEL 0x0
#define CL_ME_VERSION_ADVANCED_VER_1_INTEL 0x1
#define CL_ME_VERSION_ADVANCED_VER_2_INTEL 0x2
extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL
clCreateAcceleratorINTEL(
cl_context context,
cl_accelerator_type_intel accelerator_type,
size_t descriptor_size,
const void* descriptor,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_accelerator_intel (CL_API_CALL *clCreateAcceleratorINTEL_fn)(
cl_context context,
cl_accelerator_type_intel accelerator_type,
size_t descriptor_size,
const void* descriptor,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetAcceleratorInfoINTEL(
cl_accelerator_intel accelerator,
cl_accelerator_info_intel param_name,
size_t param_value_size,
void* param_value,
size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetAcceleratorInfoINTEL_fn)(
cl_accelerator_intel accelerator,
cl_accelerator_info_intel param_name,
size_t param_value_size,
void* param_value,
size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainAcceleratorINTEL(
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clRetainAcceleratorINTEL_fn)(
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseAcceleratorINTEL(
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clReleaseAcceleratorINTEL_fn)(
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
/******************************************
* cl_intel_simultaneous_sharing extension *
*******************************************/
#define cl_intel_simultaneous_sharing 1
#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104
#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105
/***********************************
* cl_intel_egl_image_yuv extension *
************************************/
#define cl_intel_egl_image_yuv 1
#define CL_EGL_YUV_PLANE_INTEL 0x4107
/********************************
* cl_intel_packed_yuv extension *
*********************************/
#define cl_intel_packed_yuv 1
#define CL_YUYV_INTEL 0x4076
#define CL_UYVY_INTEL 0x4077
#define CL_YVYU_INTEL 0x4078
#define CL_VYUY_INTEL 0x4079
/********************************************
* cl_intel_required_subgroup_size extension *
*********************************************/
#define cl_intel_required_subgroup_size 1
#define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108
#define CL_KERNEL_SPILL_MEM_SIZE_INTEL 0x4109
#define CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL 0x410A
/****************************************
* cl_intel_driver_diagnostics extension *
*****************************************/
#define cl_intel_driver_diagnostics 1
typedef cl_uint cl_diagnostics_verbose_level;
#define CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL 0x4106
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL ( 0xff )
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL ( 1 )
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL ( 1 << 1 )
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL ( 1 << 2 )
/********************************
* cl_intel_planar_yuv extension *
*********************************/
#define CL_NV12_INTEL 0x410E
#define CL_MEM_NO_ACCESS_INTEL ( 1 << 24 )
#define CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL ( 1 << 25 )
#define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL 0x417E
#define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL 0x417F
/*******************************************************
* cl_intel_device_side_avc_motion_estimation extension *
********************************************************/
#define CL_DEVICE_AVC_ME_VERSION_INTEL 0x410B
#define CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C
#define CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL 0x410D
#define CL_AVC_ME_VERSION_0_INTEL 0x0; // No support.
#define CL_AVC_ME_VERSION_1_INTEL 0x1; // First supported version.
#define CL_AVC_ME_MAJOR_16x16_INTEL 0x0
#define CL_AVC_ME_MAJOR_16x8_INTEL 0x1
#define CL_AVC_ME_MAJOR_8x16_INTEL 0x2
#define CL_AVC_ME_MAJOR_8x8_INTEL 0x3
#define CL_AVC_ME_MINOR_8x8_INTEL 0x0
#define CL_AVC_ME_MINOR_8x4_INTEL 0x1
#define CL_AVC_ME_MINOR_4x8_INTEL 0x2
#define CL_AVC_ME_MINOR_4x4_INTEL 0x3
#define CL_AVC_ME_MAJOR_FORWARD_INTEL 0x0
#define CL_AVC_ME_MAJOR_BACKWARD_INTEL 0x1
#define CL_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2
#define CL_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0
#define CL_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E
#define CL_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D
#define CL_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B
#define CL_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77
#define CL_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F
#define CL_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F
#define CL_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F
#define CL_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0
#define CL_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1
#define CL_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2
#define CL_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3
#define CL_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4
#define CL_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5
#define CL_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6
#define CL_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7
#define CL_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8
#define CL_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL 0x9
#define CL_AVC_ME_SEARCH_WINDOW_4x4_RADIUS_INTEL 0x2
#define CL_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL 0xa
#define CL_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
#define CL_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2
#define CL_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
#define CL_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
#define CL_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3
#define CL_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0
#define CL_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1
#define CL_AVC_ME_COST_PRECISION_PEL_INTEL 0x2
#define CL_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3
#define CL_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10
#define CL_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15
#define CL_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20
#define CL_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B
#define CL_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30
#define CL_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0
#define CL_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2
#define CL_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4
#define CL_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8
#define CL_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0
#define CL_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000
#define CL_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL ( 0x1 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL ( 0x2 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL ( 0x3 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL ( 0x55 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL ( 0xAA << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL ( 0xFF << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL ( 0x1 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL ( 0x2 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL ( 0x1 << 26 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL ( 0x2 << 26 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL ( 0x1 << 28 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL ( 0x2 << 28 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL ( 0x1 << 30 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL ( 0x2 << 30 )
#define CL_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00
#define CL_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80
#define CL_AVC_ME_INTRA_16x16_INTEL 0x0
#define CL_AVC_ME_INTRA_8x8_INTEL 0x1
#define CL_AVC_ME_INTRA_4x4_INTEL 0x2
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3
#define CL_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
#define CL_AVC_ME_FRAME_FORWARD_INTEL 0x1
#define CL_AVC_ME_FRAME_BACKWARD_INTEL 0x2
#define CL_AVC_ME_FRAME_DUAL_INTEL 0x3
#define CL_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0
#define CL_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1
#define CL_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2
#define CL_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0
#define CL_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1
#ifdef __cplusplus
}
#endif
#endif /* __CL_EXT_INTEL_H */

View File

@@ -0,0 +1,171 @@
/**********************************************************************************
* Copyright (c) 2008-2019 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
#ifndef __OPENCL_CL_GL_H
#define __OPENCL_CL_GL_H
#include <CL/cl.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef cl_uint cl_gl_object_type;
typedef cl_uint cl_gl_texture_info;
typedef cl_uint cl_gl_platform_info;
typedef struct __GLsync *cl_GLsync;
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
#define CL_GL_OBJECT_BUFFER 0x2000
#define CL_GL_OBJECT_TEXTURE2D 0x2001
#define CL_GL_OBJECT_TEXTURE3D 0x2002
#define CL_GL_OBJECT_RENDERBUFFER 0x2003
#ifdef CL_VERSION_1_2
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
#define CL_GL_OBJECT_TEXTURE1D 0x200F
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
#endif
/* cl_gl_texture_info */
#define CL_GL_TEXTURE_TARGET 0x2004
#define CL_GL_MIPMAP_LEVEL 0x2005
#ifdef CL_VERSION_1_2
#define CL_GL_NUM_SAMPLES 0x2012
#endif
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLBuffer(cl_context context,
cl_mem_flags flags,
cl_GLuint bufobj,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
#ifdef CL_VERSION_1_2
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLTexture(cl_context context,
cl_mem_flags flags,
cl_GLenum target,
cl_GLint miplevel,
cl_GLuint texture,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
#endif
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLRenderbuffer(cl_context context,
cl_mem_flags flags,
cl_GLuint renderbuffer,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLObjectInfo(cl_mem memobj,
cl_gl_object_type * gl_object_type,
cl_GLuint * gl_object_name) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLTextureInfo(cl_mem memobj,
cl_gl_texture_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireGLObjects(cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseGLObjects(cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
/* Deprecated OpenCL 1.1 APIs */
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateFromGLTexture2D(cl_context context,
cl_mem_flags flags,
cl_GLenum target,
cl_GLint miplevel,
cl_GLuint texture,
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateFromGLTexture3D(cl_context context,
cl_mem_flags flags,
cl_GLenum target,
cl_GLint miplevel,
cl_GLuint texture,
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
/* cl_khr_gl_sharing extension */
#define cl_khr_gl_sharing 1
typedef cl_uint cl_gl_context_info;
/* Additional Error Codes */
#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
/* cl_gl_context_info */
#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
/* Additional cl_context_properties */
#define CL_GL_CONTEXT_KHR 0x2008
#define CL_EGL_DISPLAY_KHR 0x2009
#define CL_GLX_DISPLAY_KHR 0x200A
#define CL_WGL_HDC_KHR 0x200B
#define CL_CGL_SHAREGROUP_KHR 0x200C
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLContextInfoKHR(const cl_context_properties * properties,
cl_gl_context_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
const cl_context_properties * properties,
cl_gl_context_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret);
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_GL_H */

View File

@@ -0,0 +1,52 @@
/**********************************************************************************
* Copyright (c) 2008-2019 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
#ifndef __OPENCL_CL_GL_EXT_H
#define __OPENCL_CL_GL_EXT_H
#ifdef __cplusplus
extern "C" {
#endif
#include <CL/cl_gl.h>
/*
* cl_khr_gl_event extension
*/
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
extern CL_API_ENTRY cl_event CL_API_CALL
clCreateEventFromGLsyncKHR(cl_context context,
cl_GLsync cl_GLsync,
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_GL_EXT_H */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,172 @@
/**********************************************************************************
* Copyright (c) 2008-2019 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/*****************************************************************************\
Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
File Name: cl_va_api_media_sharing_intel.h
Abstract:
Notes:
\*****************************************************************************/
#ifndef __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
#define __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
#include <CL/cl.h>
#include <CL/cl_platform.h>
#include <va/va.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************
* cl_intel_va_api_media_sharing extension *
*******************************************/
#define cl_intel_va_api_media_sharing 1
/* error codes */
#define CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL -1098
#define CL_INVALID_VA_API_MEDIA_SURFACE_INTEL -1099
#define CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL -1100
#define CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL -1101
/* cl_va_api_device_source_intel */
#define CL_VA_API_DISPLAY_INTEL 0x4094
/* cl_va_api_device_set_intel */
#define CL_PREFERRED_DEVICES_FOR_VA_API_INTEL 0x4095
#define CL_ALL_DEVICES_FOR_VA_API_INTEL 0x4096
/* cl_context_info */
#define CL_CONTEXT_VA_API_DISPLAY_INTEL 0x4097
/* cl_mem_info */
#define CL_MEM_VA_API_MEDIA_SURFACE_INTEL 0x4098
/* cl_image_info */
#define CL_IMAGE_VA_API_PLANE_INTEL 0x4099
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL 0x409A
#define CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL 0x409B
typedef cl_uint cl_va_api_device_source_intel;
typedef cl_uint cl_va_api_device_set_intel;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceIDsFromVA_APIMediaAdapterINTEL(
cl_platform_id platform,
cl_va_api_device_source_intel media_adapter_type,
void* media_adapter,
cl_va_api_device_set_intel media_adapter_set,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)(
cl_platform_id platform,
cl_va_api_device_source_intel media_adapter_type,
void* media_adapter,
cl_va_api_device_set_intel media_adapter_set,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromVA_APIMediaSurfaceINTEL(
cl_context context,
cl_mem_flags flags,
VASurfaceID* surface,
cl_uint plane,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)(
cl_context context,
cl_mem_flags flags,
VASurfaceID* surface,
cl_uint plane,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireVA_APIMediaSurfacesINTEL(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseVA_APIMediaSurfacesINTEL(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H */

View File

@@ -0,0 +1,86 @@
/*******************************************************************************
* Copyright (c) 2018 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
#ifndef __CL_VERSION_H
#define __CL_VERSION_H
/* Detect which version to target */
#if !defined(CL_TARGET_OPENCL_VERSION)
#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 220 (OpenCL 2.2)")
#define CL_TARGET_OPENCL_VERSION 220
#endif
#if CL_TARGET_OPENCL_VERSION != 100 && \
CL_TARGET_OPENCL_VERSION != 110 && \
CL_TARGET_OPENCL_VERSION != 120 && \
CL_TARGET_OPENCL_VERSION != 200 && \
CL_TARGET_OPENCL_VERSION != 210 && \
CL_TARGET_OPENCL_VERSION != 220
#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220). Defaulting to 220 (OpenCL 2.2)")
#undef CL_TARGET_OPENCL_VERSION
#define CL_TARGET_OPENCL_VERSION 220
#endif
/* OpenCL Version */
#if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2)
#define CL_VERSION_2_2 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1)
#define CL_VERSION_2_1 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0)
#define CL_VERSION_2_0 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2)
#define CL_VERSION_1_2 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1)
#define CL_VERSION_1_1 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0)
#define CL_VERSION_1_0 1
#endif
/* Allow deprecated APIs for older OpenCL versions. */
#if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS)
#define CL_USE_DEPRECATED_OPENCL_2_1_APIS
#endif
#if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS)
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
#endif
#if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS)
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#endif
#if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
#endif
#if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS)
#define CL_USE_DEPRECATED_OPENCL_1_0_APIS
#endif
#endif /* __CL_VERSION_H */

View File

@@ -0,0 +1,47 @@
/*******************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_H
#define __OPENCL_H
#ifdef __cplusplus
extern "C" {
#endif
#include <CL/cl.h>
#include <CL/cl_gl.h>
#include <CL/cl_gl_ext.h>
#include <CL/cl_ext.h>
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_H */

View File

@@ -0,0 +1,35 @@
#=============================================================================
# CMake build system files
#
# Copyright (c) 2014 pocl developers
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
#=============================================================================
add_subdirectory("CL")
set(PRIVATE_HEADERS _enable_all_exts.h _builtin_renames.h
_kernel.h _clang_opencl.h
_kernel_c.h _kernel_constants.h
pocl_types.h pocl_device.h pocl.h pocl_spir.h
pocl_image_types.h)
install(FILES ${PRIVATE_HEADERS}
DESTINATION ${POCL_INSTALL_PRIVATE_HEADER_DIR})

View File

@@ -0,0 +1 @@
#include "../CL/cl.h"

View File

@@ -0,0 +1 @@
#include "../CL/cl.hpp"

View File

@@ -0,0 +1 @@
#include "../CL/cl_ext.h"

View File

@@ -0,0 +1 @@
#include "../CL/cl_gl.h"

View File

@@ -0,0 +1 @@
#include "../CL/cl_gl_ext.h"

View File

@@ -0,0 +1 @@
#include "../CL/cl_platform.h"

View File

@@ -0,0 +1 @@
#include "../CL/opencl.h"

View File

@@ -0,0 +1,193 @@
/* pocl/_kernel_renames.h - Rename OpenCL builtin functions to avoid name
clashes with libm functions which are called in implementation.
Copyright (c) 2011-2013 Erik Schnetter <eschnetter@perimeterinstitute.ca>
Perimeter Institute for Theoretical Physics
Copyright (c) 2011-2017 Pekka Jääskeläinen / TUT
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef _KERNEL_RENAMES_H
#define _KERNEL_RENAMES_H
/* Move built-in declarations and libm functions out of the way.
(There should be a better way of doing so. These functions are
built-in math functions for OpenCL (see Clang's "Builtins.def").
Functions defined in libc or libm may also
interfere with OpenCL's functions, since their prototypes will be
wrong. */
#define abs _cl_abs
#define abs_diff _cl_abs_diff
#define acos _cl_acos
#define acosh _cl_acosh
#define acospi _cl_acospi
#define add_sat _cl_add_sat
#define all _cl_all
#define any _cl_any
#define asin _cl_asin
#define asinh _cl_asinh
#define asinpi _cl_asinpi
#define atan _cl_atan
#define atan2 _cl_atan2
#define atan2pi _cl_atan2pi
#define atanh _cl_atanh
#define atanpi _cl_atanpi
#define bitselect _cl_bitselect
#define cbrt _cl_cbrt
#define ceil _cl_ceil
#define clamp _cl_clamp
#define clz _cl_clz
#define copysign _cl_copysign
#define cos _cl_cos
#define cosh _cl_cosh
#define cospi _cl_cospi
#define cross _cl_cross
#define degrees _cl_degrees
#define distance _cl_distance
#define dot _cl_dot
#define erf _cl_erf
#define erfc _cl_erfc
#define exp _cl_exp
#define exp10 _cl_exp10
#define exp2 _cl_exp2
#define expm1 _cl_expm1
#define fabs _cl_fabs
#define fast_distance _cl_fast_distance
#define fast_length _cl_fast_length
#define fast_normalize _cl_fast_normalize
#define fdim _cl_fdim
#define floor _cl_floor
#define fma _cl_fma
#define fmax _cl_fmax
#define fmin _cl_fmin
#define fmod _cl_fmod
#define fract _cl_fract
#define frexp _cl_frexp
#define hadd _cl_hadd
#define half_cos _cl_half_cos
#define half_divide _cl_half_divide
#define half_exp _cl_half_exp
#define half_exp10 _cl_half_exp10
#define half_exp2 _cl_half_exp2
#define half_log _cl_half_log
#define half_log10 _cl_half_log10
#define half_log2 _cl_half_log2
#define half_powr _cl_half_powr
#define half_recip _cl_half_recip
#define half_rsqrt _cl_half_rsqrt
#define half_sin _cl_half_sin
#define half_sqrt _cl_half_sqrt
#define half_tan _cl_half_tan
#define hypot _cl_hypot
#define ilogb _cl_ilogb
#define isequal _cl_isequal
#define isfinite _cl_isfinite
#define isgreater _cl_isgreater
#define isgreaterequal _cl_isgreaterequal
#define isinf _cl_isinf
#define isless _cl_isless
#define islessequal _cl_islessequal
#define islessgreater _cl_islessgreater
#define isnan _cl_isnan
#define isnormal _cl_isnormal
#define isnotequal _cl_isnotequal
#define isordered _cl_isordered
#define isunordered _cl_isunordered
#define ldexp _cl_ldexp
#define length _cl_length
#define lgamma _cl_lgamma
#define lgamma_r _cl_lgamma_r
#define log _cl_log
#define log10 _cl_log10
#define log1p _cl_log1p
#define log2 _cl_log2
#define logb _cl_logb
#define mad _cl_mad
#define mad24 _cl_mad24
#define mad_hi _cl_mad_hi
#define mad_sat _cl_mad_sat
#define max _cl_max
#define maxmag _cl_maxmag
#define min _cl_min
#define minmag _cl_minmag
#define mix _cl_mix
#define modf _cl_modf
#define mul24 _cl_mul24
#define mul_hi _cl_mul_hi
#define nan _cl_nan
#define native_cos _cl_native_cos
#define native_divide _cl_native_divide
#define native_exp _cl_native_exp
#define native_exp10 _cl_native_exp10
#define native_exp2 _cl_native_exp2
#define native_log _cl_native_log
#define native_log10 _cl_native_log10
#define native_log2 _cl_native_log2
#define native_powr _cl_native_powr
#define native_recip _cl_native_recip
#define native_rsqrt _cl_native_rsqrt
#define native_sin _cl_native_sin
#define native_sqrt _cl_native_sqrt
#define native_tan _cl_native_tan
#define nextafter _cl_nextafter
#define normalize _cl_normalize
#define popcount _cl_popcount
#define pow _cl_pow
#define pown _cl_pown
#define powr _cl_powr
#define radians _cl_radians
#define remainder _cl_remainder
#define remquo _cl_remquo
#define rhadd _cl_rhadd
#define rint _cl_rint
#define rootn _cl_rootn
#define rotate _cl_rotate
#define round _cl_round
#define rsqrt _cl_rsqrt
#define select _cl_select
#define sign _cl_sign
#define signbit _cl_signbit
#define sin _cl_sin
#define sincos _cl_sincos
#define sinh _cl_sinh
#define sinpi _cl_sinpi
#define smoothstep _cl_smoothstep
#define sqrt _cl_sqrt
#define step _cl_step
#define sub_sat _cl_sub_sat
#define tan _cl_tan
#define tanh _cl_tanh
#define tanpi _cl_tanpi
#define tgamma _cl_tgamma
#define trunc _cl_trunc
#define upsample _cl_upsample
#define atom_add atomic_add
#define atom_sub atomic_sub
#define atom_xchg atomic_xchg
#define atom_inc atomic_inc
#define atom_dec atomic_dec
#define atom_cmpxchg atomic_cmpxchg
#define atom_min atomic_min
#define atom_max atomic_max
#define atom_and atomic_and
#define atom_or atomic_or
#define atom_xor atomic_xor
#endif

View File

@@ -0,0 +1,91 @@
/* This file includes opencl-c.h from Clang and fixes a few pocl extras.
Copyright (c) 2011-2017 Pekka Jääskeläinen / TUT
Copyright (c) 2017 Michal Babej / Tampere University of Technology
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef _OPENCL_H_
/* Use the declarations shipped with Clang. */
/* Check for _OPENCL_H already here because the kernel compiler loads the
header beforehand, but cannot find the file due to include paths not
set up. */
#include <opencl-c.h>
/* Missing declarations from opencl-c.h. Some of the geometric builtins are
defined only up to 4 vectors, but we implement them all: */
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
half _CL_OVERLOADABLE _CL_READNONE length (half8 p);
half _CL_OVERLOADABLE _CL_READNONE length (half16 p);
half _CL_OVERLOADABLE _CL_READNONE fast_length (half8 p);
half _CL_OVERLOADABLE _CL_READNONE fast_length (half16 p);
half8 _CL_OVERLOADABLE _CL_READNONE normalize (half8 p);
half16 _CL_OVERLOADABLE _CL_READNONE normalize (half16 p);
half8 _CL_OVERLOADABLE _CL_READNONE fast_normalize (half8 p);
half16 _CL_OVERLOADABLE _CL_READNONE fast_normalize (half16 p);
half _CL_OVERLOADABLE _CL_READNONE dot (half8 p0, half8 p1);
half _CL_OVERLOADABLE _CL_READNONE dot (half16 p0, half16 p1);
#endif
float _CL_OVERLOADABLE _CL_READNONE length (float8 p);
float _CL_OVERLOADABLE _CL_READNONE length (float16 p);
float _CL_OVERLOADABLE _CL_READNONE fast_length (float8 p);
float _CL_OVERLOADABLE _CL_READNONE fast_length (float16 p);
float8 _CL_OVERLOADABLE _CL_READNONE normalize (float8 p);
float16 _CL_OVERLOADABLE _CL_READNONE normalize (float16 p);
float8 _CL_OVERLOADABLE _CL_READNONE fast_normalize (float8 p);
float16 _CL_OVERLOADABLE _CL_READNONE fast_normalize (float16 p);
float _CL_OVERLOADABLE _CL_READNONE dot (float8 p0, float8 p1);
float _CL_OVERLOADABLE _CL_READNONE dot (float16 p0, float16 p1);
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
double _CL_OVERLOADABLE _CL_READNONE length (double8 p);
double _CL_OVERLOADABLE _CL_READNONE length (double16 p);
double _CL_OVERLOADABLE _CL_READNONE fast_length (double p);
double _CL_OVERLOADABLE _CL_READNONE fast_length (double2 p);
double _CL_OVERLOADABLE _CL_READNONE fast_length (double3 p);
double _CL_OVERLOADABLE _CL_READNONE fast_length (double4 p);
double _CL_OVERLOADABLE _CL_READNONE fast_length (double8 p);
double _CL_OVERLOADABLE _CL_READNONE fast_length (double16 p);
double8 _CL_OVERLOADABLE _CL_READNONE normalize (double8 p);
double16 _CL_OVERLOADABLE _CL_READNONE normalize (double16 p);
double8 _CL_OVERLOADABLE _CL_READNONE fast_normalize (double8 p);
double16 _CL_OVERLOADABLE _CL_READNONE fast_normalize (double16 p);
double _CL_OVERLOADABLE _CL_READNONE dot (double8 p0, double8 p1);
double _CL_OVERLOADABLE _CL_READNONE dot (double16 p0, double16 p1);
#endif
#endif

View File

@@ -0,0 +1,58 @@
/* Enable all extensions known to pocl, which a device supports.
* This is required at the start of include/_kernel.h for prototypes,
* then at kernel lib compilation phase (because _kernel.h disables
* everything at the end).
*/
/* OpenCL 1.0-only extensions */
#if (__OPENCL_C_VERSION__ < 110)
#ifdef cl_khr_global_int32_base_atomics
# pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#endif
#ifdef cl_khr_global_int32_extended_atomics
# pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable
#endif
#ifdef cl_khr_local_int32_base_atomics
# pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
#endif
#ifdef cl_khr_local_int32_extended_atomics
# pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable
#endif
#ifdef cl_khr_byte_addressable_store
# pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
#endif
#endif
/* all versions */
#ifdef cl_khr_fp16
# pragma OPENCL EXTENSION cl_khr_fp16: enable
#endif
#ifdef cl_khr_fp64
# pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
#ifdef cl_khr_int64_base_atomics
# pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
#endif
#ifdef cl_khr_int64_extended_atomics
# pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable
#endif
#if (__clang_major__ > 4)
#ifdef cl_khr_3d_image_writes
# pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
#endif
#endif

View File

@@ -0,0 +1,233 @@
/* pocl/_kernel.h - OpenCL types and runtime library
functions declarations. This should be included only from OpenCL C files.
Copyright (c) 2011 Universidad Rey Juan Carlos
Copyright (c) 2011-2017 Pekka Jääskeläinen / TUT
Copyright (c) 2011-2013 Erik Schnetter <eschnetter@perimeterinstitute.ca>
Perimeter Institute for Theoretical Physics
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* If the -cl-std build option is not specified, the highest OpenCL C 1.x
* language version supported by each device is used as the version of
* OpenCL C when compiling the program for each device.
*/
#ifndef __OPENCL_C_VERSION__
#define __OPENCL_C_VERSION__ 120
#endif
#if (__OPENCL_C_VERSION__ > 99)
#define CL_VERSION_1_0 100
#endif
#if (__OPENCL_C_VERSION__ > 109)
#define CL_VERSION_1_1 110
#endif
#if (__OPENCL_C_VERSION__ > 119)
#define CL_VERSION_1_2 120
#endif
#if (__OPENCL_C_VERSION__ > 199)
#define CL_VERSION_2_0 200
#endif
#include "_enable_all_exts.h"
#include "_builtin_renames.h"
/* Define some feature test macros to help write generic code. These are used
* mostly in _pocl_opencl.h header + some .cl files in kernel library */
#ifdef cl_khr_int64
# define __IF_INT64(x) x
#else
# define __IF_INT64(x)
#endif
#ifdef cl_khr_fp16
# define __IF_FP16(x) x
#else
# define __IF_FP16(x)
#endif
#ifdef cl_khr_fp64
# define __IF_FP64(x) x
#else
# define __IF_FP64(x)
#endif
#ifdef cl_khr_int64_base_atomics
#define __IF_BA64(x) x
#else
#define __IF_BA64(x)
#endif
#ifdef cl_khr_int64_extended_atomics
#define __IF_EA64(x) x
#else
#define __IF_EA64(x)
#endif
/****************************************************************************/
/* Function/type attributes supported by Clang/SPIR */
#if __has_attribute(__always_inline__)
# define _CL_ALWAYSINLINE __attribute__((__always_inline__))
#else
# define _CL_ALWAYSINLINE
#endif
#if __has_attribute(__noinline__)
# define _CL_NOINLINE __attribute__((__noinline__))
#else
# define _CL_NOINLINE
#endif
#if __has_attribute(__overloadable__)
# define _CL_OVERLOADABLE __attribute__((__overloadable__))
#else
# define _CL_OVERLOADABLE
#endif
#if __has_attribute(__pure__)
# define _CL_READONLY __attribute__((__pure__))
#else
# define _CL_READONLY
#endif
#if __has_attribute(__const__)
# define _CL_READNONE __attribute__((__const__))
#else
# define _CL_READNONE
#endif
#if __has_attribute(convergent)
# define _CL_CONVERGENT __attribute__((convergent))
#else
# define _CL_CONVERGENT
#endif
/************************ setup Clang version macros ******************/
#if (__clang_major__ == 6)
# undef LLVM_6_0
# define LLVM_6_0
#elif (__clang_major__ == 7)
# undef LLVM_7_0
# define LLVM_7_0
#elif (__clang_major__ == 8)
# undef LLVM_8_0
# define LLVM_8_0
#elif (__clang_major__ == 9)
# undef LLVM_9_0
# define LLVM_9_0
#elif (__clang_major__ == 10)
# undef LLVM_10_0
# define LLVM_10_0
#else
#error Unsupported Clang/LLVM version.
#endif
#ifndef LLVM_10_0
#define LLVM_OLDER_THAN_10_0 1
#ifndef LLVM_9_0
#define LLVM_OLDER_THAN_9_0 1
#ifndef LLVM_8_0
#define LLVM_OLDER_THAN_8_0 1
#ifndef LLVM_7_0
#define LLVM_OLDER_THAN_7_0 1
#ifndef LLVM_6_0
#define LLVM_OLDER_THAN_6_0 1
#endif
#endif
#endif
#endif
#endif
/****************************************************************************/
/* A static assert statement to catch inconsistencies at build time */
#if __has_extension(__c_static_assert__)
# define _CL_STATIC_ASSERT(_t, _x) _Static_assert(_x, #_t)
#else
# define _CL_STATIC_ASSERT(_t, _x) typedef int __cl_ai##_t[(x) ? 1 : -1];
#endif
/****************************************************************************/
#define IMG_RO_AQ __read_only
#define IMG_WO_AQ __write_only
#if (__OPENCL_C_VERSION__ > 199)
#define CLANG_HAS_RW_IMAGES
#define IMG_RW_AQ __read_write
#else
#undef CLANG_HAS_RW_IMAGES
#define IMG_RW_AQ __RW_IMAGES_UNSUPPORTED_BEFORE_CL_20
#endif
/****************************************************************************/
/* use Clang opencl header for definitions. */
#ifdef POCL_DEVICE_ADDRESS_BITS
/* If we wish to override the Clang set __SIZE_TYPE__ for this target,
let's do it here so the opencl-c.h sets size_t to the wanted type. */
#ifdef __SIZE_TYPE__
#undef __SIZE_TYPE__
#endif
#if POCL_DEVICE_ADDRESS_BITS == 32
#define __SIZE_TYPE__ uint
#elif POCL_DEVICE_ADDRESS_BITS == 64
#define __SIZE_TYPE__ ulong
#else
#error Unsupported POCL_DEVICE_ADDRESS_BITS value.
#endif
#endif
#include "_clang_opencl.h"
/****************************************************************************/
/* GNU's libm seems to use INT_MIN here while the Clang's header uses
INT_MAX. Both are allowed by the OpenCL specs, but we want them to
be unified to avoid failing tests. */
#undef FP_ILOGBNAN
#undef FP_ILOGB0
#define FP_ILOGBNAN INT_MIN
#define FP_ILOGB0 INT_MIN
/****************************************************************************/
#include "pocl_image_types.h"
#pragma OPENCL EXTENSION all : disable

View File

@@ -0,0 +1,189 @@
/* pocl/_kernel_c.h - C compatible OpenCL types and runtime library
functions declarations for kernel builtin implementations using C.
Copyright (c) 2011 Universidad Rey Juan Carlos
Copyright (c) 2011-2017 Pekka Jääskeläinen / TUT
Copyright (c) 2011-2013 Erik Schnetter <eschnetter@perimeterinstitute.ca>
Perimeter Institute for Theoretical Physics
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
* Header that can be implemented in C compiled implementations of
* built-in functions to introduce the OpenCL C compatible types etc.
*/
#ifndef _KERNEL_C_H
#define _KERNEL_C_H
#include "pocl_types.h"
#include "_kernel_constants.h"
/* Function/type attributes supported by Clang/SPIR */
#if __has_attribute(__always_inline__)
# define _CL_ALWAYSINLINE __attribute__((__always_inline__))
#else
# define _CL_ALWAYSINLINE
#endif
#if __has_attribute(__noinline__)
# define _CL_NOINLINE __attribute__((__noinline__))
#else
# define _CL_NOINLINE
#endif
#if __has_attribute(__overloadable__)
# define _CL_OVERLOADABLE __attribute__((__overloadable__))
#else
# define _CL_OVERLOADABLE
#endif
#if __has_attribute(__pure__)
# define _CL_READONLY __attribute__((__pure__))
#else
# define _CL_READONLY
#endif
#if __has_attribute(__const__)
# define _CL_READNONE __attribute__((__const__))
#else
# define _CL_READNONE
#endif
#if __has_attribute(convergent)
# define _CL_CONVERGENT __attribute__((convergent))
#else
# define _CL_CONVERGENT
#endif
typedef char char2 __attribute__((__ext_vector_type__(2)));
typedef char char3 __attribute__((__ext_vector_type__(3)));
typedef char char4 __attribute__((__ext_vector_type__(4)));
typedef char char8 __attribute__((__ext_vector_type__(8)));
typedef char char16 __attribute__((__ext_vector_type__(16)));
typedef uchar uchar2 __attribute__((__ext_vector_type__(2)));
typedef uchar uchar3 __attribute__((__ext_vector_type__(3)));
typedef uchar uchar4 __attribute__((__ext_vector_type__(4)));
typedef uchar uchar8 __attribute__((__ext_vector_type__(8)));
typedef uchar uchar16 __attribute__((__ext_vector_type__(16)));
typedef short short2 __attribute__((__ext_vector_type__(2)));
typedef short short3 __attribute__((__ext_vector_type__(3)));
typedef short short4 __attribute__((__ext_vector_type__(4)));
typedef short short8 __attribute__((__ext_vector_type__(8)));
typedef short short16 __attribute__((__ext_vector_type__(16)));
typedef ushort ushort2 __attribute__((__ext_vector_type__(2)));
typedef ushort ushort3 __attribute__((__ext_vector_type__(3)));
typedef ushort ushort4 __attribute__((__ext_vector_type__(4)));
typedef ushort ushort8 __attribute__((__ext_vector_type__(8)));
typedef ushort ushort16 __attribute__((__ext_vector_type__(16)));
typedef int int2 __attribute__((__ext_vector_type__(2)));
typedef int int3 __attribute__((__ext_vector_type__(3)));
typedef int int4 __attribute__((__ext_vector_type__(4)));
typedef int int8 __attribute__((__ext_vector_type__(8)));
typedef int int16 __attribute__((__ext_vector_type__(16)));
typedef uint uint2 __attribute__((__ext_vector_type__(2)));
typedef uint uint3 __attribute__((__ext_vector_type__(3)));
typedef uint uint4 __attribute__((__ext_vector_type__(4)));
typedef uint uint8 __attribute__((__ext_vector_type__(8)));
typedef uint uint16 __attribute__((__ext_vector_type__(16)));
#if defined(__CBUILD__) && defined(cl_khr_fp16)
/* NOTE: the Clang's __fp16 does not work robustly in C mode,
it might produce invalid code at least with half vectors.
Using the native 'half' type in OpenCL C mode works better. */
typedef __fp16 half;
#endif
typedef half half2 __attribute__((__ext_vector_type__(2)));
typedef half half3 __attribute__((__ext_vector_type__(3)));
typedef half half4 __attribute__((__ext_vector_type__(4)));
typedef half half8 __attribute__((__ext_vector_type__(8)));
typedef half half16 __attribute__((__ext_vector_type__(16)));
typedef float float2 __attribute__((__ext_vector_type__(2)));
typedef float float3 __attribute__((__ext_vector_type__(3)));
typedef float float4 __attribute__((__ext_vector_type__(4)));
typedef float float8 __attribute__((__ext_vector_type__(8)));
typedef float float16 __attribute__((__ext_vector_type__(16)));
#ifdef cl_khr_fp64
# ifndef __CBUILD__
# pragma OPENCL EXTENSION cl_khr_fp64 : enable
# endif
typedef double double2 __attribute__((__ext_vector_type__(2)));
typedef double double3 __attribute__((__ext_vector_type__(3)));
typedef double double4 __attribute__((__ext_vector_type__(4)));
typedef double double8 __attribute__((__ext_vector_type__(8)));
typedef double double16 __attribute__((__ext_vector_type__(16)));
#endif
#ifdef cl_khr_int64
typedef long long2 __attribute__((__ext_vector_type__(2)));
typedef long long3 __attribute__((__ext_vector_type__(3)));
typedef long long4 __attribute__((__ext_vector_type__(4)));
typedef long long8 __attribute__((__ext_vector_type__(8)));
typedef long long16 __attribute__((__ext_vector_type__(16)));
typedef ulong ulong2 __attribute__((__ext_vector_type__(2)));
typedef ulong ulong3 __attribute__((__ext_vector_type__(3)));
typedef ulong ulong4 __attribute__((__ext_vector_type__(4)));
typedef ulong ulong8 __attribute__((__ext_vector_type__(8)));
typedef ulong ulong16 __attribute__((__ext_vector_type__(16)));
#endif
#if defined(__TCE__)
#define POCL_ADDRESS_SPACE_PRIVATE 0
#define POCL_ADDRESS_SPACE_GLOBAL 1
#define POCL_ADDRESS_SPACE_LOCAL 3
#define POCL_ADDRESS_SPACE_CONSTANT 2
#define POCL_ADDRESS_SPACE_GENERIC 6
#endif
typedef uint cl_mem_fence_flags;
/* Integer Constants */
#if defined(__CBUILD__)
#define CHAR_BIT 8
#define CHAR_MAX SCHAR_MAX
#define CHAR_MIN SCHAR_MIN
#define INT_MAX 2147483647
#define INT_MIN (-2147483647 - 1)
#ifdef cl_khr_int64
#define LONG_MAX 0x7fffffffffffffffL
#define LONG_MIN (-0x7fffffffffffffffL - 1)
#endif
#define SCHAR_MAX 127
#define SCHAR_MIN (-127 - 1)
#define SHRT_MAX 32767
#define SHRT_MIN (-32767 - 1)
#define UCHAR_MAX 255
#define USHRT_MAX 65535
#define UINT_MAX 0xffffffff
#ifdef cl_khr_int64
#define ULONG_MAX 0xffffffffffffffffUL
#endif
#endif /* __CBUILD__ */
#endif

View File

@@ -0,0 +1,93 @@
/* pocl/_kernel_constants.h - C compatible OpenCL types and runtime library
constants declarations.
Copyright (c) 2011 Universidad Rey Juan Carlos
Copyright (c) 2011-2013 Pekka Jääskeläinen / TUT
Copyright (c) 2011-2013 Erik Schnetter <eschnetter@perimeterinstitute.ca>
Perimeter Institute for Theoretical Physics
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
* Header that can be implemented in C compiled implementations of
* built-in functions to introduce the OpenCL C compatible constants.
*/
#ifndef _KERNEL_CONSTANTS_H
#define _KERNEL_CONSTANTS_H
/* clang's header defines these */
#ifndef _OPENCL_H_
/* cl_channel_order */
#define CLK_R 0x10B0
#define CLK_A 0x10B1
#define CLK_RG 0x10B2
#define CLK_RA 0x10B3
#define CLK_RGB 0x10B4
#define CLK_RGBA 0x10B5
#define CLK_BGRA 0x10B6
#define CLK_ARGB 0x10B7
#define CLK_INTENSITY 0x10B8
#define CLK_LUMINANCE 0x10B9
#define CLK_Rx 0x10BA
#define CLK_RGx 0x10BB
#define CLK_RGBx 0x10BC
#define CLK_DEPTH 0x10BD
#define CLK_DEPTH_STENCIL 0x10BE
/* cl_channel_type */
#define CLK_SNORM_INT8 0x10D0
#define CLK_SNORM_INT16 0x10D1
#define CLK_UNORM_INT8 0x10D2
#define CLK_UNORM_INT16 0x10D3
#define CLK_UNORM_SHORT_565 0x10D4
#define CLK_UNORM_SHORT_555 0x10D5
#define CLK_UNORM_INT_101010 0x10D6
#define CLK_SIGNED_INT8 0x10D7
#define CLK_SIGNED_INT16 0x10D8
#define CLK_SIGNED_INT32 0x10D9
#define CLK_UNSIGNED_INT8 0x10DA
#define CLK_UNSIGNED_INT16 0x10DB
#define CLK_UNSIGNED_INT32 0x10DC
#define CLK_HALF_FLOAT 0x10DD
#define CLK_FLOAT 0x10DE
#define CLK_UNORM_INT24 0x10DF
/* cl_addressing _mode */
#define CLK_ADDRESS_NONE 0x00
#define CLK_ADDRESS_CLAMP_TO_EDGE 0x02
#define CLK_ADDRESS_CLAMP 0x04
#define CLK_ADDRESS_REPEAT 0x06
#define CLK_ADDRESS_MIRRORED_REPEAT 0x08
/* cl_sampler_info */
#define CLK_NORMALIZED_COORDS_FALSE 0x00
#define CLK_NORMALIZED_COORDS_TRUE 0x01
/* filter_mode */
#define CLK_FILTER_NEAREST 0x10
#define CLK_FILTER_LINEAR 0x20
/* barrier() flags */
#define CLK_LOCAL_MEM_FENCE 0x01
#define CLK_GLOBAL_MEM_FENCE 0x02
#endif
#endif

View File

@@ -0,0 +1,395 @@
/* pocl.h - global pocl declarations for the host side runtime.
Copyright (c) 2011 Universidad Rey Juan Carlos
2011-2019 Pekka Jääskeläinen
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
* @file pocl.h
*
* The declarations in this file are such that are used both in the
* libpocl implementation CL and the kernel compiler. Others should be
* moved to pocl_cl.h of lib/CL or under the kernel compiler dir.
* @todo Check if there are extra declarations here that could be moved.
*/
#ifndef POCL_H
#define POCL_H
#ifndef CL_TARGET_OPENCL_VERSION
#define CL_TARGET_OPENCL_VERSION 220
#endif
#include <CL/opencl.h>
#include "config.h"
#include "pocl_context.h"
/* detects restrict, variadic macros etc */
#include "pocl_compiler_features.h"
#define POCL_FILENAME_LENGTH 1024
#define WORKGROUP_STRING_LENGTH 1024
typedef struct _mem_mapping mem_mapping_t;
/* represents a single buffer to host memory mapping */
struct _mem_mapping {
void *host_ptr; /* the location of the mapped buffer chunk in the host memory */
size_t offset; /* offset to the beginning of the buffer */
size_t size;
mem_mapping_t *prev, *next;
/* This is required, because two clEnqueueMap() with the same buffer+size+offset,
will create two identical mappings in the buffer->mappings LL.
Without this flag, both corresponding clEnqUnmap()s will find
the same mapping (the first one in mappings LL), which will lead
to memory double-free corruption later. */
long unmap_requested;
cl_map_flags map_flags;
/* image mapping data */
size_t origin[3];
size_t region[3];
size_t row_pitch;
size_t slice_pitch;
};
/* memory identifier: id to point the global memory where memory resides
+ pointer to actual data */
typedef struct _pocl_mem_identifier
{
int available; /* ... in this mem objs context */
int global_mem_id;
void *mem_ptr;
void *image_data;
} pocl_mem_identifier;
typedef struct _mem_destructor_callback mem_destructor_callback_t;
/* represents a memory object destructor callback */
struct _mem_destructor_callback
{
void (CL_CALLBACK * pfn_notify) (cl_mem, void*); /* callback function */
void *user_data; /* user supplied data passed to callback function */
mem_destructor_callback_t *next;
};
typedef struct _build_program_callback build_program_callback_t;
struct _build_program_callback
{
void (CL_CALLBACK * callback_function) (cl_program, void*); /* callback function */
void *user_data; /* user supplied data passed to callback function */
};
// Command Queue datatypes
#define POCL_KERNEL_DIGEST_SIZE 16
typedef uint8_t pocl_kernel_hash_t[POCL_KERNEL_DIGEST_SIZE];
// clEnqueueNDRangeKernel
typedef struct
{
void *hash;
void *wg; /* The work group function ptr. Device specific. */
cl_kernel kernel;
/* The launch data that can be passed to the kernel execution environment. */
struct pocl_context pc;
struct pocl_argument *arguments;
/* Can be used to store/cache arbitrary device-specific data. */
void *device_data;
/* If set to 1, disallow any work-group function specialization. */
int force_generic_wg_func;
/* If set to 1, disallow "small grid" WG function specialization. */
int force_large_grid_wg_func;
unsigned device_i;
} _cl_command_run;
// clEnqueueNativeKernel
typedef struct
{
void *args;
size_t cb_args;
void (*user_func)(void *);
} _cl_command_native;
// clEnqueueReadBuffer
typedef struct
{
void *__restrict__ dst_host_ptr;
pocl_mem_identifier *src_mem_id;
size_t offset;
size_t size;
} _cl_command_read;
// clEnqueueWriteBuffer
typedef struct
{
const void *__restrict__ src_host_ptr;
pocl_mem_identifier *dst_mem_id;
size_t offset;
size_t size;
} _cl_command_write;
// clEnqueueCopyBuffer
typedef struct
{
pocl_mem_identifier *src_mem_id;
pocl_mem_identifier *dst_mem_id;
size_t src_offset;
size_t dst_offset;
size_t size;
} _cl_command_copy;
// clEnqueueReadBufferRect
typedef struct
{
void *__restrict__ dst_host_ptr;
pocl_mem_identifier *src_mem_id;
size_t buffer_origin[3];
size_t host_origin[3];
size_t region[3];
size_t buffer_row_pitch;
size_t buffer_slice_pitch;
size_t host_row_pitch;
size_t host_slice_pitch;
} _cl_command_read_rect;
// clEnqueueWriteBufferRect
typedef struct
{
const void *__restrict__ src_host_ptr;
pocl_mem_identifier *dst_mem_id;
size_t buffer_origin[3];
size_t host_origin[3];
size_t region[3];
size_t buffer_row_pitch;
size_t buffer_slice_pitch;
size_t host_row_pitch;
size_t host_slice_pitch;
} _cl_command_write_rect;
// clEnqueueCopyBufferRect
typedef struct
{
pocl_mem_identifier *src_mem_id;
pocl_mem_identifier *dst_mem_id;
size_t dst_origin[3];
size_t src_origin[3];
size_t region[3];
size_t src_row_pitch;
size_t src_slice_pitch;
size_t dst_row_pitch;
size_t dst_slice_pitch;
} _cl_command_copy_rect;
// clEnqueueMapBuffer
typedef struct
{
pocl_mem_identifier *mem_id;
mem_mapping_t *mapping;
} _cl_command_map;
/* clEnqueueUnMapMemObject */
typedef struct
{
pocl_mem_identifier *mem_id;
mem_mapping_t *mapping;
} _cl_command_unmap;
/* clEnqueueFillBuffer */
typedef struct
{
pocl_mem_identifier *dst_mem_id;
size_t size;
size_t offset;
void *__restrict__ pattern;
size_t pattern_size;
} _cl_command_fill_mem;
/* clEnqueue(Write/Read)Image */
typedef struct
{
pocl_mem_identifier *src_mem_id;
void *__restrict__ dst_host_ptr;
pocl_mem_identifier *dst_mem_id;
size_t dst_offset;
size_t origin[3];
size_t region[3];
size_t dst_row_pitch;
size_t dst_slice_pitch;
} _cl_command_read_image;
typedef struct
{
pocl_mem_identifier *dst_mem_id;
const void *__restrict__ src_host_ptr;
pocl_mem_identifier *src_mem_id;
size_t src_offset;
size_t origin[3];
size_t region[3];
size_t src_row_pitch;
size_t src_slice_pitch;
} _cl_command_write_image;
typedef struct
{
pocl_mem_identifier *src_mem_id;
pocl_mem_identifier *dst_mem_id;
size_t dst_origin[3];
size_t src_origin[3];
size_t region[3];
} _cl_command_copy_image;
/* clEnqueueFillImage */
typedef struct
{
pocl_mem_identifier *mem_id;
size_t origin[3];
size_t region[3];
void *__restrict__ fill_pixel;
size_t pixel_size;
} _cl_command_fill_image;
/* clEnqueueMarkerWithWaitlist */
typedef struct
{
void *data;
int has_wait_list;
} _cl_command_marker;
/* clEnqueueBarrierWithWaitlist */
typedef _cl_command_marker _cl_command_barrier;
/* clEnqueueMigrateMemObjects */
typedef struct
{
void *data;
size_t num_mem_objects;
cl_mem *mem_objects;
cl_device_id *source_devices;
} _cl_command_migrate;
typedef struct
{
void* data;
void* queue;
unsigned num_svm_pointers;
void **svm_pointers;
void (CL_CALLBACK *pfn_free_func) ( cl_command_queue queue,
cl_uint num_svm_pointers,
void *svm_pointers[],
void *user_data);
} _cl_command_svm_free;
typedef struct
{
void* svm_ptr;
size_t size;
cl_map_flags flags;
} _cl_command_svm_map;
typedef struct
{
void* svm_ptr;
} _cl_command_svm_unmap;
typedef struct
{
const void *__restrict__ src;
void *__restrict__ dst;
size_t size;
} _cl_command_svm_cpy;
typedef struct
{
void *__restrict__ svm_ptr;
size_t size;
void *__restrict__ pattern;
size_t pattern_size;
} _cl_command_svm_fill;
typedef union
{
_cl_command_run run;
_cl_command_native native;
_cl_command_read read;
_cl_command_write write;
_cl_command_copy copy;
_cl_command_read_rect read_rect;
_cl_command_write_rect write_rect;
_cl_command_copy_rect copy_rect;
_cl_command_fill_mem memfill;
_cl_command_read_image read_image;
_cl_command_write_image write_image;
_cl_command_copy_image copy_image;
_cl_command_fill_image fill_image;
_cl_command_map map;
_cl_command_unmap unmap;
_cl_command_marker marker;
_cl_command_barrier barrier;
_cl_command_migrate migrate;
_cl_command_svm_free svm_free;
_cl_command_svm_map svm_map;
_cl_command_svm_unmap svm_unmap;
_cl_command_svm_cpy svm_memcpy;
_cl_command_svm_fill svm_fill;
} _cl_command_t;
// one item in the command queue
typedef struct _cl_command_node _cl_command_node;
struct _cl_command_node
{
_cl_command_t command;
cl_command_type type;
_cl_command_node *next; // for linked-list storage
_cl_command_node *prev;
cl_event event;
const cl_event *event_wait_list;
cl_device_id device;
/* The index of the targeted device in the platform's device list. */
unsigned device_i;
cl_int ready;
};
#ifndef LLVM_10_0
#define LLVM_OLDER_THAN_10_0 1
#ifndef LLVM_9_0
#define LLVM_OLDER_THAN_9_0 1
#ifndef LLVM_8_0
#define LLVM_OLDER_THAN_8_0 1
#ifndef LLVM_7_0
#define LLVM_OLDER_THAN_7_0 1
#ifndef LLVM_6_0
#define LLVM_OLDER_THAN_6_0 1
#endif
#endif
#endif
#endif
#endif
#endif /* POCL_H */

View File

@@ -0,0 +1,137 @@
/* pocl_cache.h: global declarations of caching functions used mostly in runtime
Copyright (c) 2015 pocl developers
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef POCL_CACHE_H
#define POCL_CACHE_H
#include "pocl_cl.h"
/* The filename in which the work group (parallelizable) kernel LLVM bc is stored in
the kernel's temp dir. */
#define POCL_PARALLEL_BC_FILENAME "/parallel.bc"
#ifdef __cplusplus
extern "C" {
#endif
#include <CL/cl.h>
#ifdef __GNUC__
#pragma GCC visibility push(hidden)
#endif
int pocl_cache_init_topdir ();
int
pocl_cache_create_program_cachedir(cl_program program, unsigned device_i,
const char* preprocessed_source, size_t source_len,
char *program_bc_path);
void pocl_cache_cleanup_cachedir(cl_program program);
int pocl_cl_device_to_index(cl_program program,
cl_device_id device);
int pocl_cache_tempname (char *path_template, const char *suffix, int *fd);
int pocl_cache_create_tempdir(char* path);
int pocl_cache_write_program_source(char *program_cl_path,
cl_program program);
int pocl_cache_write_kernel_objfile (char *objfile_path,
const char *objfile_content,
uint64_t objfile_size);
int pocl_cache_write_spirv (char *spirv_path,
const char *spirv_content,
uint64_t file_size);
int pocl_cache_update_program_last_access(cl_program program,
unsigned device_i);
char* pocl_cache_read_buildlog(cl_program program, unsigned device_i);
int pocl_cache_append_to_buildlog(cl_program program,
unsigned device_i,
const char *content,
size_t size);
int pocl_cache_device_cachedir_exists(cl_program program,
unsigned device_i);
int pocl_cache_write_descriptor(cl_program program,
unsigned device_i,
const char* kernel_name,
const char* content,
size_t size);
void pocl_cache_kernel_cachedir_path (char *kernel_cachedir_path,
cl_program program, unsigned device_i,
cl_kernel kernel, const char *append_str,
_cl_command_node *command,
int specialize);
int pocl_cache_write_kernel_parallel_bc (void *bc, cl_program program,
int device_i, cl_kernel kernel,
_cl_command_node *command,
int specialize);
// required by pocl_binary.c
void pocl_cache_program_path (char *path, cl_program program,
unsigned device_i);
void pocl_cache_kernel_cachedir (char *kernel_cachedir_path,
cl_program program, unsigned device_i,
const char *kernel_name);
// these two required by llvm API
void pocl_cache_program_bc_path(char* program_bc_path,
cl_program program,
unsigned device_i);
void pocl_cache_work_group_function_path (char *parallel_bc_path,
cl_program program,
unsigned device_i, cl_kernel kernel,
_cl_command_node *command,
int specialize);
void pocl_cache_final_binary_path (char *final_binary_path, cl_program program,
unsigned device_i, cl_kernel kernel,
_cl_command_node *command, int specialize);
#ifdef __GNUC__
#pragma GCC visibility pop
#endif
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,222 @@
/* autogenerated by CMake, but edited by hand to not
* stop with #error when the compiler isn't gcc/clang */
#ifndef POCL_COMPILER_DETECTION_H
#define POCL_COMPILER_DETECTION_H
#ifndef __cplusplus
# define POCL_COMPILER_IS_Intel 0
# define POCL_COMPILER_IS_PathScale 0
# define POCL_COMPILER_IS_Embarcadero 0
# define POCL_COMPILER_IS_Borland 0
# define POCL_COMPILER_IS_Watcom 0
# define POCL_COMPILER_IS_OpenWatcom 0
# define POCL_COMPILER_IS_SunPro 0
# define POCL_COMPILER_IS_HP 0
# define POCL_COMPILER_IS_Compaq 0
# define POCL_COMPILER_IS_zOS 0
# define POCL_COMPILER_IS_XL 0
# define POCL_COMPILER_IS_VisualAge 0
# define POCL_COMPILER_IS_PGI 0
# define POCL_COMPILER_IS_Cray 0
# define POCL_COMPILER_IS_TI 0
# define POCL_COMPILER_IS_Fujitsu 0
# define POCL_COMPILER_IS_TinyCC 0
# define POCL_COMPILER_IS_Bruce 0
# define POCL_COMPILER_IS_SCO 0
# define POCL_COMPILER_IS_AppleClang 0
# define POCL_COMPILER_IS_Clang 0
# define POCL_COMPILER_IS_GNU 0
# define POCL_COMPILER_IS_MSVC 0
# define POCL_COMPILER_IS_ADSP 0
# define POCL_COMPILER_IS_IAR 0
# define POCL_COMPILER_IS_ARMCC 0
# define POCL_COMPILER_IS_SDCC 0
# define POCL_COMPILER_IS_MIPSpro 0
#if defined(__INTEL_COMPILER) || defined(__ICC)
# undef POCL_COMPILER_IS_Intel
# define POCL_COMPILER_IS_Intel 1
#elif defined(__PATHCC__)
# undef POCL_COMPILER_IS_PathScale
# define POCL_COMPILER_IS_PathScale 1
#elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__)
# undef POCL_COMPILER_IS_Embarcadero
# define POCL_COMPILER_IS_Embarcadero 1
#elif defined(__BORLANDC__)
# undef POCL_COMPILER_IS_Borland
# define POCL_COMPILER_IS_Borland 1
#elif defined(__WATCOMC__) && __WATCOMC__ < 1200
# undef POCL_COMPILER_IS_Watcom
# define POCL_COMPILER_IS_Watcom 1
#elif defined(__WATCOMC__)
# undef POCL_COMPILER_IS_OpenWatcom
# define POCL_COMPILER_IS_OpenWatcom 1
#elif defined(__SUNPRO_C)
# undef POCL_COMPILER_IS_SunPro
# define POCL_COMPILER_IS_SunPro 1
#elif defined(__HP_cc)
# undef POCL_COMPILER_IS_HP
# define POCL_COMPILER_IS_HP 1
#elif defined(__DECC)
# undef POCL_COMPILER_IS_Compaq
# define POCL_COMPILER_IS_Compaq 1
#elif defined(__IBMC__) && defined(__COMPILER_VER__)
# undef POCL_COMPILER_IS_zOS
# define POCL_COMPILER_IS_zOS 1
#elif defined(__IBMC__) && !defined(__COMPILER_VER__) && __IBMC__ >= 800
# undef POCL_COMPILER_IS_XL
# define POCL_COMPILER_IS_XL 1
#elif defined(__IBMC__) && !defined(__COMPILER_VER__) && __IBMC__ < 800
# undef POCL_COMPILER_IS_VisualAge
# define POCL_COMPILER_IS_VisualAge 1
#elif defined(__PGI)
# undef POCL_COMPILER_IS_PGI
# define POCL_COMPILER_IS_PGI 1
#elif defined(_CRAYC)
# undef POCL_COMPILER_IS_Cray
# define POCL_COMPILER_IS_Cray 1
#elif defined(__TI_COMPILER_VERSION__)
# undef POCL_COMPILER_IS_TI
# define POCL_COMPILER_IS_TI 1
#elif defined(__FUJITSU) || defined(__FCC_VERSION) || defined(__fcc_version)
# undef POCL_COMPILER_IS_Fujitsu
# define POCL_COMPILER_IS_Fujitsu 1
#elif defined(__TINYC__)
# undef POCL_COMPILER_IS_TinyCC
# define POCL_COMPILER_IS_TinyCC 1
#elif defined(__BCC__)
# undef POCL_COMPILER_IS_Bruce
# define POCL_COMPILER_IS_Bruce 1
#elif defined(__SCO_VERSION__)
# undef POCL_COMPILER_IS_SCO
# define POCL_COMPILER_IS_SCO 1
#elif defined(__clang__) && defined(__apple_build_version__)
# undef POCL_COMPILER_IS_AppleClang
# define POCL_COMPILER_IS_AppleClang 1
#elif defined(__clang__)
# undef POCL_COMPILER_IS_Clang
# define POCL_COMPILER_IS_Clang 1
#elif defined(__GNUC__)
# undef POCL_COMPILER_IS_GNU
# define POCL_COMPILER_IS_GNU 1
#elif defined(_MSC_VER)
# undef POCL_COMPILER_IS_MSVC
# define POCL_COMPILER_IS_MSVC 1
#elif defined(__VISUALDSPVERSION__) || defined(__ADSPBLACKFIN__) || defined(__ADSPTS__) || defined(__ADSP21000__)
# undef POCL_COMPILER_IS_ADSP
# define POCL_COMPILER_IS_ADSP 1
#elif defined(__IAR_SYSTEMS_ICC__ ) || defined(__IAR_SYSTEMS_ICC)
# undef POCL_COMPILER_IS_IAR
# define POCL_COMPILER_IS_IAR 1
#elif defined(__ARMCC_VERSION)
# undef POCL_COMPILER_IS_ARMCC
# define POCL_COMPILER_IS_ARMCC 1
#elif defined(SDCC)
# undef POCL_COMPILER_IS_SDCC
# define POCL_COMPILER_IS_SDCC 1
#elif defined(_SGI_COMPILER_VERSION) || defined(_COMPILER_VERSION)
# undef POCL_COMPILER_IS_MIPSpro
# define POCL_COMPILER_IS_MIPSpro 1
#endif
# if POCL_COMPILER_IS_GNU
# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404
# define POCL_COMPILER_C_FUNCTION_PROTOTYPES 1
# else
# define POCL_COMPILER_C_FUNCTION_PROTOTYPES 0
# endif
# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
# define POCL_COMPILER_C_RESTRICT 1
# else
# define POCL_COMPILER_C_RESTRICT 0
# endif
# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201000L
# define POCL_COMPILER_C_STATIC_ASSERT 1
# else
# define POCL_COMPILER_C_STATIC_ASSERT 0
# endif
# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
# define POCL_COMPILER_C_VARIADIC_MACROS 1
# else
# define POCL_COMPILER_C_VARIADIC_MACROS 0
# endif
# elif POCL_COMPILER_IS_Clang
# if ((__clang_major__ * 100) + __clang_minor__) >= 304
# define POCL_COMPILER_C_FUNCTION_PROTOTYPES 1
# else
# define POCL_COMPILER_C_FUNCTION_PROTOTYPES 0
# endif
# if ((__clang_major__ * 100) + __clang_minor__) >= 304 && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
# define POCL_COMPILER_C_RESTRICT 1
# else
# define POCL_COMPILER_C_RESTRICT 0
# endif
# if ((__clang_major__ * 100) + __clang_minor__) >= 304 && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
# define POCL_COMPILER_C_STATIC_ASSERT 1
# else
# define POCL_COMPILER_C_STATIC_ASSERT 0
# endif
# if ((__clang_major__ * 100) + __clang_minor__) >= 304 && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
# define POCL_COMPILER_C_VARIADIC_MACROS 1
# else
# define POCL_COMPILER_C_VARIADIC_MACROS 0
# endif
# else
# define POCL_COMPILER_C_FUNCTION_PROTOTYPES 0
# define POCL_COMPILER_C_RESTRICT 0
# define POCL_COMPILER_C_STATIC_ASSERT 0
# define POCL_COMPILER_C_VARIADIC_MACROS 0
# endif
# if POCL_COMPILER_C_RESTRICT
# define POCL_RESTRICT restrict
# else
# define POCL_RESTRICT
# endif
#endif
#endif

View File

@@ -0,0 +1,86 @@
/* pocl_context.h - The 32b and 64b versions of the "context struct" that can be
passed as a hidden kernel argument for kernels to fetch their WG/WI ID and
dimension data.
Copyright (c) 2018 Pekka Jääskeläinen / Tampere University of Technology
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* This header can be included both from device and host sources. */
#ifndef POCL_CONTEXT_H
#define POCL_CONTEXT_H
#include "pocl_types.h"
struct pocl_context {
#if __INTPTR_WIDTH__ == 64
ulong num_groups[3];
ulong global_offset[3];
ulong local_size[3];
#elif __INTPTR_WIDTH__ == 32
uint num_groups[3];
uint global_offset[3];
uint local_size[3];
#else
#error unsupported architecture
#endif
uchar *printf_buffer;
uint *printf_buffer_position;
uint printf_buffer_capacity;
uint work_dim;
};
struct pocl_context32 {
uint num_groups[3];
uint global_offset[3];
uint local_size[3];
uchar *printf_buffer;
uint *printf_buffer_position;
uint printf_buffer_capacity;
uint work_dim;
};
/* Copy a 64b context struct to a 32b one. */
#define POCL_CONTEXT_COPY64TO32(__DST, __SRC) \
do { \
struct pocl_context *__src = (struct pocl_context *)__SRC; \
struct pocl_context32 *__dst = (struct pocl_context32 *)__DST; \
__dst->work_dim = __src->work_dim; \
__dst->num_groups[0] = __src->num_groups[0]; \
__dst->num_groups[1] = __src->num_groups[1]; \
__dst->num_groups[2] = __src->num_groups[2]; \
__dst->global_offset[0] = __src->global_offset[0]; \
__dst->global_offset[1] = __src->global_offset[1]; \
__dst->global_offset[2] = __src->global_offset[2]; \
__dst->local_size[0] = __src->local_size[0]; \
__dst->local_size[1] = __src->local_size[1]; \
__dst->local_size[2] = __src->local_size[2]; \
__dst->printf_buffer = __src->printf_buffer; \
__dst->printf_buffer_position = __src->printf_buffer_position; \
__dst->printf_buffer_capacity = __src->printf_buffer_capacity; \
} while (0)
#define POCL_CONTEXT_SIZE(__BITNESS) \
(__BITNESS == 64 ? \
sizeof (struct pocl_context) : \
sizeof (struct pocl_context32))
#endif

View File

@@ -0,0 +1,80 @@
/* pocl_device.h - global pocl declarations to be used in the device binaries in
case applicable by the target
Copyright (c) 2012-2018 Pekka Jääskeläinen / Tampere University of Technology
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef POCL_DEVICE_H
#define POCL_DEVICE_H
#include "pocl_types.h"
#define MAX_KERNEL_ARGS 64
#define MAX_KERNEL_NAME_LENGTH 64
/* Metadata of a single kernel stored in the device.*/
typedef struct {
const uchar name[MAX_KERNEL_NAME_LENGTH];
ushort num_args;
ushort num_locals;
void *work_group_func;
} __kernel_metadata;
#ifdef _MSC_VER
#define ALIGN4(x) __declspec(align(4)) x
#define ALIGN8(x) __declspec(align(4)) x
#else
#define ALIGN4(x) x __attribute__ ((aligned (4)))
#define ALIGN8(x) x __attribute__ ((aligned (8)))
#endif
/* A kernel invocation command. */
typedef struct {
/* The execution status of this queue slot. */
ALIGN8(uint status);
/* The kernel to execute. Points to the metadata in the device global
memory. It will be casted to a __kernel_metadata* */
ALIGN8(uint kernel);
/* Pointers to the kernel arguments in the global memory. Will be
casted to 32 bit void* */
ALIGN8(uint args[MAX_KERNEL_ARGS]);
/* Sizes of the dynamically allocated local buffers. */
/* uint32_t dynamic_local_arg_sizes[MAX_KERNEL_ARGS] ALIGN4; */
/* Number of dimensions in the work space. */
ALIGN4(uint work_dim);
ALIGN4(uint num_groups[3]);
ALIGN4(uint global_offset[3]);
} __kernel_exec_cmd;
/* Kernel execution statuses. */
/* The invocation entry is free to use. */
#define POCL_KST_FREE 1
/* The kernel structure has been populated and is waiting to be
executed. */
#define POCL_KST_READY 2
/* The kernel is currently running in the device. */
#define POCL_KST_RUNNING 3
/* The kernel has finished execution. The results can be collected and the
execution entry be freed (by writing POCL_KST_FREE to the status). */
#define POCL_KST_FINISHED 4
#endif

View File

@@ -0,0 +1,83 @@
/* pocl_file_util.h: global declarations of portable file utility functions
defined in lib/llvmopencl, due to using llvm::sys::fs & other llvm APIs
Copyright (c) 2015 pocl developers
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef POCL_FILE_UTIL_H
#define POCL_FILE_UTIL_H
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __GNUC__
#pragma GCC visibility push(hidden)
#endif
/* Remove a directory, recursively */
int pocl_rm_rf(const char* path);
/* Make a directory, including all directories along path */
int pocl_mkdir_p(const char* path);
/* Remove a file or empty directory */
int pocl_remove(const char* path);
int pocl_rename(const char *oldpath, const char *newpath);
int pocl_exists(const char* path);
/* Touch file to change last modified time. For portability, this
* removes & creates the file. */
int pocl_touch_file(const char* path);
/* Writes or appends data to a file. */
int pocl_write_file(const char* path, const char* content,
uint64_t count, int append, int dont_rewrite);
int pocl_write_tempfile (char *output_path, const char *prefix,
const char *suffix, const char *content,
uint64_t count, int *ret_fd);
/* Allocates memory and places file contents in it.
* Returns negative errno on error, zero otherwise. */
int pocl_read_file(const char* path, char** content, uint64_t *filesize);
int pocl_write_module(void *module, const char* path, int dont_rewrite);
int pocl_mk_tempdir (char *output, const char *prefix);
int pocl_mk_tempname (char *output, const char *prefix, const char *suffix,
int *ret_fd);
#ifdef __GNUC__
#pragma GCC visibility pop
#endif
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,52 @@
/* pocl_image_types.h - image data structure used by device implementations
Copyright (c) 2013 Ville Korhonen
Copyright (c) 2017 Michal Babej / Tampere University of Technology
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef __X86_IMAGE_H__
#define __X86_IMAGE_H__
#ifdef __CBUILD__
#define INTTYPE cl_int
#else
#define INTTYPE int
#endif
typedef uintptr_t dev_sampler_t;
typedef struct dev_image_t {
void *_data;
INTTYPE _width;
INTTYPE _height;
INTTYPE _depth;
INTTYPE _image_array_size;
INTTYPE _row_pitch;
INTTYPE _slice_pitch;
INTTYPE _num_mip_levels; /* maybe not needed */
INTTYPE _num_samples; /* maybe not needed */
INTTYPE _order;
INTTYPE _data_type;
INTTYPE _num_channels;
INTTYPE _elem_size;
} dev_image_t;
#endif

Some files were not shown because too many files have changed in this diff Show More