project tests refactoring

This commit is contained in:
Blaise Tine
2021-06-13 17:42:04 -07:00
parent 47c3234659
commit 03406c0a3f
631 changed files with 394471 additions and 653511 deletions

1
tests/opencl/convolution/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
convolution

View File

@@ -0,0 +1,59 @@
LLVM_PREFIX ?= /opt/llvm-riscv
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/riscv32-unknown-elf
POCL_CC_PATH ?= /opt/pocl/compiler
POCL_RT_PATH ?= /opt/pocl/runtime
VORTEX_DRV_PATH ?= $(realpath ../../../driver)
VORTEX_RT_PATH ?= $(realpath ../../../runtime)
K_LLCFLAGS += "-O3 -march=riscv32 -target-abi=ilp32f -mcpu=generic-rv32 -mattr=+m,+f -float-abi=hard -code-model=small"
K_CFLAGS += "-v -O3 --sysroot=$(SYSROOT) --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) -march=rv32imf -mabi=ilp32f -I$(VORTEX_RT_PATH)/include -fno-rtti -fno-exceptions -ffreestanding -nostartfiles -fdata-sections -ffunction-sections"
K_LDFLAGS += "-Wl,-Bstatic,-T$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a -lm"
CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors
#CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I$(POCL_RT_PATH)/include
LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/simx -lOpenCL -lvortex
PROJECT = convolution
SRCS = main.cpp utils.cpp
all: $(PROJECT) kernel.pocl
kernel.pocl: kernel.cl
LLVM_PREFIX=$(LLVM_PREFIX) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_PREFIX)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -LLCFLAGS $(K_LLCFLAGS) -CFLAGS $(K_CFLAGS) -LDFLAGS $(K_LDFLAGS) -o kernel.pocl kernel.cl
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
run-fpga: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
run-asesim: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
run-vlsim: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
run-simx: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
run-rtlsim: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
clean:
rm -rf $(PROJECT) *.o .depend
clean-all: clean
rm -rf *.pocl *.dump
ifneq ($(MAKECMDGOALS),clean)
-include .depend
endif

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

View File

@@ -0,0 +1,54 @@
__kernel
void convolution(
__read_only image2d_t sourceImage,
__write_only image2d_t outputImage,
int rows,
int cols,
__constant float* filter,
int filterWidth,
sampler_t sampler)
{
// Store each work-items unique row and column
int column = get_global_id(0);
int row = get_global_id(1);
// Half the width of the filter is needed for indexing
// memory later
int halfWidth = (int)(filterWidth/2);
// All accesses to images return data as four-element vector
// (i.e., float4), although only the 'x' component will contain
// meaningful data in this code
float4 sum = {0.0f, 0.0f, 0.0f, 0.0f};
// Iterator for the filter
int filterIdx = 0;
// Each work-item iterates around its local area based on the
// size of the filter
int2 coords; // Coordinates for accessing the image
// Iterate the filter rows
for(int i = -halfWidth; i <= halfWidth; i++) {
coords.y = row + i;
// Iterate over the filter columns
for(int j = -halfWidth; j <= halfWidth; j++) {
coords.x = column + j;
float4 pixel;
// Read a pixel from the image. A single channel image
// stores the pixel in the 'x' coordinate of the returned
// vector.
pixel = read_imagef(sourceImage, sampler, coords);
sum.x += pixel.x * filter[filterIdx++];
}
}
// Copy the data to the output image if the
// work-item is in bounds
if(row < rows && column < cols) {
coords.x = column;
coords.y = row;
write_imagef(outputImage, coords, sum);
}
}

261
tests/opencl/convolution/main.cpp Executable file
View File

@@ -0,0 +1,261 @@
#include <stdio.h>
#include <stdlib.h>
#include <CL/cl.h>
#include "utils.h"
// This function takes a positive integer and rounds it up to
// the nearest multiple of another provided integer
unsigned int roundUp(unsigned int value, unsigned int multiple) {
// Determine how far past the nearest multiple the value is
unsigned int remainder = value % multiple;
// Add the difference to make the value a multiple
if(remainder != 0) {
value += (multiple-remainder);
}
return value;
}
// This function reads in a text file and stores it as a char pointer
char* readSource(char* kernelPath) {
cl_int status;
FILE *fp;
char *source;
long int size;
printf("Program file is: %s\n", kernelPath);
fp = fopen(kernelPath, "rb");
if(!fp) {
printf("Could not open kernel file\n");
exit(-1);
}
status = fseek(fp, 0, SEEK_END);
if(status != 0) {
printf("Error seeking to end of file\n");
exit(-1);
}
size = ftell(fp);
if(size < 0) {
printf("Error getting file position\n");
exit(-1);
}
rewind(fp);
source = (char *)malloc(size + 1);
int i;
for (i = 0; i < size+1; i++) {
source[i]='\0';
}
if(source == NULL) {
printf("Error allocating space for the kernel source\n");
exit(-1);
}
fread(source, 1, size, fp);
source[size] = '\0';
return source;
}
void chk(cl_int status, const char* cmd) {
if(status != CL_SUCCESS) {
printf("%s failed (%d)\n", cmd, status);
exit(-1);
}
}
int main() {
int i, j, k, l;
// Rows and columns in the input image
int imageHeight;
int imageWidth;
const char* inputFile = "input.bmp";
const char* outputFile = "output.bmp";
// Homegrown function to read a BMP from file
float* inputImage = readImage(inputFile, &imageWidth,
&imageHeight);
// Size of the input and output images on the host
int dataSize = imageHeight*imageWidth*sizeof(float);
// Output image on the host
float* outputImage = NULL;
outputImage = (float*)malloc(dataSize);
float* refImage = NULL;
refImage = (float*)malloc(dataSize);
// 45 degree motion blur
float filter[49] =
{0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0,
0, 0, -1, 0, 1, 0, 0,
0, 0, -2, 0, 2, 0, 0,
0, 0, -1, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0};
// The convolution filter is 7x7
int filterWidth = 7;
int filterSize = filterWidth*filterWidth; // Assume a square kernel
// Set up the OpenCL environment
cl_int status;
// Discovery platform
cl_platform_id platform;
status = clGetPlatformIDs(1, &platform, NULL);
chk(status, "clGetPlatformIDs");
// Discover device
cl_device_id device;
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL);
chk(status, "clGetDeviceIDs");
// Create context
cl_context_properties props[3] = {CL_CONTEXT_PLATFORM,
(cl_context_properties)(platform), 0};
cl_context context;
context = clCreateContext(props, 1, &device, NULL, NULL, &status);
chk(status, "clCreateContext");
// Create command queue
cl_command_queue queue;
queue = clCreateCommandQueue(context, device, 0, &status);
chk(status, "clCreateCommandQueue");
// The image format describes how the data will be stored in memory
cl_image_format format;
format.image_channel_order = CL_R; // single channel
format.image_channel_data_type = CL_FLOAT; // float data type
// Create space for the source image on the device
cl_mem d_inputImage = clCreateImage2D(context, 0, &format, imageWidth,
imageHeight, 0, NULL, &status);
chk(status, "clCreateImage2D");
// Create space for the output image on the device
cl_mem d_outputImage = clCreateImage2D(context, 0, &format, imageWidth,
imageHeight, 0, NULL, &status);
chk(status, "clCreateImage2D");
// Create space for the 7x7 filter on the device
cl_mem d_filter = clCreateBuffer(context, 0, filterSize*sizeof(float),
NULL, &status);
chk(status, "clCreateBuffer");
// Copy the source image to the device
size_t origin[3] = {0, 0, 0}; // Offset within the image to copy from
size_t region[3] = {imageWidth, imageHeight, 1}; // Elements to per dimension
status = clEnqueueWriteImage(queue, d_inputImage, CL_FALSE, origin, region,
0, 0, inputImage, 0, NULL, NULL);
chk(status, "clEnqueueWriteImage");
// Copy the 7x7 filter to the device
status = clEnqueueWriteBuffer(queue, d_filter, CL_FALSE, 0,
filterSize*sizeof(float), filter, 0, NULL, NULL);
chk(status, "clEnqueueWriteBuffer");
// Create the image sampler
cl_sampler sampler = clCreateSampler(context, CL_FALSE,
CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &status);
chk(status, "clCreateSampler");
const char* source = readSource("kernel.cl");
// Create a program object with source and build it
cl_program program;
program = clCreateProgramWithSource(context, 1, &source, NULL, NULL);
chk(status, "clCreateProgramWithSource");
status = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
chk(status, "clBuildProgram");
// Create the kernel object
cl_kernel kernel;
kernel = clCreateKernel(program, "convolution", &status);
chk(status, "clCreateKernel");
// Set the kernel arguments
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_inputImage);
status |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_outputImage);
status |= clSetKernelArg(kernel, 2, sizeof(int), &imageHeight);
status |= clSetKernelArg(kernel, 3, sizeof(int), &imageWidth);
status |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &d_filter);
status |= clSetKernelArg(kernel, 5, sizeof(int), &filterWidth);
status |= clSetKernelArg(kernel, 6, sizeof(cl_sampler), &sampler);
chk(status, "clSetKernelArg");
// Set the work item dimensions
size_t globalSize[2] = {imageWidth, imageHeight};
status = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalSize, NULL, 0,
NULL, NULL);
chk(status, "clEnqueueNDRange");
// Read the image back to the host
status = clEnqueueReadImage(queue, d_outputImage, CL_TRUE, origin,
region, 0, 0, outputImage, 0, NULL, NULL);
chk(status, "clEnqueueReadImage");
// Write the output image to file
storeImage(outputImage, outputFile, imageHeight, imageWidth, inputFile);
// Compute the reference image
for(i = 0; i < imageHeight; i++) {
for(j = 0; j < imageWidth; j++) {
refImage[i*imageWidth+j] = 0;
}
}
// Iterate over the rows of the source image
int halfFilterWidth = filterWidth/2;
float sum;
for(i = 0; i < imageHeight; i++) {
// Iterate over the columns of the source image
for(j = 0; j < imageWidth; j++) {
sum = 0; // Reset sum for new source pixel
// Apply the filter to the neighborhood
for(k = - halfFilterWidth; k <= halfFilterWidth; k++) {
for(l = - halfFilterWidth; l <= halfFilterWidth; l++) {
if(i+k >= 0 && i+k < imageHeight &&
j+l >= 0 && j+l < imageWidth) {
sum += inputImage[(i+k)*imageWidth + j+l] *
filter[(k+halfFilterWidth)*filterWidth +
l+halfFilterWidth];
}
}
}
refImage[i*imageWidth+j] = sum;
}
}
int failed = 0;
for(i = 0; i < imageHeight; i++) {
for(j = 0; j < imageWidth; j++) {
if(abs(outputImage[i*imageWidth+j]-refImage[i*imageWidth+j]) > 0.01) {
printf("Results are INCORRECT\n");
printf("Pixel mismatch at <%d,%d> (%f vs. %f)\n", i, j,
outputImage[i*imageWidth+j], refImage[i*imageWidth+j]);
failed = 1;
}
if(failed) break;
}
if(failed) break;
}
if(!failed) {
printf("Results are correct\n");
}
return 0;
}

View File

@@ -0,0 +1,180 @@
#include <stdio.h>
#include <stdlib.h>
#include "utils.h"
void storeImage(float *imageOut,
const char *filename,
int rows,
int cols,
const char* refFilename) {
FILE *ifp, *ofp;
unsigned char tmp;
int offset;
unsigned char *buffer;
int i, j;
int bytes;
int height, width;
ifp = fopen(refFilename, "rb");
if(ifp == NULL) {
perror(filename);
exit(-1);
}
fseek(ifp, 10, SEEK_SET);
fread(&offset, 4, 1, ifp);
fseek(ifp, 18, SEEK_SET);
fread(&width, 4, 1, ifp);
fread(&height, 4, 1, ifp);
fseek(ifp, 0, SEEK_SET);
buffer = (unsigned char *)malloc(offset);
if(buffer == NULL) {
perror("malloc");
exit(-1);
}
fread(buffer, 1, offset, ifp);
printf("Writing output image to %s\n", filename);
ofp = fopen(filename, "wb");
if(ofp == NULL) {
perror("opening output file");
exit(-1);
}
bytes = fwrite(buffer, 1, offset, ofp);
if(bytes != offset) {
printf("error writing header!\n");
exit(-1);
}
// NOTE bmp formats store data in reverse raster order (see comment in
// readImage function), so we need to flip it upside down here.
int mod = width % 4;
if(mod != 0) {
mod = 4 - mod;
}
// printf("mod = %d\n", mod);
for(i = height-1; i >= 0; i--) {
for(j = 0; j < width; j++) {
tmp = (unsigned char)imageOut[i*cols+j];
fwrite(&tmp, sizeof(char), 1, ofp);
}
// In bmp format, rows must be a multiple of 4-bytes.
// So if we're not at a multiple of 4, add junk padding.
for(j = 0; j < mod; j++) {
fwrite(&tmp, sizeof(char), 1, ofp);
}
}
fclose(ofp);
fclose(ifp);
free(buffer);
}
/*
* Read bmp image and convert to byte array. Also output the width and height
*/
float* readImage(const char *filename, int* widthOut, int* heightOut) {
uchar* imageData;
int height, width;
uchar tmp;
int offset;
int i, j;
printf("Reading input image from %s\n", filename);
FILE *fp = fopen(filename, "rb");
if(fp == NULL) {
perror(filename);
exit(-1);
}
fseek(fp, 10, SEEK_SET);
fread(&offset, 4, 1, fp);
fseek(fp, 18, SEEK_SET);
fread(&width, 4, 1, fp);
fread(&height, 4, 1, fp);
printf("width = %d\n", width);
printf("height = %d\n", height);
*widthOut = width;
*heightOut = height;
imageData = (uchar*)malloc(width*height);
if(imageData == NULL) {
perror("malloc");
exit(-1);
}
fseek(fp, offset, SEEK_SET);
fflush(NULL);
int mod = width % 4;
if(mod != 0) {
mod = 4 - mod;
}
// NOTE bitmaps are stored in upside-down raster order. So we begin
// reading from the bottom left pixel, then going from left-to-right,
// read from the bottom to the top of the image. For image analysis,
// we want the image to be right-side up, so we'll modify it here.
// First we read the image in upside-down
// Read in the actual image
for(i = 0; i < height; i++) {
// add actual data to the image
for(j = 0; j < width; j++) {
fread(&tmp, sizeof(char), 1, fp);
imageData[i*width + j] = tmp;
}
// For the bmp format, each row has to be a multiple of 4,
// so I need to read in the junk data and throw it away
for(j = 0; j < mod; j++) {
fread(&tmp, sizeof(char), 1, fp);
}
}
// Then we flip it over
int flipRow;
for(i = 0; i < height/2; i++) {
flipRow = height - (i+1);
for(j = 0; j < width; j++) {
tmp = imageData[i*width+j];
imageData[i*width+j] = imageData[flipRow*width+j];
imageData[flipRow*width+j] = tmp;
}
}
fclose(fp);
// Input image on the host
float* floatImage = NULL;
floatImage = (float*)malloc(sizeof(float)*width*height);
if(floatImage == NULL) {
perror("malloc");
exit(-1);
}
// Convert the BMP image to float (not required)
for(i = 0; i < height; i++) {
for(j = 0; j < width; j++) {
floatImage[i*width+j] = (float)imageData[i*width+j];
}
}
free(imageData);
return floatImage;
}

View File

@@ -0,0 +1,11 @@
#ifndef __UTILS__
#define __UTILS__
typedef unsigned char uchar;
float* readImage(const char *filename, int* widthOut, int* heightOut);
void storeImage(float *imageOut, const char *filename, int rows, int cols,
const char* refFilename);
#endif