update
This commit is contained in:
@@ -31,18 +31,20 @@ CXXFLAGS += -I$(POCL_INC_PATH)
|
|||||||
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||||
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||||
|
|
||||||
PROJECT=bfs
|
PROJECT = bfs
|
||||||
|
|
||||||
|
SRCS = main.cc
|
||||||
|
|
||||||
all: $(PROJECT).dump $(PROJECT).hex
|
all: $(PROJECT).dump $(PROJECT).hex
|
||||||
|
|
||||||
lib$(PROJECT).a: kernel.cl
|
lib$(PROJECT).a: kernel.cl
|
||||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
||||||
|
|
||||||
$(PROJECT).elf: main.cc lib$(PROJECT).a
|
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
|
||||||
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
|
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
|
||||||
|
|
||||||
$(PROJECT).qemu: main.cc lib$(PROJECT).a
|
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
|
||||||
$(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu
|
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
|
||||||
|
|
||||||
$(PROJECT).hex: $(PROJECT).elf
|
$(PROJECT).hex: $(PROJECT).elf
|
||||||
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
||||||
@@ -63,4 +65,4 @@ gdb-c: $(PROJECT).qemu
|
|||||||
$(GDB) $(PROJECT).qemu
|
$(GDB) $(PROJECT).qemu
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu
|
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug
|
||||||
68
benchmarks/opencl/guassian/Makefile
Normal file
68
benchmarks/opencl/guassian/Makefile
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||||
|
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
|
||||||
|
POCL_INC_PATH = $(wildcard ../include)
|
||||||
|
POCL_LIB_PATH = $(wildcard ../lib)
|
||||||
|
VX_RT_PATH = $(wildcard ../../../runtime)
|
||||||
|
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
|
||||||
|
|
||||||
|
CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
||||||
|
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
||||||
|
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
||||||
|
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||||
|
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
||||||
|
|
||||||
|
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||||
|
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||||
|
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
|
||||||
|
|
||||||
|
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
|
||||||
|
|
||||||
|
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
|
||||||
|
CXXFLAGS += -ffreestanding # program may not begin at main()
|
||||||
|
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
|
||||||
|
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
||||||
|
CXXFLAGS += -I$(POCL_INC_PATH)
|
||||||
|
|
||||||
|
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||||
|
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||||
|
|
||||||
|
PROJECT = gaussian
|
||||||
|
|
||||||
|
SRCS = main.cc clutils.cpp utils.cpp
|
||||||
|
|
||||||
|
all: $(PROJECT).dump $(PROJECT).hex
|
||||||
|
|
||||||
|
lib$(PROJECT).a: kernel.cl
|
||||||
|
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
||||||
|
|
||||||
|
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
|
||||||
|
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
|
||||||
|
|
||||||
|
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
|
||||||
|
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
|
||||||
|
|
||||||
|
$(PROJECT).hex: $(PROJECT).elf
|
||||||
|
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
||||||
|
|
||||||
|
$(PROJECT).dump: $(PROJECT).elf
|
||||||
|
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
||||||
|
|
||||||
|
run: $(PROJECT).hex
|
||||||
|
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
|
||||||
|
|
||||||
|
qemu: $(PROJECT).qemu
|
||||||
|
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu
|
||||||
|
|
||||||
|
gdb-s: $(PROJECT).qemu
|
||||||
|
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
|
||||||
|
|
||||||
|
gdb-c: $(PROJECT).qemu
|
||||||
|
$(GDB) $(PROJECT).qemu
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug
|
||||||
241
benchmarks/opencl/guassian/OriginalParallel.c
Executable file
241
benchmarks/opencl/guassian/OriginalParallel.c
Executable file
@@ -0,0 +1,241 @@
|
|||||||
|
/*-----------------------------------------------------------
|
||||||
|
** ge_p.c -- The program is to solve a linear system Ax = b
|
||||||
|
** by using Gaussian Elimination. The algorithm on page 101
|
||||||
|
** ("Foundations of Parallel Programming") is used.
|
||||||
|
** The sequential version is ge_s.c. This parallel
|
||||||
|
** implementation converts three independent for() loops
|
||||||
|
** into three Fans. Use the data file ge_3.dat to verify
|
||||||
|
** the correction of the output.
|
||||||
|
**
|
||||||
|
** Written by Andreas Kura, 02/15/95
|
||||||
|
** Modified by Chong-wei Xu, /04/20/95
|
||||||
|
**-----------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#include <us.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
int Size, t;
|
||||||
|
float **a, *b;
|
||||||
|
BEGIN_SHARED_DECL
|
||||||
|
float **m;
|
||||||
|
END_SHARED_DECL;
|
||||||
|
FILE *fp;
|
||||||
|
|
||||||
|
void InitProblemOnce();
|
||||||
|
void InitPerRun();
|
||||||
|
void ForwardSub();
|
||||||
|
void Fan1();
|
||||||
|
void Fan2();
|
||||||
|
void Fan3();
|
||||||
|
void InitMat();
|
||||||
|
void InitAry();
|
||||||
|
void PrintMat();
|
||||||
|
void PrintAry();
|
||||||
|
|
||||||
|
main ()
|
||||||
|
{
|
||||||
|
InitializeUs();
|
||||||
|
MakeSharedVariables; /* to make SHARED m */
|
||||||
|
|
||||||
|
InitProblemOnce();
|
||||||
|
InitPerRun();
|
||||||
|
ForwardSub();
|
||||||
|
|
||||||
|
printf("The result of matrix m is: \n");
|
||||||
|
PrintMat(SHARED m, Size, Size);
|
||||||
|
printf("The result of matrix a is: \n");
|
||||||
|
PrintMat(a, Size, Size);
|
||||||
|
printf("The result of array b is: \n");
|
||||||
|
PrintAry(b, Size);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*------------------------------------------------------
|
||||||
|
** InitProblemOnce -- Initialize all of matrices and
|
||||||
|
** vectors by opening a data file specified by the user.
|
||||||
|
**
|
||||||
|
** We used dynamic array **a, *b, and **m to allocate
|
||||||
|
** the memory storages.
|
||||||
|
**------------------------------------------------------
|
||||||
|
*/
|
||||||
|
void InitProblemOnce()
|
||||||
|
{
|
||||||
|
char filename[30];
|
||||||
|
|
||||||
|
printf("Enter the data file name: ");
|
||||||
|
scanf("%s", filename);
|
||||||
|
printf("The file name is: %s\n", filename);
|
||||||
|
|
||||||
|
fp = fopen(filename, "r");
|
||||||
|
|
||||||
|
fscanf(fp, "%d", &Size);
|
||||||
|
a = (float **) UsAllocScatterMatrix(Size, Size, sizeof(float));
|
||||||
|
/*
|
||||||
|
a = (float **) malloc(Size * sizeof(float *));
|
||||||
|
for (i=0; i<Size; i++) {
|
||||||
|
a[i] = (float *) malloc(Size * sizeof(float));
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
InitMat(a, Size, Size);
|
||||||
|
printf("The input matrix a is:\n");
|
||||||
|
PrintMat(a, Size, Size);
|
||||||
|
|
||||||
|
b = (float *) UsAlloc(Size * sizeof(float));
|
||||||
|
/*
|
||||||
|
b = (float *) malloc(Size * sizeof(float));
|
||||||
|
*/
|
||||||
|
InitAry(b, Size);
|
||||||
|
printf("The input array b is:\n");
|
||||||
|
PrintAry(b, Size);
|
||||||
|
|
||||||
|
SHARED m = (float **) UsAllocScatterMatrix(Size, Size, sizeof(float));
|
||||||
|
/*
|
||||||
|
m = (float **) malloc(Size * sizeof(float *));
|
||||||
|
for (i=0; i<Size; i++) {
|
||||||
|
m[i] = (float *) malloc(Size * sizeof(float));
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
Share(&Size);
|
||||||
|
Share(&a);
|
||||||
|
Share(&b);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*------------------------------------------------------
|
||||||
|
** InitPerRun() -- Initialize the contents of the
|
||||||
|
** multipier matrix **m
|
||||||
|
**------------------------------------------------------
|
||||||
|
*/
|
||||||
|
void InitPerRun()
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
|
||||||
|
for (i=0; i<Size; i++)
|
||||||
|
for (j=0; j<Size; j++)
|
||||||
|
SHARED m[i][j] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*------------------------------------------------------
|
||||||
|
** ForwardSub() -- Forward substitution of Gaussian
|
||||||
|
** elimination.
|
||||||
|
**------------------------------------------------------
|
||||||
|
*/
|
||||||
|
void ForwardSub()
|
||||||
|
{
|
||||||
|
for (t=0; t<(Size-1); t++) {
|
||||||
|
Share(&t);
|
||||||
|
GenOnI(Fan1, Size-1-t); /* t=0 to (Size-2), the range is
|
||||||
|
** Size-2-t+1 = Size-1-t
|
||||||
|
*/
|
||||||
|
GenOnA(Fan2, Size-1-t, Size-t);
|
||||||
|
GenOnI(Fan3, Size-1-t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*-------------------------------------------------------
|
||||||
|
** Fan1() -- Calculate multiplier matrix
|
||||||
|
** Pay attention to the index. Index i give the range
|
||||||
|
** which starts from 0 to range-1. The real values of
|
||||||
|
** the index should be adjust and related with the value
|
||||||
|
** of t which is defined on the ForwardSub().
|
||||||
|
**-------------------------------------------------------
|
||||||
|
*/
|
||||||
|
void Fan1(dummy, i)
|
||||||
|
int dummy, i;
|
||||||
|
{
|
||||||
|
/* Use these printf() to display the nodes and index */
|
||||||
|
printf("from node #%d\n", PhysProcToUsProc(Proc_Node));
|
||||||
|
SHARED m[i+t+1][t] = a[i+t+1][t] / a[t][t];
|
||||||
|
printf("i=%d, a[%d][%d]=%.2f, a[%d][%d]=%.2f, m[%d][%d]=%.2f\n",
|
||||||
|
(i+t+1),t,t,a[t][t],(i+t+1),t,a[i+t+1][t],(i+t+1),t,
|
||||||
|
SHARED m[i+t+1][t]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*-------------------------------------------------------
|
||||||
|
** Fan2() -- Modify the matrix A into LUD
|
||||||
|
**-------------------------------------------------------
|
||||||
|
*/
|
||||||
|
void Fan2(dummy, i, j)
|
||||||
|
int dummy, i, j;
|
||||||
|
{
|
||||||
|
a[i+1+t][j+t] -= SHARED m[i+1+t][t] * a[t][j+t];
|
||||||
|
Share (&a);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*-------------------------------------------------------
|
||||||
|
** Fan3() -- Modify the array b
|
||||||
|
**-------------------------------------------------------
|
||||||
|
*/
|
||||||
|
void Fan3(dummy, i)
|
||||||
|
int dummy, i;
|
||||||
|
{
|
||||||
|
b[i+1+t] -= SHARED m[i+1+t][t] * b[t];
|
||||||
|
}
|
||||||
|
|
||||||
|
/*------------------------------------------------------
|
||||||
|
** InitMat() -- Initialize the matrix by reading data
|
||||||
|
** from the data file
|
||||||
|
**------------------------------------------------------
|
||||||
|
*/
|
||||||
|
void InitMat(ary, nrow, ncol)
|
||||||
|
float **ary;
|
||||||
|
int nrow, ncol;
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
|
||||||
|
for (i=0; i<nrow; i++) {
|
||||||
|
for (j=0; j<ncol; j++) {
|
||||||
|
fscanf(fp, "%f", &ary[i][j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*------------------------------------------------------
|
||||||
|
** PrintMat() -- Print the contents of the matrix
|
||||||
|
**------------------------------------------------------
|
||||||
|
*/
|
||||||
|
void PrintMat(ary, nrow, ncol)
|
||||||
|
float **ary;
|
||||||
|
int nrow, ncol;
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
|
||||||
|
for (i=0; i<nrow; i++) {
|
||||||
|
for (j=0; j<ncol; j++) {
|
||||||
|
printf("%8.2f ", ary[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*------------------------------------------------------
|
||||||
|
** InitAry() -- Initialize the array (vector) by reading
|
||||||
|
** data from the data file
|
||||||
|
**------------------------------------------------------
|
||||||
|
*/
|
||||||
|
void InitAry(ary, ary_size)
|
||||||
|
float *ary;
|
||||||
|
int ary_size;
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i=0; i<ary_size; i++) {
|
||||||
|
fscanf(fp, "%f", &ary[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*------------------------------------------------------
|
||||||
|
** PrintAry() -- Print the contents of the array (vector)
|
||||||
|
**------------------------------------------------------
|
||||||
|
*/
|
||||||
|
void PrintAry(ary, ary_size)
|
||||||
|
float *ary;
|
||||||
|
int ary_size;
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i=0; i<ary_size; i++) {
|
||||||
|
printf("%.2f ", ary[i]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
60
benchmarks/opencl/guassian/README.txt
Executable file
60
benchmarks/opencl/guassian/README.txt
Executable file
@@ -0,0 +1,60 @@
|
|||||||
|
The Gaussian Elimination application solves systems of equations using the
|
||||||
|
gaussian elimination method.
|
||||||
|
|
||||||
|
The application analyzes an n x n matrix and an associated 1 x n vector to solve a
|
||||||
|
set of equations with n variables and n unknowns. The matrix and vector describe equations
|
||||||
|
of the form:
|
||||||
|
|
||||||
|
a0x + b0y + c0z + d0w = e0
|
||||||
|
a1x + b1y + c1z + d1w = e1
|
||||||
|
a2x + b2y + c2z + d2w = e2
|
||||||
|
a3x + b3y + c3z + d3w = e3
|
||||||
|
|
||||||
|
where in this case n=4. The matrix for the above equations would be as follows:
|
||||||
|
|
||||||
|
[a0 b0 c0 d0]
|
||||||
|
[a1 b1 c1 d1]
|
||||||
|
[a2 b2 c2 d2]
|
||||||
|
[a3 b3 c3 d3]
|
||||||
|
|
||||||
|
and the vector would be:
|
||||||
|
|
||||||
|
[e0]
|
||||||
|
[e1]
|
||||||
|
[e2]
|
||||||
|
[e3]
|
||||||
|
|
||||||
|
The application creates a solution vector:
|
||||||
|
|
||||||
|
[x]
|
||||||
|
[y]
|
||||||
|
[z]
|
||||||
|
[w]
|
||||||
|
|
||||||
|
|
||||||
|
The Makefile may need to be adjusted for different machines, but it was written for Mac OS X and
|
||||||
|
Linux with either NVIDIA or AMD OpenCL SDKs.
|
||||||
|
|
||||||
|
Additional input files can be created with the matrixGenerator.py file in the data folder.
|
||||||
|
|
||||||
|
Gaussian Elimination Usage
|
||||||
|
|
||||||
|
gaussianElimination [filename] [-hqt] [-p [int] -d [int]]
|
||||||
|
|
||||||
|
example:
|
||||||
|
$ ./gaussianElimination matrix4.txt
|
||||||
|
|
||||||
|
filename the filename that holds the matrix data
|
||||||
|
|
||||||
|
-h, --help Display the help file
|
||||||
|
-q Quiet mode. Suppress all text output.
|
||||||
|
-t Print timing information.
|
||||||
|
|
||||||
|
-p [int] Choose the platform (must choose both platform and device)
|
||||||
|
-d [int] Choose the device (must choose both platform and device)
|
||||||
|
|
||||||
|
|
||||||
|
Notes: 1. The filename is required as the first parameter.
|
||||||
|
2. If you declare either the device or the platform,
|
||||||
|
you must declare both.
|
||||||
|
|
||||||
1413
benchmarks/opencl/guassian/clutils.cpp
Executable file
1413
benchmarks/opencl/guassian/clutils.cpp
Executable file
File diff suppressed because it is too large
Load Diff
281
benchmarks/opencl/guassian/clutils.h
Executable file
281
benchmarks/opencl/guassian/clutils.h
Executable file
@@ -0,0 +1,281 @@
|
|||||||
|
/****************************************************************************\
|
||||||
|
* Copyright (c) 2011, Advanced Micro Devices, Inc. *
|
||||||
|
* All rights reserved. *
|
||||||
|
* *
|
||||||
|
* Redistribution and use in source and binary forms, with or without *
|
||||||
|
* modification, are permitted provided that the following conditions *
|
||||||
|
* are met: *
|
||||||
|
* *
|
||||||
|
* Redistributions of source code must retain the above copyright notice, *
|
||||||
|
* this list of conditions and the following disclaimer. *
|
||||||
|
* *
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice, *
|
||||||
|
* this list of conditions and the following disclaimer in the documentation *
|
||||||
|
* and/or other materials provided with the distribution. *
|
||||||
|
* *
|
||||||
|
* Neither the name of the copyright holder nor the names of its contributors *
|
||||||
|
* may be used to endorse or promote products derived from this software *
|
||||||
|
* without specific prior written permission. *
|
||||||
|
* *
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *
|
||||||
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR *
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR *
|
||||||
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
|
||||||
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
|
||||||
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR *
|
||||||
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF *
|
||||||
|
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
|
||||||
|
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
|
||||||
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
|
||||||
|
* *
|
||||||
|
* If you use the software (in whole or in part), you shall adhere to all *
|
||||||
|
* applicable U.S., European, and other export laws, including but not *
|
||||||
|
* limited to the U.S. Export Administration Regulations (EAR), (15 C.F.R. *
|
||||||
|
* Sections 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 *
|
||||||
|
* of 22 June 2000. Further, pursuant to Section 740.6 of the EAR, you *
|
||||||
|
* hereby certify that, except pursuant to a license granted by the United *
|
||||||
|
* States Department of Commerce Bureau of Industry and Security or as *
|
||||||
|
* otherwise permitted pursuant to a License Exception under the U.S. Export *
|
||||||
|
* Administration Regulations ("EAR"), you will not (1) export, re-export or *
|
||||||
|
* release to a national of a country in Country Groups D:1, E:1 or E:2 any *
|
||||||
|
* restricted technology, software, or source code you receive hereunder, *
|
||||||
|
* or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
|
||||||
|
* technology or software, if such foreign produced direct product is subject *
|
||||||
|
* to national security controls as identified on the Commerce Control List *
|
||||||
|
*(currently found in Supplement 1 to Part 774 of EAR). For the most current *
|
||||||
|
* Country Group listings, or for additional information about the EAR or *
|
||||||
|
* your obligations under those regulations, please refer to the U.S. Bureau *
|
||||||
|
* of Industry and Securitys website at http://www.bis.doc.gov/. *
|
||||||
|
\****************************************************************************/
|
||||||
|
|
||||||
|
#ifndef __CL_UTILS_H__
|
||||||
|
#define __CL_UTILS_H__
|
||||||
|
|
||||||
|
#include <CL/cl.h>
|
||||||
|
|
||||||
|
// The cl_time type is OS specific
|
||||||
|
#ifdef _WIN32
|
||||||
|
#include <tchar.h>
|
||||||
|
#include <Windows.h>
|
||||||
|
typedef __int64 cl_time;
|
||||||
|
#else
|
||||||
|
#include <sys/time.h>
|
||||||
|
typedef double cl_time;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//-------------------------------------------------------
|
||||||
|
// Initialization and Cleanup
|
||||||
|
//-------------------------------------------------------
|
||||||
|
|
||||||
|
// Detects platforms and devices, creates context and command queue
|
||||||
|
cl_context cl_init(char devicePreference='\0');
|
||||||
|
|
||||||
|
// Creates a context given a platform and a device
|
||||||
|
cl_context cl_init_context(int platform,int dev,int quiet=0);
|
||||||
|
|
||||||
|
// Releases resources used by clutils
|
||||||
|
void cl_cleanup();
|
||||||
|
|
||||||
|
// Releases a kernel object
|
||||||
|
void cl_freeKernel(cl_kernel kernel);
|
||||||
|
|
||||||
|
// Releases a memory object
|
||||||
|
void cl_freeMem(cl_mem mem);
|
||||||
|
|
||||||
|
// Releases a program object
|
||||||
|
void cl_freeProgram(cl_program program);
|
||||||
|
|
||||||
|
// Returns the global command queue
|
||||||
|
cl_command_queue cl_getCommandQueue();
|
||||||
|
|
||||||
|
|
||||||
|
//-------------------------------------------------------
|
||||||
|
// Synchronization functions
|
||||||
|
//-------------------------------------------------------
|
||||||
|
|
||||||
|
// Performs a clFinish on the command queue
|
||||||
|
void cl_sync();
|
||||||
|
|
||||||
|
|
||||||
|
//-------------------------------------------------------
|
||||||
|
// Memory allocation
|
||||||
|
//-------------------------------------------------------
|
||||||
|
|
||||||
|
// Allocates a regular buffer on the device
|
||||||
|
cl_mem cl_allocBuffer(size_t mem_size,
|
||||||
|
cl_mem_flags flags = CL_MEM_READ_WRITE);
|
||||||
|
|
||||||
|
// XXX I don't think this does exactly what we want it to do
|
||||||
|
// Allocates a read-only buffer and transfers the data
|
||||||
|
cl_mem cl_allocBufferConst(size_t mem_size, void* host_ptr);
|
||||||
|
|
||||||
|
// Allocates pinned memory on the host
|
||||||
|
cl_mem cl_allocBufferPinned(size_t mem_size);
|
||||||
|
|
||||||
|
// Allocates an image on the device
|
||||||
|
cl_mem cl_allocImage(size_t height, size_t width, char type,
|
||||||
|
cl_mem_flags flags = CL_MEM_READ_WRITE);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//-------------------------------------------------------
|
||||||
|
// Data transfers
|
||||||
|
//-------------------------------------------------------
|
||||||
|
|
||||||
|
// Copies a buffer from the device to pinned memory on the host and
|
||||||
|
// maps it so it can be read
|
||||||
|
void* cl_copyAndMapBuffer(cl_mem dst, cl_mem src, size_t size);
|
||||||
|
|
||||||
|
// Copies from one buffer to another
|
||||||
|
void cl_copyBufferToBuffer(cl_mem dst, cl_mem src, size_t size);
|
||||||
|
|
||||||
|
// Copies data to a buffer on the device
|
||||||
|
void cl_copyBufferToDevice(cl_mem dst, void *src, size_t mem_size,
|
||||||
|
cl_bool blocking = CL_TRUE);
|
||||||
|
|
||||||
|
// Copies data to an image on the device
|
||||||
|
void cl_copyImageToDevice(cl_mem dst, void* src, size_t height, size_t width);
|
||||||
|
|
||||||
|
// Copies an image from the device to the host
|
||||||
|
void cl_copyImageToHost(void* dst, cl_mem src, size_t height, size_t width);
|
||||||
|
|
||||||
|
// Copies data from a device buffer to the host
|
||||||
|
void cl_copyBufferToHost(void *dst, cl_mem src, size_t mem_size,
|
||||||
|
cl_bool blocking = CL_TRUE);
|
||||||
|
|
||||||
|
// Copies data from a buffer on the device to an image on the device
|
||||||
|
void cl_copyBufferToImage(cl_mem src, cl_mem dst, int height, int width);
|
||||||
|
|
||||||
|
// Maps a buffer
|
||||||
|
void* cl_mapBuffer(cl_mem mem, size_t mem_size, cl_mem_flags flags);
|
||||||
|
|
||||||
|
// Unmaps a buffer
|
||||||
|
void cl_unmapBuffer(cl_mem mem, void *ptr);
|
||||||
|
|
||||||
|
// Writes data to a zero-copy buffer on the device
|
||||||
|
void cl_writeToZCBuffer(cl_mem mem, void* data, size_t size);
|
||||||
|
|
||||||
|
//-------------------------------------------------------
|
||||||
|
// Program and kernels
|
||||||
|
//-------------------------------------------------------
|
||||||
|
|
||||||
|
// Compiles a program
|
||||||
|
cl_program cl_compileProgram(char* kernelPath, char* compileoptions,
|
||||||
|
bool verboseoptions = 0);
|
||||||
|
|
||||||
|
// Creates a kernel
|
||||||
|
cl_kernel cl_createKernel(cl_program program, const char* kernelName);
|
||||||
|
|
||||||
|
|
||||||
|
// Sets a kernel argument
|
||||||
|
void cl_setKernelArg(cl_kernel kernel, unsigned int index, size_t size,
|
||||||
|
void* data);
|
||||||
|
|
||||||
|
|
||||||
|
//-------------------------------------------------------
|
||||||
|
// Profiling/events
|
||||||
|
//-------------------------------------------------------
|
||||||
|
|
||||||
|
// Computes the execution time (start to end) for an event
|
||||||
|
double cl_computeExecTime(cl_event);
|
||||||
|
|
||||||
|
// Compute the elapsed time between two CPU timer values
|
||||||
|
double cl_computeTime(cl_time start, cl_time end);
|
||||||
|
|
||||||
|
// Creates an event from CPU timers
|
||||||
|
void cl_createUserEvent(cl_time start, cl_time end, char* desc);
|
||||||
|
|
||||||
|
// Disable logging of events
|
||||||
|
void cl_disableEvents();
|
||||||
|
|
||||||
|
// Enable logging of events
|
||||||
|
void cl_enableEvents();
|
||||||
|
|
||||||
|
// Query the current system time
|
||||||
|
void cl_getTime(cl_time* time);
|
||||||
|
|
||||||
|
// Calls a function which prints events to the terminal
|
||||||
|
void cl_printEvents();
|
||||||
|
|
||||||
|
// Calls a function which writes the events to a file
|
||||||
|
void cl_writeEventsToFile(char* path);
|
||||||
|
|
||||||
|
|
||||||
|
//-------------------------------------------------------
|
||||||
|
// Error handling
|
||||||
|
//-------------------------------------------------------
|
||||||
|
|
||||||
|
// Compare a status value to CL_SUCCESS and optionally exit on error
|
||||||
|
int cl_errChk(const cl_int status, const char *msg, bool exitOnErr);
|
||||||
|
|
||||||
|
// Queries the supported image formats for the device and prints
|
||||||
|
// them to the screen
|
||||||
|
void printSupportedImageFormats();
|
||||||
|
|
||||||
|
//-------------------------------------------------------
|
||||||
|
// Platform and device information
|
||||||
|
//-------------------------------------------------------
|
||||||
|
|
||||||
|
bool cl_deviceIsAMD(cl_device_id dev=NULL);
|
||||||
|
bool cl_deviceIsNVIDIA(cl_device_id dev=NULL);
|
||||||
|
bool cl_platformIsNVIDIA(cl_platform_id plat=NULL);
|
||||||
|
char* cl_getDeviceDriverVersion(cl_device_id dev=NULL);
|
||||||
|
char* cl_getDeviceName(cl_device_id dev=NULL);
|
||||||
|
char* cl_getDeviceVendor(cl_device_id dev=NULL);
|
||||||
|
char* cl_getDeviceVersion(cl_device_id dev=NULL);
|
||||||
|
char* cl_getPlatformName(cl_platform_id platform);
|
||||||
|
char* cl_getPlatformVendor(cl_platform_id platform);
|
||||||
|
|
||||||
|
//-------------------------------------------------------
|
||||||
|
// Utility functions
|
||||||
|
//-------------------------------------------------------
|
||||||
|
|
||||||
|
char* catStringWithInt(const char* str, int integer);
|
||||||
|
|
||||||
|
char* itoa_portable(int value, char* result, int base);
|
||||||
|
|
||||||
|
//-------------------------------------------------------
|
||||||
|
// Data types
|
||||||
|
//-------------------------------------------------------
|
||||||
|
typedef struct{
|
||||||
|
int x;
|
||||||
|
int y;
|
||||||
|
} int2;
|
||||||
|
|
||||||
|
typedef struct{
|
||||||
|
float x;
|
||||||
|
float y;
|
||||||
|
}float2;
|
||||||
|
|
||||||
|
typedef struct{
|
||||||
|
float x;
|
||||||
|
float y;
|
||||||
|
float z;
|
||||||
|
float w;
|
||||||
|
}float4;
|
||||||
|
|
||||||
|
//-------------------------------------------------------
|
||||||
|
// Defines
|
||||||
|
//-------------------------------------------------------
|
||||||
|
|
||||||
|
#define MAX_ERR_VAL 64
|
||||||
|
|
||||||
|
#define NUM_PROGRAMS 7
|
||||||
|
|
||||||
|
#define NUM_KERNELS 13
|
||||||
|
#define KERNEL_INIT_DET 0
|
||||||
|
#define KERNEL_BUILD_DET 1
|
||||||
|
#define KERNEL_SURF_DESC 2
|
||||||
|
#define KERNEL_NORM_DESC 3
|
||||||
|
#define KERNEL_NON_MAX_SUP 4
|
||||||
|
#define KERNEL_GET_ORIENT1 5
|
||||||
|
#define KERNEL_GET_ORIENT2 6
|
||||||
|
#define KERNEL_NN 7
|
||||||
|
#define KERNEL_SCAN 8
|
||||||
|
#define KERNEL_SCAN4 9
|
||||||
|
#define KERNEL_TRANSPOSE 10
|
||||||
|
#define KERNEL_SCANIMAGE 11
|
||||||
|
#define KERNEL_TRANSPOSEIMAGE 12
|
||||||
|
|
||||||
|
#endif
|
||||||
40
benchmarks/opencl/guassian/gaussianElim.h
Executable file
40
benchmarks/opencl/guassian/gaussianElim.h
Executable file
@@ -0,0 +1,40 @@
|
|||||||
|
#ifndef _GAUSSIANELIM
|
||||||
|
#define _GAUSSIANELIM
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <vector>
|
||||||
|
#include <float.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "clutils.h"
|
||||||
|
|
||||||
|
// All OpenCL headers
|
||||||
|
#if defined (__APPLE__) || defined(MACOSX)
|
||||||
|
#include <OpenCL/opencl.h>
|
||||||
|
#else
|
||||||
|
#include <CL/opencl.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
float *OpenClGaussianElimination(
|
||||||
|
cl_context context,
|
||||||
|
int timing);
|
||||||
|
|
||||||
|
void printUsage();
|
||||||
|
int parseCommandline(int argc, char *argv[], char* filename,
|
||||||
|
int *q, int *t, int *p, int *d);
|
||||||
|
|
||||||
|
void InitPerRun(int size,float *m);
|
||||||
|
void ForwardSub(cl_context context, float *a, float *b, float *m, int size,int timing);
|
||||||
|
void BackSub(float *a, float *b, float *finalVec, int size);
|
||||||
|
void Fan1(float *m, float *a, int Size, int t);
|
||||||
|
void Fan2(float *m, float *a, float *b,int Size, int j1, int t);
|
||||||
|
//void Fan3(float *m, float *b, int Size, int t);
|
||||||
|
void InitMat(FILE *fp, int size, float *ary, int nrow, int ncol);
|
||||||
|
void InitAry(FILE *fp, float *ary, int ary_size);
|
||||||
|
void PrintMat(float *ary, int size, int nrow, int ncolumn);
|
||||||
|
void PrintAry(float *ary, int ary_size);
|
||||||
|
float eventTime(cl_event event,cl_command_queue command_queue);
|
||||||
|
#endif
|
||||||
74
benchmarks/opencl/guassian/gettimeofday.cpp
Executable file
74
benchmarks/opencl/guassian/gettimeofday.cpp
Executable file
@@ -0,0 +1,74 @@
|
|||||||
|
#include "stdio.h"
|
||||||
|
#include <time.h>
|
||||||
|
#include <windows.h>
|
||||||
|
#include <iostream>
|
||||||
|
//using namespace System;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
#if defined(_MSC_VER) || defined(_MSC_EXTENSIONS)
|
||||||
|
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000Ui64
|
||||||
|
#else
|
||||||
|
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct timezone
|
||||||
|
{
|
||||||
|
int tz_minuteswest; /* minutes W of Greenwich */
|
||||||
|
int tz_dsttime; /* type of dst correction */
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// Definition of a gettimeofday function
|
||||||
|
int gettimeofday(struct timeval *tv, struct timezone *tz)
|
||||||
|
{
|
||||||
|
// Define a structure to receive the current Windows filetime
|
||||||
|
FILETIME ft;
|
||||||
|
|
||||||
|
// Initialize the present time to 0 and the timezone to UTC
|
||||||
|
unsigned __int64 tmpres = 0;
|
||||||
|
static int tzflag = 0;
|
||||||
|
|
||||||
|
if (NULL != tv)
|
||||||
|
{
|
||||||
|
GetSystemTimeAsFileTime(&ft);
|
||||||
|
|
||||||
|
// The GetSystemTimeAsFileTime returns the number of 100 nanosecond
|
||||||
|
// intervals since Jan 1, 1601 in a structure. Copy the high bits to
|
||||||
|
// the 64 bit tmpres, shift it left by 32 then or in the low 32 bits.
|
||||||
|
tmpres |= ft.dwHighDateTime;
|
||||||
|
tmpres <<= 32;
|
||||||
|
tmpres |= ft.dwLowDateTime;
|
||||||
|
|
||||||
|
// Convert to microseconds by dividing by 10
|
||||||
|
tmpres /= 10;
|
||||||
|
|
||||||
|
// The Unix epoch starts on Jan 1 1970. Need to subtract the difference
|
||||||
|
// in seconds from Jan 1 1601.
|
||||||
|
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||||
|
|
||||||
|
// Finally change microseconds to seconds and place in the seconds value.
|
||||||
|
// The modulus picks up the microseconds.
|
||||||
|
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||||
|
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (NULL != tz)
|
||||||
|
{
|
||||||
|
if (!tzflag)
|
||||||
|
{
|
||||||
|
_tzset();
|
||||||
|
tzflag++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Adjust for the timezone west of Greenwich
|
||||||
|
long seconds_diff;
|
||||||
|
_get_timezone(&seconds_diff);
|
||||||
|
tz->tz_minuteswest = seconds_diff / 60;
|
||||||
|
int hours_offset;
|
||||||
|
_get_daylight(&hours_offset);
|
||||||
|
tz->tz_dsttime = hours_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
17
benchmarks/opencl/guassian/gettimeofday.h
Executable file
17
benchmarks/opencl/guassian/gettimeofday.h
Executable file
@@ -0,0 +1,17 @@
|
|||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#include <WinSock.h>
|
||||||
|
/**
|
||||||
|
Based on code seen at.
|
||||||
|
|
||||||
|
http://www.winehq.org/pipermail/wine-devel/2003-June/018082.html
|
||||||
|
|
||||||
|
http://msdn.microsoft.com/en-us/library/ms740560
|
||||||
|
|
||||||
|
*/
|
||||||
|
int gettimeofday(struct timeval *tv, struct timezone *tz);
|
||||||
|
#else
|
||||||
|
#include <sys/time.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
49
benchmarks/opencl/guassian/kernel.cl
Executable file
49
benchmarks/opencl/guassian/kernel.cl
Executable file
@@ -0,0 +1,49 @@
|
|||||||
|
//#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
|
||||||
|
|
||||||
|
typedef struct latLong
|
||||||
|
{
|
||||||
|
float lat;
|
||||||
|
float lng;
|
||||||
|
} LatLong;
|
||||||
|
|
||||||
|
__kernel void Fan1(__global float *m_dev,
|
||||||
|
__global float *a_dev,
|
||||||
|
__global float *b_dev,
|
||||||
|
const int size,
|
||||||
|
const int t) {
|
||||||
|
int globalId = get_global_id(0);
|
||||||
|
|
||||||
|
if (globalId < size-1-t) {
|
||||||
|
*(m_dev + size * (globalId + t + 1)+t) = *(a_dev + size * (globalId + t + 1) + t) / *(a_dev + size * t + t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
__kernel void Fan2(__global float *m_dev,
|
||||||
|
__global float *a_dev,
|
||||||
|
__global float *b_dev,
|
||||||
|
const int size,
|
||||||
|
const int t) {
|
||||||
|
int globalId = get_global_id(0);
|
||||||
|
|
||||||
|
int globalIdx = get_global_id(0);
|
||||||
|
int globalIdy = get_global_id(1);
|
||||||
|
if (globalIdx < size-1-t && globalIdy < size-t) {
|
||||||
|
a_dev[size*(globalIdx+1+t)+(globalIdy+t)] -= m_dev[size*(globalIdx+1+t)+t] * a_dev[size*t+(globalIdy+t)];
|
||||||
|
|
||||||
|
if(globalIdy == 0){
|
||||||
|
b_dev[globalIdx+1+t] -= m_dev[size*(globalIdx+1+t)+(globalIdy+t)] * b_dev[t];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// One dimensional
|
||||||
|
// int globalIdx = globalId % size;
|
||||||
|
// int globalIdy = globalId / size;
|
||||||
|
//
|
||||||
|
// if (globalIdx < size-1-t && globalIdy < size-t) {
|
||||||
|
// a_dev[size*(globalIdx+1+t)+(globalIdy+t)] -= m_dev[size*(globalIdx+1+t)+t] * a_dev[size*t+(globalIdy+t)];
|
||||||
|
// }
|
||||||
|
// if(globalIdy == 0){
|
||||||
|
// b_dev[globalIdx+1+t] -= m_dev[size*(globalIdx+1+t)+(globalIdy+t)] * b_dev[t];
|
||||||
|
// }
|
||||||
|
|
||||||
|
}
|
||||||
BIN
benchmarks/opencl/guassian/libgaussian.a
Normal file
BIN
benchmarks/opencl/guassian/libgaussian.a
Normal file
Binary file not shown.
412
benchmarks/opencl/guassian/main.cc
Executable file
412
benchmarks/opencl/guassian/main.cc
Executable file
@@ -0,0 +1,412 @@
|
|||||||
|
#ifndef __GAUSSIAN_ELIMINATION__
|
||||||
|
#define __GAUSSIAN_ELIMINATION__
|
||||||
|
|
||||||
|
#include "gaussianElim.h"
|
||||||
|
|
||||||
|
cl_context context = NULL;
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
printf("enter demo main\n");
|
||||||
|
float *a = NULL, *b = NULL, *finalVec = NULL;
|
||||||
|
float *m = NULL;
|
||||||
|
int size;
|
||||||
|
|
||||||
|
FILE *fp;
|
||||||
|
|
||||||
|
// args
|
||||||
|
char filename[100];
|
||||||
|
int quiet = 0, timing = 0, platform = -1, device = -1;
|
||||||
|
|
||||||
|
// parse command line
|
||||||
|
if (parseCommandline(argc, argv, filename, &quiet, &timing, &platform,
|
||||||
|
&device)) {
|
||||||
|
printUsage();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
context = cl_init_context(platform, device, quiet);
|
||||||
|
|
||||||
|
fp = fopen(filename, "r");
|
||||||
|
fscanf(fp, "%d", &size);
|
||||||
|
|
||||||
|
a = (float *)malloc(size * size * sizeof(float));
|
||||||
|
|
||||||
|
printf("OK\n");
|
||||||
|
|
||||||
|
InitMat(fp, size, a, size, size);
|
||||||
|
// printf("The input matrix a is:\n");
|
||||||
|
// PrintMat(a, size, size, size);
|
||||||
|
b = (float *)malloc(size * sizeof(float));
|
||||||
|
|
||||||
|
InitAry(fp, b, size);
|
||||||
|
// printf("The input array b is:\n");
|
||||||
|
// PrintAry(b, size);
|
||||||
|
|
||||||
|
// create the solution matrix
|
||||||
|
m = (float *)malloc(size * size * sizeof(float));
|
||||||
|
|
||||||
|
// create a new vector to hold the final answer
|
||||||
|
finalVec = (float *)malloc(size * sizeof(float));
|
||||||
|
|
||||||
|
InitPerRun(size, m);
|
||||||
|
|
||||||
|
// begin timing
|
||||||
|
|
||||||
|
// run kernels
|
||||||
|
ForwardSub(context, a, b, m, size, timing);
|
||||||
|
|
||||||
|
// end timing
|
||||||
|
if (!quiet) {
|
||||||
|
printf("The result of matrix m is: \n");
|
||||||
|
|
||||||
|
PrintMat(m, size, size, size);
|
||||||
|
printf("The result of matrix a is: \n");
|
||||||
|
PrintMat(a, size, size, size);
|
||||||
|
printf("The result of array b is: \n");
|
||||||
|
PrintAry(b, size);
|
||||||
|
|
||||||
|
BackSub(a, b, finalVec, size);
|
||||||
|
printf("The final solution is: \n");
|
||||||
|
PrintAry(finalVec, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(fp);
|
||||||
|
free(m);
|
||||||
|
free(a);
|
||||||
|
free(b);
|
||||||
|
free(finalVec);
|
||||||
|
// OpenClGaussianElimination(context,timing);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*------------------------------------------------------
|
||||||
|
** ForwardSub() -- Forward substitution of Gaussian
|
||||||
|
** elimination.
|
||||||
|
**------------------------------------------------------
|
||||||
|
*/
|
||||||
|
void ForwardSub(cl_context context, float *a, float *b, float *m, int size,
|
||||||
|
int timing) {
|
||||||
|
// 1. set up kernels
|
||||||
|
cl_kernel fan1_kernel, fan2_kernel;
|
||||||
|
cl_int status = 0;
|
||||||
|
cl_program gaussianElim_program;
|
||||||
|
cl_event writeEvent, kernelEvent, readEvent;
|
||||||
|
float writeTime = 0, readTime = 0, kernelTime = 0;
|
||||||
|
float writeMB = 0, readMB = 0;
|
||||||
|
|
||||||
|
gaussianElim_program =
|
||||||
|
cl_compileProgram((char *)"gaussianElim_kernels.cl", NULL);
|
||||||
|
|
||||||
|
fan1_kernel = clCreateKernel(gaussianElim_program, "Fan1", &status);
|
||||||
|
status = cl_errChk(status, (char *)"Error Creating Fan1 kernel", true);
|
||||||
|
if (status)
|
||||||
|
exit(1);
|
||||||
|
|
||||||
|
fan2_kernel = clCreateKernel(gaussianElim_program, "Fan2", &status);
|
||||||
|
status = cl_errChk(status, (char *)"Error Creating Fan2 kernel", true);
|
||||||
|
if (status)
|
||||||
|
exit(1);
|
||||||
|
|
||||||
|
// 2. set up memory on device and send ipts data to device
|
||||||
|
|
||||||
|
cl_mem a_dev, b_dev, m_dev;
|
||||||
|
|
||||||
|
cl_int error = 0;
|
||||||
|
|
||||||
|
a_dev = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||||
|
sizeof(float) * size * size, NULL, &error);
|
||||||
|
|
||||||
|
b_dev = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float) * size, NULL,
|
||||||
|
&error);
|
||||||
|
|
||||||
|
m_dev = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||||
|
sizeof(float) * size * size, NULL, &error);
|
||||||
|
|
||||||
|
cl_command_queue command_queue = cl_getCommandQueue();
|
||||||
|
|
||||||
|
error = clEnqueueWriteBuffer(command_queue, a_dev,
|
||||||
|
1, // change to 0 for nonblocking write
|
||||||
|
0, // offset
|
||||||
|
sizeof(float) * size * size, a, 0, NULL,
|
||||||
|
&writeEvent);
|
||||||
|
|
||||||
|
if (timing)
|
||||||
|
writeTime += eventTime(writeEvent, command_queue);
|
||||||
|
clReleaseEvent(writeEvent);
|
||||||
|
|
||||||
|
error = clEnqueueWriteBuffer(command_queue, b_dev,
|
||||||
|
1, // change to 0 for nonblocking write
|
||||||
|
0, // offset
|
||||||
|
sizeof(float) * size, b, 0, NULL, &writeEvent);
|
||||||
|
if (timing)
|
||||||
|
writeTime += eventTime(writeEvent, command_queue);
|
||||||
|
clReleaseEvent(writeEvent);
|
||||||
|
|
||||||
|
error = clEnqueueWriteBuffer(command_queue, m_dev,
|
||||||
|
1, // change to 0 for nonblocking write
|
||||||
|
0, // offset
|
||||||
|
sizeof(float) * size * size, m, 0, NULL,
|
||||||
|
&writeEvent);
|
||||||
|
if (timing)
|
||||||
|
writeTime += eventTime(writeEvent, command_queue);
|
||||||
|
clReleaseEvent(writeEvent);
|
||||||
|
writeMB = (float)(sizeof(float) * size * (size + size + 1) / 1e6);
|
||||||
|
|
||||||
|
// 3. Determine block sizes
|
||||||
|
size_t globalWorksizeFan1[1];
|
||||||
|
size_t globalWorksizeFan2[2];
|
||||||
|
|
||||||
|
globalWorksizeFan1[0] = size;
|
||||||
|
globalWorksizeFan2[0] = size;
|
||||||
|
globalWorksizeFan2[1] = size;
|
||||||
|
|
||||||
|
int t;
|
||||||
|
// 4. Setup and Run kernels
|
||||||
|
for (t = 0; t < (size - 1); t++) {
|
||||||
|
// kernel args
|
||||||
|
cl_int argchk;
|
||||||
|
argchk = clSetKernelArg(fan1_kernel, 0, sizeof(cl_mem), (void *)&m_dev);
|
||||||
|
argchk |= clSetKernelArg(fan1_kernel, 1, sizeof(cl_mem), (void *)&a_dev);
|
||||||
|
argchk |= clSetKernelArg(fan1_kernel, 2, sizeof(cl_mem), (void *)&b_dev);
|
||||||
|
argchk |= clSetKernelArg(fan1_kernel, 3, sizeof(int), (void *)&size);
|
||||||
|
argchk |= clSetKernelArg(fan1_kernel, 4, sizeof(int), (void *)&t);
|
||||||
|
|
||||||
|
cl_errChk(argchk, "ERROR in Setting Fan1 kernel args", true);
|
||||||
|
|
||||||
|
// launch kernel
|
||||||
|
error =
|
||||||
|
clEnqueueNDRangeKernel(command_queue, fan1_kernel, 1, 0,
|
||||||
|
globalWorksizeFan1, NULL, 0, NULL, &kernelEvent);
|
||||||
|
|
||||||
|
cl_errChk(error, "ERROR in Executing Fan1 Kernel", true);
|
||||||
|
if (timing) {
|
||||||
|
// printf("here1a\n");
|
||||||
|
kernelTime += eventTime(kernelEvent, command_queue);
|
||||||
|
// printf("here1b\n");
|
||||||
|
}
|
||||||
|
clReleaseEvent(kernelEvent);
|
||||||
|
// Fan1<<<dimGrid,dimBlock>>>(m_cuda,a_cuda,Size,t);
|
||||||
|
// cudaThreadSynchronize();
|
||||||
|
|
||||||
|
// kernel args
|
||||||
|
argchk = clSetKernelArg(fan2_kernel, 0, sizeof(cl_mem), (void *)&m_dev);
|
||||||
|
argchk |= clSetKernelArg(fan2_kernel, 1, sizeof(cl_mem), (void *)&a_dev);
|
||||||
|
argchk |= clSetKernelArg(fan2_kernel, 2, sizeof(cl_mem), (void *)&b_dev);
|
||||||
|
argchk |= clSetKernelArg(fan2_kernel, 3, sizeof(int), (void *)&size);
|
||||||
|
argchk |= clSetKernelArg(fan2_kernel, 4, sizeof(int), (void *)&t);
|
||||||
|
|
||||||
|
cl_errChk(argchk, "ERROR in Setting Fan2 kernel args", true);
|
||||||
|
|
||||||
|
// launch kernel
|
||||||
|
error =
|
||||||
|
clEnqueueNDRangeKernel(command_queue, fan2_kernel, 2, 0,
|
||||||
|
globalWorksizeFan2, NULL, 0, NULL, &kernelEvent);
|
||||||
|
|
||||||
|
cl_errChk(error, "ERROR in Executing Fan1 Kernel", true);
|
||||||
|
if (timing) {
|
||||||
|
// printf("here2a\n");
|
||||||
|
kernelTime += eventTime(kernelEvent, command_queue);
|
||||||
|
// printf("here2b\n");
|
||||||
|
}
|
||||||
|
clReleaseEvent(kernelEvent);
|
||||||
|
// Fan2<<<dimGridXY,dimBlockXY>>>(m_cuda,a_cuda,b_cuda,Size,Size-t,t);
|
||||||
|
// cudaThreadSynchronize();
|
||||||
|
}
|
||||||
|
// 5. transfer data off of device
|
||||||
|
error =
|
||||||
|
clEnqueueReadBuffer(command_queue, a_dev,
|
||||||
|
1, // change to 0 for nonblocking write
|
||||||
|
0, // offset
|
||||||
|
sizeof(float) * size * size, a, 0, NULL, &readEvent);
|
||||||
|
|
||||||
|
cl_errChk(error, "ERROR with clEnqueueReadBuffer", true);
|
||||||
|
if (timing)
|
||||||
|
readTime += eventTime(readEvent, command_queue);
|
||||||
|
clReleaseEvent(readEvent);
|
||||||
|
|
||||||
|
error = clEnqueueReadBuffer(command_queue, b_dev,
|
||||||
|
1, // change to 0 for nonblocking write
|
||||||
|
0, // offset
|
||||||
|
sizeof(float) * size, b, 0, NULL, &readEvent);
|
||||||
|
cl_errChk(error, "ERROR with clEnqueueReadBuffer", true);
|
||||||
|
if (timing)
|
||||||
|
readTime += eventTime(readEvent, command_queue);
|
||||||
|
clReleaseEvent(readEvent);
|
||||||
|
|
||||||
|
error =
|
||||||
|
clEnqueueReadBuffer(command_queue, m_dev,
|
||||||
|
1, // change to 0 for nonblocking write
|
||||||
|
0, // offset
|
||||||
|
sizeof(float) * size * size, m, 0, NULL, &readEvent);
|
||||||
|
|
||||||
|
cl_errChk(error, "ERROR with clEnqueueReadBuffer", true);
|
||||||
|
if (timing)
|
||||||
|
readTime += eventTime(readEvent, command_queue);
|
||||||
|
clReleaseEvent(readEvent);
|
||||||
|
readMB = (float)(sizeof(float) * size * (size + size + 1) / 1e6);
|
||||||
|
|
||||||
|
if (timing) {
|
||||||
|
printf("Matrix Size\tWrite(s) [size]\t\tKernel(s)\tRead(s) "
|
||||||
|
"[size]\t\tTotal(s)\n");
|
||||||
|
printf("%dx%d \t", size, size);
|
||||||
|
|
||||||
|
printf("%f [%.2fMB]\t", writeTime, writeMB);
|
||||||
|
|
||||||
|
printf("%f\t", kernelTime);
|
||||||
|
|
||||||
|
printf("%f [%.2fMB]\t", readTime, readMB);
|
||||||
|
|
||||||
|
printf("%f\n\n", writeTime + kernelTime + readTime);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
float eventTime(cl_event event, cl_command_queue command_queue) {
|
||||||
|
cl_int error = 0;
|
||||||
|
cl_ulong eventStart, eventEnd;
|
||||||
|
clFinish(command_queue);
|
||||||
|
error = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START,
|
||||||
|
sizeof(cl_ulong), &eventStart, NULL);
|
||||||
|
cl_errChk(error, "ERROR in Event Profiling.", true);
|
||||||
|
error = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
|
||||||
|
sizeof(cl_ulong), &eventEnd, NULL);
|
||||||
|
cl_errChk(error, "ERROR in Event Profiling.", true);
|
||||||
|
|
||||||
|
return (float)((eventEnd - eventStart) / 1e9);
|
||||||
|
}
|
||||||
|
|
||||||
|
int parseCommandline(int argc, char *argv[], char *filename, int *q, int *t,
|
||||||
|
int *p, int *d) {
|
||||||
|
int i;
|
||||||
|
// if (argc < 2) return 1; // error
|
||||||
|
strncpy(filename, "matrix4.txt", 100);
|
||||||
|
char flag;
|
||||||
|
|
||||||
|
for (i = 1; i < argc; i++) {
|
||||||
|
if (argv[i][0] == '-') { // flag
|
||||||
|
flag = argv[i][1];
|
||||||
|
switch (flag) {
|
||||||
|
case 'h': // help
|
||||||
|
return 1;
|
||||||
|
break;
|
||||||
|
case 'q': // quiet
|
||||||
|
*q = 1;
|
||||||
|
break;
|
||||||
|
case 't': // timing
|
||||||
|
*t = 1;
|
||||||
|
break;
|
||||||
|
case 'p': // platform
|
||||||
|
i++;
|
||||||
|
*p = atoi(argv[i]);
|
||||||
|
break;
|
||||||
|
case 'd': // device
|
||||||
|
i++;
|
||||||
|
*d = atoi(argv[i]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ((*d >= 0 && *p < 0) ||
|
||||||
|
(*p >= 0 &&
|
||||||
|
*d < 0)) // both p and d must be specified if either are specified
|
||||||
|
return 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void printUsage() {
|
||||||
|
printf("Gaussian Elimination Usage\n");
|
||||||
|
printf("\n");
|
||||||
|
printf("gaussianElimination [filename] [-hqt] [-p [int] -d [int]]\n");
|
||||||
|
printf("\n");
|
||||||
|
printf("example:\n");
|
||||||
|
printf("$ ./gaussianElimination matrix4.txt\n");
|
||||||
|
printf("\n");
|
||||||
|
printf("filename the filename that holds the matrix data\n");
|
||||||
|
printf("\n");
|
||||||
|
printf("-h Display the help file\n");
|
||||||
|
printf("-q Quiet mode. Suppress all text output.\n");
|
||||||
|
printf("-t Print timing information.\n");
|
||||||
|
printf("\n");
|
||||||
|
printf("-p [int] Choose the platform (must choose both platform and "
|
||||||
|
"device)\n");
|
||||||
|
printf("-d [int] Choose the device (must choose both platform and "
|
||||||
|
"device)\n");
|
||||||
|
printf("\n");
|
||||||
|
printf("\n");
|
||||||
|
printf("Notes: 1. The filename is required as the first parameter.\n");
|
||||||
|
printf(" 2. If you declare either the device or the platform,\n");
|
||||||
|
printf(" you must declare both.\n\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*------------------------------------------------------
|
||||||
|
** InitPerRun() -- Initialize the contents of the
|
||||||
|
** multipier matrix **m
|
||||||
|
**------------------------------------------------------
|
||||||
|
*/
|
||||||
|
void InitPerRun(int size, float *m) {
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < size * size; i++)
|
||||||
|
*(m + i) = 0.0;
|
||||||
|
}
|
||||||
|
void BackSub(float *a, float *b, float *finalVec, int size) {
|
||||||
|
// solve "bottom up"
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < size; i++) {
|
||||||
|
finalVec[size - i - 1] = b[size - i - 1];
|
||||||
|
for (j = 0; j < i; j++) {
|
||||||
|
finalVec[size - i - 1] -= *(a + size * (size - i - 1) + (size - j - 1)) *
|
||||||
|
finalVec[size - j - 1];
|
||||||
|
}
|
||||||
|
finalVec[size - i - 1] =
|
||||||
|
finalVec[size - i - 1] / *(a + size * (size - i - 1) + (size - i - 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void InitMat(FILE *fp, int size, float *ary, int nrow, int ncol) {
|
||||||
|
int i, j;
|
||||||
|
|
||||||
|
for (i = 0; i < nrow; i++) {
|
||||||
|
for (j = 0; j < ncol; j++) {
|
||||||
|
fscanf(fp, "%f", ary + size * i + j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*------------------------------------------------------
|
||||||
|
** InitAry() -- Initialize the array (vector) by reading
|
||||||
|
** data from the data file
|
||||||
|
**------------------------------------------------------
|
||||||
|
*/
|
||||||
|
void InitAry(FILE *fp, float *ary, int ary_size) {
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < ary_size; i++) {
|
||||||
|
fscanf(fp, "%f", &ary[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*------------------------------------------------------
|
||||||
|
** PrintMat() -- Print the contents of the matrix
|
||||||
|
**------------------------------------------------------
|
||||||
|
*/
|
||||||
|
void PrintMat(float *ary, int size, int nrow, int ncol) {
|
||||||
|
int i, j;
|
||||||
|
|
||||||
|
for (i = 0; i < nrow; i++) {
|
||||||
|
for (j = 0; j < ncol; j++) {
|
||||||
|
printf("%8.2f ", *(ary + size * i + j));
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*------------------------------------------------------
|
||||||
|
** PrintAry() -- Print the contents of the array (vector)
|
||||||
|
**------------------------------------------------------
|
||||||
|
*/
|
||||||
|
void PrintAry(float *ary, int ary_size) {
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < ary_size; i++) {
|
||||||
|
printf("%.2f ", ary[i]);
|
||||||
|
}
|
||||||
|
printf("\n\n");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
11
benchmarks/opencl/guassian/matrix4.txt
Executable file
11
benchmarks/opencl/guassian/matrix4.txt
Executable file
@@ -0,0 +1,11 @@
|
|||||||
|
4
|
||||||
|
|
||||||
|
-0.6 -0.5 0.7 0.3
|
||||||
|
-0.3 -0.9 0.3 0.7
|
||||||
|
-0.4 -0.5 -0.3 -0.8
|
||||||
|
0.0 -0.1 0.2 0.9
|
||||||
|
|
||||||
|
-0.85 -0.68 0.24 -0.53
|
||||||
|
|
||||||
|
0.7 0.0 -0.4 -0.5
|
||||||
|
|
||||||
1
benchmarks/opencl/guassian/run
Executable file
1
benchmarks/opencl/guassian/run
Executable file
@@ -0,0 +1 @@
|
|||||||
|
./gaussian ../../data/gaussian/matrix4.txt
|
||||||
204
benchmarks/opencl/guassian/utils.cpp
Executable file
204
benchmarks/opencl/guassian/utils.cpp
Executable file
@@ -0,0 +1,204 @@
|
|||||||
|
/****************************************************************************\
|
||||||
|
* Copyright (c) 2011, Advanced Micro Devices, Inc. *
|
||||||
|
* All rights reserved. *
|
||||||
|
* *
|
||||||
|
* Redistribution and use in source and binary forms, with or without *
|
||||||
|
* modification, are permitted provided that the following conditions *
|
||||||
|
* are met: *
|
||||||
|
* *
|
||||||
|
* Redistributions of source code must retain the above copyright notice, *
|
||||||
|
* this list of conditions and the following disclaimer. *
|
||||||
|
* *
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice, *
|
||||||
|
* this list of conditions and the following disclaimer in the documentation *
|
||||||
|
* and/or other materials provided with the distribution. *
|
||||||
|
* *
|
||||||
|
* Neither the name of the copyright holder nor the names of its contributors *
|
||||||
|
* may be used to endorse or promote products derived from this software *
|
||||||
|
* without specific prior written permission. *
|
||||||
|
* *
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *
|
||||||
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR *
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR *
|
||||||
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
|
||||||
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
|
||||||
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR *
|
||||||
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF *
|
||||||
|
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
|
||||||
|
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
|
||||||
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
|
||||||
|
* *
|
||||||
|
* If you use the software (in whole or in part), you shall adhere to all *
|
||||||
|
* applicable U.S., European, and other export laws, including but not *
|
||||||
|
* limited to the U.S. Export Administration Regulations (EAR), (15 C.F.R. *
|
||||||
|
* Sections 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 *
|
||||||
|
* of 22 June 2000. Further, pursuant to Section 740.6 of the EAR, you *
|
||||||
|
* hereby certify that, except pursuant to a license granted by the United *
|
||||||
|
* States Department of Commerce Bureau of Industry and Security or as *
|
||||||
|
* otherwise permitted pursuant to a License Exception under the U.S. Export *
|
||||||
|
* Administration Regulations ("EAR"), you will not (1) export, re-export or *
|
||||||
|
* release to a national of a country in Country Groups D:1, E:1 or E:2 any *
|
||||||
|
* restricted technology, software, or source code you receive hereunder, *
|
||||||
|
* or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
|
||||||
|
* technology or software, if such foreign produced direct product is subject *
|
||||||
|
* to national security controls as identified on the Commerce Control List *
|
||||||
|
*(currently found in Supplement 1 to Part 774 of EAR). For the most current *
|
||||||
|
* Country Group listings, or for additional information about the EAR or *
|
||||||
|
* your obligations under those regulations, please refer to the U.S. Bureau *
|
||||||
|
* of Industry and Securitys website at http://www.bis.doc.gov/. *
|
||||||
|
\****************************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "utils.h"
|
||||||
|
|
||||||
|
static bool usingImages = true;
|
||||||
|
|
||||||
|
//! A wrapper for malloc that checks the return value
|
||||||
|
void* alloc(size_t size) {
|
||||||
|
|
||||||
|
void* ptr = NULL;
|
||||||
|
ptr = malloc(size);
|
||||||
|
if(ptr == NULL) {
|
||||||
|
perror("malloc");
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function checks to make sure a file exists before we open it
|
||||||
|
void checkFile(char* filename)
|
||||||
|
{
|
||||||
|
|
||||||
|
struct stat fileStatus;
|
||||||
|
if(stat(filename, &fileStatus) != 0) {
|
||||||
|
printf("Error opening file: %s\n", filename);
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if(!(S_IFREG & fileStatus.st_mode)) {
|
||||||
|
printf("File %s is not a regular file\n", filename);
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// This function checks to make sure a directory exists
|
||||||
|
void checkDir(char* dirpath)
|
||||||
|
{
|
||||||
|
|
||||||
|
struct stat fileStatus;
|
||||||
|
if(stat(dirpath, &fileStatus) != 0) {
|
||||||
|
printf("Directory does not exist: %s\n", dirpath);
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if(!(S_IFDIR & fileStatus.st_mode)) {
|
||||||
|
printf("Directory was not provided: %s\n", dirpath);
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the command line arguments
|
||||||
|
void parseArguments(int argc, char** argv, char** input, char** events,
|
||||||
|
char** ipts, char* devicePref, bool* verifyResults)
|
||||||
|
{
|
||||||
|
|
||||||
|
for(int i = 2; i < argc; i++) {
|
||||||
|
if(strcmp(argv[i], "-d") == 0) { // Event dump found
|
||||||
|
if(i == argc-1) {
|
||||||
|
printf("Usage: -e Needs directory path\n");
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
devicePref[0] = argv[i+1][0];
|
||||||
|
i++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if(strcmp(argv[i], "-e") == 0) { // Event dump found
|
||||||
|
if(i == argc-1) {
|
||||||
|
printf("Usage: -e Needs directory path\n");
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
*events = argv[i+1];
|
||||||
|
i++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if(strcmp(argv[i], "-i") == 0) { // Input found
|
||||||
|
if(i == argc-1) {
|
||||||
|
printf("Usage: -i Needs directory path\n");
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
*input = argv[i+1];
|
||||||
|
i++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if(strcmp(argv[i], "-l") == 0) { // Ipts dump found
|
||||||
|
if(i == argc-1) {
|
||||||
|
printf("Usage: -l Needs directory path\n");
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
*ipts = argv[i+1];
|
||||||
|
i++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if(strcmp(argv[i], "-n") == 0) { // Don't use OpenCL images
|
||||||
|
setUsingImages(false);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if(strcmp(argv[i], "-v") == 0) { // Verify results
|
||||||
|
*verifyResults = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// This function that takes a positive integer 'value' and returns
|
||||||
|
// the nearest multiple of 'multiple' (used for padding columns)
|
||||||
|
unsigned int roundUp(unsigned int value, unsigned int multiple) {
|
||||||
|
|
||||||
|
unsigned int remainder = value % multiple;
|
||||||
|
|
||||||
|
// Make the value a multiple of multiple
|
||||||
|
if(remainder != 0) {
|
||||||
|
value += (multiple-remainder);
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Concatenate two strings and return a pointer to the new string
|
||||||
|
char* smartStrcat(char* str1, char* str2)
|
||||||
|
{
|
||||||
|
char* newStr = NULL;
|
||||||
|
|
||||||
|
newStr = (char*)alloc((strlen(str1)+strlen(str2)+1)*sizeof(char));
|
||||||
|
|
||||||
|
strcpy(newStr, str1);
|
||||||
|
strcat(newStr, str2);
|
||||||
|
|
||||||
|
return newStr;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Set the value of using images to true if they are being
|
||||||
|
// used, or false if they are not
|
||||||
|
void setUsingImages(bool val)
|
||||||
|
{
|
||||||
|
usingImages = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Return whether or not images are being used
|
||||||
|
bool isUsingImages()
|
||||||
|
{
|
||||||
|
return usingImages;
|
||||||
|
}
|
||||||
84
benchmarks/opencl/guassian/utils.h
Executable file
84
benchmarks/opencl/guassian/utils.h
Executable file
@@ -0,0 +1,84 @@
|
|||||||
|
/****************************************************************************\
|
||||||
|
* Copyright (c) 2011, Advanced Micro Devices, Inc. *
|
||||||
|
* All rights reserved. *
|
||||||
|
* *
|
||||||
|
* Redistribution and use in source and binary forms, with or without *
|
||||||
|
* modification, are permitted provided that the following conditions *
|
||||||
|
* are met: *
|
||||||
|
* *
|
||||||
|
* Redistributions of source code must retain the above copyright notice, *
|
||||||
|
* this list of conditions and the following disclaimer. *
|
||||||
|
* *
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice, *
|
||||||
|
* this list of conditions and the following disclaimer in the documentation *
|
||||||
|
* and/or other materials provided with the distribution. *
|
||||||
|
* *
|
||||||
|
* Neither the name of the copyright holder nor the names of its contributors *
|
||||||
|
* may be used to endorse or promote products derived from this software *
|
||||||
|
* without specific prior written permission. *
|
||||||
|
* *
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *
|
||||||
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR *
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR *
|
||||||
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
|
||||||
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
|
||||||
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR *
|
||||||
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF *
|
||||||
|
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
|
||||||
|
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
|
||||||
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
|
||||||
|
* *
|
||||||
|
* If you use the software (in whole or in part), you shall adhere to all *
|
||||||
|
* applicable U.S., European, and other export laws, including but not *
|
||||||
|
* limited to the U.S. Export Administration Regulations (EAR), (15 C.F.R. *
|
||||||
|
* Sections 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 *
|
||||||
|
* of 22 June 2000. Further, pursuant to Section 740.6 of the EAR, you *
|
||||||
|
* hereby certify that, except pursuant to a license granted by the United *
|
||||||
|
* States Department of Commerce Bureau of Industry and Security or as *
|
||||||
|
* otherwise permitted pursuant to a License Exception under the U.S. Export *
|
||||||
|
* Administration Regulations ("EAR"), you will not (1) export, re-export or *
|
||||||
|
* release to a national of a country in Country Groups D:1, E:1 or E:2 any *
|
||||||
|
* restricted technology, software, or source code you receive hereunder, *
|
||||||
|
* or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
|
||||||
|
* technology or software, if such foreign produced direct product is subject *
|
||||||
|
* to national security controls as identified on the Commerce Control List *
|
||||||
|
*(currently found in Supplement 1 to Part 774 of EAR). For the most current *
|
||||||
|
* Country Group listings, or for additional information about the EAR or *
|
||||||
|
* your obligations under those regulations, please refer to the U.S. Bureau *
|
||||||
|
* of Industry and Securitys website at http://www.bis.doc.gov/. *
|
||||||
|
\****************************************************************************/
|
||||||
|
|
||||||
|
#ifndef _UTILS_
|
||||||
|
#define _UTILS_
|
||||||
|
|
||||||
|
// Wrapper for malloc
|
||||||
|
void* alloc(size_t size);
|
||||||
|
|
||||||
|
// Checks for existence of directory
|
||||||
|
void checkDir(char* dirpath);
|
||||||
|
|
||||||
|
// Check for existence of file
|
||||||
|
void checkFile(char* filename);
|
||||||
|
|
||||||
|
// Parse the input command line options to the program
|
||||||
|
void parseArguments(int argc, char** argv, char** input, char** events,
|
||||||
|
char** ipts, char* devicePref, bool* verifyResults);
|
||||||
|
|
||||||
|
|
||||||
|
// Print the program usage information
|
||||||
|
void printUsage();
|
||||||
|
|
||||||
|
// Rounds up size to the nearest multiple of multiple
|
||||||
|
unsigned int roundUp(unsigned int value, unsigned int multiple);
|
||||||
|
|
||||||
|
// Concatenate two strings, creating a new one
|
||||||
|
char* smartStrcat(char* str1, char* str2);
|
||||||
|
|
||||||
|
// Set the value of usingImages
|
||||||
|
void setUsingImages(bool val);
|
||||||
|
|
||||||
|
// Return whether or not images are being used
|
||||||
|
bool isUsingImages();
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -31,7 +31,8 @@ CXXFLAGS += -I$(POCL_INC_PATH)
|
|||||||
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||||
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||||
|
|
||||||
PROJECT=kmeans
|
PROJECT = kmeans
|
||||||
|
SRCS = main.cc read_input.c rmse.c cluster.c kmeans_clustering.c
|
||||||
|
|
||||||
all: $(PROJECT).dump $(PROJECT).hex
|
all: $(PROJECT).dump $(PROJECT).hex
|
||||||
|
|
||||||
@@ -50,11 +51,11 @@ read_input.o: read_input.c
|
|||||||
rmse.o: rmse.c
|
rmse.o: rmse.c
|
||||||
$(CC) $(CXXFLAGS) -c rmse.c
|
$(CC) $(CXXFLAGS) -c rmse.c
|
||||||
|
|
||||||
$(PROJECT).elf: main.cc lib$(PROJECT).a read_input.o rmse.o cluster.o kmeans_clustering.o
|
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
|
||||||
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc read_input.o rmse.o cluster.o kmeans_clustering.o $(VX_LIBS) -o $(PROJECT).elf
|
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
|
||||||
|
|
||||||
$(PROJECT).qemu: main.cc lib$(PROJECT).a read_input.o rmse.o cluster.o kmeans_clustering.o
|
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
|
||||||
$(CXX) $(CXXFLAGS) main.cc read_input.o rmse.o cluster.o kmeans_clustering.o $(QEMU_LIBS) -o $(PROJECT).qemu
|
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
|
||||||
|
|
||||||
$(PROJECT).hex: $(PROJECT).elf
|
$(PROJECT).hex: $(PROJECT).elf
|
||||||
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
||||||
@@ -75,4 +76,4 @@ gdb-c: $(PROJECT).qemu
|
|||||||
$(GDB) $(PROJECT).qemu
|
$(GDB) $(PROJECT).qemu
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu
|
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug
|
||||||
155
benchmarks/opencl/kmeans/cluster.c
Executable file
155
benchmarks/opencl/kmeans/cluster.c
Executable file
@@ -0,0 +1,155 @@
|
|||||||
|
/*****************************************************************************/
|
||||||
|
/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */
|
||||||
|
/*By downloading, copying, installing or using the software you agree */
|
||||||
|
/*to this license. If you do not agree to this license, do not download, */
|
||||||
|
/*install, copy or use the software. */
|
||||||
|
/* */
|
||||||
|
/* */
|
||||||
|
/*Copyright (c) 2005 Northwestern University */
|
||||||
|
/*All rights reserved. */
|
||||||
|
|
||||||
|
/*Redistribution of the software in source and binary forms, */
|
||||||
|
/*with or without modification, is permitted provided that the */
|
||||||
|
/*following conditions are met: */
|
||||||
|
/* */
|
||||||
|
/*1 Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* */
|
||||||
|
/*2 Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution.*/
|
||||||
|
/* */
|
||||||
|
/*3 Neither the name of Northwestern University nor the names of its */
|
||||||
|
/* contributors may be used to endorse or promote products derived */
|
||||||
|
/* from this software without specific prior written permission. */
|
||||||
|
/* */
|
||||||
|
/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */
|
||||||
|
/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */
|
||||||
|
/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */
|
||||||
|
/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */
|
||||||
|
/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||||
|
/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */
|
||||||
|
/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
|
||||||
|
/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */
|
||||||
|
/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */
|
||||||
|
/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/*POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
/*************************************************************************/
|
||||||
|
/** File: cluster.c **/
|
||||||
|
/** Description: Takes as input a file, containing 1 data point per **/
|
||||||
|
/** per line, and performs a fuzzy c-means clustering **/
|
||||||
|
/** on the data. Fuzzy clustering is performed using **/
|
||||||
|
/** min to max clusters and the clustering that gets **/
|
||||||
|
/** the best score according to a compactness and **/
|
||||||
|
/** separation criterion are returned. **/
|
||||||
|
/** Author: Brendan McCane **/
|
||||||
|
/** James Cook University of North Queensland. **/
|
||||||
|
/** Australia. email: mccane@cs.jcu.edu.au **/
|
||||||
|
/** **/
|
||||||
|
/** Edited by: Jay Pisharath, Wei-keng Liao **/
|
||||||
|
/** Northwestern University. **/
|
||||||
|
/** **/
|
||||||
|
/** ================================================================ **/
|
||||||
|
/** **/
|
||||||
|
/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/
|
||||||
|
/** University of Virginia **/
|
||||||
|
/** **/
|
||||||
|
/** Description: No longer supports fuzzy c-means clustering; **/
|
||||||
|
/** only regular k-means clustering. **/
|
||||||
|
/** No longer performs "validity" function to analyze **/
|
||||||
|
/** compactness and separation crietria; instead **/
|
||||||
|
/** calculate root mean squared error. **/
|
||||||
|
/** **/
|
||||||
|
/*************************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <float.h>
|
||||||
|
#include "kmeans.h"
|
||||||
|
|
||||||
|
float min_rmse_ref = FLT_MAX;
|
||||||
|
extern double wtime(void);
|
||||||
|
/* reference min_rmse value */
|
||||||
|
|
||||||
|
/*---< cluster() >-----------------------------------------------------------*/
|
||||||
|
int cluster(int npoints, /* number of data points */
|
||||||
|
int nfeatures, /* number of attributes for each point */
|
||||||
|
float **features, /* array: [npoints][nfeatures] */
|
||||||
|
int min_nclusters, /* range of min to max number of clusters */
|
||||||
|
int max_nclusters,
|
||||||
|
float threshold, /* loop terminating factor */
|
||||||
|
int *best_nclusters, /* out: number between min and max with lowest RMSE */
|
||||||
|
float ***cluster_centres, /* out: [best_nclusters][nfeatures] */
|
||||||
|
float *min_rmse, /* out: minimum RMSE */
|
||||||
|
int isRMSE, /* calculate RMSE */
|
||||||
|
int nloops /* number of iteration for each number of clusters */
|
||||||
|
)
|
||||||
|
{
|
||||||
|
int nclusters; /* number of clusters k */
|
||||||
|
int index =0; /* number of iteration to reach the best RMSE */
|
||||||
|
int rmse; /* RMSE for each clustering */
|
||||||
|
int *membership; /* which cluster a data point belongs to */
|
||||||
|
float **tmp_cluster_centres; /* hold coordinates of cluster centers */
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* allocate memory for membership */
|
||||||
|
membership = (int*) malloc(npoints * sizeof(int));
|
||||||
|
|
||||||
|
/* sweep k from min to max_nclusters to find the best number of clusters */
|
||||||
|
for(nclusters = min_nclusters; nclusters <= max_nclusters; nclusters++)
|
||||||
|
{
|
||||||
|
if (nclusters > npoints) break; /* cannot have more clusters than points */
|
||||||
|
|
||||||
|
/* allocate device memory, invert data array (@ kmeans_cuda.cu) */
|
||||||
|
allocate(npoints, nfeatures, nclusters, features);
|
||||||
|
|
||||||
|
/* iterate nloops times for each number of clusters */
|
||||||
|
for(i = 0; i < nloops; i++)
|
||||||
|
{
|
||||||
|
/* initialize initial cluster centers, CUDA calls (@ kmeans_cuda.cu) */
|
||||||
|
tmp_cluster_centres = kmeans_clustering(features,
|
||||||
|
nfeatures,
|
||||||
|
npoints,
|
||||||
|
nclusters,
|
||||||
|
threshold,
|
||||||
|
membership);
|
||||||
|
|
||||||
|
if (*cluster_centres) {
|
||||||
|
free((*cluster_centres)[0]);
|
||||||
|
free(*cluster_centres);
|
||||||
|
}
|
||||||
|
*cluster_centres = tmp_cluster_centres;
|
||||||
|
|
||||||
|
|
||||||
|
/* find the number of clusters with the best RMSE */
|
||||||
|
if(isRMSE)
|
||||||
|
{
|
||||||
|
rmse = rms_err(features,
|
||||||
|
nfeatures,
|
||||||
|
npoints,
|
||||||
|
tmp_cluster_centres,
|
||||||
|
nclusters);
|
||||||
|
|
||||||
|
if(rmse < min_rmse_ref){
|
||||||
|
min_rmse_ref = rmse; //update reference min RMSE
|
||||||
|
*min_rmse = min_rmse_ref; //update return min RMSE
|
||||||
|
*best_nclusters = nclusters; //update optimum number of clusters
|
||||||
|
index = i; //update number of iteration to reach best RMSE
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
deallocateMemory(); /* free device memory (@ kmeans_cuda.cu) */
|
||||||
|
}
|
||||||
|
|
||||||
|
free(membership);
|
||||||
|
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
1184
benchmarks/opencl/kmeans/getopt.c
Executable file
1184
benchmarks/opencl/kmeans/getopt.c
Executable file
File diff suppressed because it is too large
Load Diff
191
benchmarks/opencl/kmeans/getopt.h
Executable file
191
benchmarks/opencl/kmeans/getopt.h
Executable file
@@ -0,0 +1,191 @@
|
|||||||
|
|
||||||
|
|
||||||
|
/* getopt.h */
|
||||||
|
/* Declarations for getopt.
|
||||||
|
Copyright (C) 1989-1994, 1996-1999, 2001 Free Software
|
||||||
|
Foundation, Inc. This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute
|
||||||
|
it and/or modify it under the terms of the GNU Lesser
|
||||||
|
General Public License as published by the Free Software
|
||||||
|
Foundation; either version 2.1 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will
|
||||||
|
be useful, but WITHOUT ANY WARRANTY; without even the
|
||||||
|
implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||||
|
License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General
|
||||||
|
Public License along with the GNU C Library; if not, write
|
||||||
|
to the Free Software Foundation, Inc., 59 Temple Place,
|
||||||
|
Suite 330, Boston, MA 02111-1307 USA. */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef _GETOPT_H
|
||||||
|
|
||||||
|
#ifndef __need_getopt
|
||||||
|
# define _GETOPT_H 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* If __GNU_LIBRARY__ is not already defined, either we are being used
|
||||||
|
standalone, or this is the first header included in the source file.
|
||||||
|
If we are being used with glibc, we need to include <features.h>, but
|
||||||
|
that does not exist if we are standalone. So: if __GNU_LIBRARY__ is
|
||||||
|
not defined, include <ctype.h>, which will pull in <features.h> for us
|
||||||
|
if it's from glibc. (Why ctype.h? It's guaranteed to exist and it
|
||||||
|
doesn't flood the namespace with stuff the way some other headers do.) */
|
||||||
|
#if !defined __GNU_LIBRARY__
|
||||||
|
# include <ctype.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* For communication from `getopt' to the caller.
|
||||||
|
When `getopt' finds an option that takes an argument,
|
||||||
|
the argument value is returned here.
|
||||||
|
Also, when `ordering' is RETURN_IN_ORDER,
|
||||||
|
each non-option ARGV-element is returned here. */
|
||||||
|
|
||||||
|
extern char *optarg;
|
||||||
|
|
||||||
|
/* Index in ARGV of the next element to be scanned.
|
||||||
|
This is used for communication to and from the caller
|
||||||
|
and for communication between successive calls to `getopt'.
|
||||||
|
|
||||||
|
On entry to `getopt', zero means this is the first call; initialize.
|
||||||
|
|
||||||
|
When `getopt' returns -1, this is the index of the first of the
|
||||||
|
non-option elements that the caller should itself scan.
|
||||||
|
|
||||||
|
Otherwise, `optind' communicates from one call to the next
|
||||||
|
how much of ARGV has been scanned so far. */
|
||||||
|
|
||||||
|
extern int optind;
|
||||||
|
|
||||||
|
/* Callers store zero here to inhibit the error message `getopt' prints
|
||||||
|
for unrecognized options. */
|
||||||
|
|
||||||
|
extern int opterr;
|
||||||
|
|
||||||
|
/* Set to an option character which was unrecognized. */
|
||||||
|
|
||||||
|
extern int optopt;
|
||||||
|
|
||||||
|
#ifndef __need_getopt
|
||||||
|
/* Describe the long-named options requested by the application.
|
||||||
|
The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
|
||||||
|
of `struct option' terminated by an element containing a name which is
|
||||||
|
zero.
|
||||||
|
|
||||||
|
The field `has_arg' is:
|
||||||
|
no_argument (or 0) if the option does not take an argument,
|
||||||
|
required_argument (or 1) if the option requires an argument,
|
||||||
|
optional_argument (or 2) if the option takes an optional argument.
|
||||||
|
|
||||||
|
If the field `flag' is not NULL, it points to a variable that is set
|
||||||
|
to the value given in the field `val' when the option is found, but
|
||||||
|
left unchanged if the option is not found.
|
||||||
|
|
||||||
|
To have a long-named option do something other than set an `int' to
|
||||||
|
a compiled-in constant, such as set a value from `optarg', set the
|
||||||
|
option's `flag' field to zero and its `val' field to a nonzero
|
||||||
|
value (the equivalent single-letter option character, if there is
|
||||||
|
one). For long options that have a zero `flag' field, `getopt'
|
||||||
|
returns the contents of the `val' field. */
|
||||||
|
|
||||||
|
struct option
|
||||||
|
{
|
||||||
|
# if (defined __STDC__ && __STDC__) || defined __cplusplus
|
||||||
|
const char *name;
|
||||||
|
# else
|
||||||
|
char *name;
|
||||||
|
# endif
|
||||||
|
/* has_arg can't be an enum because some compilers complain about
|
||||||
|
type mismatches in all the code that assumes it is an int. */
|
||||||
|
int has_arg;
|
||||||
|
int *flag;
|
||||||
|
int val;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Names for the values of the `has_arg' field of `struct option'. */
|
||||||
|
|
||||||
|
# define no_argument 0
|
||||||
|
# define required_argument 1
|
||||||
|
# define optional_argument 2
|
||||||
|
#endif /* need getopt */
|
||||||
|
|
||||||
|
|
||||||
|
/* Get definitions and prototypes for functions to process the
|
||||||
|
arguments in ARGV (ARGC of them, minus the program name) for
|
||||||
|
options given in OPTS.
|
||||||
|
|
||||||
|
Return the option character from OPTS just read. Return -1 when
|
||||||
|
there are no more options. For unrecognized options, or options
|
||||||
|
missing arguments, `optopt' is set to the option letter, and '?' is
|
||||||
|
returned.
|
||||||
|
|
||||||
|
The OPTS string is a list of characters which are recognized option
|
||||||
|
letters, optionally followed by colons, specifying that that letter
|
||||||
|
takes an argument, to be placed in `optarg'.
|
||||||
|
|
||||||
|
If a letter in OPTS is followed by two colons, its argument is
|
||||||
|
optional. This behavior is specific to the GNU `getopt'.
|
||||||
|
|
||||||
|
The argument `--' causes premature termination of argument
|
||||||
|
scanning, explicitly telling `getopt' that there are no more
|
||||||
|
options.
|
||||||
|
|
||||||
|
If OPTS begins with `--', then non-option arguments are treated as
|
||||||
|
arguments to the option '\0'. This behavior is specific to the GNU
|
||||||
|
`getopt'. */
|
||||||
|
|
||||||
|
#if (defined __STDC__ && __STDC__) || defined __cplusplus
|
||||||
|
# ifdef __GNU_LIBRARY__
|
||||||
|
/* Many other libraries have conflicting prototypes for getopt, with
|
||||||
|
differences in the consts, in stdlib.h. To avoid compilation
|
||||||
|
errors, only prototype getopt for the GNU C library. */
|
||||||
|
extern int getopt (int ___argc, char *const *___argv, const char *__shortopts);
|
||||||
|
# else /* not __GNU_LIBRARY__ */
|
||||||
|
extern int getopt ();
|
||||||
|
# endif /* __GNU_LIBRARY__ */
|
||||||
|
|
||||||
|
# ifndef __need_getopt
|
||||||
|
extern int getopt_long (int ___argc, char *const *___argv,
|
||||||
|
const char *__shortopts,
|
||||||
|
const struct option *__longopts, int *__longind);
|
||||||
|
extern int getopt_long_only (int ___argc, char *const *___argv,
|
||||||
|
const char *__shortopts,
|
||||||
|
const struct option *__longopts, int *__longind);
|
||||||
|
|
||||||
|
/* Internal only. Users should not call this directly. */
|
||||||
|
extern int _getopt_internal (int ___argc, char *const *___argv,
|
||||||
|
const char *__shortopts,
|
||||||
|
const struct option *__longopts, int *__longind,
|
||||||
|
int __long_only);
|
||||||
|
# endif
|
||||||
|
#else /* not __STDC__ */
|
||||||
|
extern int getopt ();
|
||||||
|
# ifndef __need_getopt
|
||||||
|
extern int getopt_long ();
|
||||||
|
extern int getopt_long_only ();
|
||||||
|
|
||||||
|
extern int _getopt_internal ();
|
||||||
|
# endif
|
||||||
|
#endif /* __STDC__ */
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Make sure we later can get all the definitions and declarations. */
|
||||||
|
#undef __need_getopt
|
||||||
|
|
||||||
|
#endif /* getopt.h */
|
||||||
|
|
||||||
61
benchmarks/opencl/kmeans/kernel.cl
Executable file
61
benchmarks/opencl/kmeans/kernel.cl
Executable file
@@ -0,0 +1,61 @@
|
|||||||
|
#ifndef FLT_MAX
|
||||||
|
#define FLT_MAX 3.40282347e+38
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__kernel void
|
||||||
|
kmeans_kernel_c(__global float *feature,
|
||||||
|
__global float *clusters,
|
||||||
|
__global int *membership,
|
||||||
|
int npoints,
|
||||||
|
int nclusters,
|
||||||
|
int nfeatures,
|
||||||
|
int offset,
|
||||||
|
int size
|
||||||
|
)
|
||||||
|
{
|
||||||
|
unsigned int point_id = get_global_id(0);
|
||||||
|
int index = 0;
|
||||||
|
//const unsigned int point_id = get_global_id(0);
|
||||||
|
if (point_id < npoints)
|
||||||
|
{
|
||||||
|
float min_dist=FLT_MAX;
|
||||||
|
for (int i=0; i < nclusters; i++) {
|
||||||
|
|
||||||
|
float dist = 0;
|
||||||
|
float ans = 0;
|
||||||
|
for (int l=0; l<nfeatures; l++){
|
||||||
|
ans += (feature[l * npoints + point_id]-clusters[i*nfeatures+l])*
|
||||||
|
(feature[l * npoints + point_id]-clusters[i*nfeatures+l]);
|
||||||
|
}
|
||||||
|
|
||||||
|
dist = ans;
|
||||||
|
if (dist < min_dist) {
|
||||||
|
min_dist = dist;
|
||||||
|
index = i;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//printf("%d\n", index);
|
||||||
|
membership[point_id] = index;
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void
|
||||||
|
kmeans_swap(__global float *feature,
|
||||||
|
__global float *feature_swap,
|
||||||
|
int npoints,
|
||||||
|
int nfeatures
|
||||||
|
){
|
||||||
|
|
||||||
|
unsigned int tid = get_global_id(0);
|
||||||
|
//for(int i = 0; i < nfeatures; i++)
|
||||||
|
// feature_swap[i * npoints + tid] = feature[tid * nfeatures + i];
|
||||||
|
//Lingjie Zhang modificated at 11/05/2015
|
||||||
|
if (tid < npoints){
|
||||||
|
for(int i = 0; i < nfeatures; i++)
|
||||||
|
feature_swap[i * npoints + tid] = feature[tid * nfeatures + i];
|
||||||
|
}
|
||||||
|
// end of Lingjie Zhang's modification
|
||||||
|
}
|
||||||
BIN
benchmarks/opencl/kmeans/kmeans
Executable file
BIN
benchmarks/opencl/kmeans/kmeans
Executable file
Binary file not shown.
65
benchmarks/opencl/kmeans/kmeans.h
Executable file
65
benchmarks/opencl/kmeans/kmeans.h
Executable file
@@ -0,0 +1,65 @@
|
|||||||
|
/*****************************************************************************/
|
||||||
|
/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */
|
||||||
|
/*By downloading, copying, installing or using the software you agree */
|
||||||
|
/*to this license. If you do not agree to this license, do not download, */
|
||||||
|
/*install, copy or use the software. */
|
||||||
|
/* */
|
||||||
|
/* */
|
||||||
|
/*Copyright (c) 2005 Northwestern University */
|
||||||
|
/*All rights reserved. */
|
||||||
|
|
||||||
|
/*Redistribution of the software in source and binary forms, */
|
||||||
|
/*with or without modification, is permitted provided that the */
|
||||||
|
/*following conditions are met: */
|
||||||
|
/* */
|
||||||
|
/*1 Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* */
|
||||||
|
/*2 Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution.*/
|
||||||
|
/* */
|
||||||
|
/*3 Neither the name of Northwestern University nor the names of its */
|
||||||
|
/* contributors may be used to endorse or promote products derived */
|
||||||
|
/* from this software without specific prior written permission. */
|
||||||
|
/* */
|
||||||
|
/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */
|
||||||
|
/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */
|
||||||
|
/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */
|
||||||
|
/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */
|
||||||
|
/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||||
|
/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */
|
||||||
|
/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
|
||||||
|
/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */
|
||||||
|
/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */
|
||||||
|
/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/*POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
#ifndef _H_FUZZY_KMEANS
|
||||||
|
#define _H_FUZZY_KMEANS
|
||||||
|
|
||||||
|
#ifndef FLT_MAX
|
||||||
|
#define FLT_MAX 3.40282347e+38
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
float euclid_dist_2 (float*, float*, int);
|
||||||
|
int find_nearest_point (float* , int, float**, int);
|
||||||
|
float rms_err(float**, int, int, float**, int);
|
||||||
|
int cluster(int, int, float**, int, int, float, int*, float***, float*, int, int);
|
||||||
|
int setup(int argc, char** argv);
|
||||||
|
int allocate(int npoints, int nfeatures, int nclusters, float **feature);
|
||||||
|
void deallocateMemory();
|
||||||
|
int kmeansOCL(float **feature, int nfeatures, int npoints, int nclusters, int *membership, float **clusters, int *new_centers_len, float **new_centers);
|
||||||
|
float** kmeans_clustering(float **feature, int nfeatures, int npoints, int nclusters, float threshold, int *membership);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
176
benchmarks/opencl/kmeans/kmeans_clustering.c
Executable file
176
benchmarks/opencl/kmeans/kmeans_clustering.c
Executable file
@@ -0,0 +1,176 @@
|
|||||||
|
/*****************************************************************************/
|
||||||
|
/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */
|
||||||
|
/*By downloading, copying, installing or using the software you agree */
|
||||||
|
/*to this license. If you do not agree to this license, do not download, */
|
||||||
|
/*install, copy or use the software. */
|
||||||
|
/* */
|
||||||
|
/* */
|
||||||
|
/*Copyright (c) 2005 Northwestern University */
|
||||||
|
/*All rights reserved. */
|
||||||
|
|
||||||
|
/*Redistribution of the software in source and binary forms, */
|
||||||
|
/*with or without modification, is permitted provided that the */
|
||||||
|
/*following conditions are met: */
|
||||||
|
/* */
|
||||||
|
/*1 Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* */
|
||||||
|
/*2 Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution.*/
|
||||||
|
/* */
|
||||||
|
/*3 Neither the name of Northwestern University nor the names of its */
|
||||||
|
/* contributors may be used to endorse or promote products derived */
|
||||||
|
/* from this software without specific prior written permission. */
|
||||||
|
/* */
|
||||||
|
/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */
|
||||||
|
/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */
|
||||||
|
/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */
|
||||||
|
/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */
|
||||||
|
/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||||
|
/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */
|
||||||
|
/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
|
||||||
|
/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */
|
||||||
|
/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */
|
||||||
|
/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/*POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
/*************************************************************************/
|
||||||
|
/** File: kmeans_clustering.c **/
|
||||||
|
/** Description: Implementation of regular k-means clustering **/
|
||||||
|
/** algorithm **/
|
||||||
|
/** Author: Wei-keng Liao **/
|
||||||
|
/** ECE Department, Northwestern University **/
|
||||||
|
/** email: wkliao@ece.northwestern.edu **/
|
||||||
|
/** **/
|
||||||
|
/** Edited by: Jay Pisharath **/
|
||||||
|
/** Northwestern University. **/
|
||||||
|
/** **/
|
||||||
|
/** ================================================================ **/
|
||||||
|
/** **/
|
||||||
|
/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/
|
||||||
|
/** University of Virginia **/
|
||||||
|
/** **/
|
||||||
|
/** Description: No longer supports fuzzy c-means clustering; **/
|
||||||
|
/** only regular k-means clustering. **/
|
||||||
|
/** No longer performs "validity" function to analyze **/
|
||||||
|
/** compactness and separation crietria; instead **/
|
||||||
|
/** calculate root mean squared error. **/
|
||||||
|
/** **/
|
||||||
|
/*************************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <float.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include "kmeans.h"
|
||||||
|
|
||||||
|
#define RANDOM_MAX 2147483647
|
||||||
|
|
||||||
|
extern double wtime(void);
|
||||||
|
|
||||||
|
/*----< kmeans_clustering() >---------------------------------------------*/
|
||||||
|
float** kmeans_clustering(float **feature, /* in: [npoints][nfeatures] */
|
||||||
|
int nfeatures,
|
||||||
|
int npoints,
|
||||||
|
int nclusters,
|
||||||
|
float threshold,
|
||||||
|
int *membership) /* out: [npoints] */
|
||||||
|
{
|
||||||
|
int i, j, n = 0; /* counters */
|
||||||
|
int loop=0, temp;
|
||||||
|
int *new_centers_len; /* [nclusters]: no. of points in each cluster */
|
||||||
|
float delta; /* if the point moved */
|
||||||
|
float **clusters; /* out: [nclusters][nfeatures] */
|
||||||
|
float **new_centers; /* [nclusters][nfeatures] */
|
||||||
|
|
||||||
|
int *initial; /* used to hold the index of points not yet selected
|
||||||
|
prevents the "birthday problem" of dual selection (?)
|
||||||
|
considered holding initial cluster indices, but changed due to
|
||||||
|
possible, though unlikely, infinite loops */
|
||||||
|
int initial_points;
|
||||||
|
int c = 0;
|
||||||
|
|
||||||
|
/* nclusters should never be > npoints
|
||||||
|
that would guarantee a cluster without points */
|
||||||
|
if (nclusters > npoints)
|
||||||
|
nclusters = npoints;
|
||||||
|
|
||||||
|
/* allocate space for and initialize returning variable clusters[] */
|
||||||
|
clusters = (float**) malloc(nclusters * sizeof(float*));
|
||||||
|
clusters[0] = (float*) malloc(nclusters * nfeatures * sizeof(float));
|
||||||
|
for (i=1; i<nclusters; i++)
|
||||||
|
clusters[i] = clusters[i-1] + nfeatures;
|
||||||
|
|
||||||
|
/* initialize the random clusters */
|
||||||
|
initial = (int *) malloc (npoints * sizeof(int));
|
||||||
|
for (i = 0; i < npoints; i++)
|
||||||
|
{
|
||||||
|
initial[i] = i;
|
||||||
|
}
|
||||||
|
initial_points = npoints;
|
||||||
|
|
||||||
|
/* randomly pick cluster centers */
|
||||||
|
for (i=0; i<nclusters && initial_points >= 0; i++) {
|
||||||
|
//n = (int)rand() % initial_points;
|
||||||
|
|
||||||
|
for (j=0; j<nfeatures; j++)
|
||||||
|
clusters[i][j] = feature[initial[n]][j]; // remapped
|
||||||
|
|
||||||
|
/* swap the selected index to the end (not really necessary,
|
||||||
|
could just move the end up) */
|
||||||
|
temp = initial[n];
|
||||||
|
initial[n] = initial[initial_points-1];
|
||||||
|
initial[initial_points-1] = temp;
|
||||||
|
initial_points--;
|
||||||
|
n++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* initialize the membership to -1 for all */
|
||||||
|
for (i=0; i < npoints; i++)
|
||||||
|
membership[i] = -1;
|
||||||
|
|
||||||
|
/* allocate space for and initialize new_centers_len and new_centers */
|
||||||
|
new_centers_len = (int*) calloc(nclusters, sizeof(int));
|
||||||
|
|
||||||
|
new_centers = (float**) malloc(nclusters * sizeof(float*));
|
||||||
|
new_centers[0] = (float*) calloc(nclusters * nfeatures, sizeof(float));
|
||||||
|
for (i=1; i<nclusters; i++)
|
||||||
|
new_centers[i] = new_centers[i-1] + nfeatures;
|
||||||
|
|
||||||
|
/* iterate until convergence */
|
||||||
|
do {
|
||||||
|
delta = 0.0;
|
||||||
|
// CUDA
|
||||||
|
delta = (float) kmeansOCL(feature, /* in: [npoints][nfeatures] */
|
||||||
|
nfeatures, /* number of attributes for each point */
|
||||||
|
npoints, /* number of data points */
|
||||||
|
nclusters, /* number of clusters */
|
||||||
|
membership, /* which cluster the point belongs to */
|
||||||
|
clusters, /* out: [nclusters][nfeatures] */
|
||||||
|
new_centers_len, /* out: number of points in each cluster */
|
||||||
|
new_centers /* sum of points in each cluster */
|
||||||
|
);
|
||||||
|
|
||||||
|
/* replace old cluster centers with new_centers */
|
||||||
|
/* CPU side of reduction */
|
||||||
|
for (i=0; i<nclusters; i++) {
|
||||||
|
for (j=0; j<nfeatures; j++) {
|
||||||
|
if (new_centers_len[i] > 0)
|
||||||
|
clusters[i][j] = new_centers[i][j] / new_centers_len[i]; /* take average i.e. sum/n */
|
||||||
|
new_centers[i][j] = 0.0; /* set back to 0 */
|
||||||
|
}
|
||||||
|
new_centers_len[i] = 0; /* set back to 0 */
|
||||||
|
}
|
||||||
|
c++;
|
||||||
|
} while ((delta > threshold) && (loop++ < 500)); /* makes sure loop terminates */
|
||||||
|
printf("iterated %d times\n", c);
|
||||||
|
free(new_centers[0]);
|
||||||
|
free(new_centers);
|
||||||
|
free(new_centers_len);
|
||||||
|
|
||||||
|
return clusters;
|
||||||
|
}
|
||||||
|
|
||||||
BIN
benchmarks/opencl/kmeans/libkmeans.a
Normal file
BIN
benchmarks/opencl/kmeans/libkmeans.a
Normal file
Binary file not shown.
359
benchmarks/opencl/kmeans/main.cc
Executable file
359
benchmarks/opencl/kmeans/main.cc
Executable file
@@ -0,0 +1,359 @@
|
|||||||
|
#include "kmeans.h"
|
||||||
|
#include <iostream>
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#ifdef WIN
|
||||||
|
#include <windows.h>
|
||||||
|
#else
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <sys/time.h>
|
||||||
|
double gettime() {
|
||||||
|
struct timeval t;
|
||||||
|
gettimeofday(&t, NULL);
|
||||||
|
return t.tv_sec + t.tv_usec * 1e-6;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef NV
|
||||||
|
#include <oclUtils.h>
|
||||||
|
#else
|
||||||
|
#include <CL/cl.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef FLT_MAX
|
||||||
|
#define FLT_MAX 3.40282347e+38
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef RD_WG_SIZE_0_0
|
||||||
|
#define BLOCK_SIZE RD_WG_SIZE_0_0
|
||||||
|
#elif defined(RD_WG_SIZE_0)
|
||||||
|
#define BLOCK_SIZE RD_WG_SIZE_0
|
||||||
|
#elif defined(RD_WG_SIZE)
|
||||||
|
#define BLOCK_SIZE RD_WG_SIZE
|
||||||
|
#else
|
||||||
|
#define BLOCK_SIZE 256
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef RD_WG_SIZE_1_0
|
||||||
|
#define BLOCK_SIZE2 RD_WG_SIZE_1_0
|
||||||
|
#elif defined(RD_WG_SIZE_1)
|
||||||
|
#define BLOCK_SIZE2 RD_WG_SIZE_1
|
||||||
|
#elif defined(RD_WG_SIZE)
|
||||||
|
#define BLOCK_SIZE2 RD_WG_SIZE
|
||||||
|
#else
|
||||||
|
#define BLOCK_SIZE2 256
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// local variables
|
||||||
|
static cl_context context;
|
||||||
|
static cl_command_queue cmd_queue;
|
||||||
|
static cl_device_type device_type;
|
||||||
|
static cl_device_id *device_list;
|
||||||
|
static cl_int num_devices;
|
||||||
|
|
||||||
|
static int initialize(int use_gpu) {
|
||||||
|
cl_int result;
|
||||||
|
size_t size;
|
||||||
|
|
||||||
|
/*// create OpenCL context
|
||||||
|
cl_platform_id platform_id;
|
||||||
|
if (clGetPlatformIDs(1, &platform_id, NULL) != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clGetPlatformIDs(1,*,0) failed\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
cl_context_properties ctxprop[] = {CL_CONTEXT_PLATFORM,
|
||||||
|
(cl_context_properties)platform_id, 0};
|
||||||
|
device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
|
||||||
|
context = clCreateContextFromType(ctxprop, device_type, NULL, NULL, NULL);
|
||||||
|
if (!context) {
|
||||||
|
printf("ERROR: clCreateContextFromType(%s) failed\n",
|
||||||
|
use_gpu ? "GPU" : "CPU");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// get the list of GPUs
|
||||||
|
result = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size);
|
||||||
|
num_devices = (int)(size / sizeof(cl_device_id));
|
||||||
|
|
||||||
|
if (result != CL_SUCCESS || num_devices < 1) {
|
||||||
|
printf("ERROR: clGetContextInfo() failed\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
device_list = new cl_device_id[num_devices];
|
||||||
|
if (!device_list) {
|
||||||
|
printf("ERROR: new cl_device_id[] failed\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
result =
|
||||||
|
clGetContextInfo(context, CL_CONTEXT_DEVICES, size, device_list, NULL);
|
||||||
|
if (result != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clGetContextInfo() failed\n");
|
||||||
|
return -1;
|
||||||
|
}*/
|
||||||
|
|
||||||
|
cl_platform_id platform_id;
|
||||||
|
num_devices = 1;
|
||||||
|
device_list = new cl_device_id[num_devices];
|
||||||
|
|
||||||
|
result = clGetPlatformIDs(1, &platform_id, NULL);
|
||||||
|
result = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, device_list, NULL);
|
||||||
|
context = clCreateContext(NULL, 1, device_list, NULL, NULL, &result);
|
||||||
|
|
||||||
|
// create command queue for the first device
|
||||||
|
cmd_queue = clCreateCommandQueue(context, device_list[0], 0, NULL);
|
||||||
|
if (!cmd_queue) {
|
||||||
|
printf("ERROR: clCreateCommandQueue() failed\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int shutdown() {
|
||||||
|
// release resources
|
||||||
|
if (cmd_queue)
|
||||||
|
clReleaseCommandQueue(cmd_queue);
|
||||||
|
if (context)
|
||||||
|
clReleaseContext(context);
|
||||||
|
if (device_list)
|
||||||
|
delete device_list;
|
||||||
|
|
||||||
|
// reset all variables
|
||||||
|
cmd_queue = 0;
|
||||||
|
context = 0;
|
||||||
|
device_list = 0;
|
||||||
|
num_devices = 0;
|
||||||
|
device_type = 0;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_mem d_feature;
|
||||||
|
cl_mem d_feature_swap;
|
||||||
|
cl_mem d_cluster;
|
||||||
|
cl_mem d_membership;
|
||||||
|
|
||||||
|
cl_kernel kernel;
|
||||||
|
cl_kernel kernel_s;
|
||||||
|
cl_kernel kernel2;
|
||||||
|
|
||||||
|
int *membership_OCL;
|
||||||
|
int *membership_d;
|
||||||
|
float *feature_d;
|
||||||
|
float *clusters_d;
|
||||||
|
float *center_d;
|
||||||
|
|
||||||
|
int allocate(int n_points, int n_features, int n_clusters, float **feature) {
|
||||||
|
/*int sourcesize = 1024 * 1024;
|
||||||
|
char *source = (char *)calloc(sourcesize, sizeof(char));
|
||||||
|
if (!source) {
|
||||||
|
printf("ERROR: calloc(%d) failed\n", sourcesize);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// read the kernel core source
|
||||||
|
char *tempchar = "./kmeans.cl";
|
||||||
|
FILE *fp = fopen(tempchar, "rb");
|
||||||
|
if (!fp) {
|
||||||
|
printf("ERROR: unable to open '%s'\n", tempchar);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
fread(source + strlen(source), sourcesize, 1, fp);
|
||||||
|
fclose(fp);*/
|
||||||
|
|
||||||
|
// OpenCL initialization
|
||||||
|
int use_gpu = 1;
|
||||||
|
if (initialize(use_gpu))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// compile kernel
|
||||||
|
cl_int err = 0;
|
||||||
|
//const char *slist[2] = {source, 0};
|
||||||
|
//cl_program prog = clCreateProgramWithSource(context, 1, slist, NULL, &err);
|
||||||
|
cl_program prog = clCreateProgramWithBuiltInKernels(context, 1, device_list, "kmeans_kernel_c;kmeans_swap", &err);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clCreateProgramWithSource() => %d\n", err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
err = clBuildProgram(prog, 0, NULL, NULL, NULL, NULL);
|
||||||
|
{ // show warnings/errors
|
||||||
|
// static char log[65536]; memset(log, 0, sizeof(log));
|
||||||
|
// cl_device_id device_id = 0;
|
||||||
|
// err = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(device_id),
|
||||||
|
//&device_id, NULL);
|
||||||
|
// clGetProgramBuildInfo(prog, device_id, CL_PROGRAM_BUILD_LOG,
|
||||||
|
// sizeof(log)-1, log, NULL);
|
||||||
|
// if(err || strstr(log,"warning:") || strstr(log, "error:"))
|
||||||
|
// printf("<<<<\n%s\n>>>>\n", log);
|
||||||
|
}
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clBuildProgram() => %d\n", err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *kernel_kmeans_c = "kmeans_kernel_c";
|
||||||
|
char *kernel_swap = "kmeans_swap";
|
||||||
|
|
||||||
|
kernel_s = clCreateKernel(prog, kernel_kmeans_c, &err);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clCreateKernel() 0 => %d\n", err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
kernel2 = clCreateKernel(prog, kernel_swap, &err);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clCreateKernel() 0 => %d\n", err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
clReleaseProgram(prog);
|
||||||
|
|
||||||
|
d_feature = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||||
|
n_points * n_features * sizeof(float), NULL, &err);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clCreateBuffer d_feature (size:%d) => %d\n",
|
||||||
|
n_points * n_features, err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
d_feature_swap =
|
||||||
|
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||||
|
n_points * n_features * sizeof(float), NULL, &err);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clCreateBuffer d_feature_swap (size:%d) => %d\n",
|
||||||
|
n_points * n_features, err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
d_cluster =
|
||||||
|
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||||
|
n_clusters * n_features * sizeof(float), NULL, &err);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clCreateBuffer d_cluster (size:%d) => %d\n",
|
||||||
|
n_clusters * n_features, err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
d_membership = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||||
|
n_points * sizeof(int), NULL, &err);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clCreateBuffer d_membership (size:%d) => %d\n", n_points,
|
||||||
|
err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// write buffers
|
||||||
|
err = clEnqueueWriteBuffer(cmd_queue, d_feature, 1, 0,
|
||||||
|
n_points * n_features * sizeof(float), feature[0],
|
||||||
|
0, 0, 0);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clEnqueueWriteBuffer d_feature (size:%d) => %d\n",
|
||||||
|
n_points * n_features, err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
clSetKernelArg(kernel2, 0, sizeof(void *), (void *)&d_feature);
|
||||||
|
clSetKernelArg(kernel2, 1, sizeof(void *), (void *)&d_feature_swap);
|
||||||
|
clSetKernelArg(kernel2, 2, sizeof(cl_int), (void *)&n_points);
|
||||||
|
clSetKernelArg(kernel2, 3, sizeof(cl_int), (void *)&n_features);
|
||||||
|
|
||||||
|
size_t global_work[3] = {n_points, 1, 1};
|
||||||
|
/// Ke Wang adjustable local group size 2013/08/07 10:37:33
|
||||||
|
size_t local_work_size = BLOCK_SIZE; // work group size is defined by
|
||||||
|
// RD_WG_SIZE_0 or RD_WG_SIZE_0_0
|
||||||
|
// 2014/06/10 17:00:51
|
||||||
|
if (global_work[0] % local_work_size != 0)
|
||||||
|
global_work[0] = (global_work[0] / local_work_size + 1) * local_work_size;
|
||||||
|
|
||||||
|
err = clEnqueueNDRangeKernel(cmd_queue, kernel2, 1, NULL, global_work,
|
||||||
|
&local_work_size, 0, 0, 0);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clEnqueueNDRangeKernel()=>%d failed\n", err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
membership_OCL = (int *)malloc(n_points * sizeof(int));
|
||||||
|
}
|
||||||
|
|
||||||
|
void deallocateMemory() {
|
||||||
|
clReleaseMemObject(d_feature);
|
||||||
|
clReleaseMemObject(d_feature_swap);
|
||||||
|
clReleaseMemObject(d_cluster);
|
||||||
|
clReleaseMemObject(d_membership);
|
||||||
|
free(membership_OCL);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
printf("WG size of kernel_swap = %d, WG size of kernel_kmeans = %d \n",
|
||||||
|
BLOCK_SIZE, BLOCK_SIZE2);
|
||||||
|
setup(argc, argv);
|
||||||
|
shutdown();
|
||||||
|
}
|
||||||
|
|
||||||
|
int kmeansOCL(float **feature, /* in: [npoints][nfeatures] */
|
||||||
|
int n_features, int n_points, int n_clusters, int *membership,
|
||||||
|
float **clusters, int *new_centers_len, float **new_centers) {
|
||||||
|
|
||||||
|
int delta = 0;
|
||||||
|
int i, j, k;
|
||||||
|
cl_int err = 0;
|
||||||
|
|
||||||
|
size_t global_work[3] = {n_points, 1, 1};
|
||||||
|
|
||||||
|
/// Ke Wang adjustable local group size 2013/08/07 10:37:33
|
||||||
|
size_t local_work_size = BLOCK_SIZE2; // work group size is defined by
|
||||||
|
// RD_WG_SIZE_1 or RD_WG_SIZE_1_0
|
||||||
|
// 2014/06/10 17:00:41
|
||||||
|
if (global_work[0] % local_work_size != 0)
|
||||||
|
global_work[0] = (global_work[0] / local_work_size + 1) * local_work_size;
|
||||||
|
|
||||||
|
err = clEnqueueWriteBuffer(cmd_queue, d_cluster, 1, 0,
|
||||||
|
n_clusters * n_features * sizeof(float),
|
||||||
|
clusters[0], 0, 0, 0);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clEnqueueWriteBuffer d_cluster (size:%d) => %d\n", n_points,
|
||||||
|
err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int size = 0;
|
||||||
|
int offset = 0;
|
||||||
|
|
||||||
|
clSetKernelArg(kernel_s, 0, sizeof(void *), (void *)&d_feature_swap);
|
||||||
|
clSetKernelArg(kernel_s, 1, sizeof(void *), (void *)&d_cluster);
|
||||||
|
clSetKernelArg(kernel_s, 2, sizeof(void *), (void *)&d_membership);
|
||||||
|
clSetKernelArg(kernel_s, 3, sizeof(cl_int), (void *)&n_points);
|
||||||
|
clSetKernelArg(kernel_s, 4, sizeof(cl_int), (void *)&n_clusters);
|
||||||
|
clSetKernelArg(kernel_s, 5, sizeof(cl_int), (void *)&n_features);
|
||||||
|
clSetKernelArg(kernel_s, 6, sizeof(cl_int), (void *)&offset);
|
||||||
|
clSetKernelArg(kernel_s, 7, sizeof(cl_int), (void *)&size);
|
||||||
|
|
||||||
|
err = clEnqueueNDRangeKernel(cmd_queue, kernel_s, 1, NULL, global_work,
|
||||||
|
&local_work_size, 0, 0, 0);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clEnqueueNDRangeKernel()=>%d failed\n", err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
clFinish(cmd_queue);
|
||||||
|
err = clEnqueueReadBuffer(cmd_queue, d_membership, 1, 0,
|
||||||
|
n_points * sizeof(int), membership_OCL, 0, 0, 0);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: Memcopy Out\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
delta = 0;
|
||||||
|
for (i = 0; i < n_points; i++) {
|
||||||
|
int cluster_id = membership_OCL[i];
|
||||||
|
new_centers_len[cluster_id]++;
|
||||||
|
if (membership_OCL[i] != membership[i]) {
|
||||||
|
delta++;
|
||||||
|
membership[i] = membership_OCL[i];
|
||||||
|
}
|
||||||
|
for (j = 0; j < n_features; j++) {
|
||||||
|
new_centers[cluster_id][j] += feature[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return delta;
|
||||||
|
}
|
||||||
338
benchmarks/opencl/kmeans/read_input.c
Executable file
338
benchmarks/opencl/kmeans/read_input.c
Executable file
@@ -0,0 +1,338 @@
|
|||||||
|
/*****************************************************************************/
|
||||||
|
/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */
|
||||||
|
/*By downloading, copying, installing or using the software you agree */
|
||||||
|
/*to this license. If you do not agree to this license, do not download, */
|
||||||
|
/*install, copy or use the software. */
|
||||||
|
/* */
|
||||||
|
/* */
|
||||||
|
/*Copyright (c) 2005 Northwestern University */
|
||||||
|
/*All rights reserved. */
|
||||||
|
|
||||||
|
/*Redistribution of the software in source and binary forms, */
|
||||||
|
/*with or without modification, is permitted provided that the */
|
||||||
|
/*following conditions are met: */
|
||||||
|
/* */
|
||||||
|
/*1 Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* */
|
||||||
|
/*2 Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution.*/
|
||||||
|
/* */
|
||||||
|
/*3 Neither the name of Northwestern University nor the names of its */
|
||||||
|
/* contributors may be used to endorse or promote products derived */
|
||||||
|
/* from this software without specific prior written permission. */
|
||||||
|
/* */
|
||||||
|
/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */
|
||||||
|
/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */
|
||||||
|
/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */
|
||||||
|
/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */
|
||||||
|
/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||||
|
/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */
|
||||||
|
/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
|
||||||
|
/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */
|
||||||
|
/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */
|
||||||
|
/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/*POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
/*************************************************************************/
|
||||||
|
/** File: example.c **/
|
||||||
|
/** Description: Takes as input a file: **/
|
||||||
|
/** ascii file: containing 1 data point per line **/
|
||||||
|
/** binary file: first int is the number of objects **/
|
||||||
|
/** 2nd int is the no. of features of each **/
|
||||||
|
/** object **/
|
||||||
|
/** This example performs a fuzzy c-means clustering **/
|
||||||
|
/** on the data. Fuzzy clustering is performed using **/
|
||||||
|
/** min to max clusters and the clustering that gets **/
|
||||||
|
/** the best score according to a compactness and **/
|
||||||
|
/** separation criterion are returned. **/
|
||||||
|
/** Author: Wei-keng Liao **/
|
||||||
|
/** ECE Department Northwestern University **/
|
||||||
|
/** email: wkliao@ece.northwestern.edu **/
|
||||||
|
/** **/
|
||||||
|
/** Edited by: Jay Pisharath **/
|
||||||
|
/** Northwestern University. **/
|
||||||
|
/** **/
|
||||||
|
/** ================================================================ **/
|
||||||
|
/**
|
||||||
|
* **/
|
||||||
|
/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee
|
||||||
|
* **/
|
||||||
|
/** University of Virginia
|
||||||
|
* **/
|
||||||
|
/**
|
||||||
|
* **/
|
||||||
|
/** Description: No longer supports fuzzy c-means clustering;
|
||||||
|
* **/
|
||||||
|
/** only regular k-means clustering.
|
||||||
|
* **/
|
||||||
|
/** No longer performs "validity" function to
|
||||||
|
* analyze **/
|
||||||
|
/** compactness and separation crietria; instead
|
||||||
|
* **/
|
||||||
|
/** calculate root mean squared error.
|
||||||
|
* **/
|
||||||
|
/** **/
|
||||||
|
/*************************************************************************/
|
||||||
|
#define _CRT_SECURE_NO_DEPRECATE 1
|
||||||
|
|
||||||
|
#include "kmeans.h"
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
extern double wtime(void);
|
||||||
|
|
||||||
|
/*---< usage() >------------------------------------------------------------*/
|
||||||
|
void usage(char *argv0) {
|
||||||
|
char *help = "\nUsage: %s [switches] -i filename\n\n"
|
||||||
|
" -i filename :file containing data to be clustered\n"
|
||||||
|
" -m max_nclusters :maximum number of clusters allowed "
|
||||||
|
"[default=5]\n"
|
||||||
|
" -n min_nclusters :minimum number of clusters allowed "
|
||||||
|
"[default=5]\n"
|
||||||
|
" -t threshold :threshold value "
|
||||||
|
"[default=0.001]\n"
|
||||||
|
" -l nloops :iteration for each number of clusters "
|
||||||
|
"[default=1]\n"
|
||||||
|
" -b :input file is in binary format\n"
|
||||||
|
" -r :calculate RMSE "
|
||||||
|
"[default=off]\n"
|
||||||
|
" -o :output cluster center coordinates "
|
||||||
|
"[default=off]\n";
|
||||||
|
fprintf(stderr, help, argv0);
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*---< main() >-------------------------------------------------------------*/
|
||||||
|
int setup(int argc, char **argv) {
|
||||||
|
int opt;
|
||||||
|
extern char *optarg;
|
||||||
|
char *filename = 0;
|
||||||
|
float *buf;
|
||||||
|
char line[1024];
|
||||||
|
int isBinaryFile = 0;
|
||||||
|
|
||||||
|
float threshold = 0.001; /* default value */
|
||||||
|
int max_nclusters = 5; /* default value */
|
||||||
|
int min_nclusters = 5; /* default value */
|
||||||
|
int best_nclusters = 0;
|
||||||
|
int nfeatures = 0;
|
||||||
|
int npoints = 0;
|
||||||
|
float len;
|
||||||
|
|
||||||
|
float **features;
|
||||||
|
float **cluster_centres = NULL;
|
||||||
|
int i, j, index;
|
||||||
|
int nloops = 1; /* default value */
|
||||||
|
|
||||||
|
int isRMSE = 0;
|
||||||
|
float rmse;
|
||||||
|
|
||||||
|
int isOutput = 0;
|
||||||
|
// float cluster_timing, io_timing;
|
||||||
|
|
||||||
|
/* obtain command line arguments and change appropriate options */
|
||||||
|
while ((opt = getopt(argc, argv, "i:t:m:n:l:bro")) != EOF) {
|
||||||
|
switch (opt) {
|
||||||
|
case 'i':
|
||||||
|
filename = optarg;
|
||||||
|
break;
|
||||||
|
case 'b':
|
||||||
|
isBinaryFile = 1;
|
||||||
|
break;
|
||||||
|
case 't':
|
||||||
|
threshold = atof(optarg);
|
||||||
|
break;
|
||||||
|
case 'm':
|
||||||
|
max_nclusters = atoi(optarg);
|
||||||
|
break;
|
||||||
|
case 'n':
|
||||||
|
min_nclusters = atoi(optarg);
|
||||||
|
break;
|
||||||
|
case 'r':
|
||||||
|
isRMSE = 1;
|
||||||
|
break;
|
||||||
|
case 'o':
|
||||||
|
isOutput = 1;
|
||||||
|
break;
|
||||||
|
case 'l':
|
||||||
|
nloops = atoi(optarg);
|
||||||
|
break;
|
||||||
|
case '?':
|
||||||
|
usage(argv[0]);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
usage(argv[0]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ============== I/O begin ==============*/
|
||||||
|
/* get nfeatures and npoints */
|
||||||
|
// io_timing = omp_get_wtime();
|
||||||
|
|
||||||
|
/*if (isBinaryFile) { // Binary file input
|
||||||
|
FILE *infile;
|
||||||
|
if ((infile = fopen("100", "r")) == NULL) {
|
||||||
|
fprintf(stderr, "Error: no such file (%s)\n", filename);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
fread(&npoints, 1, sizeof(int), infile);
|
||||||
|
fread(&nfeatures, 1, sizeof(int), infile);
|
||||||
|
|
||||||
|
// allocate space for features[][] and read attributes of all objects
|
||||||
|
buf = (float *)malloc(npoints * nfeatures * sizeof(float));
|
||||||
|
features = (float **)malloc(npoints * sizeof(float *));
|
||||||
|
features[0] = (float *)malloc(npoints * nfeatures * sizeof(float));
|
||||||
|
for (i = 1; i < npoints; i++) {
|
||||||
|
features[i] = features[i - 1] + nfeatures;
|
||||||
|
}
|
||||||
|
fread(buf, 1, npoints * nfeatures * sizeof(float), infile);
|
||||||
|
fclose(infile);
|
||||||
|
} else {
|
||||||
|
FILE *infile;
|
||||||
|
if ((infile = fopen("100", "r")) == NULL) {
|
||||||
|
fprintf(stderr, "Error: no such file (%s)\n", filename);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
while (fgets(line, 1024, infile) != NULL)
|
||||||
|
if (strtok(line, " \t\n") != 0) {
|
||||||
|
npoints++;
|
||||||
|
}
|
||||||
|
rewind(infile);
|
||||||
|
while (fgets(line, 1024, infile) != NULL) {
|
||||||
|
if (strtok(line, " \t\n") != 0) {
|
||||||
|
// ignore the id (first attribute): nfeatures = 1;
|
||||||
|
while (strtok(NULL, " ,\t\n") != NULL)
|
||||||
|
nfeatures++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// allocate space for features[] and read attributes of all objects
|
||||||
|
buf = (float *)malloc(npoints * nfeatures * sizeof(float));
|
||||||
|
features = (float **)malloc(npoints * sizeof(float *));
|
||||||
|
features[0] = (float *)malloc(npoints * nfeatures * sizeof(float));
|
||||||
|
for (i = 1; i < npoints; i++)
|
||||||
|
features[i] = features[i - 1] + nfeatures;
|
||||||
|
rewind(infile);
|
||||||
|
i = 0;
|
||||||
|
while (fgets(line, 1024, infile) != NULL) {
|
||||||
|
if (strtok(line, " \t\n") == NULL)
|
||||||
|
continue;
|
||||||
|
for (j = 0; j < nfeatures; j++) {
|
||||||
|
buf[i] = atof(strtok(NULL, " ,\t\n"));
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fclose(infile);
|
||||||
|
}*/
|
||||||
|
|
||||||
|
npoints = 100;
|
||||||
|
nfeatures = 100;
|
||||||
|
buf = (float *)malloc(npoints * nfeatures * sizeof(float));
|
||||||
|
features = (float **)malloc(npoints * sizeof(float *));
|
||||||
|
features[0] = (float *)malloc(npoints * nfeatures * sizeof(float));
|
||||||
|
for (i = 1; i < npoints; i++) {
|
||||||
|
features[i] = features[i - 1] + nfeatures;
|
||||||
|
}
|
||||||
|
for (i = 0; i < npoints * nfeatures; ++i) {
|
||||||
|
buf[i] = (i % 64);
|
||||||
|
}
|
||||||
|
|
||||||
|
// io_timing = omp_get_wtime() - io_timing;
|
||||||
|
|
||||||
|
printf("\nI/O completed\n");
|
||||||
|
printf("\nNumber of objects: %d\n", npoints);
|
||||||
|
printf("Number of features: %d\n", nfeatures);
|
||||||
|
/* ============== I/O end ==============*/
|
||||||
|
|
||||||
|
// error check for clusters
|
||||||
|
if (npoints < min_nclusters) {
|
||||||
|
printf("Error: min_nclusters(%d) > npoints(%d) -- cannot proceed\n",
|
||||||
|
min_nclusters, npoints);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
srand(7); /* seed for future random number generator */
|
||||||
|
memcpy(
|
||||||
|
features[0], buf,
|
||||||
|
npoints * nfeatures *
|
||||||
|
sizeof(
|
||||||
|
float)); /* now features holds 2-dimensional array of features */
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
/* ======================= core of the clustering ===================*/
|
||||||
|
|
||||||
|
// cluster_timing = omp_get_wtime(); /* Total clustering time */
|
||||||
|
cluster_centres = NULL;
|
||||||
|
index = cluster(npoints, /* number of data points */
|
||||||
|
nfeatures, /* number of features for each point */
|
||||||
|
features, /* array: [npoints][nfeatures] */
|
||||||
|
min_nclusters, /* range of min to max number of clusters */
|
||||||
|
max_nclusters, threshold, /* loop termination factor */
|
||||||
|
&best_nclusters, /* return: number between min and max */
|
||||||
|
&cluster_centres, /* return: [best_nclusters][nfeatures] */
|
||||||
|
&rmse, /* Root Mean Squared Error */
|
||||||
|
isRMSE, /* calculate RMSE */
|
||||||
|
nloops); /* number of iteration for each number of clusters */
|
||||||
|
|
||||||
|
// cluster_timing = omp_get_wtime() - cluster_timing;
|
||||||
|
|
||||||
|
/* =============== Command Line Output =============== */
|
||||||
|
|
||||||
|
/* cluster center coordinates
|
||||||
|
:displayed only for when k=1*/
|
||||||
|
if ((min_nclusters == max_nclusters) && (isOutput == 1)) {
|
||||||
|
printf("\n================= Centroid Coordinates =================\n");
|
||||||
|
for (i = 0; i < max_nclusters; i++) {
|
||||||
|
printf("%d:", i);
|
||||||
|
for (j = 0; j < nfeatures; j++) {
|
||||||
|
printf(" %.2f", cluster_centres[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
len = (float)((max_nclusters - min_nclusters + 1) * nloops);
|
||||||
|
|
||||||
|
printf("Number of Iteration: %d\n", nloops);
|
||||||
|
// printf("Time for I/O: %.5fsec\n", io_timing);
|
||||||
|
// printf("Time for Entire Clustering: %.5fsec\n", cluster_timing);
|
||||||
|
|
||||||
|
if (min_nclusters != max_nclusters) {
|
||||||
|
if (nloops != 1) { // range of k, multiple iteration
|
||||||
|
// printf("Average Clustering Time: %fsec\n",
|
||||||
|
// cluster_timing / len);
|
||||||
|
printf("Best number of clusters is %d\n", best_nclusters);
|
||||||
|
} else { // range of k, single iteration
|
||||||
|
// printf("Average Clustering Time: %fsec\n",
|
||||||
|
// cluster_timing / len);
|
||||||
|
printf("Best number of clusters is %d\n", best_nclusters);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (nloops != 1) { // single k, multiple iteration
|
||||||
|
// printf("Average Clustering Time: %.5fsec\n",
|
||||||
|
// cluster_timing / nloops);
|
||||||
|
if (isRMSE) // if calculated RMSE
|
||||||
|
printf("Number of trials to approach the best RMSE of %.3f is %d\n",
|
||||||
|
rmse, index + 1);
|
||||||
|
} else { // single k, single iteration
|
||||||
|
if (isRMSE) // if calculated RMSE
|
||||||
|
printf("Root Mean Squared Error: %.3f\n", rmse);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* free up memory */
|
||||||
|
free(features[0]);
|
||||||
|
free(features);
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
94
benchmarks/opencl/kmeans/rmse.c
Executable file
94
benchmarks/opencl/kmeans/rmse.c
Executable file
@@ -0,0 +1,94 @@
|
|||||||
|
/*************************************************************************/
|
||||||
|
/** File: rmse.c **/
|
||||||
|
/** Description: calculate root mean squared error of particular **/
|
||||||
|
/** clustering. **/
|
||||||
|
/** Author: Sang-Ha Lee **/
|
||||||
|
/** University of Virginia. **/
|
||||||
|
/** **/
|
||||||
|
/** Note: euclid_dist_2() and find_nearest_point() adopted from **/
|
||||||
|
/** Minebench code. **/
|
||||||
|
/** **/
|
||||||
|
/*************************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <float.h>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
#include "kmeans.h"
|
||||||
|
|
||||||
|
extern double wtime(void);
|
||||||
|
|
||||||
|
/*----< euclid_dist_2() >----------------------------------------------------*/
|
||||||
|
/* multi-dimensional spatial Euclid distance square */
|
||||||
|
__inline
|
||||||
|
float euclid_dist_2(float *pt1,
|
||||||
|
float *pt2,
|
||||||
|
int numdims)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
float ans=0.0;
|
||||||
|
|
||||||
|
for (i=0; i<numdims; i++)
|
||||||
|
ans += (pt1[i]-pt2[i]) * (pt1[i]-pt2[i]);
|
||||||
|
|
||||||
|
return(ans);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*----< find_nearest_point() >-----------------------------------------------*/
|
||||||
|
__inline
|
||||||
|
int find_nearest_point(float *pt, /* [nfeatures] */
|
||||||
|
int nfeatures,
|
||||||
|
float **pts, /* [npts][nfeatures] */
|
||||||
|
int npts)
|
||||||
|
{
|
||||||
|
int index, i;
|
||||||
|
float max_dist=FLT_MAX;
|
||||||
|
|
||||||
|
/* find the cluster center id with min distance to pt */
|
||||||
|
for (i=0; i<npts; i++) {
|
||||||
|
float dist;
|
||||||
|
dist = euclid_dist_2(pt, pts[i], nfeatures); /* no need square root */
|
||||||
|
if (dist < max_dist) {
|
||||||
|
max_dist = dist;
|
||||||
|
index = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(index);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*----< rms_err(): calculates RMSE of clustering >-------------------------------------*/
|
||||||
|
float rms_err (float **feature, /* [npoints][nfeatures] */
|
||||||
|
int nfeatures,
|
||||||
|
int npoints,
|
||||||
|
float **cluster_centres, /* [nclusters][nfeatures] */
|
||||||
|
int nclusters)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int nearest_cluster_index; /* cluster center id with min distance to pt */
|
||||||
|
float sum_euclid = 0.0; /* sum of Euclidean distance squares */
|
||||||
|
float ret; /* return value */
|
||||||
|
|
||||||
|
/* calculate and sum the sqaure of euclidean distance*/
|
||||||
|
#pragma omp parallel for \
|
||||||
|
shared(feature,cluster_centres) \
|
||||||
|
firstprivate(npoints,nfeatures,nclusters) \
|
||||||
|
private(i, nearest_cluster_index) \
|
||||||
|
schedule (static)
|
||||||
|
for (i=0; i<npoints; i++) {
|
||||||
|
nearest_cluster_index = find_nearest_point(feature[i],
|
||||||
|
nfeatures,
|
||||||
|
cluster_centres,
|
||||||
|
nclusters);
|
||||||
|
|
||||||
|
sum_euclid += euclid_dist_2(feature[i],
|
||||||
|
cluster_centres[nearest_cluster_index],
|
||||||
|
nfeatures);
|
||||||
|
|
||||||
|
}
|
||||||
|
/* divide by n, then take sqrt */
|
||||||
|
ret = sqrt(sum_euclid / npoints);
|
||||||
|
|
||||||
|
return(ret);
|
||||||
|
}
|
||||||
|
|
||||||
1
benchmarks/opencl/kmeans/run
Executable file
1
benchmarks/opencl/kmeans/run
Executable file
@@ -0,0 +1 @@
|
|||||||
|
./kmeans -o -i ../../data/kmeans/kdd_cup
|
||||||
@@ -31,18 +31,20 @@ CXXFLAGS += -I$(POCL_INC_PATH)
|
|||||||
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||||
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||||
|
|
||||||
PROJECT=saxpy
|
PROJECT = saxpy
|
||||||
|
|
||||||
|
SRCS = main.cc
|
||||||
|
|
||||||
all: $(PROJECT).dump $(PROJECT).hex
|
all: $(PROJECT).dump $(PROJECT).hex
|
||||||
|
|
||||||
lib$(PROJECT).a: kernel.cl
|
lib$(PROJECT).a: kernel.cl
|
||||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
||||||
|
|
||||||
$(PROJECT).elf: main.cc lib$(PROJECT).a
|
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
|
||||||
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
|
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
|
||||||
|
|
||||||
$(PROJECT).qemu: main.cc lib$(PROJECT).a
|
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
|
||||||
$(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu
|
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
|
||||||
|
|
||||||
$(PROJECT).hex: $(PROJECT).elf
|
$(PROJECT).hex: $(PROJECT).elf
|
||||||
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
||||||
@@ -63,4 +65,4 @@ gdb-c: $(PROJECT).qemu
|
|||||||
$(GDB) $(PROJECT).qemu
|
$(GDB) $(PROJECT).qemu
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu
|
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug
|
||||||
@@ -153,7 +153,7 @@ int main(int argc, char **argv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
cl_event kernel_completion;
|
cl_event kernel_completion;
|
||||||
size_t global_work_size[1] = {NUM_DATA};
|
size_t global_work_size[1] = {NUM_DATA/2,NUM_DATA/2};
|
||||||
printf("attempting to enqueue kernel\n");
|
printf("attempting to enqueue kernel\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
|
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
|
||||||
|
|||||||
@@ -31,18 +31,20 @@ CXXFLAGS += -I$(POCL_INC_PATH)
|
|||||||
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||||
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||||
|
|
||||||
PROJECT=sfilter
|
PROJECT = sfilter
|
||||||
|
|
||||||
|
SRCS = main.cc
|
||||||
|
|
||||||
all: $(PROJECT).dump $(PROJECT).hex
|
all: $(PROJECT).dump $(PROJECT).hex
|
||||||
|
|
||||||
lib$(PROJECT).a: kernel.cl
|
lib$(PROJECT).a: kernel.cl
|
||||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
||||||
|
|
||||||
$(PROJECT).elf: main.cc lib$(PROJECT).a
|
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
|
||||||
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
|
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
|
||||||
|
|
||||||
$(PROJECT).qemu: main.cc lib$(PROJECT).a
|
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
|
||||||
$(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu
|
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
|
||||||
|
|
||||||
$(PROJECT).hex: $(PROJECT).elf
|
$(PROJECT).hex: $(PROJECT).elf
|
||||||
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
||||||
@@ -63,4 +65,4 @@ gdb-c: $(PROJECT).qemu
|
|||||||
$(GDB) $(PROJECT).qemu
|
$(GDB) $(PROJECT).qemu
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu
|
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug
|
||||||
@@ -31,18 +31,20 @@ CXXFLAGS += -I$(POCL_INC_PATH)
|
|||||||
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||||
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||||
|
|
||||||
PROJECT=sgemm
|
PROJECT = sgemm
|
||||||
|
|
||||||
|
SRCS = main.cc
|
||||||
|
|
||||||
all: $(PROJECT).dump $(PROJECT).hex
|
all: $(PROJECT).dump $(PROJECT).hex
|
||||||
|
|
||||||
lib$(PROJECT).a: kernel.cl
|
lib$(PROJECT).a: kernel.cl
|
||||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
||||||
|
|
||||||
$(PROJECT).elf: main.cc lib$(PROJECT).a
|
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
|
||||||
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
|
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
|
||||||
|
|
||||||
$(PROJECT).qemu: main.cc lib$(PROJECT).a
|
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
|
||||||
$(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu
|
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
|
||||||
|
|
||||||
$(PROJECT).hex: $(PROJECT).elf
|
$(PROJECT).hex: $(PROJECT).elf
|
||||||
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
||||||
@@ -63,4 +65,4 @@ gdb-c: $(PROJECT).qemu
|
|||||||
$(GDB) $(PROJECT).qemu
|
$(GDB) $(PROJECT).qemu
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf *.elf *.dump *.hex
|
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug
|
||||||
|
|||||||
@@ -31,18 +31,20 @@ CXXFLAGS += -I$(POCL_INC_PATH)
|
|||||||
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||||
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||||
|
|
||||||
PROJECT=vecadd
|
PROJECT = vecadd
|
||||||
|
|
||||||
|
SRCS = main.cc
|
||||||
|
|
||||||
all: $(PROJECT).dump $(PROJECT).hex
|
all: $(PROJECT).dump $(PROJECT).hex
|
||||||
|
|
||||||
lib$(PROJECT).a: kernel.cl
|
lib$(PROJECT).a: kernel.cl
|
||||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
||||||
|
|
||||||
$(PROJECT).elf: main.cc lib$(PROJECT).a
|
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
|
||||||
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
|
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
|
||||||
|
|
||||||
$(PROJECT).qemu: main.cc lib$(PROJECT).a
|
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
|
||||||
$(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu
|
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
|
||||||
|
|
||||||
$(PROJECT).hex: $(PROJECT).elf
|
$(PROJECT).hex: $(PROJECT).elf
|
||||||
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
||||||
@@ -63,4 +65,4 @@ gdb-c: $(PROJECT).qemu
|
|||||||
$(GDB) $(PROJECT).qemu
|
$(GDB) $(PROJECT).qemu
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu
|
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug
|
||||||
Reference in New Issue
Block a user