Merge branch 'fpga_synthesis' of https://github.gatech.edu/casl/Vortex into fpga_synthesis
This commit is contained in:
44
benchmarks/new_opencl/kmeans/Makefile
Normal file
44
benchmarks/new_opencl/kmeans/Makefile
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
LLVM_LIB_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops/lib)
|
||||||
|
POCLCC_PATH ?= $(wildcard ../compiler)
|
||||||
|
POCLRT_PATH ?= $(wildcard ../runtime)
|
||||||
|
DRIVER_PATH ?= $(wildcard ../../../driver/sw)
|
||||||
|
|
||||||
|
CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors
|
||||||
|
|
||||||
|
CXXFLAGS += -I$(POCLRT_PATH)/include
|
||||||
|
|
||||||
|
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/simx -lOpenCL -lvortex
|
||||||
|
|
||||||
|
PROJECT = kmeans
|
||||||
|
|
||||||
|
SRCS = main.cc read_input.c rmse.c kmeans_clustering.c cluster.c getopt.c
|
||||||
|
|
||||||
|
all: $(PROJECT)
|
||||||
|
|
||||||
|
kernel.pocl: kernel.cl
|
||||||
|
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(LLVM_LIB_PATH):$(POCLCC_PATH)/lib:$(DRIVER_PATH)/simx $(POCLCC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
|
||||||
|
|
||||||
|
$(PROJECT): $(SRCS)
|
||||||
|
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||||
|
|
||||||
|
run-fpga: $(PROJECT) kernel.pocl
|
||||||
|
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||||
|
|
||||||
|
run-ase: $(PROJECT) kernel.pocl
|
||||||
|
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||||
|
|
||||||
|
run-simx: $(PROJECT) kernel.pocl
|
||||||
|
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||||
|
|
||||||
|
run-rtlsim: $(PROJECT) kernel.pocl
|
||||||
|
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||||
|
|
||||||
|
.depend: $(SRCS)
|
||||||
|
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -rf $(PROJECT) *.o *.dump .depend
|
||||||
|
|
||||||
|
ifneq ($(MAKECMDGOALS),clean)
|
||||||
|
-include .depend
|
||||||
|
endif
|
||||||
0
benchmarks/new_opencl/kmeans/README
Normal file
0
benchmarks/new_opencl/kmeans/README
Normal file
155
benchmarks/new_opencl/kmeans/cluster.c
Executable file
155
benchmarks/new_opencl/kmeans/cluster.c
Executable file
@@ -0,0 +1,155 @@
|
|||||||
|
/*****************************************************************************/
|
||||||
|
/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */
|
||||||
|
/*By downloading, copying, installing or using the software you agree */
|
||||||
|
/*to this license. If you do not agree to this license, do not download, */
|
||||||
|
/*install, copy or use the software. */
|
||||||
|
/* */
|
||||||
|
/* */
|
||||||
|
/*Copyright (c) 2005 Northwestern University */
|
||||||
|
/*All rights reserved. */
|
||||||
|
|
||||||
|
/*Redistribution of the software in source and binary forms, */
|
||||||
|
/*with or without modification, is permitted provided that the */
|
||||||
|
/*following conditions are met: */
|
||||||
|
/* */
|
||||||
|
/*1 Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* */
|
||||||
|
/*2 Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution.*/
|
||||||
|
/* */
|
||||||
|
/*3 Neither the name of Northwestern University nor the names of its */
|
||||||
|
/* contributors may be used to endorse or promote products derived */
|
||||||
|
/* from this software without specific prior written permission. */
|
||||||
|
/* */
|
||||||
|
/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */
|
||||||
|
/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */
|
||||||
|
/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */
|
||||||
|
/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */
|
||||||
|
/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||||
|
/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */
|
||||||
|
/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
|
||||||
|
/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */
|
||||||
|
/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */
|
||||||
|
/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/*POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
/*************************************************************************/
|
||||||
|
/** File: cluster.c **/
|
||||||
|
/** Description: Takes as input a file, containing 1 data point per **/
|
||||||
|
/** per line, and performs a fuzzy c-means clustering **/
|
||||||
|
/** on the data. Fuzzy clustering is performed using **/
|
||||||
|
/** min to max clusters and the clustering that gets **/
|
||||||
|
/** the best score according to a compactness and **/
|
||||||
|
/** separation criterion are returned. **/
|
||||||
|
/** Author: Brendan McCane **/
|
||||||
|
/** James Cook University of North Queensland. **/
|
||||||
|
/** Australia. email: mccane@cs.jcu.edu.au **/
|
||||||
|
/** **/
|
||||||
|
/** Edited by: Jay Pisharath, Wei-keng Liao **/
|
||||||
|
/** Northwestern University. **/
|
||||||
|
/** **/
|
||||||
|
/** ================================================================ **/
|
||||||
|
/** **/
|
||||||
|
/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/
|
||||||
|
/** University of Virginia **/
|
||||||
|
/** **/
|
||||||
|
/** Description: No longer supports fuzzy c-means clustering; **/
|
||||||
|
/** only regular k-means clustering. **/
|
||||||
|
/** No longer performs "validity" function to analyze **/
|
||||||
|
/** compactness and separation crietria; instead **/
|
||||||
|
/** calculate root mean squared error. **/
|
||||||
|
/** **/
|
||||||
|
/*************************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <float.h>
|
||||||
|
#include "kmeans.h"
|
||||||
|
|
||||||
|
float min_rmse_ref = FLT_MAX;
|
||||||
|
extern double wtime(void);
|
||||||
|
/* reference min_rmse value */
|
||||||
|
|
||||||
|
/*---< cluster() >-----------------------------------------------------------*/
|
||||||
|
int cluster(int npoints, /* number of data points */
|
||||||
|
int nfeatures, /* number of attributes for each point */
|
||||||
|
float **features, /* array: [npoints][nfeatures] */
|
||||||
|
int min_nclusters, /* range of min to max number of clusters */
|
||||||
|
int max_nclusters,
|
||||||
|
float threshold, /* loop terminating factor */
|
||||||
|
int *best_nclusters, /* out: number between min and max with lowest RMSE */
|
||||||
|
float ***cluster_centres, /* out: [best_nclusters][nfeatures] */
|
||||||
|
float *min_rmse, /* out: minimum RMSE */
|
||||||
|
int isRMSE, /* calculate RMSE */
|
||||||
|
int nloops /* number of iteration for each number of clusters */
|
||||||
|
)
|
||||||
|
{
|
||||||
|
int nclusters; /* number of clusters k */
|
||||||
|
int index =0; /* number of iteration to reach the best RMSE */
|
||||||
|
int rmse; /* RMSE for each clustering */
|
||||||
|
int *membership; /* which cluster a data point belongs to */
|
||||||
|
float **tmp_cluster_centres; /* hold coordinates of cluster centers */
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* allocate memory for membership */
|
||||||
|
membership = (int*) malloc(npoints * sizeof(int));
|
||||||
|
|
||||||
|
/* sweep k from min to max_nclusters to find the best number of clusters */
|
||||||
|
for(nclusters = min_nclusters; nclusters <= max_nclusters; nclusters++)
|
||||||
|
{
|
||||||
|
if (nclusters > npoints) break; /* cannot have more clusters than points */
|
||||||
|
|
||||||
|
/* allocate device memory, invert data array (@ kmeans_cuda.cu) */
|
||||||
|
allocate(npoints, nfeatures, nclusters, features);
|
||||||
|
|
||||||
|
/* iterate nloops times for each number of clusters */
|
||||||
|
for(i = 0; i < nloops; i++)
|
||||||
|
{
|
||||||
|
/* initialize initial cluster centers, CUDA calls (@ kmeans_cuda.cu) */
|
||||||
|
tmp_cluster_centres = kmeans_clustering(features,
|
||||||
|
nfeatures,
|
||||||
|
npoints,
|
||||||
|
nclusters,
|
||||||
|
threshold,
|
||||||
|
membership);
|
||||||
|
|
||||||
|
if (*cluster_centres) {
|
||||||
|
free((*cluster_centres)[0]);
|
||||||
|
free(*cluster_centres);
|
||||||
|
}
|
||||||
|
*cluster_centres = tmp_cluster_centres;
|
||||||
|
|
||||||
|
|
||||||
|
/* find the number of clusters with the best RMSE */
|
||||||
|
if(isRMSE)
|
||||||
|
{
|
||||||
|
rmse = rms_err(features,
|
||||||
|
nfeatures,
|
||||||
|
npoints,
|
||||||
|
tmp_cluster_centres,
|
||||||
|
nclusters);
|
||||||
|
|
||||||
|
if(rmse < min_rmse_ref){
|
||||||
|
min_rmse_ref = rmse; //update reference min RMSE
|
||||||
|
*min_rmse = min_rmse_ref; //update return min RMSE
|
||||||
|
*best_nclusters = nclusters; //update optimum number of clusters
|
||||||
|
index = i; //update number of iteration to reach best RMSE
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
deallocateMemory(); /* free device memory (@ kmeans_cuda.cu) */
|
||||||
|
}
|
||||||
|
|
||||||
|
free(membership);
|
||||||
|
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
1184
benchmarks/new_opencl/kmeans/getopt.c
Executable file
1184
benchmarks/new_opencl/kmeans/getopt.c
Executable file
File diff suppressed because it is too large
Load Diff
191
benchmarks/new_opencl/kmeans/getopt.h
Executable file
191
benchmarks/new_opencl/kmeans/getopt.h
Executable file
@@ -0,0 +1,191 @@
|
|||||||
|
|
||||||
|
|
||||||
|
/* getopt.h */
|
||||||
|
/* Declarations for getopt.
|
||||||
|
Copyright (C) 1989-1994, 1996-1999, 2001 Free Software
|
||||||
|
Foundation, Inc. This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute
|
||||||
|
it and/or modify it under the terms of the GNU Lesser
|
||||||
|
General Public License as published by the Free Software
|
||||||
|
Foundation; either version 2.1 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will
|
||||||
|
be useful, but WITHOUT ANY WARRANTY; without even the
|
||||||
|
implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||||
|
License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General
|
||||||
|
Public License along with the GNU C Library; if not, write
|
||||||
|
to the Free Software Foundation, Inc., 59 Temple Place,
|
||||||
|
Suite 330, Boston, MA 02111-1307 USA. */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef _GETOPT_H
|
||||||
|
|
||||||
|
#ifndef __need_getopt
|
||||||
|
# define _GETOPT_H 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* If __GNU_LIBRARY__ is not already defined, either we are being used
|
||||||
|
standalone, or this is the first header included in the source file.
|
||||||
|
If we are being used with glibc, we need to include <features.h>, but
|
||||||
|
that does not exist if we are standalone. So: if __GNU_LIBRARY__ is
|
||||||
|
not defined, include <ctype.h>, which will pull in <features.h> for us
|
||||||
|
if it's from glibc. (Why ctype.h? It's guaranteed to exist and it
|
||||||
|
doesn't flood the namespace with stuff the way some other headers do.) */
|
||||||
|
#if !defined __GNU_LIBRARY__
|
||||||
|
# include <ctype.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* For communication from `getopt' to the caller.
|
||||||
|
When `getopt' finds an option that takes an argument,
|
||||||
|
the argument value is returned here.
|
||||||
|
Also, when `ordering' is RETURN_IN_ORDER,
|
||||||
|
each non-option ARGV-element is returned here. */
|
||||||
|
|
||||||
|
extern char *optarg;
|
||||||
|
|
||||||
|
/* Index in ARGV of the next element to be scanned.
|
||||||
|
This is used for communication to and from the caller
|
||||||
|
and for communication between successive calls to `getopt'.
|
||||||
|
|
||||||
|
On entry to `getopt', zero means this is the first call; initialize.
|
||||||
|
|
||||||
|
When `getopt' returns -1, this is the index of the first of the
|
||||||
|
non-option elements that the caller should itself scan.
|
||||||
|
|
||||||
|
Otherwise, `optind' communicates from one call to the next
|
||||||
|
how much of ARGV has been scanned so far. */
|
||||||
|
|
||||||
|
extern int optind;
|
||||||
|
|
||||||
|
/* Callers store zero here to inhibit the error message `getopt' prints
|
||||||
|
for unrecognized options. */
|
||||||
|
|
||||||
|
extern int opterr;
|
||||||
|
|
||||||
|
/* Set to an option character which was unrecognized. */
|
||||||
|
|
||||||
|
extern int optopt;
|
||||||
|
|
||||||
|
#ifndef __need_getopt
|
||||||
|
/* Describe the long-named options requested by the application.
|
||||||
|
The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
|
||||||
|
of `struct option' terminated by an element containing a name which is
|
||||||
|
zero.
|
||||||
|
|
||||||
|
The field `has_arg' is:
|
||||||
|
no_argument (or 0) if the option does not take an argument,
|
||||||
|
required_argument (or 1) if the option requires an argument,
|
||||||
|
optional_argument (or 2) if the option takes an optional argument.
|
||||||
|
|
||||||
|
If the field `flag' is not NULL, it points to a variable that is set
|
||||||
|
to the value given in the field `val' when the option is found, but
|
||||||
|
left unchanged if the option is not found.
|
||||||
|
|
||||||
|
To have a long-named option do something other than set an `int' to
|
||||||
|
a compiled-in constant, such as set a value from `optarg', set the
|
||||||
|
option's `flag' field to zero and its `val' field to a nonzero
|
||||||
|
value (the equivalent single-letter option character, if there is
|
||||||
|
one). For long options that have a zero `flag' field, `getopt'
|
||||||
|
returns the contents of the `val' field. */
|
||||||
|
|
||||||
|
struct option
|
||||||
|
{
|
||||||
|
# if (defined __STDC__ && __STDC__) || defined __cplusplus
|
||||||
|
const char *name;
|
||||||
|
# else
|
||||||
|
char *name;
|
||||||
|
# endif
|
||||||
|
/* has_arg can't be an enum because some compilers complain about
|
||||||
|
type mismatches in all the code that assumes it is an int. */
|
||||||
|
int has_arg;
|
||||||
|
int *flag;
|
||||||
|
int val;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Names for the values of the `has_arg' field of `struct option'. */
|
||||||
|
|
||||||
|
# define no_argument 0
|
||||||
|
# define required_argument 1
|
||||||
|
# define optional_argument 2
|
||||||
|
#endif /* need getopt */
|
||||||
|
|
||||||
|
|
||||||
|
/* Get definitions and prototypes for functions to process the
|
||||||
|
arguments in ARGV (ARGC of them, minus the program name) for
|
||||||
|
options given in OPTS.
|
||||||
|
|
||||||
|
Return the option character from OPTS just read. Return -1 when
|
||||||
|
there are no more options. For unrecognized options, or options
|
||||||
|
missing arguments, `optopt' is set to the option letter, and '?' is
|
||||||
|
returned.
|
||||||
|
|
||||||
|
The OPTS string is a list of characters which are recognized option
|
||||||
|
letters, optionally followed by colons, specifying that that letter
|
||||||
|
takes an argument, to be placed in `optarg'.
|
||||||
|
|
||||||
|
If a letter in OPTS is followed by two colons, its argument is
|
||||||
|
optional. This behavior is specific to the GNU `getopt'.
|
||||||
|
|
||||||
|
The argument `--' causes premature termination of argument
|
||||||
|
scanning, explicitly telling `getopt' that there are no more
|
||||||
|
options.
|
||||||
|
|
||||||
|
If OPTS begins with `--', then non-option arguments are treated as
|
||||||
|
arguments to the option '\0'. This behavior is specific to the GNU
|
||||||
|
`getopt'. */
|
||||||
|
|
||||||
|
#if (defined __STDC__ && __STDC__) || defined __cplusplus
|
||||||
|
# ifdef __GNU_LIBRARY__
|
||||||
|
/* Many other libraries have conflicting prototypes for getopt, with
|
||||||
|
differences in the consts, in stdlib.h. To avoid compilation
|
||||||
|
errors, only prototype getopt for the GNU C library. */
|
||||||
|
extern int getopt (int ___argc, char *const *___argv, const char *__shortopts);
|
||||||
|
# else /* not __GNU_LIBRARY__ */
|
||||||
|
extern int getopt ();
|
||||||
|
# endif /* __GNU_LIBRARY__ */
|
||||||
|
|
||||||
|
# ifndef __need_getopt
|
||||||
|
extern int getopt_long (int ___argc, char *const *___argv,
|
||||||
|
const char *__shortopts,
|
||||||
|
const struct option *__longopts, int *__longind);
|
||||||
|
extern int getopt_long_only (int ___argc, char *const *___argv,
|
||||||
|
const char *__shortopts,
|
||||||
|
const struct option *__longopts, int *__longind);
|
||||||
|
|
||||||
|
/* Internal only. Users should not call this directly. */
|
||||||
|
extern int _getopt_internal (int ___argc, char *const *___argv,
|
||||||
|
const char *__shortopts,
|
||||||
|
const struct option *__longopts, int *__longind,
|
||||||
|
int __long_only);
|
||||||
|
# endif
|
||||||
|
#else /* not __STDC__ */
|
||||||
|
extern int getopt ();
|
||||||
|
# ifndef __need_getopt
|
||||||
|
extern int getopt_long ();
|
||||||
|
extern int getopt_long_only ();
|
||||||
|
|
||||||
|
extern int _getopt_internal ();
|
||||||
|
# endif
|
||||||
|
#endif /* __STDC__ */
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Make sure we later can get all the definitions and declarations. */
|
||||||
|
#undef __need_getopt
|
||||||
|
|
||||||
|
#endif /* getopt.h */
|
||||||
|
|
||||||
61
benchmarks/new_opencl/kmeans/kernel.cl
Executable file
61
benchmarks/new_opencl/kmeans/kernel.cl
Executable file
@@ -0,0 +1,61 @@
|
|||||||
|
#ifndef FLT_MAX
|
||||||
|
#define FLT_MAX 3.40282347e+38
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__kernel void
|
||||||
|
kmeans_kernel_c(__global float *feature,
|
||||||
|
__global float *clusters,
|
||||||
|
__global int *membership,
|
||||||
|
int npoints,
|
||||||
|
int nclusters,
|
||||||
|
int nfeatures,
|
||||||
|
int offset,
|
||||||
|
int size
|
||||||
|
)
|
||||||
|
{
|
||||||
|
unsigned int point_id = get_global_id(0);
|
||||||
|
int index = 0;
|
||||||
|
//const unsigned int point_id = get_global_id(0);
|
||||||
|
if (point_id < npoints)
|
||||||
|
{
|
||||||
|
float min_dist=FLT_MAX;
|
||||||
|
for (int i=0; i < nclusters; i++) {
|
||||||
|
|
||||||
|
float dist = 0;
|
||||||
|
float ans = 0;
|
||||||
|
for (int l=0; l<nfeatures; l++){
|
||||||
|
ans += (feature[l * npoints + point_id]-clusters[i*nfeatures+l])*
|
||||||
|
(feature[l * npoints + point_id]-clusters[i*nfeatures+l]);
|
||||||
|
}
|
||||||
|
|
||||||
|
dist = ans;
|
||||||
|
if (dist < min_dist) {
|
||||||
|
min_dist = dist;
|
||||||
|
index = i;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//printf("%d\n", index);
|
||||||
|
membership[point_id] = index;
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void
|
||||||
|
kmeans_swap(__global float *feature,
|
||||||
|
__global float *feature_swap,
|
||||||
|
int npoints,
|
||||||
|
int nfeatures
|
||||||
|
){
|
||||||
|
|
||||||
|
unsigned int tid = get_global_id(0);
|
||||||
|
//for(int i = 0; i < nfeatures; i++)
|
||||||
|
// feature_swap[i * npoints + tid] = feature[tid * nfeatures + i];
|
||||||
|
//Lingjie Zhang modificated at 11/05/2015
|
||||||
|
if (tid < npoints){
|
||||||
|
for(int i = 0; i < nfeatures; i++)
|
||||||
|
feature_swap[i * npoints + tid] = feature[tid * nfeatures + i];
|
||||||
|
}
|
||||||
|
// end of Lingjie Zhang's modification
|
||||||
|
}
|
||||||
BIN
benchmarks/new_opencl/kmeans/kmeans
Executable file
BIN
benchmarks/new_opencl/kmeans/kmeans
Executable file
Binary file not shown.
65
benchmarks/new_opencl/kmeans/kmeans.h
Executable file
65
benchmarks/new_opencl/kmeans/kmeans.h
Executable file
@@ -0,0 +1,65 @@
|
|||||||
|
/*****************************************************************************/
|
||||||
|
/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */
|
||||||
|
/*By downloading, copying, installing or using the software you agree */
|
||||||
|
/*to this license. If you do not agree to this license, do not download, */
|
||||||
|
/*install, copy or use the software. */
|
||||||
|
/* */
|
||||||
|
/* */
|
||||||
|
/*Copyright (c) 2005 Northwestern University */
|
||||||
|
/*All rights reserved. */
|
||||||
|
|
||||||
|
/*Redistribution of the software in source and binary forms, */
|
||||||
|
/*with or without modification, is permitted provided that the */
|
||||||
|
/*following conditions are met: */
|
||||||
|
/* */
|
||||||
|
/*1 Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* */
|
||||||
|
/*2 Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution.*/
|
||||||
|
/* */
|
||||||
|
/*3 Neither the name of Northwestern University nor the names of its */
|
||||||
|
/* contributors may be used to endorse or promote products derived */
|
||||||
|
/* from this software without specific prior written permission. */
|
||||||
|
/* */
|
||||||
|
/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */
|
||||||
|
/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */
|
||||||
|
/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */
|
||||||
|
/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */
|
||||||
|
/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||||
|
/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */
|
||||||
|
/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
|
||||||
|
/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */
|
||||||
|
/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */
|
||||||
|
/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/*POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
#ifndef _H_FUZZY_KMEANS
|
||||||
|
#define _H_FUZZY_KMEANS
|
||||||
|
|
||||||
|
#ifndef FLT_MAX
|
||||||
|
#define FLT_MAX 3.40282347e+38
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
float euclid_dist_2 (float*, float*, int);
|
||||||
|
int find_nearest_point (float* , int, float**, int);
|
||||||
|
float rms_err(float**, int, int, float**, int);
|
||||||
|
int cluster(int, int, float**, int, int, float, int*, float***, float*, int, int);
|
||||||
|
int setup(int argc, char** argv);
|
||||||
|
int allocate(int npoints, int nfeatures, int nclusters, float **feature);
|
||||||
|
void deallocateMemory();
|
||||||
|
int kmeansOCL(float **feature, int nfeatures, int npoints, int nclusters, int *membership, float **clusters, int *new_centers_len, float **new_centers);
|
||||||
|
float** kmeans_clustering(float **feature, int nfeatures, int npoints, int nclusters, float threshold, int *membership);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
176
benchmarks/new_opencl/kmeans/kmeans_clustering.c
Executable file
176
benchmarks/new_opencl/kmeans/kmeans_clustering.c
Executable file
@@ -0,0 +1,176 @@
|
|||||||
|
/*****************************************************************************/
|
||||||
|
/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */
|
||||||
|
/*By downloading, copying, installing or using the software you agree */
|
||||||
|
/*to this license. If you do not agree to this license, do not download, */
|
||||||
|
/*install, copy or use the software. */
|
||||||
|
/* */
|
||||||
|
/* */
|
||||||
|
/*Copyright (c) 2005 Northwestern University */
|
||||||
|
/*All rights reserved. */
|
||||||
|
|
||||||
|
/*Redistribution of the software in source and binary forms, */
|
||||||
|
/*with or without modification, is permitted provided that the */
|
||||||
|
/*following conditions are met: */
|
||||||
|
/* */
|
||||||
|
/*1 Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* */
|
||||||
|
/*2 Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution.*/
|
||||||
|
/* */
|
||||||
|
/*3 Neither the name of Northwestern University nor the names of its */
|
||||||
|
/* contributors may be used to endorse or promote products derived */
|
||||||
|
/* from this software without specific prior written permission. */
|
||||||
|
/* */
|
||||||
|
/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */
|
||||||
|
/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */
|
||||||
|
/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */
|
||||||
|
/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */
|
||||||
|
/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||||
|
/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */
|
||||||
|
/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
|
||||||
|
/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */
|
||||||
|
/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */
|
||||||
|
/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/*POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
/*************************************************************************/
|
||||||
|
/** File: kmeans_clustering.c **/
|
||||||
|
/** Description: Implementation of regular k-means clustering **/
|
||||||
|
/** algorithm **/
|
||||||
|
/** Author: Wei-keng Liao **/
|
||||||
|
/** ECE Department, Northwestern University **/
|
||||||
|
/** email: wkliao@ece.northwestern.edu **/
|
||||||
|
/** **/
|
||||||
|
/** Edited by: Jay Pisharath **/
|
||||||
|
/** Northwestern University. **/
|
||||||
|
/** **/
|
||||||
|
/** ================================================================ **/
|
||||||
|
/** **/
|
||||||
|
/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/
|
||||||
|
/** University of Virginia **/
|
||||||
|
/** **/
|
||||||
|
/** Description: No longer supports fuzzy c-means clustering; **/
|
||||||
|
/** only regular k-means clustering. **/
|
||||||
|
/** No longer performs "validity" function to analyze **/
|
||||||
|
/** compactness and separation crietria; instead **/
|
||||||
|
/** calculate root mean squared error. **/
|
||||||
|
/** **/
|
||||||
|
/*************************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <float.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include "kmeans.h"
|
||||||
|
|
||||||
|
#define RANDOM_MAX 2147483647
|
||||||
|
|
||||||
|
extern double wtime(void);
|
||||||
|
|
||||||
|
/*----< kmeans_clustering() >---------------------------------------------*/
|
||||||
|
float** kmeans_clustering(float **feature, /* in: [npoints][nfeatures] */
|
||||||
|
int nfeatures,
|
||||||
|
int npoints,
|
||||||
|
int nclusters,
|
||||||
|
float threshold,
|
||||||
|
int *membership) /* out: [npoints] */
|
||||||
|
{
|
||||||
|
int i, j, n = 0; /* counters */
|
||||||
|
int loop=0, temp;
|
||||||
|
int *new_centers_len; /* [nclusters]: no. of points in each cluster */
|
||||||
|
float delta; /* if the point moved */
|
||||||
|
float **clusters; /* out: [nclusters][nfeatures] */
|
||||||
|
float **new_centers; /* [nclusters][nfeatures] */
|
||||||
|
|
||||||
|
int *initial; /* used to hold the index of points not yet selected
|
||||||
|
prevents the "birthday problem" of dual selection (?)
|
||||||
|
considered holding initial cluster indices, but changed due to
|
||||||
|
possible, though unlikely, infinite loops */
|
||||||
|
int initial_points;
|
||||||
|
int c = 0;
|
||||||
|
|
||||||
|
/* nclusters should never be > npoints
|
||||||
|
that would guarantee a cluster without points */
|
||||||
|
if (nclusters > npoints)
|
||||||
|
nclusters = npoints;
|
||||||
|
|
||||||
|
/* allocate space for and initialize returning variable clusters[] */
|
||||||
|
clusters = (float**) malloc(nclusters * sizeof(float*));
|
||||||
|
clusters[0] = (float*) malloc(nclusters * nfeatures * sizeof(float));
|
||||||
|
for (i=1; i<nclusters; i++)
|
||||||
|
clusters[i] = clusters[i-1] + nfeatures;
|
||||||
|
|
||||||
|
/* initialize the random clusters */
|
||||||
|
initial = (int *) malloc (npoints * sizeof(int));
|
||||||
|
for (i = 0; i < npoints; i++)
|
||||||
|
{
|
||||||
|
initial[i] = i;
|
||||||
|
}
|
||||||
|
initial_points = npoints;
|
||||||
|
|
||||||
|
/* randomly pick cluster centers */
|
||||||
|
for (i=0; i<nclusters && initial_points >= 0; i++) {
|
||||||
|
//n = (int)rand() % initial_points;
|
||||||
|
|
||||||
|
for (j=0; j<nfeatures; j++)
|
||||||
|
clusters[i][j] = feature[initial[n]][j]; // remapped
|
||||||
|
|
||||||
|
/* swap the selected index to the end (not really necessary,
|
||||||
|
could just move the end up) */
|
||||||
|
temp = initial[n];
|
||||||
|
initial[n] = initial[initial_points-1];
|
||||||
|
initial[initial_points-1] = temp;
|
||||||
|
initial_points--;
|
||||||
|
n++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* initialize the membership to -1 for all */
|
||||||
|
for (i=0; i < npoints; i++)
|
||||||
|
membership[i] = -1;
|
||||||
|
|
||||||
|
/* allocate space for and initialize new_centers_len and new_centers */
|
||||||
|
new_centers_len = (int*) calloc(nclusters, sizeof(int));
|
||||||
|
|
||||||
|
new_centers = (float**) malloc(nclusters * sizeof(float*));
|
||||||
|
new_centers[0] = (float*) calloc(nclusters * nfeatures, sizeof(float));
|
||||||
|
for (i=1; i<nclusters; i++)
|
||||||
|
new_centers[i] = new_centers[i-1] + nfeatures;
|
||||||
|
|
||||||
|
/* iterate until convergence */
|
||||||
|
do {
|
||||||
|
delta = 0.0;
|
||||||
|
// CUDA
|
||||||
|
delta = (float) kmeansOCL(feature, /* in: [npoints][nfeatures] */
|
||||||
|
nfeatures, /* number of attributes for each point */
|
||||||
|
npoints, /* number of data points */
|
||||||
|
nclusters, /* number of clusters */
|
||||||
|
membership, /* which cluster the point belongs to */
|
||||||
|
clusters, /* out: [nclusters][nfeatures] */
|
||||||
|
new_centers_len, /* out: number of points in each cluster */
|
||||||
|
new_centers /* sum of points in each cluster */
|
||||||
|
);
|
||||||
|
|
||||||
|
/* replace old cluster centers with new_centers */
|
||||||
|
/* CPU side of reduction */
|
||||||
|
for (i=0; i<nclusters; i++) {
|
||||||
|
for (j=0; j<nfeatures; j++) {
|
||||||
|
if (new_centers_len[i] > 0)
|
||||||
|
clusters[i][j] = new_centers[i][j] / new_centers_len[i]; /* take average i.e. sum/n */
|
||||||
|
new_centers[i][j] = 0.0; /* set back to 0 */
|
||||||
|
}
|
||||||
|
new_centers_len[i] = 0; /* set back to 0 */
|
||||||
|
}
|
||||||
|
c++;
|
||||||
|
} while ((delta > threshold) && (loop++ < 500)); /* makes sure loop terminates */
|
||||||
|
printf("iterated %d times\n", c);
|
||||||
|
free(new_centers[0]);
|
||||||
|
free(new_centers);
|
||||||
|
free(new_centers_len);
|
||||||
|
|
||||||
|
return clusters;
|
||||||
|
}
|
||||||
|
|
||||||
BIN
benchmarks/new_opencl/kmeans/libkmeans.a
Normal file
BIN
benchmarks/new_opencl/kmeans/libkmeans.a
Normal file
Binary file not shown.
382
benchmarks/new_opencl/kmeans/main.cc
Executable file
382
benchmarks/new_opencl/kmeans/main.cc
Executable file
@@ -0,0 +1,382 @@
|
|||||||
|
#include "kmeans.h"
|
||||||
|
#include <iostream>
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#ifdef WIN
|
||||||
|
#include <windows.h>
|
||||||
|
#else
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <sys/time.h>
|
||||||
|
double gettime() {
|
||||||
|
struct timeval t;
|
||||||
|
gettimeofday(&t, NULL);
|
||||||
|
return t.tv_sec + t.tv_usec * 1e-6;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef NV
|
||||||
|
#include <oclUtils.h>
|
||||||
|
#else
|
||||||
|
#include <CL/cl.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef FLT_MAX
|
||||||
|
#define FLT_MAX 3.40282347e+38
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef RD_WG_SIZE_0_0
|
||||||
|
#define BLOCK_SIZE RD_WG_SIZE_0_0
|
||||||
|
#elif defined(RD_WG_SIZE_0)
|
||||||
|
#define BLOCK_SIZE RD_WG_SIZE_0
|
||||||
|
#elif defined(RD_WG_SIZE)
|
||||||
|
#define BLOCK_SIZE RD_WG_SIZE
|
||||||
|
#else
|
||||||
|
#define BLOCK_SIZE 256
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef RD_WG_SIZE_1_0
|
||||||
|
#define BLOCK_SIZE2 RD_WG_SIZE_1_0
|
||||||
|
#elif defined(RD_WG_SIZE_1)
|
||||||
|
#define BLOCK_SIZE2 RD_WG_SIZE_1
|
||||||
|
#elif defined(RD_WG_SIZE)
|
||||||
|
#define BLOCK_SIZE2 RD_WG_SIZE
|
||||||
|
#else
|
||||||
|
#define BLOCK_SIZE2 256
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// local variables
|
||||||
|
static cl_context context;
|
||||||
|
static cl_command_queue cmd_queue;
|
||||||
|
static cl_device_type device_type;
|
||||||
|
static cl_device_id *device_list;
|
||||||
|
static cl_int num_devices;
|
||||||
|
|
||||||
|
static int initialize(int use_gpu) {
|
||||||
|
cl_int result;
|
||||||
|
size_t size;
|
||||||
|
|
||||||
|
/*// create OpenCL context
|
||||||
|
cl_platform_id platform_id;
|
||||||
|
if (clGetPlatformIDs(1, &platform_id, NULL) != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clGetPlatformIDs(1,*,0) failed\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
cl_context_properties ctxprop[] = {CL_CONTEXT_PLATFORM,
|
||||||
|
(cl_context_properties)platform_id, 0};
|
||||||
|
device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
|
||||||
|
context = clCreateContextFromType(ctxprop, device_type, NULL, NULL, NULL);
|
||||||
|
if (!context) {
|
||||||
|
printf("ERROR: clCreateContextFromType(%s) failed\n",
|
||||||
|
use_gpu ? "GPU" : "CPU");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// get the list of GPUs
|
||||||
|
result = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size);
|
||||||
|
num_devices = (int)(size / sizeof(cl_device_id));
|
||||||
|
|
||||||
|
if (result != CL_SUCCESS || num_devices < 1) {
|
||||||
|
printf("ERROR: clGetContextInfo() failed\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
device_list = new cl_device_id[num_devices];
|
||||||
|
if (!device_list) {
|
||||||
|
printf("ERROR: new cl_device_id[] failed\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
result =
|
||||||
|
clGetContextInfo(context, CL_CONTEXT_DEVICES, size, device_list, NULL);
|
||||||
|
if (result != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clGetContextInfo() failed\n");
|
||||||
|
return -1;
|
||||||
|
}*/
|
||||||
|
|
||||||
|
cl_platform_id platform_id;
|
||||||
|
num_devices = 1;
|
||||||
|
device_list = new cl_device_id[num_devices];
|
||||||
|
|
||||||
|
result = clGetPlatformIDs(1, &platform_id, NULL);
|
||||||
|
result = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, device_list, NULL);
|
||||||
|
context = clCreateContext(NULL, 1, device_list, NULL, NULL, &result);
|
||||||
|
|
||||||
|
// create command queue for the first device
|
||||||
|
cmd_queue = clCreateCommandQueue(context, device_list[0], 0, NULL);
|
||||||
|
if (!cmd_queue) {
|
||||||
|
printf("ERROR: clCreateCommandQueue() failed\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int shutdown() {
|
||||||
|
// release resources
|
||||||
|
if (cmd_queue)
|
||||||
|
clReleaseCommandQueue(cmd_queue);
|
||||||
|
if (context)
|
||||||
|
clReleaseContext(context);
|
||||||
|
if (device_list)
|
||||||
|
delete device_list;
|
||||||
|
|
||||||
|
// reset all variables
|
||||||
|
cmd_queue = 0;
|
||||||
|
context = 0;
|
||||||
|
device_list = 0;
|
||||||
|
num_devices = 0;
|
||||||
|
device_type = 0;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_mem d_feature;
|
||||||
|
cl_mem d_feature_swap;
|
||||||
|
cl_mem d_cluster;
|
||||||
|
cl_mem d_membership;
|
||||||
|
|
||||||
|
cl_kernel kernel;
|
||||||
|
cl_kernel kernel_s;
|
||||||
|
cl_kernel kernel2;
|
||||||
|
|
||||||
|
int *membership_OCL;
|
||||||
|
int *membership_d;
|
||||||
|
float *feature_d;
|
||||||
|
float *clusters_d;
|
||||||
|
float *center_d;
|
||||||
|
|
||||||
|
|
||||||
|
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
|
||||||
|
if (nullptr == filename || nullptr == data || 0 == size)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
FILE* fp = fopen(filename, "r");
|
||||||
|
if (NULL == fp) {
|
||||||
|
fprintf(stderr, "Failed to load kernel.");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
fseek(fp , 0 , SEEK_END);
|
||||||
|
long fsize = ftell(fp);
|
||||||
|
rewind(fp);
|
||||||
|
|
||||||
|
*data = (uint8_t*)malloc(fsize);
|
||||||
|
*size = fread(*data, 1, fsize, fp);
|
||||||
|
|
||||||
|
fclose(fp);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int allocate(int n_points, int n_features, int n_clusters, float **feature) {
|
||||||
|
/*int sourcesize = 1024 * 1024;
|
||||||
|
char *source = (char *)calloc(sourcesize, sizeof(char));
|
||||||
|
if (!source) {
|
||||||
|
printf("ERROR: calloc(%d) failed\n", sourcesize);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// read the kernel core source
|
||||||
|
char *tempchar = "./kmeans.cl";
|
||||||
|
FILE *fp = fopen(tempchar, "rb");
|
||||||
|
if (!fp) {
|
||||||
|
printf("ERROR: unable to open '%s'\n", tempchar);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
fread(source + strlen(source), sourcesize, 1, fp);
|
||||||
|
fclose(fp);*/
|
||||||
|
|
||||||
|
// OpenCL initialization
|
||||||
|
int use_gpu = 1;
|
||||||
|
if (initialize(use_gpu))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// compile kernel
|
||||||
|
cl_int err = 0;
|
||||||
|
//const char *slist[2] = {source, 0};
|
||||||
|
//cl_program prog = clCreateProgramWithSource(context, 1, slist, NULL, &err);
|
||||||
|
cl_program prog = clCreateProgramWithBuiltInKernels(context, 1, device_list, "kmeans_kernel_c;kmeans_swap", &err);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clCreateProgramWithSource() => %d\n", err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
err = clBuildProgram(prog, 0, NULL, NULL, NULL, NULL);
|
||||||
|
{ // show warnings/errors
|
||||||
|
// static char log[65536]; memset(log, 0, sizeof(log));
|
||||||
|
// cl_device_id device_id = 0;
|
||||||
|
// err = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(device_id),
|
||||||
|
//&device_id, NULL);
|
||||||
|
// clGetProgramBuildInfo(prog, device_id, CL_PROGRAM_BUILD_LOG,
|
||||||
|
// sizeof(log)-1, log, NULL);
|
||||||
|
// if(err || strstr(log,"warning:") || strstr(log, "error:"))
|
||||||
|
// printf("<<<<\n%s\n>>>>\n", log);
|
||||||
|
}
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clBuildProgram() => %d\n", err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *kernel_kmeans_c = "kmeans_kernel_c";
|
||||||
|
char *kernel_swap = "kmeans_swap";
|
||||||
|
|
||||||
|
kernel_s = clCreateKernel(prog, kernel_kmeans_c, &err);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clCreateKernel() 0 => %d\n", err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
kernel2 = clCreateKernel(prog, kernel_swap, &err);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clCreateKernel() 0 => %d\n", err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
clReleaseProgram(prog);
|
||||||
|
|
||||||
|
d_feature = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||||
|
n_points * n_features * sizeof(float), NULL, &err);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clCreateBuffer d_feature (size:%d) => %d\n",
|
||||||
|
n_points * n_features, err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
d_feature_swap =
|
||||||
|
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||||
|
n_points * n_features * sizeof(float), NULL, &err);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clCreateBuffer d_feature_swap (size:%d) => %d\n",
|
||||||
|
n_points * n_features, err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
d_cluster =
|
||||||
|
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||||
|
n_clusters * n_features * sizeof(float), NULL, &err);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clCreateBuffer d_cluster (size:%d) => %d\n",
|
||||||
|
n_clusters * n_features, err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
d_membership = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||||
|
n_points * sizeof(int), NULL, &err);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clCreateBuffer d_membership (size:%d) => %d\n", n_points,
|
||||||
|
err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// write buffers
|
||||||
|
err = clEnqueueWriteBuffer(cmd_queue, d_feature, 1, 0,
|
||||||
|
n_points * n_features * sizeof(float), feature[0],
|
||||||
|
0, 0, 0);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clEnqueueWriteBuffer d_feature (size:%d) => %d\n",
|
||||||
|
n_points * n_features, err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
clSetKernelArg(kernel2, 0, sizeof(void *), (void *)&d_feature);
|
||||||
|
clSetKernelArg(kernel2, 1, sizeof(void *), (void *)&d_feature_swap);
|
||||||
|
clSetKernelArg(kernel2, 2, sizeof(cl_int), (void *)&n_points);
|
||||||
|
clSetKernelArg(kernel2, 3, sizeof(cl_int), (void *)&n_features);
|
||||||
|
|
||||||
|
size_t global_work[3] = {n_points, 1, 1};
|
||||||
|
/// Ke Wang adjustable local group size 2013/08/07 10:37:33
|
||||||
|
size_t local_work_size = BLOCK_SIZE; // work group size is defined by
|
||||||
|
// RD_WG_SIZE_0 or RD_WG_SIZE_0_0
|
||||||
|
// 2014/06/10 17:00:51
|
||||||
|
if (global_work[0] % local_work_size != 0)
|
||||||
|
global_work[0] = (global_work[0] / local_work_size + 1) * local_work_size;
|
||||||
|
|
||||||
|
err = clEnqueueNDRangeKernel(cmd_queue, kernel2, 1, NULL, global_work,
|
||||||
|
&local_work_size, 0, 0, 0);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clEnqueueNDRangeKernel()=>%d failed\n", err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
membership_OCL = (int *)malloc(n_points * sizeof(int));
|
||||||
|
}
|
||||||
|
|
||||||
|
void deallocateMemory() {
|
||||||
|
clReleaseMemObject(d_feature);
|
||||||
|
clReleaseMemObject(d_feature_swap);
|
||||||
|
clReleaseMemObject(d_cluster);
|
||||||
|
clReleaseMemObject(d_membership);
|
||||||
|
free(membership_OCL);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
printf("WG size of kernel_swap = %d, WG size of kernel_kmeans = %d \n",
|
||||||
|
BLOCK_SIZE, BLOCK_SIZE2);
|
||||||
|
setup(argc, argv);
|
||||||
|
shutdown();
|
||||||
|
}
|
||||||
|
|
||||||
|
int kmeansOCL(float **feature, /* in: [npoints][nfeatures] */
|
||||||
|
int n_features, int n_points, int n_clusters, int *membership,
|
||||||
|
float **clusters, int *new_centers_len, float **new_centers) {
|
||||||
|
|
||||||
|
int delta = 0;
|
||||||
|
int i, j, k;
|
||||||
|
cl_int err = 0;
|
||||||
|
|
||||||
|
size_t global_work[3] = {n_points, 1, 1};
|
||||||
|
|
||||||
|
/// Ke Wang adjustable local group size 2013/08/07 10:37:33
|
||||||
|
size_t local_work_size = BLOCK_SIZE2; // work group size is defined by
|
||||||
|
// RD_WG_SIZE_1 or RD_WG_SIZE_1_0
|
||||||
|
// 2014/06/10 17:00:41
|
||||||
|
if (global_work[0] % local_work_size != 0)
|
||||||
|
global_work[0] = (global_work[0] / local_work_size + 1) * local_work_size;
|
||||||
|
|
||||||
|
err = clEnqueueWriteBuffer(cmd_queue, d_cluster, 1, 0,
|
||||||
|
n_clusters * n_features * sizeof(float),
|
||||||
|
clusters[0], 0, 0, 0);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clEnqueueWriteBuffer d_cluster (size:%d) => %d\n", n_points,
|
||||||
|
err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int size = 0;
|
||||||
|
int offset = 0;
|
||||||
|
|
||||||
|
clSetKernelArg(kernel_s, 0, sizeof(void *), (void *)&d_feature_swap);
|
||||||
|
clSetKernelArg(kernel_s, 1, sizeof(void *), (void *)&d_cluster);
|
||||||
|
clSetKernelArg(kernel_s, 2, sizeof(void *), (void *)&d_membership);
|
||||||
|
clSetKernelArg(kernel_s, 3, sizeof(cl_int), (void *)&n_points);
|
||||||
|
clSetKernelArg(kernel_s, 4, sizeof(cl_int), (void *)&n_clusters);
|
||||||
|
clSetKernelArg(kernel_s, 5, sizeof(cl_int), (void *)&n_features);
|
||||||
|
clSetKernelArg(kernel_s, 6, sizeof(cl_int), (void *)&offset);
|
||||||
|
clSetKernelArg(kernel_s, 7, sizeof(cl_int), (void *)&size);
|
||||||
|
|
||||||
|
err = clEnqueueNDRangeKernel(cmd_queue, kernel_s, 1, NULL, global_work,
|
||||||
|
&local_work_size, 0, 0, 0);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: clEnqueueNDRangeKernel()=>%d failed\n", err);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
clFinish(cmd_queue);
|
||||||
|
err = clEnqueueReadBuffer(cmd_queue, d_membership, 1, 0,
|
||||||
|
n_points * sizeof(int), membership_OCL, 0, 0, 0);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
printf("ERROR: Memcopy Out\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
delta = 0;
|
||||||
|
for (i = 0; i < n_points; i++) {
|
||||||
|
int cluster_id = membership_OCL[i];
|
||||||
|
new_centers_len[cluster_id]++;
|
||||||
|
if (membership_OCL[i] != membership[i]) {
|
||||||
|
delta++;
|
||||||
|
membership[i] = membership_OCL[i];
|
||||||
|
}
|
||||||
|
for (j = 0; j < n_features; j++) {
|
||||||
|
new_centers[cluster_id][j] += feature[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return delta;
|
||||||
|
}
|
||||||
338
benchmarks/new_opencl/kmeans/read_input.c
Executable file
338
benchmarks/new_opencl/kmeans/read_input.c
Executable file
@@ -0,0 +1,338 @@
|
|||||||
|
/*****************************************************************************/
|
||||||
|
/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */
|
||||||
|
/*By downloading, copying, installing or using the software you agree */
|
||||||
|
/*to this license. If you do not agree to this license, do not download, */
|
||||||
|
/*install, copy or use the software. */
|
||||||
|
/* */
|
||||||
|
/* */
|
||||||
|
/*Copyright (c) 2005 Northwestern University */
|
||||||
|
/*All rights reserved. */
|
||||||
|
|
||||||
|
/*Redistribution of the software in source and binary forms, */
|
||||||
|
/*with or without modification, is permitted provided that the */
|
||||||
|
/*following conditions are met: */
|
||||||
|
/* */
|
||||||
|
/*1 Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* */
|
||||||
|
/*2 Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution.*/
|
||||||
|
/* */
|
||||||
|
/*3 Neither the name of Northwestern University nor the names of its */
|
||||||
|
/* contributors may be used to endorse or promote products derived */
|
||||||
|
/* from this software without specific prior written permission. */
|
||||||
|
/* */
|
||||||
|
/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */
|
||||||
|
/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */
|
||||||
|
/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */
|
||||||
|
/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */
|
||||||
|
/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||||
|
/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */
|
||||||
|
/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
|
||||||
|
/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */
|
||||||
|
/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */
|
||||||
|
/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/*POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
/*************************************************************************/
|
||||||
|
/** File: example.c **/
|
||||||
|
/** Description: Takes as input a file: **/
|
||||||
|
/** ascii file: containing 1 data point per line **/
|
||||||
|
/** binary file: first int is the number of objects **/
|
||||||
|
/** 2nd int is the no. of features of each **/
|
||||||
|
/** object **/
|
||||||
|
/** This example performs a fuzzy c-means clustering **/
|
||||||
|
/** on the data. Fuzzy clustering is performed using **/
|
||||||
|
/** min to max clusters and the clustering that gets **/
|
||||||
|
/** the best score according to a compactness and **/
|
||||||
|
/** separation criterion are returned. **/
|
||||||
|
/** Author: Wei-keng Liao **/
|
||||||
|
/** ECE Department Northwestern University **/
|
||||||
|
/** email: wkliao@ece.northwestern.edu **/
|
||||||
|
/** **/
|
||||||
|
/** Edited by: Jay Pisharath **/
|
||||||
|
/** Northwestern University. **/
|
||||||
|
/** **/
|
||||||
|
/** ================================================================ **/
|
||||||
|
/**
|
||||||
|
* **/
|
||||||
|
/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee
|
||||||
|
* **/
|
||||||
|
/** University of Virginia
|
||||||
|
* **/
|
||||||
|
/**
|
||||||
|
* **/
|
||||||
|
/** Description: No longer supports fuzzy c-means clustering;
|
||||||
|
* **/
|
||||||
|
/** only regular k-means clustering.
|
||||||
|
* **/
|
||||||
|
/** No longer performs "validity" function to
|
||||||
|
* analyze **/
|
||||||
|
/** compactness and separation crietria; instead
|
||||||
|
* **/
|
||||||
|
/** calculate root mean squared error.
|
||||||
|
* **/
|
||||||
|
/** **/
|
||||||
|
/*************************************************************************/
|
||||||
|
#define _CRT_SECURE_NO_DEPRECATE 1
|
||||||
|
|
||||||
|
#include "kmeans.h"
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
extern double wtime(void);
|
||||||
|
|
||||||
|
/*---< usage() >------------------------------------------------------------*/
|
||||||
|
void usage(char *argv0) {
|
||||||
|
char *help = "\nUsage: %s [switches] -i filename\n\n"
|
||||||
|
" -i filename :file containing data to be clustered\n"
|
||||||
|
" -m max_nclusters :maximum number of clusters allowed "
|
||||||
|
"[default=5]\n"
|
||||||
|
" -n min_nclusters :minimum number of clusters allowed "
|
||||||
|
"[default=5]\n"
|
||||||
|
" -t threshold :threshold value "
|
||||||
|
"[default=0.001]\n"
|
||||||
|
" -l nloops :iteration for each number of clusters "
|
||||||
|
"[default=1]\n"
|
||||||
|
" -b :input file is in binary format\n"
|
||||||
|
" -r :calculate RMSE "
|
||||||
|
"[default=off]\n"
|
||||||
|
" -o :output cluster center coordinates "
|
||||||
|
"[default=off]\n";
|
||||||
|
fprintf(stderr, help, argv0);
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*---< main() >-------------------------------------------------------------*/
|
||||||
|
int setup(int argc, char **argv) {
|
||||||
|
int opt;
|
||||||
|
extern char *optarg;
|
||||||
|
char *filename = 0;
|
||||||
|
float *buf;
|
||||||
|
char line[1024];
|
||||||
|
int isBinaryFile = 0;
|
||||||
|
|
||||||
|
float threshold = 0.001; /* default value */
|
||||||
|
int max_nclusters = 5; /* default value */
|
||||||
|
int min_nclusters = 5; /* default value */
|
||||||
|
int best_nclusters = 0;
|
||||||
|
int nfeatures = 0;
|
||||||
|
int npoints = 0;
|
||||||
|
float len;
|
||||||
|
|
||||||
|
float **features;
|
||||||
|
float **cluster_centres = NULL;
|
||||||
|
int i, j, index;
|
||||||
|
int nloops = 1; /* default value */
|
||||||
|
|
||||||
|
int isRMSE = 0;
|
||||||
|
float rmse;
|
||||||
|
|
||||||
|
int isOutput = 0;
|
||||||
|
// float cluster_timing, io_timing;
|
||||||
|
|
||||||
|
/* obtain command line arguments and change appropriate options */
|
||||||
|
while ((opt = getopt(argc, argv, "i:t:m:n:l:bro")) != EOF) {
|
||||||
|
switch (opt) {
|
||||||
|
case 'i':
|
||||||
|
filename = optarg;
|
||||||
|
break;
|
||||||
|
case 'b':
|
||||||
|
isBinaryFile = 1;
|
||||||
|
break;
|
||||||
|
case 't':
|
||||||
|
threshold = atof(optarg);
|
||||||
|
break;
|
||||||
|
case 'm':
|
||||||
|
max_nclusters = atoi(optarg);
|
||||||
|
break;
|
||||||
|
case 'n':
|
||||||
|
min_nclusters = atoi(optarg);
|
||||||
|
break;
|
||||||
|
case 'r':
|
||||||
|
isRMSE = 1;
|
||||||
|
break;
|
||||||
|
case 'o':
|
||||||
|
isOutput = 1;
|
||||||
|
break;
|
||||||
|
case 'l':
|
||||||
|
nloops = atoi(optarg);
|
||||||
|
break;
|
||||||
|
case '?':
|
||||||
|
usage(argv[0]);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
usage(argv[0]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ============== I/O begin ==============*/
|
||||||
|
/* get nfeatures and npoints */
|
||||||
|
// io_timing = omp_get_wtime();
|
||||||
|
|
||||||
|
/*if (isBinaryFile) { // Binary file input
|
||||||
|
FILE *infile;
|
||||||
|
if ((infile = fopen("100", "r")) == NULL) {
|
||||||
|
fprintf(stderr, "Error: no such file (%s)\n", filename);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
fread(&npoints, 1, sizeof(int), infile);
|
||||||
|
fread(&nfeatures, 1, sizeof(int), infile);
|
||||||
|
|
||||||
|
// allocate space for features[][] and read attributes of all objects
|
||||||
|
buf = (float *)malloc(npoints * nfeatures * sizeof(float));
|
||||||
|
features = (float **)malloc(npoints * sizeof(float *));
|
||||||
|
features[0] = (float *)malloc(npoints * nfeatures * sizeof(float));
|
||||||
|
for (i = 1; i < npoints; i++) {
|
||||||
|
features[i] = features[i - 1] + nfeatures;
|
||||||
|
}
|
||||||
|
fread(buf, 1, npoints * nfeatures * sizeof(float), infile);
|
||||||
|
fclose(infile);
|
||||||
|
} else {
|
||||||
|
FILE *infile;
|
||||||
|
if ((infile = fopen("100", "r")) == NULL) {
|
||||||
|
fprintf(stderr, "Error: no such file (%s)\n", filename);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
while (fgets(line, 1024, infile) != NULL)
|
||||||
|
if (strtok(line, " \t\n") != 0) {
|
||||||
|
npoints++;
|
||||||
|
}
|
||||||
|
rewind(infile);
|
||||||
|
while (fgets(line, 1024, infile) != NULL) {
|
||||||
|
if (strtok(line, " \t\n") != 0) {
|
||||||
|
// ignore the id (first attribute): nfeatures = 1;
|
||||||
|
while (strtok(NULL, " ,\t\n") != NULL)
|
||||||
|
nfeatures++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// allocate space for features[] and read attributes of all objects
|
||||||
|
buf = (float *)malloc(npoints * nfeatures * sizeof(float));
|
||||||
|
features = (float **)malloc(npoints * sizeof(float *));
|
||||||
|
features[0] = (float *)malloc(npoints * nfeatures * sizeof(float));
|
||||||
|
for (i = 1; i < npoints; i++)
|
||||||
|
features[i] = features[i - 1] + nfeatures;
|
||||||
|
rewind(infile);
|
||||||
|
i = 0;
|
||||||
|
while (fgets(line, 1024, infile) != NULL) {
|
||||||
|
if (strtok(line, " \t\n") == NULL)
|
||||||
|
continue;
|
||||||
|
for (j = 0; j < nfeatures; j++) {
|
||||||
|
buf[i] = atof(strtok(NULL, " ,\t\n"));
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fclose(infile);
|
||||||
|
}*/
|
||||||
|
|
||||||
|
npoints = 100;
|
||||||
|
nfeatures = 100;
|
||||||
|
buf = (float *)malloc(npoints * nfeatures * sizeof(float));
|
||||||
|
features = (float **)malloc(npoints * sizeof(float *));
|
||||||
|
features[0] = (float *)malloc(npoints * nfeatures * sizeof(float));
|
||||||
|
for (i = 1; i < npoints; i++) {
|
||||||
|
features[i] = features[i - 1] + nfeatures;
|
||||||
|
}
|
||||||
|
for (i = 0; i < npoints * nfeatures; ++i) {
|
||||||
|
buf[i] = (i % 64);
|
||||||
|
}
|
||||||
|
|
||||||
|
// io_timing = omp_get_wtime() - io_timing;
|
||||||
|
|
||||||
|
printf("\nI/O completed\n");
|
||||||
|
printf("\nNumber of objects: %d\n", npoints);
|
||||||
|
printf("Number of features: %d\n", nfeatures);
|
||||||
|
/* ============== I/O end ==============*/
|
||||||
|
|
||||||
|
// error check for clusters
|
||||||
|
if (npoints < min_nclusters) {
|
||||||
|
printf("Error: min_nclusters(%d) > npoints(%d) -- cannot proceed\n",
|
||||||
|
min_nclusters, npoints);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
srand(7); /* seed for future random number generator */
|
||||||
|
memcpy(
|
||||||
|
features[0], buf,
|
||||||
|
npoints * nfeatures *
|
||||||
|
sizeof(
|
||||||
|
float)); /* now features holds 2-dimensional array of features */
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
/* ======================= core of the clustering ===================*/
|
||||||
|
|
||||||
|
// cluster_timing = omp_get_wtime(); /* Total clustering time */
|
||||||
|
cluster_centres = NULL;
|
||||||
|
index = cluster(npoints, /* number of data points */
|
||||||
|
nfeatures, /* number of features for each point */
|
||||||
|
features, /* array: [npoints][nfeatures] */
|
||||||
|
min_nclusters, /* range of min to max number of clusters */
|
||||||
|
max_nclusters, threshold, /* loop termination factor */
|
||||||
|
&best_nclusters, /* return: number between min and max */
|
||||||
|
&cluster_centres, /* return: [best_nclusters][nfeatures] */
|
||||||
|
&rmse, /* Root Mean Squared Error */
|
||||||
|
isRMSE, /* calculate RMSE */
|
||||||
|
nloops); /* number of iteration for each number of clusters */
|
||||||
|
|
||||||
|
// cluster_timing = omp_get_wtime() - cluster_timing;
|
||||||
|
|
||||||
|
/* =============== Command Line Output =============== */
|
||||||
|
|
||||||
|
/* cluster center coordinates
|
||||||
|
:displayed only for when k=1*/
|
||||||
|
if ((min_nclusters == max_nclusters) && (isOutput == 1)) {
|
||||||
|
printf("\n================= Centroid Coordinates =================\n");
|
||||||
|
for (i = 0; i < max_nclusters; i++) {
|
||||||
|
printf("%d:", i);
|
||||||
|
for (j = 0; j < nfeatures; j++) {
|
||||||
|
printf(" %.2f", cluster_centres[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
len = (float)((max_nclusters - min_nclusters + 1) * nloops);
|
||||||
|
|
||||||
|
printf("Number of Iteration: %d\n", nloops);
|
||||||
|
// printf("Time for I/O: %.5fsec\n", io_timing);
|
||||||
|
// printf("Time for Entire Clustering: %.5fsec\n", cluster_timing);
|
||||||
|
|
||||||
|
if (min_nclusters != max_nclusters) {
|
||||||
|
if (nloops != 1) { // range of k, multiple iteration
|
||||||
|
// printf("Average Clustering Time: %fsec\n",
|
||||||
|
// cluster_timing / len);
|
||||||
|
printf("Best number of clusters is %d\n", best_nclusters);
|
||||||
|
} else { // range of k, single iteration
|
||||||
|
// printf("Average Clustering Time: %fsec\n",
|
||||||
|
// cluster_timing / len);
|
||||||
|
printf("Best number of clusters is %d\n", best_nclusters);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (nloops != 1) { // single k, multiple iteration
|
||||||
|
// printf("Average Clustering Time: %.5fsec\n",
|
||||||
|
// cluster_timing / nloops);
|
||||||
|
if (isRMSE) // if calculated RMSE
|
||||||
|
printf("Number of trials to approach the best RMSE of %.3f is %d\n",
|
||||||
|
rmse, index + 1);
|
||||||
|
} else { // single k, single iteration
|
||||||
|
if (isRMSE) // if calculated RMSE
|
||||||
|
printf("Root Mean Squared Error: %.3f\n", rmse);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* free up memory */
|
||||||
|
free(features[0]);
|
||||||
|
free(features);
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
94
benchmarks/new_opencl/kmeans/rmse.c
Executable file
94
benchmarks/new_opencl/kmeans/rmse.c
Executable file
@@ -0,0 +1,94 @@
|
|||||||
|
/*************************************************************************/
|
||||||
|
/** File: rmse.c **/
|
||||||
|
/** Description: calculate root mean squared error of particular **/
|
||||||
|
/** clustering. **/
|
||||||
|
/** Author: Sang-Ha Lee **/
|
||||||
|
/** University of Virginia. **/
|
||||||
|
/** **/
|
||||||
|
/** Note: euclid_dist_2() and find_nearest_point() adopted from **/
|
||||||
|
/** Minebench code. **/
|
||||||
|
/** **/
|
||||||
|
/*************************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <float.h>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
#include "kmeans.h"
|
||||||
|
|
||||||
|
extern double wtime(void);
|
||||||
|
|
||||||
|
/*----< euclid_dist_2() >----------------------------------------------------*/
|
||||||
|
/* multi-dimensional spatial Euclid distance square */
|
||||||
|
__inline
|
||||||
|
float euclid_dist_2(float *pt1,
|
||||||
|
float *pt2,
|
||||||
|
int numdims)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
float ans=0.0;
|
||||||
|
|
||||||
|
for (i=0; i<numdims; i++)
|
||||||
|
ans += (pt1[i]-pt2[i]) * (pt1[i]-pt2[i]);
|
||||||
|
|
||||||
|
return(ans);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*----< find_nearest_point() >-----------------------------------------------*/
|
||||||
|
__inline
|
||||||
|
int find_nearest_point(float *pt, /* [nfeatures] */
|
||||||
|
int nfeatures,
|
||||||
|
float **pts, /* [npts][nfeatures] */
|
||||||
|
int npts)
|
||||||
|
{
|
||||||
|
int index, i;
|
||||||
|
float max_dist=FLT_MAX;
|
||||||
|
|
||||||
|
/* find the cluster center id with min distance to pt */
|
||||||
|
for (i=0; i<npts; i++) {
|
||||||
|
float dist;
|
||||||
|
dist = euclid_dist_2(pt, pts[i], nfeatures); /* no need square root */
|
||||||
|
if (dist < max_dist) {
|
||||||
|
max_dist = dist;
|
||||||
|
index = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(index);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*----< rms_err(): calculates RMSE of clustering >-------------------------------------*/
|
||||||
|
float rms_err (float **feature, /* [npoints][nfeatures] */
|
||||||
|
int nfeatures,
|
||||||
|
int npoints,
|
||||||
|
float **cluster_centres, /* [nclusters][nfeatures] */
|
||||||
|
int nclusters)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int nearest_cluster_index; /* cluster center id with min distance to pt */
|
||||||
|
float sum_euclid = 0.0; /* sum of Euclidean distance squares */
|
||||||
|
float ret; /* return value */
|
||||||
|
|
||||||
|
/* calculate and sum the sqaure of euclidean distance*/
|
||||||
|
#pragma omp parallel for \
|
||||||
|
shared(feature,cluster_centres) \
|
||||||
|
firstprivate(npoints,nfeatures,nclusters) \
|
||||||
|
private(i, nearest_cluster_index) \
|
||||||
|
schedule (static)
|
||||||
|
for (i=0; i<npoints; i++) {
|
||||||
|
nearest_cluster_index = find_nearest_point(feature[i],
|
||||||
|
nfeatures,
|
||||||
|
cluster_centres,
|
||||||
|
nclusters);
|
||||||
|
|
||||||
|
sum_euclid += euclid_dist_2(feature[i],
|
||||||
|
cluster_centres[nearest_cluster_index],
|
||||||
|
nfeatures);
|
||||||
|
|
||||||
|
}
|
||||||
|
/* divide by n, then take sqrt */
|
||||||
|
ret = sqrt(sum_euclid / npoints);
|
||||||
|
|
||||||
|
return(ret);
|
||||||
|
}
|
||||||
|
|
||||||
@@ -55,6 +55,28 @@ static cl_device_type device_type;
|
|||||||
static cl_device_id *device_list;
|
static cl_device_id *device_list;
|
||||||
static cl_int num_devices;
|
static cl_int num_devices;
|
||||||
|
|
||||||
|
|
||||||
|
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
|
||||||
|
if (nullptr == filename || nullptr == data || 0 == size)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
FILE* fp = fopen(filename, "r");
|
||||||
|
if (NULL == fp) {
|
||||||
|
fprintf(stderr, "Failed to load kernel.");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
fseek(fp , 0 , SEEK_END);
|
||||||
|
long fsize = ftell(fp);
|
||||||
|
rewind(fp);
|
||||||
|
|
||||||
|
*data = (uint8_t*)malloc(fsize);
|
||||||
|
*size = fread(*data, 1, fsize, fp);
|
||||||
|
|
||||||
|
fclose(fp);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int initialize(int use_gpu) {
|
static int initialize(int use_gpu) {
|
||||||
cl_int result;
|
cl_int result;
|
||||||
size_t size;
|
size_t size;
|
||||||
@@ -147,6 +169,11 @@ float *feature_d;
|
|||||||
float *clusters_d;
|
float *clusters_d;
|
||||||
float *center_d;
|
float *center_d;
|
||||||
|
|
||||||
|
uint8_t* kernel_bin = NULL;
|
||||||
|
size_t kernel_size = 0;
|
||||||
|
cl_int binary_status = 0;
|
||||||
|
|
||||||
|
|
||||||
int allocate(int n_points, int n_features, int n_clusters, float **feature) {
|
int allocate(int n_points, int n_features, int n_clusters, float **feature) {
|
||||||
/*int sourcesize = 1024 * 1024;
|
/*int sourcesize = 1024 * 1024;
|
||||||
char *source = (char *)calloc(sourcesize, sizeof(char));
|
char *source = (char *)calloc(sourcesize, sizeof(char));
|
||||||
@@ -170,11 +197,18 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature) {
|
|||||||
if (initialize(use_gpu))
|
if (initialize(use_gpu))
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
// Load Kernel
|
||||||
|
if (read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
// compile kernel
|
// compile kernel
|
||||||
cl_int err = 0;
|
cl_int err = 0;
|
||||||
//const char *slist[2] = {source, 0};
|
//const char *slist[2] = {source, 0};
|
||||||
//cl_program prog = clCreateProgramWithSource(context, 1, slist, NULL, &err);
|
//cl_program prog = clCreateProgramWithSource(context, 1, slist, NULL, &err);
|
||||||
cl_program prog = clCreateProgramWithBuiltInKernels(context, 1, device_list, "kmeans_kernel_c;kmeans_swap", &err);
|
cl_program prog = clCreateProgramWithBinary(
|
||||||
|
context, 1, device_list, &kernel_size, &kernel_bin, &binary_status, &err);
|
||||||
|
// cl_program prog = clCreateProgramWithBuiltInKernels(context, 1, device_list, "kmeans_kernel_c;kmeans_swap", &err);
|
||||||
if (err != CL_SUCCESS) {
|
if (err != CL_SUCCESS) {
|
||||||
printf("ERROR: clCreateProgramWithSource() => %d\n", err);
|
printf("ERROR: clCreateProgramWithSource() => %d\n", err);
|
||||||
return -1;
|
return -1;
|
||||||
@@ -280,6 +314,7 @@ void deallocateMemory() {
|
|||||||
clReleaseMemObject(d_feature_swap);
|
clReleaseMemObject(d_feature_swap);
|
||||||
clReleaseMemObject(d_cluster);
|
clReleaseMemObject(d_cluster);
|
||||||
clReleaseMemObject(d_membership);
|
clReleaseMemObject(d_membership);
|
||||||
|
if (kernel_bin) free(kernel_bin);
|
||||||
free(membership_OCL);
|
free(membership_OCL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,24 +1,35 @@
|
|||||||
|
|
||||||
BUILD_DIR=build_sim
|
BUILD_DIR=build_ase
|
||||||
|
|
||||||
all: ase fpga
|
all: ase fpga
|
||||||
|
|
||||||
ase: build-setup
|
ase: setup-ase
|
||||||
make -C $(BUILD_DIR)
|
make -C $(BUILD_DIR)
|
||||||
|
|
||||||
fpga: build-setup
|
fpga: setup-fpga
|
||||||
# TODO
|
cd build_fpga && qsub-synth
|
||||||
|
|
||||||
build-setup: $(BUILD_DIR)/Makefile
|
setup-ase: build_ase/Makefile
|
||||||
|
|
||||||
$(BUILD_DIR)/Makefile:
|
setup-fpga: build_fpga/build/dcp.qpf
|
||||||
afu_sim_setup --sources=sources.txt --platform discrete_pcie3 $(BUILD_DIR) -f
|
|
||||||
|
build_ase/Makefile:
|
||||||
|
afu_sim_setup --s sources.txt build_ase
|
||||||
|
|
||||||
|
build_fpga/build/dcp.qpf:
|
||||||
|
afu_synth_setup -s sources.txt build_fpga
|
||||||
|
|
||||||
run-ase:
|
run-ase:
|
||||||
cd $(BUILD_DIR) && MENT_VSIM_OPT="-dpicpppath /usr/bin/gcc" make sim
|
cd build_ase && make sim
|
||||||
|
|
||||||
|
wave:
|
||||||
|
vsim -view build_ase/work/vsim.wlf -do wave.do
|
||||||
|
|
||||||
run-fpga:
|
run-fpga:
|
||||||
# TODO
|
# TODO
|
||||||
|
|
||||||
clean:
|
clean-ase:
|
||||||
rm -rf $(BUILD_DIR)
|
rm -rf build_ase
|
||||||
|
|
||||||
|
clean-fpga:
|
||||||
|
rm -rf build_fpga
|
||||||
@@ -108,7 +108,7 @@ module ccip_std_afu
|
|||||||
#(
|
#(
|
||||||
.NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS)
|
.NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS)
|
||||||
)
|
)
|
||||||
hello_mem_afu_inst
|
vortex_afu_inst
|
||||||
(
|
(
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.SoftReset (reset_T1),
|
.SoftReset (reset_T1),
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
vortex_afu.json
|
vortex_afu.json
|
||||||
|
|
||||||
|
+define+GLOBAL_BLOCK_SIZE_BYTES=64
|
||||||
|
|
||||||
+incdir+.
|
+incdir+.
|
||||||
+incdir+../../rtl
|
+incdir+../../rtl
|
||||||
+incdir+../../rtl/shared_memory
|
+incdir+../../rtl/shared_memory
|
||||||
@@ -13,6 +15,7 @@ vortex_afu.json
|
|||||||
../../rtl/VX_define.v
|
../../rtl/VX_define.v
|
||||||
../../rtl/VX_cache/VX_cache_config.v
|
../../rtl/VX_cache/VX_cache_config.v
|
||||||
../../rtl/Vortex_SOC.v
|
../../rtl/Vortex_SOC.v
|
||||||
|
../../rtl/Vortex_Cluster.v
|
||||||
../../rtl/Vortex.v
|
../../rtl/Vortex.v
|
||||||
../../rtl/VX_front_end.v
|
../../rtl/VX_front_end.v
|
||||||
../../rtl/VX_back_end.v
|
../../rtl/VX_back_end.v
|
||||||
|
|||||||
@@ -3,7 +3,19 @@
|
|||||||
"afu-image": {
|
"afu-image": {
|
||||||
"power": 0,
|
"power": 0,
|
||||||
"clock-frequency-high": "auto",
|
"clock-frequency-high": "auto",
|
||||||
"clock-frequency-low": "auto",
|
"clock-frequency-low": "auto",
|
||||||
|
|
||||||
|
"mmio-csr-cmd": 10,
|
||||||
|
"mmio-csr-status": 12,
|
||||||
|
"mmio-csr-io-addr": 14,
|
||||||
|
"mmio-csr-mem-addr": 16,
|
||||||
|
"mmio-csr-data-size": 18,
|
||||||
|
|
||||||
|
"cmd-type-read": 1,
|
||||||
|
"cmd-type-write": 2,
|
||||||
|
"cmd-type-run": 3,
|
||||||
|
"cmd-type-snoop": 4,
|
||||||
|
|
||||||
"afu-top-interface":
|
"afu-top-interface":
|
||||||
{
|
{
|
||||||
"class": "ccip_std_afu_avalon_mm",
|
"class": "ccip_std_afu_avalon_mm",
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -1,3 +1,5 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
source /tools/reconfig/intel/19.3/rg_intel_fpga_end_19.3.sh
|
source /tools/reconfig/intel/19.3/rg_intel_fpga_end_19.3.sh
|
||||||
export PATH=/tools/opae/1.4.0/bin:/tools/reconfig/intel/19.3/modelsim_ase/bin:$PATH
|
export PATH=/tools/opae/1.4.0/bin:/tools/reconfig/intel/19.3/modelsim_ase/bin:$PATH
|
||||||
export LD_LIBRARY_PATH=/tools/opae/1.4.0/lib:$PATH
|
export LD_LIBRARY_PATH=/tools/opae/1.4.0/lib:$PATH
|
||||||
|
|||||||
@@ -1,6 +1,9 @@
|
|||||||
|
|
||||||
|
|
||||||
all: opae rtlsim simx
|
all: dummy
|
||||||
|
|
||||||
|
dummy:
|
||||||
|
$(MAKE) -C dummy
|
||||||
|
|
||||||
opae:
|
opae:
|
||||||
$(MAKE) -C opae
|
$(MAKE) -C opae
|
||||||
@@ -12,6 +15,7 @@ simx:
|
|||||||
$(MAKE) -C simx
|
$(MAKE) -C simx
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
|
$(MAKE) clean -C dummy
|
||||||
$(MAKE) clean -C opae
|
$(MAKE) clean -C opae
|
||||||
$(MAKE) clean -C rtlsim
|
$(MAKE) clean -C rtlsim
|
||||||
$(MAKE) clean -C simx
|
$(MAKE) clean -C simx
|
||||||
|
|||||||
20
driver/sw/dummy/Makefile
Normal file
20
driver/sw/dummy/Makefile
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
CXXFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors
|
||||||
|
#CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
|
||||||
|
|
||||||
|
CXXFLAGS += -I../include -I../../../runtime
|
||||||
|
|
||||||
|
CXXFLAGS += -fPIC
|
||||||
|
|
||||||
|
LDFLAGS += -shared -pthread
|
||||||
|
|
||||||
|
SRCS = vortex.cpp ../vx_utils.cpp
|
||||||
|
|
||||||
|
PROJECT = libvortex.so
|
||||||
|
|
||||||
|
all: $(PROJECT)
|
||||||
|
|
||||||
|
$(PROJECT): $(SRCS)
|
||||||
|
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -rf $(PROJECT) obj_dir
|
||||||
45
driver/sw/dummy/vortex.cpp
Normal file
45
driver/sw/dummy/vortex.cpp
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
#include <vortex.h>
|
||||||
|
|
||||||
|
extern int vx_dev_open(vx_device_h* /*hdevice*/) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern int vx_dev_close(vx_device_h /*hdevice*/) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern int vx_alloc_dev_mem(vx_device_h /*hdevice*/, size_t /*size*/, size_t* /*dev_maddr*/) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern int vx_flush_caches(vx_device_h /*hdevice*/, size_t /*dev_maddr*/, size_t /*size*/) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern int vx_alloc_shared_mem(vx_device_h /*hdevice*/, size_t /*size*/, vx_buffer_h* /*hbuffer*/) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern volatile void* vx_host_ptr(vx_buffer_h /*hbuffer*/) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern int vx_buf_release(vx_buffer_h /*hbuffer*/) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern int vx_copy_to_dev(vx_buffer_h /*hbuffer*/, size_t /*dev_maddr*/, size_t /*size*/, size_t /*src_offset*/) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern int vx_copy_from_dev(vx_buffer_h /*hbuffer*/, size_t /*dev_maddr*/, size_t /*size*/, size_t /*dest_offset*/) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern int vx_start(vx_device_h /*hdevice*/) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern int vx_ready_wait(vx_device_h /*hdevice*/, long long /*timeout*/) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
|
|
||||||
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||||
|
|
||||||
CXXFLAGS += -I../include -I/tools/opae/1.4.0/include
|
CXXFLAGS += -I../include -I/tools/opae/1.4.0/include -I../../../runtime
|
||||||
|
|
||||||
LDFLAGS += -L/tools/opae/1.4.0/lib
|
LDFLAGS += -L/tools/opae/1.4.0/lib
|
||||||
|
|
||||||
@@ -17,6 +17,8 @@ CXXFLAGS +=-fstack-protector
|
|||||||
# Position independent code
|
# Position independent code
|
||||||
CXXFLAGS += -fPIC
|
CXXFLAGS += -fPIC
|
||||||
|
|
||||||
|
CXXFLAGS += -DGLOBAL_BLOCK_SIZE_BYTES=64
|
||||||
|
|
||||||
LDFLAGS += -luuid
|
LDFLAGS += -luuid
|
||||||
|
|
||||||
LDFLAGS += -shared
|
LDFLAGS += -shared
|
||||||
@@ -50,7 +52,7 @@ $(PROJECT_ASE): $(SRCS) $(ASE_DIR)
|
|||||||
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $@
|
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $@
|
||||||
|
|
||||||
vortex.o: vortex.cpp $(AFU_JSON_INFO)
|
vortex.o: vortex.cpp $(AFU_JSON_INFO)
|
||||||
$(CC) $(CXXFLAGS) -c vortex.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c vortex.cpp -o $@
|
||||||
|
|
||||||
$(ASE_DIR):
|
$(ASE_DIR):
|
||||||
mkdir -p ase
|
mkdir -p ase
|
||||||
|
|||||||
@@ -4,35 +4,35 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <uuid/uuid.h>
|
#include <uuid/uuid.h>
|
||||||
|
|
||||||
#include <opae/fpga.h>
|
#include <opae/fpga.h>
|
||||||
#include <vortex.h>
|
#include <vortex.h>
|
||||||
#include "vortex_afu.h"
|
#include "vortex_afu.h"
|
||||||
|
|
||||||
// MMIO Address Mappings
|
|
||||||
#define MMIO_COPY_IO_ADDRESS 0X120
|
|
||||||
#define MMIO_COPY_AVM_ADDRESS 0x100
|
|
||||||
#define MMIO_COPY_DATA_SIZE 0X118
|
|
||||||
|
|
||||||
#define MMIO_CMD_TYPE 0X110
|
|
||||||
#define MMIO_READY_FOR_CMD 0X198
|
|
||||||
|
|
||||||
#define MMIO_CMD_TYPE_READ 0
|
|
||||||
#define MMIO_CMD_TYPE_WRITE 1
|
|
||||||
#define MMIO_CMD_TYPE_START 2
|
|
||||||
#define MMIO_CMD_TYPE_SNOOP 3
|
|
||||||
|
|
||||||
#define CHECK_RES(_expr) \
|
#define CHECK_RES(_expr) \
|
||||||
do { \
|
do { \
|
||||||
fpga_result res = _expr; \
|
fpga_result res = _expr; \
|
||||||
if (res == FPGA_OK) \
|
if (res == FPGA_OK) \
|
||||||
break; \
|
break; \
|
||||||
printf("OPAE Error: '%s' returned %d!\n", #_expr, (int)res); \
|
printf("OPAE Error: '%s' returned %d, %s!\n", \
|
||||||
|
#_expr, (int)res, fpgaErrStr(res)); \
|
||||||
return -1; \
|
return -1; \
|
||||||
} while (false)
|
} while (false)
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#define CMD_TYPE_READ AFU_IMAGE_CMD_TYPE_READ
|
||||||
|
#define CMD_TYPE_WRITE AFU_IMAGE_CMD_TYPE_WRITE
|
||||||
|
#define CMD_TYPE_RUN AFU_IMAGE_CMD_TYPE_RUN
|
||||||
|
#define CMD_TYPE_SNOOP AFU_IMAGE_CMD_TYPE_SNOOP
|
||||||
|
|
||||||
|
#define MMIO_CSR_CMD (AFU_IMAGE_MMIO_CSR_CMD * 4)
|
||||||
|
#define MMIO_CSR_STATUS (AFU_IMAGE_MMIO_CSR_STATUS * 4)
|
||||||
|
#define MMIO_CSR_IO_ADDR (AFU_IMAGE_MMIO_CSR_IO_ADDR * 4)
|
||||||
|
#define MMIO_CSR_MEM_ADDR (AFU_IMAGE_MMIO_CSR_MEM_ADDR * 4)
|
||||||
|
#define MMIO_CSR_DATA_SIZE (AFU_IMAGE_MMIO_CSR_DATA_SIZE * 4)
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
typedef struct vx_device_ {
|
typedef struct vx_device_ {
|
||||||
fpga_handle fpga;
|
fpga_handle fpga;
|
||||||
size_t mem_allocation;
|
size_t mem_allocation;
|
||||||
@@ -42,21 +42,19 @@ typedef struct vx_buffer_ {
|
|||||||
uint64_t wsid;
|
uint64_t wsid;
|
||||||
volatile void* host_ptr;
|
volatile void* host_ptr;
|
||||||
uint64_t io_addr;
|
uint64_t io_addr;
|
||||||
fpga_handle fpga;
|
vx_device_h hdevice;
|
||||||
size_t size;
|
size_t size;
|
||||||
} vx_buffer_t;
|
} vx_buffer_t;
|
||||||
|
|
||||||
static size_t align_size(size_t size) {
|
static size_t align_size(size_t size) {
|
||||||
uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
||||||
return cache_block_size * ((size + cache_block_size - 1) / cache_block_size);
|
return cache_block_size * ((size + cache_block_size - 1) / cache_block_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
// Search for an accelerator matching the requested UUID and connect to it
|
|
||||||
// Convert this to void if required as storing the fpga_handle to params variable
|
|
||||||
extern int vx_dev_open(vx_device_h* hdevice) {
|
extern int vx_dev_open(vx_device_h* hdevice) {
|
||||||
fpga_properties filter = NULL;
|
fpga_properties filter = nullptr;
|
||||||
fpga_result res;
|
fpga_result res;
|
||||||
fpga_guid guid;
|
fpga_guid guid;
|
||||||
fpga_token accel_token;
|
fpga_token accel_token;
|
||||||
@@ -64,11 +62,14 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
|||||||
fpga_handle accel_handle;
|
fpga_handle accel_handle;
|
||||||
vx_device_t* device;
|
vx_device_t* device;
|
||||||
|
|
||||||
if (NULL == hdevice)
|
if (nullptr == hdevice)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
// ensure that the block size 64
|
||||||
|
assert(64 == vx_dev_caps(VX_CAPS_CACHE_LINESIZE));
|
||||||
|
|
||||||
// Set up a filter that will search for an accelerator
|
// Set up a filter that will search for an accelerator
|
||||||
fpgaGetProperties(NULL, &filter);
|
fpgaGetProperties(nullptr, &filter);
|
||||||
fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR);
|
fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR);
|
||||||
|
|
||||||
// Add the desired UUID to the filter
|
// Add the desired UUID to the filter
|
||||||
@@ -84,13 +85,13 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
|||||||
|
|
||||||
if (num_matches < 1) {
|
if (num_matches < 1) {
|
||||||
fprintf(stderr, "Accelerator %s not found!\n", AFU_ACCEL_UUID);
|
fprintf(stderr, "Accelerator %s not found!\n", AFU_ACCEL_UUID);
|
||||||
return NULL;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Open accelerator
|
// Open accelerator
|
||||||
res = fpgaOpen(accel_token, &accel_handle, 0);
|
res = fpgaOpen(accel_token, &accel_handle, 0);
|
||||||
if (FPGA_OK != res) {
|
if (FPGA_OK != res) {
|
||||||
return NULL;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Done with token
|
// Done with token
|
||||||
@@ -98,9 +99,9 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
|||||||
|
|
||||||
// allocate device object
|
// allocate device object
|
||||||
device = (vx_device_t*)malloc(sizeof(vx_device_t));
|
device = (vx_device_t*)malloc(sizeof(vx_device_t));
|
||||||
if (NULL == device) {
|
if (nullptr == device) {
|
||||||
fpgaClose(accel_handle);
|
fpgaClose(accel_handle);
|
||||||
return NULL;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
device->fpga = accel_handle;
|
device->fpga = accel_handle;
|
||||||
@@ -111,9 +112,8 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Close the fpga when all the operations are done
|
|
||||||
extern int vx_dev_close(vx_device_h hdevice) {
|
extern int vx_dev_close(vx_device_h hdevice) {
|
||||||
if (NULL == hdevice)
|
if (nullptr == hdevice)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||||
@@ -126,15 +126,15 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
|
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
|
||||||
if (NULL == hdevice
|
if (nullptr == hdevice
|
||||||
|| NULL == dev_maddr
|
|| nullptr == dev_maddr
|
||||||
|| 0 >= size)
|
|| 0 >= size)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||||
|
|
||||||
size_t asize = align_size(size);
|
size_t asize = align_size(size);
|
||||||
auto dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
|
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
|
||||||
if (device->mem_allocation + asize > dev_mem_size)
|
if (device->mem_allocation + asize > dev_mem_size)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
@@ -151,9 +151,9 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb
|
|||||||
uint64_t io_addr;
|
uint64_t io_addr;
|
||||||
vx_buffer_t* buffer;
|
vx_buffer_t* buffer;
|
||||||
|
|
||||||
if (NULL == hdevice
|
if (nullptr == hdevice
|
||||||
|| 0 >= size
|
|| 0 >= size
|
||||||
|| NULL == hbuffer)
|
|| nullptr == hbuffer)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||||
@@ -174,7 +174,7 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb
|
|||||||
|
|
||||||
// allocate buffer object
|
// allocate buffer object
|
||||||
buffer = (vx_buffer_t*)malloc(sizeof(vx_buffer_t));
|
buffer = (vx_buffer_t*)malloc(sizeof(vx_buffer_t));
|
||||||
if (NULL == buffer) {
|
if (nullptr == buffer) {
|
||||||
fpgaReleaseBuffer(device->fpga, wsid);
|
fpgaReleaseBuffer(device->fpga, wsid);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@@ -182,7 +182,7 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb
|
|||||||
buffer->wsid = wsid;
|
buffer->wsid = wsid;
|
||||||
buffer->host_ptr = host_ptr;
|
buffer->host_ptr = host_ptr;
|
||||||
buffer->io_addr = io_addr;
|
buffer->io_addr = io_addr;
|
||||||
buffer->fpga = device->fpga;
|
buffer->hdevice = hdevice;
|
||||||
buffer->size = size;
|
buffer->size = size;
|
||||||
|
|
||||||
*hbuffer = buffer;
|
*hbuffer = buffer;
|
||||||
@@ -191,136 +191,30 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern volatile void* vx_host_ptr(vx_buffer_h hbuffer) {
|
extern volatile void* vx_host_ptr(vx_buffer_h hbuffer) {
|
||||||
|
if (nullptr == hbuffer)
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||||
if (NULL == buffer)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
return buffer->host_ptr;
|
return buffer->host_ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_buf_release(vx_buffer_h hbuffer) {
|
extern int vx_buf_release(vx_buffer_h hbuffer) {
|
||||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
if (nullptr == hbuffer)
|
||||||
if (NULL == buffer)
|
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
fpgaReleaseBuffer(buffer->fpga, buffer->wsid);
|
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||||
|
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
|
||||||
|
|
||||||
|
fpgaReleaseBuffer(device->fpga, buffer->wsid);
|
||||||
|
|
||||||
free(buffer);
|
free(buffer);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if HW is ready for SW
|
|
||||||
static int ready_for_sw(fpga_handle hdevice) {
|
|
||||||
uint64_t data = 0;
|
|
||||||
struct timespec sleep_time;
|
|
||||||
|
|
||||||
#ifdef USE_ASE
|
|
||||||
sleep_time.tv_sec = 1;
|
|
||||||
sleep_time.tv_nsec = 0;
|
|
||||||
#else
|
|
||||||
sleep_time.tv_sec = 0;
|
|
||||||
sleep_time.tv_nsec = 1000000;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
do {
|
|
||||||
CHECK_RES(fpgaReadMMIO64(hdevice, 0, MMIO_READY_FOR_CMD, &data));
|
|
||||||
nanosleep(&sleep_time, NULL);
|
|
||||||
} while (data != 0x1);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
|
|
||||||
if (NULL == hbuffer
|
|
||||||
|| 0 >= size)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
|
||||||
|
|
||||||
// bound checking
|
|
||||||
if (size + src_offset > buffer->size)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
// Ensure ready for new command
|
|
||||||
if (ready_for_sw(buffer->fpga) != 0)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_AVM_ADDRESS, dev_maddr));
|
|
||||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_IO_ADDRESS, buffer->io_addr + src_offset);
|
|
||||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_DATA_SIZE, size));
|
|
||||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_WRITE));
|
|
||||||
|
|
||||||
// Wait for the write operation to finish
|
|
||||||
return ready_for_sw(buffer->fpga);
|
|
||||||
}
|
|
||||||
|
|
||||||
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
|
|
||||||
if (NULL == hbuffer
|
|
||||||
|| 0 >= size)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
|
||||||
|
|
||||||
// bound checking
|
|
||||||
if (size + dest_offset > buffer->size)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
// Ensure ready for new command
|
|
||||||
if (ready_for_sw(buffer->fpga) != 0)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_AVM_ADDRESS, dev_maddr));
|
|
||||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_IO_ADDRESS, buffer->io_addr + dest_offset);
|
|
||||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_DATA_SIZE, size));
|
|
||||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_READ));
|
|
||||||
|
|
||||||
// Wait for the write operation to finish
|
|
||||||
return ready_for_sw(buffer->fpga);
|
|
||||||
}
|
|
||||||
|
|
||||||
extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
|
|
||||||
if (NULL == hbuffer
|
|
||||||
|| 0 >= size)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
|
||||||
|
|
||||||
// bound checking
|
|
||||||
if (size + src_offset > buffer->size)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
// Ensure ready for new command
|
|
||||||
if (ready_for_sw(buffer->fpga) != 0)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_AVM_ADDRESS, dev_maddr));
|
|
||||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_IO_ADDRESS, (buffer->io_addr + src_offset)/VX_CACHE_LINESIZE));
|
|
||||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_DATA_SIZE, size));
|
|
||||||
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_SNOOP));
|
|
||||||
|
|
||||||
// Wait for the write operation to finish
|
|
||||||
return ready_for_sw(buffer->fpga);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
extern int vx_start(vx_device_h hdevice) {
|
|
||||||
if (NULL == hdevice)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
|
||||||
|
|
||||||
// Ensure ready for new command
|
|
||||||
if (ready_for_sw(device->fpga) != 0)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_START));
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
||||||
if (NULL == hdevice)
|
if (nullptr == hdevice)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||||
@@ -328,7 +222,7 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
|||||||
uint64_t data = 0;
|
uint64_t data = 0;
|
||||||
struct timespec sleep_time;
|
struct timespec sleep_time;
|
||||||
|
|
||||||
#ifdef USE_ASE
|
#if defined(USE_ASE)
|
||||||
sleep_time.tv_sec = 1;
|
sleep_time.tv_sec = 1;
|
||||||
sleep_time.tv_nsec = 0;
|
sleep_time.tv_nsec = 0;
|
||||||
#else
|
#else
|
||||||
@@ -339,13 +233,106 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
|||||||
// to milliseconds
|
// to milliseconds
|
||||||
long long sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000);
|
long long sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000);
|
||||||
|
|
||||||
do {
|
for (;;) {
|
||||||
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_READY_FOR_CMD, &data));
|
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_STATUS, &data));
|
||||||
nanosleep(&sleep_time, NULL);
|
if (0 == data || 0 == timeout)
|
||||||
sleep_time_ms -= sleep_time_ms;
|
break;
|
||||||
if (timeout <= sleep_time_ms)
|
nanosleep(&sleep_time, nullptr);
|
||||||
break;
|
timeout -= sleep_time_ms;
|
||||||
} while (data != 0x1);
|
};
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
|
||||||
|
if (nullptr == hbuffer
|
||||||
|
|| 0 >= size)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||||
|
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
|
||||||
|
|
||||||
|
// bound checking
|
||||||
|
if (size + src_offset > buffer->size)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// Ensure ready for new command
|
||||||
|
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, buffer->io_addr + src_offset));
|
||||||
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr));
|
||||||
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size));
|
||||||
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_WRITE));
|
||||||
|
|
||||||
|
// Wait for the write operation to finish
|
||||||
|
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
|
||||||
|
if (nullptr == hbuffer
|
||||||
|
|| 0 >= size)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||||
|
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
|
||||||
|
|
||||||
|
// bound checking
|
||||||
|
if (size + dest_offset > buffer->size)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// Ensure ready for new command
|
||||||
|
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, buffer->io_addr + dest_offset));
|
||||||
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr));
|
||||||
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size));
|
||||||
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_READ));
|
||||||
|
|
||||||
|
// Wait for the write operation to finish
|
||||||
|
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
|
||||||
|
if (nullptr == hdevice
|
||||||
|
|| 0 >= size)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
vx_device_t* device = ((vx_device_t*)hdevice);
|
||||||
|
|
||||||
|
// Ensure ready for new command
|
||||||
|
if (vx_ready_wait(hdevice, -1) != 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr));
|
||||||
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size));
|
||||||
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_SNOOP));
|
||||||
|
|
||||||
|
// Wait for the write operation to finish
|
||||||
|
if (vx_ready_wait(hdevice, -1) != 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern int vx_start(vx_device_h hdevice) {
|
||||||
|
if (nullptr == hdevice)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||||
|
|
||||||
|
// Ensure ready for new command
|
||||||
|
if (vx_ready_wait(hdevice, -1) != 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_RUN));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -11,17 +11,6 @@
|
|||||||
#include <ram.h>
|
#include <ram.h>
|
||||||
#include <simulator.h>
|
#include <simulator.h>
|
||||||
|
|
||||||
#define PAGE_SIZE 4096
|
|
||||||
|
|
||||||
#define CHECK_RES(_expr) \
|
|
||||||
do { \
|
|
||||||
fpga_result res = _expr; \
|
|
||||||
if (res == FPGA_OK) \
|
|
||||||
break; \
|
|
||||||
printf("OPAE Error: '%s' returned %d!\n", #_expr, (int)res); \
|
|
||||||
return -1; \
|
|
||||||
} while (false)
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
static size_t align_size(size_t size) {
|
static size_t align_size(size_t size) {
|
||||||
@@ -197,7 +186,7 @@ private:
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
extern int vx_dev_open(vx_device_h* hdevice) {
|
extern int vx_dev_open(vx_device_h* hdevice) {
|
||||||
if (NULL == hdevice)
|
if (nullptr == hdevice)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
*hdevice = new vx_device();
|
*hdevice = new vx_device();
|
||||||
@@ -217,8 +206,8 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
|
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
|
||||||
if (NULL == hdevice
|
if (nullptr == hdevice
|
||||||
|| NULL == dev_maddr
|
|| nullptr == dev_maddr
|
||||||
|| 0 >= size)
|
|| 0 >= size)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
@@ -227,7 +216,7 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
|
extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
|
||||||
if (NULL == hdevice
|
if (nullptr == hdevice
|
||||||
|| 0 >= size)
|
|| 0 >= size)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
@@ -240,7 +229,7 @@ extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
|
|||||||
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
|
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
|
||||||
if (nullptr == hdevice
|
if (nullptr == hdevice
|
||||||
|| 0 >= size
|
|| 0 >= size
|
||||||
|| NULL == hbuffer)
|
|| nullptr == hbuffer)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
vx_device *device = ((vx_device*)hdevice);
|
vx_device *device = ((vx_device*)hdevice);
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
CFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors
|
CFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors
|
||||||
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
|
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
|
||||||
|
|
||||||
CFLAGS += -I../../include -I../../../../simX/include -I../../../../runtime
|
CFLAGS += -I../../include -I../../../../simX/include -I../../../../runtime
|
||||||
|
|
||||||
CFLAGS += -fPIC
|
CFLAGS += -fPIC
|
||||||
|
|
||||||
|
|||||||
@@ -13,15 +13,6 @@
|
|||||||
|
|
||||||
#define PAGE_SIZE 4096
|
#define PAGE_SIZE 4096
|
||||||
|
|
||||||
#define CHECK_RES(_expr) \
|
|
||||||
do { \
|
|
||||||
fpga_result res = _expr; \
|
|
||||||
if (res == FPGA_OK) \
|
|
||||||
break; \
|
|
||||||
printf("OPAE Error: '%s' returned %d!\n", #_expr, (int)res); \
|
|
||||||
return -1; \
|
|
||||||
} while (false)
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
static size_t align_size(size_t size) {
|
static size_t align_size(size_t size) {
|
||||||
@@ -206,7 +197,7 @@ private:
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
extern int vx_dev_open(vx_device_h* hdevice) {
|
extern int vx_dev_open(vx_device_h* hdevice) {
|
||||||
if (NULL == hdevice)
|
if (nullptr == hdevice)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
*hdevice = new vx_device();
|
*hdevice = new vx_device();
|
||||||
@@ -226,8 +217,8 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
|
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
|
||||||
if (NULL == hdevice
|
if (nullptr == hdevice
|
||||||
|| NULL == dev_maddr
|
|| nullptr == dev_maddr
|
||||||
|| 0 >= size)
|
|| 0 >= size)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
@@ -236,7 +227,7 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_flush_caches(vx_device_h hdevice, size_t /*dev_maddr*/, size_t size) {
|
extern int vx_flush_caches(vx_device_h hdevice, size_t /*dev_maddr*/, size_t size) {
|
||||||
if (NULL == hdevice
|
if (nullptr == hdevice
|
||||||
|| 0 >= size)
|
|| 0 >= size)
|
||||||
return -1;
|
return -1;
|
||||||
// this functionality is not need by simX
|
// this functionality is not need by simX
|
||||||
@@ -246,7 +237,7 @@ extern int vx_flush_caches(vx_device_h hdevice, size_t /*dev_maddr*/, size_t siz
|
|||||||
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
|
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
|
||||||
if (nullptr == hdevice
|
if (nullptr == hdevice
|
||||||
|| 0 >= size
|
|| 0 >= size
|
||||||
|| NULL == hbuffer)
|
|| nullptr == hbuffer)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
vx_device *device = ((vx_device*)hdevice);
|
vx_device *device = ((vx_device*)hdevice);
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_
|
|||||||
if (NULL == content || 0 == size)
|
if (NULL == content || 0 == size)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
uint32_t buffer_transfer_size = 4096;
|
uint32_t buffer_transfer_size = 65536;
|
||||||
uint32_t kernel_base_addr = vx_dev_caps(VX_CAPS_KERNEL_BASE_ADDR);
|
uint32_t kernel_base_addr = vx_dev_caps(VX_CAPS_KERNEL_BASE_ADDR);
|
||||||
|
|
||||||
// allocate device buffer
|
// allocate device buffer
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
CXXFLAGS += -std=c++17 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||||
|
|
||||||
CXXFLAGS += -I../../sw/include
|
CXXFLAGS += -I../../sw/include
|
||||||
|
|
||||||
@@ -12,13 +12,13 @@ SRCS = basic.cpp
|
|||||||
all: $(PROJECT)
|
all: $(PROJECT)
|
||||||
|
|
||||||
$(PROJECT): $(SRCS)
|
$(PROJECT): $(SRCS)
|
||||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../sw/simx -lvortex -o $@
|
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../sw/dummy -lvortex -o $@
|
||||||
|
|
||||||
run-fpga: $(PROJECT)
|
run-fpga: $(PROJECT)
|
||||||
LD_LIBRARY_PATH=../../sw/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
LD_LIBRARY_PATH=../../sw/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||||
|
|
||||||
run-ase: $(PROJECT)
|
run-ase: $(PROJECT)
|
||||||
LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
ASE_LOG=0 LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||||
|
|
||||||
run-rtlsim: $(PROJECT)
|
run-rtlsim: $(PROJECT)
|
||||||
LD_LIBRARY_PATH=../../sw/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
LD_LIBRARY_PATH=../../sw/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||||
|
|||||||
Binary file not shown.
@@ -1,16 +1,20 @@
|
|||||||
#include <stdio.h>
|
#include <iostream>
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <vortex.h>
|
#include <vortex.h>
|
||||||
|
|
||||||
|
int test = -1;
|
||||||
|
|
||||||
static void parse_args(int argc, char **argv) {
|
static void parse_args(int argc, char **argv) {
|
||||||
int c;
|
int c;
|
||||||
while ((c = getopt(argc, argv, "?")) != -1) {
|
while ((c = getopt(argc, argv, "t:h?")) != -1) {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
|
case 't': {
|
||||||
|
test = atoi(optarg);
|
||||||
|
} break;
|
||||||
|
case 'h':
|
||||||
case '?': {
|
case '?': {
|
||||||
printf("Test.\n");
|
std::cout << "Test." << std::endl;
|
||||||
printf("Usage: [-h: help]\n");
|
std::cout << "Usage: [-t testno][-h: help]" << std::endl;
|
||||||
exit(0);
|
exit(0);
|
||||||
} break;
|
} break;
|
||||||
default:
|
default:
|
||||||
@@ -23,9 +27,13 @@ uint64_t shuffle(int i, uint64_t value) {
|
|||||||
return (value << i) | (value & ((1 << i)-1));;
|
return (value << i) | (value & ((1 << i)-1));;
|
||||||
}
|
}
|
||||||
|
|
||||||
int run_test(vx_buffer_h sbuf, vx_buffer_h dbuf, uint32_t address, uint64_t value, int num_blocks) {
|
int run_test_0(vx_buffer_h sbuf,
|
||||||
int err;
|
vx_buffer_h dbuf,
|
||||||
int num_failures = 0;
|
uint32_t address,
|
||||||
|
uint64_t value,
|
||||||
|
int num_blocks) {
|
||||||
|
int ret;
|
||||||
|
int errors = 0;
|
||||||
|
|
||||||
// write sbuf data
|
// write sbuf data
|
||||||
for (int i = 0; i < 8 * num_blocks; ++i) {
|
for (int i = 0; i < 8 * num_blocks; ++i) {
|
||||||
@@ -33,75 +41,162 @@ int run_test(vx_buffer_h sbuf, vx_buffer_h dbuf, uint32_t address, uint64_t valu
|
|||||||
}
|
}
|
||||||
|
|
||||||
// write buffer to local memory
|
// write buffer to local memory
|
||||||
err = vx_copy_to_dev(sbuf, address, 64 * num_blocks, 0);
|
std::cout << "write buffer to local memory" << std::endl;
|
||||||
if (err != 0)
|
ret = vx_copy_to_dev(sbuf, address, 64 * num_blocks, 0);
|
||||||
return -1;
|
if (ret != 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
// read buffer from local memory
|
// read buffer from local memory
|
||||||
err = vx_copy_from_dev(dbuf, address, 64 * num_blocks, 0);
|
std::cout << "read buffer from local memory" << std::endl;
|
||||||
if (err != 0)
|
ret = vx_copy_from_dev(dbuf, address, 64 * num_blocks, 0);
|
||||||
return -1;
|
if (ret != 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
// verify result
|
// verify result
|
||||||
|
std::cout << "verify result" << std::endl;
|
||||||
for (int i = 0; i < 8 * num_blocks; ++i) {
|
for (int i = 0; i < 8 * num_blocks; ++i) {
|
||||||
auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i];
|
auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i];
|
||||||
auto ref = shuffle(i, value);
|
auto ref = shuffle(i, value);
|
||||||
if (curr != ref) {
|
if (curr != ref) {
|
||||||
printf("error @ %x: actual %ld, expected %ld\n", address + 64 * i, curr, ref);
|
std::cout << "error @ " << std::hex << (address + 64 * i)
|
||||||
++num_failures;
|
<< ": actual " << curr << ", expected " << ref << std::endl;
|
||||||
|
++errors;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return num_failures;
|
|
||||||
|
if (errors != 0) {
|
||||||
|
std::cout << "Found " << errors << " errors!" << std::endl;
|
||||||
|
std::cout << "FAILED!" << std::endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int run_test_1(vx_device_h device, const char* program) {
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
// upload program
|
||||||
|
std::cout << "upload program" << std::endl;
|
||||||
|
ret = vx_upload_kernel_file(device, program);
|
||||||
|
if (ret != 0) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
// start device
|
||||||
|
std::cout << "start device" << std::endl;
|
||||||
|
ret = vx_start(device);
|
||||||
|
if (ret != 0) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
// wait for completion
|
||||||
|
std::cout << "wait for completion" << std::endl;
|
||||||
|
ret = vx_ready_wait(device, -1);
|
||||||
|
if (ret != 0) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
vx_device_h device = nullptr;
|
||||||
|
vx_buffer_h sbuf = nullptr;
|
||||||
|
vx_buffer_h dbuf = nullptr;
|
||||||
|
|
||||||
|
void cleanup() {
|
||||||
|
if (sbuf) {
|
||||||
|
vx_buf_release(sbuf);
|
||||||
|
}
|
||||||
|
if (dbuf) {
|
||||||
|
vx_buf_release(dbuf);
|
||||||
|
}
|
||||||
|
if (device) {
|
||||||
|
vx_dev_close(device);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
int err;
|
int ret;
|
||||||
int num_failures = 0;
|
|
||||||
|
|
||||||
// parse command arguments
|
// parse command arguments
|
||||||
parse_args(argc, argv);
|
parse_args(argc, argv);
|
||||||
|
|
||||||
// open device connection
|
// open device connection
|
||||||
|
std::cout << "open device connection" << std::endl;
|
||||||
vx_device_h device;
|
vx_device_h device;
|
||||||
err = vx_dev_open(&device);
|
ret = vx_dev_open(&device);
|
||||||
if (err != 0)
|
if (ret != 0)
|
||||||
return -1;
|
return ret;
|
||||||
|
|
||||||
// create source buffer
|
// create source buffer
|
||||||
vx_buffer_h sbuf;
|
std::cout << "create source buffer" << std::endl;
|
||||||
err = vx_alloc_shared_mem(device, 4096, &sbuf);
|
ret = vx_alloc_shared_mem(device, 4096, &sbuf);
|
||||||
if (err != 0) {
|
if (ret != 0) {
|
||||||
vx_dev_close(device);
|
cleanup();
|
||||||
return -1;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
// create destination buffer
|
// create destination buffer
|
||||||
vx_buffer_h dbuf;
|
std::cout << "create destination buffer" << std::endl;
|
||||||
err = vx_alloc_shared_mem(device, 4096, &dbuf);
|
ret = vx_alloc_shared_mem(device, 4096, &dbuf);
|
||||||
if (err != 0) {
|
if (ret != 0) {
|
||||||
vx_buf_release(sbuf);
|
cleanup();
|
||||||
vx_dev_close(device);
|
return ret;
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// run tests
|
// run tests
|
||||||
num_failures += run_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 1);
|
if (0 == test || -1 == test) {
|
||||||
num_failures += run_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 2);
|
std::cout << "run test suite 0" << std::endl;
|
||||||
num_failures += run_test(sbuf, dbuf, 0x20000000, 0xff00ff00ff00ff00, 4);
|
|
||||||
num_failures += run_test(sbuf, dbuf, 0x20000000, 0x0badf00d40ff40ff, 8);
|
|
||||||
|
|
||||||
// releae buffers
|
ret = run_test_0(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 1);
|
||||||
vx_buf_release(sbuf);
|
if (ret != 0) {
|
||||||
vx_buf_release(dbuf);
|
cleanup();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
// close device
|
ret = run_test_0(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 2);
|
||||||
vx_dev_close(device);
|
if (ret != 0) {
|
||||||
|
cleanup();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
if (0 == num_failures) {
|
ret = run_test_0(sbuf, dbuf, 0x20000000, 0xff00ff00ff00ff00, 4);
|
||||||
printf("Test PASSED\n");
|
if (ret != 0) {
|
||||||
} else {
|
cleanup();
|
||||||
printf("Test FAILED\n");
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = run_test_0(sbuf, dbuf, 0x20000000, 0x0badf00d40ff40ff, 8);
|
||||||
|
if (ret != 0) {
|
||||||
|
cleanup();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return num_failures;
|
if (1 == test || -1 == test) {
|
||||||
|
std::cout << "run test suite 1" << std::endl;
|
||||||
|
ret = run_test_1(device, "rv32ui-p-lw.bin");
|
||||||
|
if (ret != 0) {
|
||||||
|
cleanup();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (2 == test || -1 == test) {
|
||||||
|
std::cout << "run test suite 1" << std::endl;
|
||||||
|
ret = run_test_1(device, "rv32ui-p-sw.bin");
|
||||||
|
if (ret != 0) {
|
||||||
|
cleanup();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// cleanup
|
||||||
|
std::cout << "cleanup" << std::endl;
|
||||||
|
cleanup();
|
||||||
|
|
||||||
|
std::cout << "Test PASSED" << std::endl;
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
BIN
driver/tests/basic/rv32ui-p-lw.bin
Normal file
BIN
driver/tests/basic/rv32ui-p-lw.bin
Normal file
Binary file not shown.
BIN
driver/tests/basic/rv32ui-p-sw.bin
Normal file
BIN
driver/tests/basic/rv32ui-p-sw.bin
Normal file
Binary file not shown.
@@ -40,13 +40,13 @@ kernel.elf: $(SRCS)
|
|||||||
$(VX_CC) $(VX_CFLAGS) $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_SRCS) -I$(VX_RT_PATH) -o kernel.elf
|
$(VX_CC) $(VX_CFLAGS) $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_SRCS) -I$(VX_RT_PATH) -o kernel.elf
|
||||||
|
|
||||||
$(PROJECT): $(SRCS)
|
$(PROJECT): $(SRCS)
|
||||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../sw/simx -lvortex -o $@
|
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../sw/dummy -lvortex -o $@
|
||||||
|
|
||||||
run-fpga: $(PROJECT)
|
run-fpga: $(PROJECT)
|
||||||
LD_LIBRARY_PATH=../../sw/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
LD_LIBRARY_PATH=../../sw/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
||||||
|
|
||||||
run-ase: $(PROJECT)
|
run-ase: $(PROJECT)
|
||||||
LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
ASE_LOG=0 LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
||||||
|
|
||||||
run-rtlsim: $(PROJECT)
|
run-rtlsim: $(PROJECT)
|
||||||
LD_LIBRARY_PATH=../../sw/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
LD_LIBRARY_PATH=../../sw/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
||||||
|
|||||||
Binary file not shown.
@@ -1,6 +1,5 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <unistd.h>
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <vortex.h>
|
#include <vortex.h>
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
@@ -40,21 +39,77 @@ static void parse_args(int argc, char **argv) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
vx_device_h device;
|
int run_test(vx_device_h device,
|
||||||
vx_buffer_h buffer;
|
vx_buffer_h buffer,
|
||||||
|
const kernel_arg_t& kernel_arg,
|
||||||
|
uint32_t buf_size,
|
||||||
|
uint32_t num_points) {
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
// start device
|
||||||
|
std::cout << "start device" << std::endl;
|
||||||
|
ret = vx_start(device);
|
||||||
|
if (ret != 0) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
// wait for completion
|
||||||
|
std::cout << "wait for completion" << std::endl;
|
||||||
|
ret = vx_ready_wait(device, -1);
|
||||||
|
if (ret != 0) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
// flush the destination buffer caches
|
||||||
|
std::cout << "flush the destination buffer caches" << std::endl;
|
||||||
|
ret = vx_flush_caches(device, kernel_arg.dst_ptr, buf_size);
|
||||||
|
if (ret != 0) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
// download destination buffer
|
||||||
|
std::cout << "download destination buffer" << std::endl;
|
||||||
|
ret = vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0);
|
||||||
|
if (ret != 0) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
// verify result
|
||||||
|
std::cout << "verify result" << std::endl;
|
||||||
|
{
|
||||||
|
int errors = 0;
|
||||||
|
auto buf_ptr = (int*)vx_host_ptr(buffer);
|
||||||
|
for (uint32_t i = 0; i < num_points; ++i) {
|
||||||
|
int ref = i * i;
|
||||||
|
int cur = buf_ptr[i];
|
||||||
|
if (cur != ref) {
|
||||||
|
++errors;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (errors != 0) {
|
||||||
|
std::cout << "Found " << errors << " errors!" << std::endl;
|
||||||
|
std::cout << "FAILED!" << std::endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
vx_device_h device = nullptr;
|
||||||
|
vx_buffer_h buffer = nullptr;
|
||||||
|
|
||||||
void cleanup() {
|
void cleanup() {
|
||||||
if (device) {
|
|
||||||
vx_dev_close(device);
|
|
||||||
}
|
|
||||||
if (buffer) {
|
if (buffer) {
|
||||||
vx_buf_release(buffer);
|
vx_buf_release(buffer);
|
||||||
}
|
}
|
||||||
|
if (device) {
|
||||||
|
vx_dev_close(device);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
int ret;
|
int ret;
|
||||||
int errors = 0;
|
|
||||||
size_t value;
|
size_t value;
|
||||||
kernel_arg_t kernel_arg;
|
kernel_arg_t kernel_arg;
|
||||||
|
|
||||||
@@ -79,14 +134,14 @@ int main(int argc, char *argv[]) {
|
|||||||
std::cout << "open device connection" << std::endl;
|
std::cout << "open device connection" << std::endl;
|
||||||
ret = vx_dev_open(&device);
|
ret = vx_dev_open(&device);
|
||||||
if (ret != 0)
|
if (ret != 0)
|
||||||
return -1;
|
return ret;
|
||||||
|
|
||||||
// upload program
|
// upload program
|
||||||
std::cout << "upload program" << std::endl;
|
std::cout << "upload program" << std::endl;
|
||||||
ret = vx_upload_kernel_file(device, program_file);
|
ret = vx_upload_kernel_file(device, program_file);
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
cleanup();
|
cleanup();
|
||||||
return -1;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
// allocate device memory
|
// allocate device memory
|
||||||
@@ -95,21 +150,21 @@ int main(int argc, char *argv[]) {
|
|||||||
ret = vx_alloc_dev_mem(device, buf_size, &value);
|
ret = vx_alloc_dev_mem(device, buf_size, &value);
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
cleanup();
|
cleanup();
|
||||||
return -1;
|
return ret;
|
||||||
}
|
}
|
||||||
kernel_arg.src0_ptr = value;
|
kernel_arg.src0_ptr = value;
|
||||||
|
|
||||||
ret = vx_alloc_dev_mem(device, buf_size, &value);
|
ret = vx_alloc_dev_mem(device, buf_size, &value);
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
cleanup();
|
cleanup();
|
||||||
return -1;
|
return ret;
|
||||||
}
|
}
|
||||||
kernel_arg.src1_ptr = value;
|
kernel_arg.src1_ptr = value;
|
||||||
|
|
||||||
ret = vx_alloc_dev_mem(device, buf_size, &value);
|
ret = vx_alloc_dev_mem(device, buf_size, &value);
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
cleanup();
|
cleanup();
|
||||||
return -1;
|
return ret;
|
||||||
}
|
}
|
||||||
kernel_arg.dst_ptr = value;
|
kernel_arg.dst_ptr = value;
|
||||||
|
|
||||||
@@ -119,7 +174,7 @@ int main(int argc, char *argv[]) {
|
|||||||
ret = vx_alloc_shared_mem(device, alloc_size, &buffer);
|
ret = vx_alloc_shared_mem(device, alloc_size, &buffer);
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
cleanup();
|
cleanup();
|
||||||
return -1;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
// populate source buffer values
|
// populate source buffer values
|
||||||
@@ -137,13 +192,13 @@ int main(int argc, char *argv[]) {
|
|||||||
ret = vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0);
|
ret = vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0);
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
cleanup();
|
cleanup();
|
||||||
return -1;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0);
|
ret = vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0);
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
cleanup();
|
cleanup();
|
||||||
return -1;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
// upload kernel argument
|
// upload kernel argument
|
||||||
@@ -158,117 +213,29 @@ int main(int argc, char *argv[]) {
|
|||||||
ret = vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0);
|
ret = vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0);
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
cleanup();
|
cleanup();
|
||||||
return -1;
|
return ret;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// start device
|
// run tests
|
||||||
std::cout << "start device" << std::endl;
|
std::cout << "run tests" << std::endl;
|
||||||
ret = vx_start(device);
|
ret = run_test(device, buffer, kernel_arg, buf_size, num_points);
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
cleanup();
|
cleanup();
|
||||||
return -1;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
// wait for completion
|
ret = run_test(device, buffer, kernel_arg, buf_size, num_points);
|
||||||
std::cout << "wait for completion" << std::endl;
|
|
||||||
ret = vx_ready_wait(device, -1);
|
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
cleanup();
|
cleanup();
|
||||||
return -1;
|
return ret;
|
||||||
}
|
|
||||||
|
|
||||||
// flush the destination buffer caches
|
|
||||||
std::cout << "flush the destination buffer caches" << std::endl;
|
|
||||||
ret = vx_flush_caches(device, kernel_arg.dst_ptr, buf_size);
|
|
||||||
if (ret != 0) {
|
|
||||||
cleanup();
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// download destination buffer
|
|
||||||
std::cout << "download destination buffer" << std::endl;
|
|
||||||
ret = vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0);
|
|
||||||
if (ret != 0) {
|
|
||||||
cleanup();
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// verify result
|
|
||||||
std::cout << "verify result" << std::endl;
|
|
||||||
{
|
|
||||||
auto buf_ptr = (int*)vx_host_ptr(buffer);
|
|
||||||
for (uint32_t i = 0; i < num_points; ++i) {
|
|
||||||
int ref = i * i;
|
|
||||||
int cur = buf_ptr[i];
|
|
||||||
if (cur != ref) {
|
|
||||||
++errors;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (errors != 0) {
|
|
||||||
printf("Found %d errors!\n", errors);
|
|
||||||
printf("FAILED!\n");
|
|
||||||
cleanup();
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// start device
|
|
||||||
std::cout << "start device" << std::endl;
|
|
||||||
ret = vx_start(device);
|
|
||||||
if (ret != 0) {
|
|
||||||
cleanup();
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// wait for completion
|
|
||||||
std::cout << "wait for completion" << std::endl;
|
|
||||||
ret = vx_ready_wait(device, -1);
|
|
||||||
if (ret != 0) {
|
|
||||||
cleanup();
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// flush the destination buffer caches
|
|
||||||
std::cout << "flush the destination buffer caches" << std::endl;
|
|
||||||
ret = vx_flush_caches(device, kernel_arg.dst_ptr, buf_size);
|
|
||||||
if (ret != 0) {
|
|
||||||
cleanup();
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// download destination buffer
|
|
||||||
std::cout << "download destination buffer" << std::endl;
|
|
||||||
ret = vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0);
|
|
||||||
if (ret != 0) {
|
|
||||||
cleanup();
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// verify result
|
|
||||||
std::cout << "verify result" << std::endl;
|
|
||||||
{
|
|
||||||
auto buf_ptr = (int*)vx_host_ptr(buffer);
|
|
||||||
for (uint32_t i = 0; i < num_points; ++i) {
|
|
||||||
int ref = i * i;
|
|
||||||
int cur = buf_ptr[i];
|
|
||||||
if (cur != ref) {
|
|
||||||
++errors;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// cleanup
|
// cleanup
|
||||||
std::cout << "cleanup" << std::endl;
|
std::cout << "cleanup" << std::endl;
|
||||||
cleanup();
|
cleanup();
|
||||||
|
|
||||||
if (0 == errors) {
|
std::cout << "PASSED!" << std::endl;
|
||||||
printf("PASSED!\n");
|
|
||||||
} else {
|
|
||||||
printf("Found %d errors!\n", errors);
|
|
||||||
printf("FAILED!\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
return errors;
|
return 0;
|
||||||
}
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user