Merged in harp-iset and fixed instruction support.

This commit is contained in:
chad
2014-09-26 07:50:31 -04:00
9 changed files with 175 additions and 68 deletions

View File

@@ -1,7 +1,7 @@
################################################################################
# HARPtools by Chad D. Kersey, Summer 2011 #
################################################################################
CXXFLAGS ?= -fPIC -O3 # -g -DUSE_DEBUG=3
CXXFLAGS ?= -fPIC -O3 #-g -DUSE_DEBUG=3
LDLIBS ?= -pthread
PREFIX ?= /usr/local

View File

@@ -46,6 +46,8 @@ Core::Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id) :
for (Word i = 0; i < a.getNPRegs(); ++i) {
pred[j].push_back(Reg<bool>(id, regNum++));
}
tmask.push_back(true);
}
/* Set initial register contents. */
@@ -131,6 +133,11 @@ void Core::step() {
D_RAW(" (");
for (unsigned i = 0; i < shadowPReg.size(); ++i) D_RAW(shadowPReg[i]);
D_RAW(')' << endl);
D(3, "Thread mask:");
D_RAW(" ");
for (unsigned i = 0; i < tmask.size(); ++i) D_RAW(tmask[i] << ' ');
D_RAW(endl);
}
#endif

View File

@@ -6,6 +6,7 @@
#include <string>
#include <vector>
#include <stack>
#include "types.h"
#include "archdef.h"
@@ -26,7 +27,7 @@ namespace Harp {
Reg &operator=(T r) { val = r; doWrite(); return *this; }
operator T() { doRead(); return val; }
operator T() const { doRead(); return val; }
void trunc(Size s) {
Word mask((~0ull >> (sizeof(Word)-s)*8));
@@ -39,14 +40,36 @@ namespace Harp {
#ifdef EMU_INSTRUMENTATION
/* Access size here is 8, representing the register size of 64-bit cores. */
void doWrite() { reg_doWrite(cpuId, regNum); }
void doRead() { reg_doRead(cpuId, regNum); }
void doWrite() const { reg_doWrite(cpuId, regNum); }
void doRead() const { reg_doRead(cpuId, regNum); }
#else
void doWrite() {}
void doRead() {}
void doWrite() const {}
void doRead() const {}
#endif
};
// Entry in the IPDOM Stack
struct DomStackEntry {
DomStackEntry(
unsigned p, const std::vector<std::vector<Reg<bool> > >& m, Word pc
): pc(pc), fallThrough(false)
{
std::cout << "New DomStackEntry:";
for (unsigned i = 0; i < m.size(); ++i) {
tmask.push_back(!bool(m[i][p]));
std::cout << ' ' << bool(m[i][p]);
}
std::cout << std::endl;
}
DomStackEntry(const std::vector<bool> &tmask):
tmask(tmask), fallThrough(true) {}
bool fallThrough;
std::vector<bool> tmask;
Word pc;
};
class Core {
public:
Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id=0);
@@ -67,6 +90,9 @@ namespace Harp {
std::vector<std::vector<Reg<Word> > > reg;
std::vector<std::vector<Reg<bool> > > pred;
std::vector<bool> tmask;
std::stack<DomStackEntry> domStack;
std::vector<Word> shadowReg;
std::vector<bool> shadowPReg;

View File

@@ -30,10 +30,11 @@ namespace Harp {
JALI, JALR, JMPI, JMPR, CLONE, JALIS, JALRS,
JMPRT, LD, ST, LDI, RTOP, ANDP, ORP, XORP, NOTP, ISNEG,
ISZERO, HALT, TRAP, JMPRU, SKEP, RETI, TLBRM,
ITOF, FTOI, FADD, FSUB, FMUL, FDIV, FNEG };
ITOF, FTOI, FADD, FSUB, FMUL, FDIV, FNEG, WSPAWN,
SPLIT, JOIN };
enum ArgClass {
AC_NONE, AC_2REG, AC_2IMM, AC_3REG, AC_3PREG, AC_3IMM, AC_3REGSRC,
AC_1IMM, AC_1REG, AC_3IMMSRC, AC_PREG_REG, AC_2PREG
AC_1IMM, AC_1REG, AC_3IMMSRC, AC_PREG_REG, AC_2PREG, AC_2REGSRC
};
enum InstType {
ITYPE_NULL, ITYPE_INTBASIC, ITYPE_INTMUL, ITYPE_INTDIV, ITYPE_STACK, ITYPE_BR,

View File

@@ -80,6 +80,9 @@ Instruction::InstTableEntry Instruction::instTable[] = {
{"fmul", false, false, false, false, AC_3REG, ITYPE_FPMUL },
{"fdiv", false, false, false, false, AC_3REG, ITYPE_FPDIV },
{"fneg", false, false, false, false, AC_2REG, ITYPE_FPBASIC },
{"wspawn", false, false, true, false, AC_2REGSRC, ITYPE_NULL },
{"split", false, false, true, false, AC_NONE, ITYPE_NULL },
{"join", false, false, true, false, AC_NONE, ITYPE_NULL },
{NULL,false,false,false,false,AC_NONE,ITYPE_NULL}/////// End of table.
};
@@ -118,12 +121,16 @@ void Instruction::executeOn(Core &c) {
return;
}
/* Also throw exceptions on divergent branches. */
if (predicated && instTable[op].controlFlow) {
bool p0 = c.pred[0][pred];
for (Size t = 1; t < c.activeThreads; t++) {
if (c.pred[t][pred] != p0) throw DivergentBranchException();
/* Also throw exceptions on non-masked divergent branches. */
if (instTable[op].controlFlow) {
Size t, count, active;
for (t = 0, count = 0, active = 0; t < c.activeThreads; ++t) {
if ((!predicated || c.pred[t][pred]) && c.tmask[t]) ++count;
if (c.tmask[t]) ++active;
}
if (count != 0 && count != active)
throw DivergentBranchException();
}
Size nextActiveThreads = c.activeThreads;
@@ -132,8 +139,12 @@ void Instruction::executeOn(Core &c) {
for (Size t = 0; t < c.activeThreads; t++) {
vector<Reg<Word> > &reg(c.reg[t]);
vector<Reg<bool> > &pReg(c.pred[t]);
stack<DomStackEntry> &domStack(c.domStack);
if (predicated && !pReg[pred]) continue;
// If this thread is masked out, don't execute the instruction, unless it's
// a split or join.
if (((predicated && !pReg[pred]) || !c.tmask[t]) &&
op != SPLIT && op != JOIN) continue;
Word memAddr;
switch (op) {
@@ -241,6 +252,10 @@ void Instruction::executeOn(Core &c) {
break;
case NOTP: pReg[pdest] = !(pReg[psrc[0]]);
break;
case ANDP: pReg[pdest] = pReg[psrc[0]] & pReg[psrc[1]];
break;
case ORP: pReg[pdest] = pReg[psrc[0]] | pReg[psrc[1]];
break;
case ISNEG: pReg[pdest] = (1ll<<(wordSz*8 - 1))&reg[rsrc[0]];
break;
case HALT: c.activeThreads = 0;
@@ -283,6 +298,23 @@ void Instruction::executeOn(Core &c) {
case FDIV: reg[rdest] = Float(double(Float(reg[rsrc[0]], wordSz)) /
double(Float(reg[rsrc[1]], wordSz)),wordSz);
break;
case SPLIT:if (t == 0) {
// TODO: if mask becomes all-zero, fall through
DomStackEntry e(pred, c.pred, c.pc);
c.domStack.push(c.tmask);
c.domStack.push(e);
for (unsigned i = 0; i < e.tmask.size(); ++i)
c.tmask[i] = !e.tmask[i];
}
break;
case JOIN: if (t == 0) {
// TODO: if mask becomes all-zero, fall through
if (!c.domStack.top().fallThrough)
c.pc = c.domStack.top().pc;
c.tmask = c.domStack.top().tmask;
c.domStack.pop();
}
break;
default:
cout << "ERROR: Unsupported instruction: " << *this << "\n";
exit(1);

View File

@@ -5,12 +5,14 @@ HARPDIS = ../harptool -D
4BARCH = 4b16/16/2
all: simple.bin sieve.bin 2thread.bin simple.4b.bin sieve.4b.bin 2thread.4b.bin bubble.bin bubble.4b.bin dotprod.bin dotprod.4b.bin matmul.bin matmul.4b.bin \
matmul-mt.s lfsr.bin
matmul-mt.s lfsr.bin diverge.bin
run: simple.out sieve.out 2thread.out simple.4b.out sieve.4b.out 2thread.4b.out bubble.out bubble.4b.out dotprod.out dotprod.4b.out matmul.out matmul.4b.out\
matmul-mt.out lfsr.4b.out lfsr.out
matmul-mt.out lfsr.4b.out lfsr.out diverge.out
disas: simple.d sieve.d 2thread.d simple.4b.d sieve.4b.d 2thread.4b.d bubble.d bubble.4b.d dotprod.d dotprod.4b.d matmul.d matmul.4b.d matmul-mt.d lfsr.d
disas: simple.d sieve.d 2thread.d simple.4b.d sieve.4b.d 2thread.4b.d bubble.d \
bubble.4b.d dotprod.d dotprod.4b.d matmul.d matmul.4b.d matmul-mt.d lfsr.d \
diverge.d
%.4b.out : %.4b.bin
$(HARPEM) -a $(4BARCH) -c $< > $@
@@ -18,56 +20,11 @@ disas: simple.d sieve.d 2thread.d simple.4b.d sieve.4b.d 2thread.4b.d bubble.d b
%.out : %.bin
$(HARPEM) -c $< > $@
2thread.bin : boot.HOF lib.HOF 2thread.HOF
$(HARPLD) -o 2thread.bin $^
2thread.4b.bin : boot.4b.HOF lib.4b.HOF 2thread.4b.HOF
$(HARPLD) --arch $(4BARCH) -o 2thread.4b.bin $^
bubble.bin : boot.HOF lib.HOF bubble.HOF
$(HARPLD) -o bubble.bin $^
bubble.4b.bin : boot.4b.HOF lib.4b.HOF bubble.4b.HOF
$(HARPLD) --arch $(4BARCH) -o bubble.4b.bin $^
simple.bin : boot.HOF lib.HOF simple.HOF
$(HARPLD) -o $@ $^
sieve.bin : boot.HOF lib.HOF sieve.HOF
$(HARPLD) -o $@ $^
lfsr.bin : boot.HOF lib.HOF lfsr.HOF
$(HARPLD) -o $@ $^
dotprod.bin : boot.HOF lib.HOF dotprod.HOF
$(HARPLD) -o $@ $^
matmul.bin : boot.HOF lib.HOF matmul.HOF
$(HARPLD) -o $@ $^
matmul-mt.bin : boot.HOF lib.HOF matmul-mt.HOF
$(HARPLD) -o $@ $^
simple.4b.bin : boot.4b.HOF lib.4b.HOF simple.4b.HOF
%.4b.bin : boot.4b.HOF lib.4b.HOF %.4b.HOF
$(HARPLD) --arch $(4BARCH) -o $@ $^
sieve.4b.bin : boot.4b.HOF lib.4b.HOF sieve.4b.HOF
$(HARPLD) --arch $(4BARCH) -o $@ $^
dotprod.4b.bin : boot.4b.HOF lib.4b.HOF dotprod.4b.HOF
$(HARPLD) --arch $(4BARCH) -o $@ $^
matmul.4b.bin : boot.4b.HOF lib.4b.HOF matmul.4b.HOF
$(HARPLD) --arch $(4BARCH) -o $@ $^
lfsr.4b.bin : boot.4b.HOF lib.4b.HOF lfsr.4b.HOF
$(HARPLD) --arch $(4BARCH) -o $@ $^
%.4b.bin : %.4b.HOF
$(HARPLD) --arch $(4BARCH) -o $@ $<
%.bin : %.HOF
$(HARPLD) -o $@ $<
%.bin : boot.HOF lib.HOF %.HOF
$(HARPLD) -o $@ $^
%.4b.HOF : %.s
$(HARPAS) --arch $(4BARCH) -o $@ $<

64
src/test/diverge.s Normal file
View File

@@ -0,0 +1,64 @@
/*******************************************************************************
Harptools by Chad D. Kersey, Summer 2011
********************************************************************************
Sample HARP assmebly program.
*******************************************************************************/
/* Divergent branch: test immediate postdominator branch divergence support. */
.def THREADS 8
.align 4096
.perm x
.entry
.global
entry:
ldi %r0, #1
ldi %r1, THREADS
sloop: clone %r0
addi %r0, %r0, #1
sub %r2, %r1, %r0
rtop @p0, %r2
@p0 ? jmpi sloop
ldi %r0, #0
jalis %r5, %r1, dthread;
ldi %r0, #0
ldi %r1, (__WORD * THREADS)
ploop: ld %r7, %r0, array
jali %r5, printdec
addi %r0, %r0, __WORD
sub %r7, %r1, %r0
rtop @p0, %r7
@p0 ? jmpi ploop
trap;
dthread: ldi %r1, #10
ldi %r2, #0
loop: andi %r3, %r0, #1
rtop @p1, %r3
@p1 ? split
@p1 ? jmpi else
add %r2, %r2, %r0
jmpi after
else: sub %r2, %r2, %r0
after: join
subi %r1, %r1, #1
rtop @p0, %r1
@p0 ? jmpi loop
shli %r4, %r0, (`__WORD)
st %r2, %r4, array
jmprt %r5;
.align 4096
array: .space 4096

View File

@@ -11,13 +11,22 @@
.perm x
.entry
.global
entry: ldi %r7, hello
entry: ldi %r0, wentry
ldi %r7, hello2
/* wspawn %r0, %r7 */
ldi %r0, hello1
wentry: ori %r7, %r0, #0
jali %r5, puts
trap; /* All traps currently cause a halt. */
.perm rw
hello:
hello1:
.byte 0x22
.string "Harp!\" is how a harp seal says hello!\n"
hello2:
.string "This is a string for another thread!\n"