Merge branch 'harmonica-iset' of https://github.com/cdkersey/harptool into harmonica-iset

This commit is contained in:
chad
2014-09-26 07:22:33 -04:00
8 changed files with 158 additions and 59 deletions

View File

@@ -8,7 +8,7 @@ PREFIX ?= /usr/local
LIB_OBJS=args.o obj.o mem.o core.o instruction.o enc.o util.o lex.yy.o
all: harptool libharplib.so libharplib.a libqsim-harp.so
all: harptool libharplib.so libharplib.a # libqsim-harp.so
# Use -static so we don't have to install the library in order to just run
# Harptool.

View File

@@ -46,6 +46,8 @@ Core::Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id) :
for (Word i = 0; i < a.getNPRegs(); ++i) {
pred[j].push_back(Reg<bool>(id, regNum++));
}
tmask.push_back(true);
}
/* Set initial register contents. */
@@ -131,6 +133,11 @@ void Core::step() {
D_RAW(" (");
for (unsigned i = 0; i < shadowPReg.size(); ++i) D_RAW(shadowPReg[i]);
D_RAW(')' << endl);
D(3, "Thread mask:");
D_RAW(" ");
for (unsigned i = 0; i < tmask.size(); ++i) D_RAW(tmask[i] << ' ');
D_RAW(endl);
}
#endif

View File

@@ -6,6 +6,7 @@
#include <string>
#include <vector>
#include <stack>
#include "types.h"
#include "archdef.h"
@@ -26,7 +27,7 @@ namespace Harp {
Reg &operator=(T r) { val = r; doWrite(); return *this; }
operator T() { doRead(); return val; }
operator T() const { doRead(); return val; }
void trunc(Size s) {
Word mask((~0ull >> (sizeof(Word)-s)*8));
@@ -39,14 +40,36 @@ namespace Harp {
#ifdef EMU_INSTRUMENTATION
/* Access size here is 8, representing the register size of 64-bit cores. */
void doWrite() { reg_doWrite(cpuId, regNum); }
void doRead() { reg_doRead(cpuId, regNum); }
void doWrite() const { reg_doWrite(cpuId, regNum); }
void doRead() const { reg_doRead(cpuId, regNum); }
#else
void doWrite() {}
void doRead() {}
void doWrite() const {}
void doRead() const {}
#endif
};
// Entry in the IPDOM Stack
struct DomStackEntry {
DomStackEntry(
unsigned p, const std::vector<std::vector<Reg<bool> > >& m, Word pc
): pc(pc), fallThrough(false)
{
std::cout << "New DomStackEntry:";
for (unsigned i = 0; i < m.size(); ++i) {
tmask.push_back(!bool(m[i][p]));
std::cout << ' ' << bool(m[i][p]);
}
std::cout << std::endl;
}
DomStackEntry(const std::vector<bool> &tmask):
tmask(tmask), fallThrough(true) {}
bool fallThrough;
std::vector<bool> tmask;
Word pc;
};
class Core {
public:
Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id=0);
@@ -67,6 +90,9 @@ namespace Harp {
std::vector<std::vector<Reg<Word> > > reg;
std::vector<std::vector<Reg<bool> > > pred;
std::vector<bool> tmask;
std::stack<DomStackEntry> domStack;
std::vector<Word> shadowReg;
std::vector<bool> shadowPReg;

View File

@@ -30,7 +30,8 @@ namespace Harp {
JALI, JALR, JMPI, JMPR, CLONE, JALIS, JALRS,
JMPRT, LD, ST, LDI, RTOP, ANDP, ORP, XORP, NOTP, ISNEG,
ISZERO, HALT, TRAP, JMPRU, SKEP, RETI, TLBRM,
ITOF, FTOI, FADD, FSUB, FMUL, FDIV, FNEG, WSPAWN };
ITOF, FTOI, FADD, FSUB, FMUL, FDIV, FNEG, WSPAWN,
SPLIT, JOIN };
enum ArgClass {
AC_NONE, AC_2REG, AC_2IMM, AC_3REG, AC_3PREG, AC_3IMM, AC_3REGSRC,
AC_1IMM, AC_1REG, AC_3IMMSRC, AC_PREG_REG, AC_2PREG, AC_2REGSRC

View File

@@ -81,6 +81,8 @@ Instruction::InstTableEntry Instruction::instTable[] = {
{"fdiv", false, false, false, false, AC_3REG, ITYPE_FPDIV },
{"fneg", false, false, false, false, AC_2REG, ITYPE_FPBASIC },
{"wspawn", false, false, true, false, AC_2REGSRC, ITYPE_NULL },
{"split", false, false, true, false, AC_NONE, ITYPE_NULL },
{"join", false, false, true, false, AC_NONE, ITYPE_NULL },
{NULL,false,false,false,false,AC_NONE,ITYPE_NULL}/////// End of table.
};
@@ -119,12 +121,16 @@ void Instruction::executeOn(Core &c) {
return;
}
/* Also throw exceptions on divergent branches. */
if (predicated && instTable[op].controlFlow) {
bool p0 = c.pred[0][pred];
for (Size t = 1; t < c.activeThreads; t++) {
if (c.pred[t][pred] != p0) throw DivergentBranchException();
/* Also throw exceptions on non-masked divergent branches. */
if (instTable[op].controlFlow) {
Size t, count, active;
for (t = 0, count = 0, active = 0; t < c.activeThreads; ++t) {
if ((!predicated || c.pred[t][pred]) && c.tmask[t]) ++count;
if (c.tmask[t]) ++active;
}
if (count != 0 && count != active)
throw DivergentBranchException();
}
Size nextActiveThreads = c.activeThreads;
@@ -133,8 +139,12 @@ void Instruction::executeOn(Core &c) {
for (Size t = 0; t < c.activeThreads; t++) {
vector<Reg<Word> > &reg(c.reg[t]);
vector<Reg<bool> > &pReg(c.pred[t]);
stack<DomStackEntry> &domStack(c.domStack);
if (predicated && !pReg[pred]) continue;
// If this thread is masked out, don't execute the instruction, unless it's
// a split or join.
if (((predicated && !pReg[pred]) || !c.tmask[t]) &&
op != SPLIT && op != JOIN) continue;
Word memAddr;
switch (op) {
@@ -288,6 +298,23 @@ void Instruction::executeOn(Core &c) {
case FDIV: reg[rdest] = Float(double(Float(reg[rsrc[0]], wordSz)) /
double(Float(reg[rsrc[1]], wordSz)),wordSz);
break;
case SPLIT:if (t == 0) {
// TODO: if mask becomes all-zero, fall through
DomStackEntry e(pred, c.pred, c.pc);
c.domStack.push(c.tmask);
c.domStack.push(e);
for (unsigned i = 0; i < e.tmask.size(); ++i)
c.tmask[i] = !e.tmask[i];
}
break;
case JOIN: if (t == 0) {
// TODO: if mask becomes all-zero, fall through
if (!c.domStack.top().fallThrough)
c.pc = c.domStack.top().pc;
c.tmask = c.domStack.top().tmask;
c.domStack.pop();
}
break;
default:
cout << "ERROR: Unsupported instruction: " << *this << "\n";
exit(1);

View File

@@ -5,12 +5,14 @@ HARPDIS = ../harptool -D
4BARCH = 4b16/16/2
all: simple.bin sieve.bin 2thread.bin simple.4b.bin sieve.4b.bin 2thread.4b.bin bubble.bin bubble.4b.bin dotprod.bin dotprod.4b.bin matmul.bin matmul.4b.bin \
matmul-mt.s
matmul-mt.bin diverge.bin
run: simple.out sieve.out 2thread.out simple.4b.out sieve.4b.out 2thread.4b.out bubble.out bubble.4b.out dotprod.out dotprod.4b.out matmul.out matmul.4b.out\
matmul-mt.out
matmul-mt.out diverge.out
disas: simple.d sieve.d 2thread.d simple.4b.d sieve.4b.d 2thread.4b.d bubble.d bubble.4b.d dotprod.d dotprod.4b.d matmul.d matmul.4b.d matmul-mt.d
disas: simple.d sieve.d 2thread.d simple.4b.d sieve.4b.d 2thread.4b.d bubble.d \
bubble.4b.d dotprod.d dotprod.4b.d matmul.d matmul.4b.d matmul-mt.d \
diverge.d diverge.4b.d
%.4b.out : %.4b.bin
$(HARPEM) -a $(4BARCH) -c $< > $@
@@ -18,50 +20,11 @@ disas: simple.d sieve.d 2thread.d simple.4b.d sieve.4b.d 2thread.4b.d bubble.d b
%.out : %.bin
$(HARPEM) -c $< > $@
2thread.bin : boot.HOF lib.HOF 2thread.HOF
$(HARPLD) -o 2thread.bin $^
2thread.4b.bin : boot.4b.HOF lib.4b.HOF 2thread.4b.HOF
$(HARPLD) --arch $(4BARCH) -o 2thread.4b.bin $^
bubble.bin : boot.HOF lib.HOF bubble.HOF
$(HARPLD) -o bubble.bin $^
bubble.4b.bin : boot.4b.HOF lib.4b.HOF bubble.4b.HOF
$(HARPLD) --arch $(4BARCH) -o bubble.4b.bin $^
simple.bin : boot.HOF lib.HOF simple.HOF
$(HARPLD) -o $@ $^
sieve.bin : boot.HOF lib.HOF sieve.HOF
$(HARPLD) -o $@ $^
dotprod.bin : boot.HOF lib.HOF dotprod.HOF
$(HARPLD) -o $@ $^
matmul.bin : boot.HOF lib.HOF matmul.HOF
$(HARPLD) -o $@ $^
matmul-mt.bin : boot.HOF lib.HOF matmul-mt.HOF
$(HARPLD) -o $@ $^
simple.4b.bin : boot.4b.HOF lib.4b.HOF simple.4b.HOF
%.4b.bin : boot.4b.HOF lib.4b.HOF %.4b.HOF
$(HARPLD) --arch $(4BARCH) -o $@ $^
sieve.4b.bin : boot.4b.HOF lib.4b.HOF sieve.4b.HOF
$(HARPLD) --arch $(4BARCH) -o $@ $^
dotprod.4b.bin : boot.4b.HOF lib.4b.HOF dotprod.4b.HOF
$(HARPLD) --arch $(4BARCH) -o $@ $^
matmul.4b.bin : boot.4b.HOF lib.4b.HOF matmul.4b.HOF
$(HARPLD) --arch $(4BARCH) -o $@ $^
%.4b.bin : %.4b.HOF
$(HARPLD) --arch $(4BARCH) -o $@ $<
%.bin : %.HOF
$(HARPLD) -o $@ $<
%.bin : boot.HOF lib.HOF %.HOF
$(HARPLD) -o $@ $^
%.4b.HOF : %.s
$(HARPAS) --arch $(4BARCH) -o $@ $<

64
src/test/diverge.s Normal file
View File

@@ -0,0 +1,64 @@
/*******************************************************************************
Harptools by Chad D. Kersey, Summer 2011
********************************************************************************
Sample HARP assmebly program.
*******************************************************************************/
/* Divergent branch: test immediate postdominator branch divergence support. */
.def THREADS 8
.align 4096
.perm x
.entry
.global
entry:
ldi %r0, #1
ldi %r1, THREADS
sloop: clone %r0
addi %r0, %r0, #1
sub %r2, %r1, %r0
rtop @p0, %r2
@p0 ? jmpi sloop
ldi %r0, #0
jalis %r5, %r1, dthread;
ldi %r0, #0
ldi %r1, (__WORD * THREADS)
ploop: ld %r7, %r0, array
jali %r5, printdec
addi %r0, %r0, __WORD
sub %r7, %r1, %r0
rtop @p0, %r7
@p0 ? jmpi ploop
trap;
dthread: ldi %r1, #10
ldi %r2, #0
loop: andi %r3, %r0, #1
rtop @p1, %r3
@p1 ? split
@p1 ? jmpi else
add %r2, %r2, %r0
jmpi after
else: sub %r2, %r2, %r0
after: join
subi %r1, %r1, #1
rtop @p0, %r1
@p0 ? jmpi loop
shli %r4, %r0, (`__WORD)
st %r2, %r4, array
jmprt %r5;
.align 4096
array: .space 4096