Fully-functioning spawn and join instructions.
This commit is contained in:
@@ -46,6 +46,8 @@ Core::Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id) :
|
|||||||
for (Word i = 0; i < a.getNPRegs(); ++i) {
|
for (Word i = 0; i < a.getNPRegs(); ++i) {
|
||||||
pred[j].push_back(Reg<bool>(id, regNum++));
|
pred[j].push_back(Reg<bool>(id, regNum++));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tmask.push_back(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set initial register contents. */
|
/* Set initial register contents. */
|
||||||
@@ -131,6 +133,11 @@ void Core::step() {
|
|||||||
D_RAW(" (");
|
D_RAW(" (");
|
||||||
for (unsigned i = 0; i < shadowPReg.size(); ++i) D_RAW(shadowPReg[i]);
|
for (unsigned i = 0; i < shadowPReg.size(); ++i) D_RAW(shadowPReg[i]);
|
||||||
D_RAW(')' << endl);
|
D_RAW(')' << endl);
|
||||||
|
|
||||||
|
D(3, "Thread mask:");
|
||||||
|
D_RAW(" ");
|
||||||
|
for (unsigned i = 0; i < tmask.size(); ++i) D_RAW(tmask[i] << ' ');
|
||||||
|
D_RAW(endl);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ namespace Harp {
|
|||||||
|
|
||||||
Reg &operator=(T r) { val = r; doWrite(); return *this; }
|
Reg &operator=(T r) { val = r; doWrite(); return *this; }
|
||||||
|
|
||||||
operator T() { doRead(); return val; }
|
operator T() const { doRead(); return val; }
|
||||||
|
|
||||||
void trunc(Size s) {
|
void trunc(Size s) {
|
||||||
Word mask((~0ull >> (sizeof(Word)-s)*8));
|
Word mask((~0ull >> (sizeof(Word)-s)*8));
|
||||||
@@ -40,16 +40,32 @@ namespace Harp {
|
|||||||
|
|
||||||
#ifdef EMU_INSTRUMENTATION
|
#ifdef EMU_INSTRUMENTATION
|
||||||
/* Access size here is 8, representing the register size of 64-bit cores. */
|
/* Access size here is 8, representing the register size of 64-bit cores. */
|
||||||
void doWrite() { reg_doWrite(cpuId, regNum); }
|
void doWrite() const { reg_doWrite(cpuId, regNum); }
|
||||||
void doRead() { reg_doRead(cpuId, regNum); }
|
void doRead() const { reg_doRead(cpuId, regNum); }
|
||||||
#else
|
#else
|
||||||
void doWrite() {}
|
void doWrite() const {}
|
||||||
void doRead() {}
|
void doRead() const {}
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
// Entry in the IPDOM Stack
|
// Entry in the IPDOM Stack
|
||||||
struct DomStackEntry {
|
struct DomStackEntry {
|
||||||
|
DomStackEntry(
|
||||||
|
unsigned p, const std::vector<std::vector<Reg<bool> > >& m, Word pc
|
||||||
|
): pc(pc), fallThrough(false)
|
||||||
|
{
|
||||||
|
std::cout << "New DomStackEntry:";
|
||||||
|
for (unsigned i = 0; i < m.size(); ++i) {
|
||||||
|
tmask.push_back(!bool(m[i][p]));
|
||||||
|
std::cout << ' ' << bool(m[i][p]);
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
DomStackEntry(const std::vector<bool> &tmask):
|
||||||
|
tmask(tmask), fallThrough(true) {}
|
||||||
|
|
||||||
|
bool fallThrough;
|
||||||
std::vector<bool> tmask;
|
std::vector<bool> tmask;
|
||||||
Word pc;
|
Word pc;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -121,12 +121,16 @@ void Instruction::executeOn(Core &c) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Also throw exceptions on divergent branches. */
|
/* Also throw exceptions on non-masked divergent branches. */
|
||||||
if (predicated && instTable[op].controlFlow) {
|
if (instTable[op].controlFlow) {
|
||||||
bool p0 = c.pred[0][pred];
|
Size t, count, active;
|
||||||
for (Size t = 1; t < c.activeThreads; t++) {
|
for (t = 0, count = 0, active = 0; t < c.activeThreads; ++t) {
|
||||||
if (c.pred[t][pred] != p0) throw DivergentBranchException();
|
if ((!predicated || c.pred[t][pred]) && c.tmask[t]) ++count;
|
||||||
|
if (c.tmask[t]) ++active;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (count != 0 && count != active)
|
||||||
|
throw DivergentBranchException();
|
||||||
}
|
}
|
||||||
|
|
||||||
Size nextActiveThreads = c.activeThreads;
|
Size nextActiveThreads = c.activeThreads;
|
||||||
@@ -135,8 +139,12 @@ void Instruction::executeOn(Core &c) {
|
|||||||
for (Size t = 0; t < c.activeThreads; t++) {
|
for (Size t = 0; t < c.activeThreads; t++) {
|
||||||
vector<Reg<Word> > ®(c.reg[t]);
|
vector<Reg<Word> > ®(c.reg[t]);
|
||||||
vector<Reg<bool> > &pReg(c.pred[t]);
|
vector<Reg<bool> > &pReg(c.pred[t]);
|
||||||
|
stack<DomStackEntry> &domStack(c.domStack);
|
||||||
|
|
||||||
if (predicated && !pReg[pred]) continue;
|
// If this thread is masked out, don't execute the instruction, unless it's
|
||||||
|
// a split or join.
|
||||||
|
if (((predicated && !pReg[pred]) || !c.tmask[t]) &&
|
||||||
|
op != SPLIT && op != JOIN) continue;
|
||||||
|
|
||||||
Word memAddr;
|
Word memAddr;
|
||||||
switch (op) {
|
switch (op) {
|
||||||
@@ -282,6 +290,23 @@ void Instruction::executeOn(Core &c) {
|
|||||||
case FDIV: reg[rdest] = Float(double(Float(reg[rsrc[0]], wordSz)) /
|
case FDIV: reg[rdest] = Float(double(Float(reg[rsrc[0]], wordSz)) /
|
||||||
double(Float(reg[rsrc[1]], wordSz)),wordSz);
|
double(Float(reg[rsrc[1]], wordSz)),wordSz);
|
||||||
break;
|
break;
|
||||||
|
case SPLIT:if (t == 0) {
|
||||||
|
// TODO: if mask becomes all-zero, fall through
|
||||||
|
DomStackEntry e(pred, c.pred, c.pc);
|
||||||
|
c.domStack.push(c.tmask);
|
||||||
|
c.domStack.push(e);
|
||||||
|
for (unsigned i = 0; i < e.tmask.size(); ++i)
|
||||||
|
c.tmask[i] = !e.tmask[i];
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case JOIN: if (t == 0) {
|
||||||
|
// TODO: if mask becomes all-zero, fall through
|
||||||
|
if (!c.domStack.top().fallThrough)
|
||||||
|
c.pc = c.domStack.top().pc;
|
||||||
|
c.tmask = c.domStack.top().tmask;
|
||||||
|
c.domStack.pop();
|
||||||
|
}
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
cout << "ERROR: Unsupported instruction: " << *this << "\n";
|
cout << "ERROR: Unsupported instruction: " << *this << "\n";
|
||||||
exit(1);
|
exit(1);
|
||||||
|
|||||||
@@ -5,12 +5,14 @@ HARPDIS = ../harptool -D
|
|||||||
4BARCH = 4b16/16/2
|
4BARCH = 4b16/16/2
|
||||||
|
|
||||||
all: simple.bin sieve.bin 2thread.bin simple.4b.bin sieve.4b.bin 2thread.4b.bin bubble.bin bubble.4b.bin dotprod.bin dotprod.4b.bin matmul.bin matmul.4b.bin \
|
all: simple.bin sieve.bin 2thread.bin simple.4b.bin sieve.4b.bin 2thread.4b.bin bubble.bin bubble.4b.bin dotprod.bin dotprod.4b.bin matmul.bin matmul.4b.bin \
|
||||||
matmul-mt.s
|
matmul-mt.bin diverge.bin
|
||||||
|
|
||||||
run: simple.out sieve.out 2thread.out simple.4b.out sieve.4b.out 2thread.4b.out bubble.out bubble.4b.out dotprod.out dotprod.4b.out matmul.out matmul.4b.out\
|
run: simple.out sieve.out 2thread.out simple.4b.out sieve.4b.out 2thread.4b.out bubble.out bubble.4b.out dotprod.out dotprod.4b.out matmul.out matmul.4b.out\
|
||||||
matmul-mt.out
|
matmul-mt.out diverge.out
|
||||||
|
|
||||||
disas: simple.d sieve.d 2thread.d simple.4b.d sieve.4b.d 2thread.4b.d bubble.d bubble.4b.d dotprod.d dotprod.4b.d matmul.d matmul.4b.d matmul-mt.d
|
disas: simple.d sieve.d 2thread.d simple.4b.d sieve.4b.d 2thread.4b.d bubble.d \
|
||||||
|
bubble.4b.d dotprod.d dotprod.4b.d matmul.d matmul.4b.d matmul-mt.d \
|
||||||
|
diverge.d diverge.4b.d
|
||||||
|
|
||||||
%.4b.out : %.4b.bin
|
%.4b.out : %.4b.bin
|
||||||
$(HARPEM) -a $(4BARCH) -c $< > $@
|
$(HARPEM) -a $(4BARCH) -c $< > $@
|
||||||
@@ -18,50 +20,11 @@ disas: simple.d sieve.d 2thread.d simple.4b.d sieve.4b.d 2thread.4b.d bubble.d b
|
|||||||
%.out : %.bin
|
%.out : %.bin
|
||||||
$(HARPEM) -c $< > $@
|
$(HARPEM) -c $< > $@
|
||||||
|
|
||||||
2thread.bin : boot.HOF lib.HOF 2thread.HOF
|
%.4b.bin : boot.4b.HOF lib.4b.HOF %.4b.HOF
|
||||||
$(HARPLD) -o 2thread.bin $^
|
|
||||||
|
|
||||||
2thread.4b.bin : boot.4b.HOF lib.4b.HOF 2thread.4b.HOF
|
|
||||||
$(HARPLD) --arch $(4BARCH) -o 2thread.4b.bin $^
|
|
||||||
|
|
||||||
bubble.bin : boot.HOF lib.HOF bubble.HOF
|
|
||||||
$(HARPLD) -o bubble.bin $^
|
|
||||||
|
|
||||||
bubble.4b.bin : boot.4b.HOF lib.4b.HOF bubble.4b.HOF
|
|
||||||
$(HARPLD) --arch $(4BARCH) -o bubble.4b.bin $^
|
|
||||||
|
|
||||||
simple.bin : boot.HOF lib.HOF simple.HOF
|
|
||||||
$(HARPLD) -o $@ $^
|
|
||||||
|
|
||||||
sieve.bin : boot.HOF lib.HOF sieve.HOF
|
|
||||||
$(HARPLD) -o $@ $^
|
|
||||||
|
|
||||||
dotprod.bin : boot.HOF lib.HOF dotprod.HOF
|
|
||||||
$(HARPLD) -o $@ $^
|
|
||||||
|
|
||||||
matmul.bin : boot.HOF lib.HOF matmul.HOF
|
|
||||||
$(HARPLD) -o $@ $^
|
|
||||||
|
|
||||||
matmul-mt.bin : boot.HOF lib.HOF matmul-mt.HOF
|
|
||||||
$(HARPLD) -o $@ $^
|
|
||||||
|
|
||||||
simple.4b.bin : boot.4b.HOF lib.4b.HOF simple.4b.HOF
|
|
||||||
$(HARPLD) --arch $(4BARCH) -o $@ $^
|
$(HARPLD) --arch $(4BARCH) -o $@ $^
|
||||||
|
|
||||||
sieve.4b.bin : boot.4b.HOF lib.4b.HOF sieve.4b.HOF
|
%.bin : boot.HOF lib.HOF %.HOF
|
||||||
$(HARPLD) --arch $(4BARCH) -o $@ $^
|
$(HARPLD) -o $@ $^
|
||||||
|
|
||||||
dotprod.4b.bin : boot.4b.HOF lib.4b.HOF dotprod.4b.HOF
|
|
||||||
$(HARPLD) --arch $(4BARCH) -o $@ $^
|
|
||||||
|
|
||||||
matmul.4b.bin : boot.4b.HOF lib.4b.HOF matmul.4b.HOF
|
|
||||||
$(HARPLD) --arch $(4BARCH) -o $@ $^
|
|
||||||
|
|
||||||
%.4b.bin : %.4b.HOF
|
|
||||||
$(HARPLD) --arch $(4BARCH) -o $@ $<
|
|
||||||
|
|
||||||
%.bin : %.HOF
|
|
||||||
$(HARPLD) -o $@ $<
|
|
||||||
|
|
||||||
%.4b.HOF : %.s
|
%.4b.HOF : %.s
|
||||||
$(HARPAS) --arch $(4BARCH) -o $@ $<
|
$(HARPAS) --arch $(4BARCH) -o $@ $<
|
||||||
|
|||||||
64
src/test/diverge.s
Normal file
64
src/test/diverge.s
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
/*******************************************************************************
|
||||||
|
Harptools by Chad D. Kersey, Summer 2011
|
||||||
|
********************************************************************************
|
||||||
|
|
||||||
|
Sample HARP assmebly program.
|
||||||
|
|
||||||
|
*******************************************************************************/
|
||||||
|
/* Divergent branch: test immediate postdominator branch divergence support. */
|
||||||
|
.def THREADS 8
|
||||||
|
|
||||||
|
.align 4096
|
||||||
|
.perm x
|
||||||
|
.entry
|
||||||
|
.global
|
||||||
|
entry:
|
||||||
|
ldi %r0, #1
|
||||||
|
ldi %r1, THREADS
|
||||||
|
sloop: clone %r0
|
||||||
|
|
||||||
|
addi %r0, %r0, #1
|
||||||
|
sub %r2, %r1, %r0
|
||||||
|
rtop @p0, %r2
|
||||||
|
@p0 ? jmpi sloop
|
||||||
|
|
||||||
|
ldi %r0, #0
|
||||||
|
jalis %r5, %r1, dthread;
|
||||||
|
|
||||||
|
ldi %r0, #0
|
||||||
|
ldi %r1, (__WORD * THREADS)
|
||||||
|
|
||||||
|
ploop: ld %r7, %r0, array
|
||||||
|
jali %r5, printdec
|
||||||
|
|
||||||
|
addi %r0, %r0, __WORD
|
||||||
|
sub %r7, %r1, %r0
|
||||||
|
rtop @p0, %r7
|
||||||
|
@p0 ? jmpi ploop
|
||||||
|
|
||||||
|
trap;
|
||||||
|
|
||||||
|
|
||||||
|
dthread: ldi %r1, #10
|
||||||
|
ldi %r2, #0
|
||||||
|
|
||||||
|
loop: andi %r3, %r0, #1
|
||||||
|
rtop @p1, %r3
|
||||||
|
@p1 ? split
|
||||||
|
@p1 ? jmpi else
|
||||||
|
add %r2, %r2, %r0
|
||||||
|
jmpi after
|
||||||
|
else: sub %r2, %r2, %r0
|
||||||
|
after: join
|
||||||
|
|
||||||
|
subi %r1, %r1, #1
|
||||||
|
rtop @p0, %r1
|
||||||
|
@p0 ? jmpi loop
|
||||||
|
|
||||||
|
shli %r4, %r0, (`__WORD)
|
||||||
|
st %r2, %r4, array
|
||||||
|
|
||||||
|
jmprt %r5;
|
||||||
|
|
||||||
|
.align 4096
|
||||||
|
array: .space 4096
|
||||||
Reference in New Issue
Block a user