Added warp independance
This commit is contained in:
@@ -5,7 +5,7 @@ CXXFLAGS ?= -std=c++11 -fPIC -O3 # -g -DUSE_DEBUG=3 -DPRINT_ACTIVE_THREADS
|
|||||||
LDLIBS ?= -pthread
|
LDLIBS ?= -pthread
|
||||||
PREFIX ?= /usr/local
|
PREFIX ?= /usr/local
|
||||||
|
|
||||||
LIB_OBJS=args.o obj.o mem.o core.o instruction.o enc.o util.o lex.yy.o
|
LIB_OBJS=args.o obj.o mem.o core.o instruction.o enc.o util.o
|
||||||
|
|
||||||
all: harptool libharplib.so libharplib.a #libqsim-harp.so
|
all: harptool libharplib.so libharplib.a #libqsim-harp.so
|
||||||
|
|
||||||
@@ -45,8 +45,6 @@ core.o : core.cpp include/types.h include/util.h include/mem.h \
|
|||||||
# include/mem.h
|
# include/mem.h
|
||||||
# $(CXX) $(CXXFLAGS) $(QSIM_CXXFLAGS) -shared -o $@ $^
|
# $(CXX) $(CXXFLAGS) $(QSIM_CXXFLAGS) -shared -o $@ $^
|
||||||
|
|
||||||
lex.yy.cc: scanner.lex
|
|
||||||
flex scanner.lex
|
|
||||||
|
|
||||||
install:
|
install:
|
||||||
cp libharplib.so $(PREFIX)/lib
|
cp libharplib.so $(PREFIX)/lib
|
||||||
|
|||||||
117
src/enc.cpp
117
src/enc.cpp
@@ -403,120 +403,3 @@ Instruction *WordDecoder::decode(const std::vector<Byte> &v, Size &idx) {
|
|||||||
return &inst;
|
return &inst;
|
||||||
}
|
}
|
||||||
|
|
||||||
// WordEncoder::WordEncoder(const ArchDef &arch) {
|
|
||||||
// getSizes(arch, n, o, r, p, i1, i2, i3);
|
|
||||||
// if (p > r) r = p;
|
|
||||||
// oMask = mask(o); rMask = mask(r); pMask = mask(p);
|
|
||||||
// i1Mask = mask(i1); i2Mask = mask(i2); i3Mask = mask(i3);
|
|
||||||
// }
|
|
||||||
|
|
||||||
// Size WordEncoder::encode(Ref *&ref, std::vector<Byte> &v,
|
|
||||||
// Size idx, Instruction &i)
|
|
||||||
// {
|
|
||||||
// Word code = 0;
|
|
||||||
// Size bitsWritten = 0;
|
|
||||||
|
|
||||||
// /* Predicate/predicated bit */
|
|
||||||
// if (i.hasPred()) {
|
|
||||||
// code = 1 << p;
|
|
||||||
// code |= (i.getPred()&pMask);
|
|
||||||
// if (i.getPred() > pMask) {
|
|
||||||
// cout << "Predicate in " << i << " does not fit in encoding.\n";
|
|
||||||
// exit(1);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// bitsWritten += (1 + p);
|
|
||||||
|
|
||||||
// /* Opcode */
|
|
||||||
// code <<= o;
|
|
||||||
// code |= (i.getOpcode()&oMask);
|
|
||||||
// if (i.getOpcode() > oMask) {
|
|
||||||
// cout << "Opcode in " << i << " does not fit in encoding.\n";
|
|
||||||
// exit(1);
|
|
||||||
// }
|
|
||||||
// bitsWritten += o;
|
|
||||||
|
|
||||||
// if (i.hasRDest()) {
|
|
||||||
// code <<= r;
|
|
||||||
// code |= i.getRDest();
|
|
||||||
// bitsWritten += r;
|
|
||||||
// if (i.getRDest() > rMask) {
|
|
||||||
// cout << "Destination register in " << i << " does not fit in encoding.\n";
|
|
||||||
// exit(1);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// if (i.hasPDest()) {
|
|
||||||
// code <<= r;
|
|
||||||
// code |= i.getPDest();
|
|
||||||
// bitsWritten += r;
|
|
||||||
// if (i.getPDest() > rMask) {
|
|
||||||
// cout << "Destination predicate in " <<i<< " does not fit in encoding.\n";
|
|
||||||
// exit(1);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// for (Size j = 0; j < i.getNRSrc(); j++) {
|
|
||||||
// code <<= r;
|
|
||||||
// code |= i.getRSrc(j);
|
|
||||||
// bitsWritten += r;
|
|
||||||
// if (i.getRSrc(j) > rMask) {
|
|
||||||
// cout << "Source register " << j << " in " << i
|
|
||||||
// << " does not fit in encoding.\n";
|
|
||||||
// exit(1);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// for (Size j = 0; j < i.getNPSrc(); j++) {
|
|
||||||
// code <<= r;
|
|
||||||
// code |= i.getPSrc(j);
|
|
||||||
// bitsWritten += r;
|
|
||||||
// if (i.getPSrc(j) > rMask) {
|
|
||||||
// cout << "Source predicate " << j << " in " << i
|
|
||||||
// << " does not fit in encoding.\n";
|
|
||||||
// exit(1);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// if (i.hasRefLiteral()) {
|
|
||||||
// Ref *r = i.getRefLiteral();
|
|
||||||
// ref = new OffsetRef(r->name, v, idx, n - bitsWritten, n, i.hasRelImm());
|
|
||||||
// } else {
|
|
||||||
// ref = NULL;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// if (i.hasImm()) {
|
|
||||||
// if (bitsWritten == n - i1) {
|
|
||||||
// code <<= i1;
|
|
||||||
// code |= (i.getImm()&i1Mask);
|
|
||||||
// bitsWritten += i1;
|
|
||||||
// Word_s ws(i.getImm());
|
|
||||||
// if ((ws >> i1) != 0 && (ws >> i1) != -1) goto tooBigImm;
|
|
||||||
// } else if (bitsWritten == n - i2) {
|
|
||||||
// code <<= i2;
|
|
||||||
// code |= (i.getImm()&i2Mask);
|
|
||||||
// bitsWritten += i2;
|
|
||||||
// Word_s ws(i.getImm());
|
|
||||||
// if ((ws >> i2) != 0 && (ws >> i2) != -1) goto tooBigImm;
|
|
||||||
// } else if (bitsWritten == n - i3) {
|
|
||||||
// code <<= i3;
|
|
||||||
// code |= (i.getImm()&i3Mask);
|
|
||||||
// bitsWritten += i3;
|
|
||||||
// Word_s ws(i.getImm());
|
|
||||||
// if ((ws >> i3) != 0 && (ws >> i3) != -1) goto tooBigImm;
|
|
||||||
// } else {
|
|
||||||
// cout << "WordEncoder::encode() could not encode: " << i << '\n';
|
|
||||||
// exit(1);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// if (bitsWritten < n) code <<= (n - bitsWritten);
|
|
||||||
|
|
||||||
// writeWord(v, idx, n/8, code);
|
|
||||||
|
|
||||||
// return n/8;
|
|
||||||
|
|
||||||
// tooBigImm:
|
|
||||||
// cout << "Immediate in " << i << " too large to encode.\n";
|
|
||||||
// exit(1);
|
|
||||||
// }
|
|
||||||
|
|||||||
@@ -262,12 +262,6 @@ int emu_main(int argc, char **argv) {
|
|||||||
|
|
||||||
if (showStats) core.printStats();
|
if (showStats) core.printStats();
|
||||||
|
|
||||||
Addr base_addr = 0x81000000;
|
|
||||||
for (Addr i = 0; i < 16; i++)
|
|
||||||
{
|
|
||||||
Addr new_addr = base_addr + (i *4);
|
|
||||||
std::cout << std::hex << new_addr << " = " << std::hex << old_ram.read(new_addr) << "\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << "\n";
|
std::cout << "\n";
|
||||||
return 0;
|
return 0;
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
#ifndef __DEBUG_H
|
#ifndef __DEBUG_H
|
||||||
#define __DEBUG_H
|
#define __DEBUG_H
|
||||||
|
|
||||||
#define USE_DEBUG 9
|
//#define USE_DEBUG 9
|
||||||
|
|
||||||
#ifdef USE_DEBUG
|
#ifdef USE_DEBUG
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|||||||
@@ -59,17 +59,17 @@ bool checkUnanimous(unsigned p, const std::vector<std::vector<Reg<Word> > >& m,
|
|||||||
}
|
}
|
||||||
if (i == m.size()) throw DivergentBranchException();
|
if (i == m.size()) throw DivergentBranchException();
|
||||||
|
|
||||||
std::cout << "same: " << same << " with -> ";
|
//std::cout << "same: " << same << " with -> ";
|
||||||
for (; i < m.size(); ++i) {
|
for (; i < m.size(); ++i) {
|
||||||
if (tm[i]) {
|
if (tm[i]) {
|
||||||
std::cout << " " << (bool(m[i][p]));
|
//std::cout << " " << (bool(m[i][p]));
|
||||||
if (same != (bool(m[i][p]))) {
|
if (same != (bool(m[i][p]))) {
|
||||||
std::cout << " FALSE\n";
|
//std::cout << " FALSE\n";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << " TRUE\n";
|
//std::cout << " TRUE\n";
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -84,7 +84,7 @@ void Instruction::executeOn(Warp &c) {
|
|||||||
/* If I try to execute a privileged instruction in user mode, throw an
|
/* If I try to execute a privileged instruction in user mode, throw an
|
||||||
exception 3. */
|
exception 3. */
|
||||||
if (instTable[op].privileged && !c.supervisorMode) {
|
if (instTable[op].privileged && !c.supervisorMode) {
|
||||||
std::cout << "INTERRUPT SUPERVISOR\n";
|
//std::cout << "INTERRUPT SUPERVISOR\n";
|
||||||
c.interrupt(3);
|
c.interrupt(3);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -126,8 +126,8 @@ void Instruction::executeOn(Warp &c) {
|
|||||||
vector<Reg<bool> > &pReg(c.pred[t]);
|
vector<Reg<bool> > &pReg(c.pred[t]);
|
||||||
stack<DomStackEntry> &domStack(c.domStack);
|
stack<DomStackEntry> &domStack(c.domStack);
|
||||||
|
|
||||||
std::cout << std::hex << "opcode: " << op << " func3: " << func3 << "\n";
|
//std::cout << std::hex << "opcode: " << op << " func3: " << func3 << "\n";
|
||||||
if (op == GPGPU) std::cout << "OPCODE MATCHED GPGPU\n";
|
if (op == GPGPU) //std::cout << "OPCODE MATCHED GPGPU\n";
|
||||||
|
|
||||||
// If this thread is masked out, don't execute the instruction, unless it's
|
// If this thread is masked out, don't execute the instruction, unless it's
|
||||||
// a split or join.
|
// a split or join.
|
||||||
@@ -151,10 +151,10 @@ void Instruction::executeOn(Warp &c) {
|
|||||||
switch (op) {
|
switch (op) {
|
||||||
|
|
||||||
case NOP:
|
case NOP:
|
||||||
std::cout << "NOP_INST\n";
|
//std::cout << "NOP_INST\n";
|
||||||
break;
|
break;
|
||||||
case R_INST:
|
case R_INST:
|
||||||
std::cout << "R_INST\n";
|
//std::cout << "R_INST\n";
|
||||||
switch (func3)
|
switch (func3)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
@@ -221,11 +221,11 @@ void Instruction::executeOn(Warp &c) {
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case L_INST:
|
case L_INST:
|
||||||
std::cout << "L_INST\n";
|
//std::cout << "L_INST\n";
|
||||||
memAddr = ((reg[rsrc[0]] + immsrc) & 0xFFFFFFFC);
|
memAddr = ((reg[rsrc[0]] + immsrc) & 0xFFFFFFFC);
|
||||||
shift_by = ((reg[rsrc[0]] + immsrc) & 0x00000003) * 8;
|
shift_by = ((reg[rsrc[0]] + immsrc) & 0x00000003) * 8;
|
||||||
data_read = c.core->mem.read(memAddr, c.supervisorMode);
|
data_read = c.core->mem.read(memAddr, c.supervisorMode);
|
||||||
// std::cout <<std::hex<< "EXECUTE: " << reg[rsrc[0]] << " + " << immsrc << " = " << memAddr << " -> data_read: " << data_read << "\n";
|
// //std::cout <<std::hex<< "EXECUTE: " << reg[rsrc[0]] << " + " << immsrc << " = " << memAddr << " -> data_read: " << data_read << "\n";
|
||||||
#ifdef EMU_INSTRUMENTATION
|
#ifdef EMU_INSTRUMENTATION
|
||||||
Harp::OSDomain::osDomain->
|
Harp::OSDomain::osDomain->
|
||||||
do_mem(0, memAddr, c.core->mem.virtToPhys(memAddr), 8, true);
|
do_mem(0, memAddr, c.core->mem.virtToPhys(memAddr), 8, true);
|
||||||
@@ -239,7 +239,7 @@ void Instruction::executeOn(Warp &c) {
|
|||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
// LH
|
// LH
|
||||||
// std::cout << "shifting by: " << shift_by << " final data: " << ((data_read >> shift_by) & 0xFFFF, 16, 0xFFFF) << "\n";
|
// //std::cout << "shifting by: " << shift_by << " final data: " << ((data_read >> shift_by) & 0xFFFF, 16, 0xFFFF) << "\n";
|
||||||
reg[rdest] = signExt((data_read >> shift_by) & 0xFFFF, 16, 0xFFFF);
|
reg[rdest] = signExt((data_read >> shift_by) & 0xFFFF, 16, 0xFFFF);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
@@ -259,7 +259,7 @@ void Instruction::executeOn(Warp &c) {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case I_INST:
|
case I_INST:
|
||||||
std::cout << "I_INST\n";
|
//std::cout << "I_INST\n";
|
||||||
switch (func3)
|
switch (func3)
|
||||||
{
|
{
|
||||||
|
|
||||||
@@ -312,7 +312,7 @@ void Instruction::executeOn(Warp &c) {
|
|||||||
if ((func7 == 0))
|
if ((func7 == 0))
|
||||||
{
|
{
|
||||||
// SRLI
|
// SRLI
|
||||||
// std::cout << "WTF\n";
|
// //std::cout << "WTF\n";
|
||||||
bool isNeg = ((0x80000000 & reg[rsrc[0]])) > 0;
|
bool isNeg = ((0x80000000 & reg[rsrc[0]])) > 0;
|
||||||
Word result = Word_u(reg[rsrc[0]]) >> Word_u(immsrc);
|
Word result = Word_u(reg[rsrc[0]]) >> Word_u(immsrc);
|
||||||
// if (isNeg)
|
// if (isNeg)
|
||||||
@@ -332,7 +332,7 @@ void Instruction::executeOn(Warp &c) {
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
// SRAI
|
// SRAI
|
||||||
// std::cout << "WOHOOOOO\n";
|
// //std::cout << "WOHOOOOO\n";
|
||||||
op1 = reg[rsrc[0]];
|
op1 = reg[rsrc[0]];
|
||||||
op2 = immsrc;
|
op2 = immsrc;
|
||||||
reg[rdest] = op1 >> op2;
|
reg[rdest] = op1 >> op2;
|
||||||
@@ -345,23 +345,28 @@ void Instruction::executeOn(Warp &c) {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case S_INST:
|
case S_INST:
|
||||||
std::cout << "S_INST\n";
|
//std::cout << "S_INST\n";
|
||||||
++c.stores;
|
++c.stores;
|
||||||
memAddr = reg[rsrc[0]] + immsrc;
|
memAddr = reg[rsrc[0]] + immsrc;
|
||||||
// std::cout << "STORE MEM ADDRESS: " << std::hex << reg[rsrc[0]] << " + " << immsrc << "\n";
|
// //std::cout << "STORE MEM ADDRESS: " << std::hex << reg[rsrc[0]] << " + " << immsrc << "\n";
|
||||||
// std::cout << "FUNC3: " << func3 << "\n";
|
// //std::cout << "FUNC3: " << func3 << "\n";
|
||||||
|
if (memAddr == 0x00010000)
|
||||||
|
{
|
||||||
|
std::cout << (char) reg[rsrc[1]];
|
||||||
|
break;
|
||||||
|
}
|
||||||
switch (func3)
|
switch (func3)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
// std::cout << "SB\n";
|
// //std::cout << "SB\n";
|
||||||
c.core->mem.write(memAddr, reg[rsrc[1]] & 0x000000FF, c.supervisorMode, 1);
|
c.core->mem.write(memAddr, reg[rsrc[1]] & 0x000000FF, c.supervisorMode, 1);
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
// std::cout << "SH\n";
|
// //std::cout << "SH\n";
|
||||||
c.core->mem.write(memAddr, reg[rsrc[1]], c.supervisorMode, 2);
|
c.core->mem.write(memAddr, reg[rsrc[1]], c.supervisorMode, 2);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
// std::cout << std::hex << "SW: about to write: " << reg[rsrc[1]] << " to " << memAddr << "\n";
|
// //std::cout << std::hex << "SW: about to write: " << reg[rsrc[1]] << " to " << memAddr << "\n";
|
||||||
c.core->mem.write(memAddr, reg[rsrc[1]], c.supervisorMode, 4);
|
c.core->mem.write(memAddr, reg[rsrc[1]], c.supervisorMode, 4);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@@ -375,7 +380,7 @@ void Instruction::executeOn(Warp &c) {
|
|||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
case B_INST:
|
case B_INST:
|
||||||
std::cout << "B_INST\n";
|
//std::cout << "B_INST\n";
|
||||||
switch (func3)
|
switch (func3)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
@@ -429,17 +434,17 @@ void Instruction::executeOn(Warp &c) {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case LUI_INST:
|
case LUI_INST:
|
||||||
std::cout << "LUI_INST\n";
|
//std::cout << "LUI_INST\n";
|
||||||
reg[rdest] = (immsrc << 12) & 0xfffff000;
|
reg[rdest] = (immsrc << 12) & 0xfffff000;
|
||||||
break;
|
break;
|
||||||
case AUIPC_INST:
|
case AUIPC_INST:
|
||||||
std::cout << "AUIPC_INST\n";
|
//std::cout << "AUIPC_INST\n";
|
||||||
reg[rdest] = ((immsrc << 12) & 0xfffff000) + (c.pc - 4);
|
reg[rdest] = ((immsrc << 12) & 0xfffff000) + (c.pc - 4);
|
||||||
break;
|
break;
|
||||||
case JAL_INST:
|
case JAL_INST:
|
||||||
std::cout << "JAL_INST\n";
|
//std::cout << "JAL_INST\n";
|
||||||
if (!pcSet) nextPc = (c.pc - 4) + immsrc;
|
if (!pcSet) nextPc = (c.pc - 4) + immsrc;
|
||||||
if (!pcSet) std::cout << "JAL... SETTING PC: " << nextPc << "\n";
|
if (!pcSet) //std::cout << "JAL... SETTING PC: " << nextPc << "\n";
|
||||||
if (rdest != 0)
|
if (rdest != 0)
|
||||||
{
|
{
|
||||||
reg[rdest] = c.pc;
|
reg[rdest] = c.pc;
|
||||||
@@ -447,9 +452,9 @@ void Instruction::executeOn(Warp &c) {
|
|||||||
pcSet = true;
|
pcSet = true;
|
||||||
break;
|
break;
|
||||||
case JALR_INST:
|
case JALR_INST:
|
||||||
std::cout << "JALR_INST\n";
|
//std::cout << "JALR_INST\n";
|
||||||
if (!pcSet) nextPc = reg[rsrc[0]] + immsrc;
|
if (!pcSet) nextPc = reg[rsrc[0]] + immsrc;
|
||||||
if (!pcSet) std::cout << "JALR... SETTING PC: " << nextPc << "\n";
|
if (!pcSet) //std::cout << "JALR... SETTING PC: " << nextPc << "\n";
|
||||||
if (rdest != 0)
|
if (rdest != 0)
|
||||||
{
|
{
|
||||||
reg[rdest] = c.pc;
|
reg[rdest] = c.pc;
|
||||||
@@ -457,7 +462,7 @@ void Instruction::executeOn(Warp &c) {
|
|||||||
pcSet = true;
|
pcSet = true;
|
||||||
break;
|
break;
|
||||||
case SYS_INST:
|
case SYS_INST:
|
||||||
std::cout << "SYS_INST\n";
|
//std::cout << "SYS_INST\n";
|
||||||
temp = reg[rsrc[0]];
|
temp = reg[rsrc[0]];
|
||||||
switch (func3)
|
switch (func3)
|
||||||
{
|
{
|
||||||
@@ -513,7 +518,7 @@ void Instruction::executeOn(Warp &c) {
|
|||||||
case 0:
|
case 0:
|
||||||
if (immsrc < 2)
|
if (immsrc < 2)
|
||||||
{
|
{
|
||||||
std::cout << "INTERRUPT ECALL/EBREAK\n";
|
//std::cout << "INTERRUPT ECALL/EBREAK\n";
|
||||||
nextActiveThreads = 0;
|
nextActiveThreads = 0;
|
||||||
c.interrupt(0);
|
c.interrupt(0);
|
||||||
}
|
}
|
||||||
@@ -523,16 +528,16 @@ void Instruction::executeOn(Warp &c) {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case TRAP:
|
case TRAP:
|
||||||
std::cout << "INTERRUPT TRAP\n";
|
//std::cout << "INTERRUPT TRAP\n";
|
||||||
nextActiveThreads = 0;
|
nextActiveThreads = 0;
|
||||||
c.interrupt(0);
|
c.interrupt(0);
|
||||||
break;
|
break;
|
||||||
case FENCE:
|
case FENCE:
|
||||||
std::cout << "FENCE_INST\n";
|
//std::cout << "FENCE_INST\n";
|
||||||
break;
|
break;
|
||||||
case PJ_INST:
|
case PJ_INST:
|
||||||
// pred jump reg
|
// pred jump reg
|
||||||
std::cout << "pred jump... src: " << rsrc[0] << std::hex << " val: " << reg[rsrc[0]] << " dest: " << reg[rsrc[1]] << "\n";
|
//std::cout << "pred jump... src: " << rsrc[0] << std::hex << " val: " << reg[rsrc[0]] << " dest: " << reg[rsrc[1]] << "\n";
|
||||||
if (reg[rsrc[0]])
|
if (reg[rsrc[0]])
|
||||||
{
|
{
|
||||||
if (!pcSet) nextPc = reg[rsrc[1]];
|
if (!pcSet) nextPc = reg[rsrc[1]];
|
||||||
@@ -540,24 +545,24 @@ void Instruction::executeOn(Warp &c) {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case GPGPU:
|
case GPGPU:
|
||||||
std::cout << "GPGPU\n";
|
//std::cout << "GPGPU\n";
|
||||||
switch(func3)
|
switch(func3)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
// WSPAWN
|
// WSPAWN
|
||||||
std::cout << "WSPAWN\n";
|
//std::cout << "WSPAWN\n";
|
||||||
if (sjOnce)
|
if (sjOnce)
|
||||||
{
|
{
|
||||||
sjOnce = false;
|
sjOnce = false;
|
||||||
D(0, "Spawning a new warp.");
|
D(0, "Spawning a new warp.");
|
||||||
// std::cout << "SIZE: " << c.core->w.size() << "\n";
|
// //std::cout << "SIZE: " << c.core->w.size() << "\n";
|
||||||
for (unsigned i = 0; i < c.core->w.size(); ++i)
|
for (unsigned i = 0; i < c.core->w.size(); ++i)
|
||||||
{
|
{
|
||||||
// std::cout << "WHATTT\n";
|
// //std::cout << "WHATTT\n";
|
||||||
Warp &newWarp(c.core->w[i]);
|
Warp &newWarp(c.core->w[i]);
|
||||||
// std::cout << "STARTING\n";
|
// //std::cout << "STARTING\n";
|
||||||
if (newWarp.spawned == false) {
|
if (newWarp.spawned == false) {
|
||||||
// std::cout << "ABOUT TO START\n";
|
// //std::cout << "ABOUT TO START\n";
|
||||||
newWarp.pc = reg[rsrc[0]];
|
newWarp.pc = reg[rsrc[0]];
|
||||||
newWarp.reg[0] = reg;
|
newWarp.reg[0] = reg;
|
||||||
newWarp.csr = c.csr;
|
newWarp.csr = c.csr;
|
||||||
@@ -572,12 +577,12 @@ void Instruction::executeOn(Warp &c) {
|
|||||||
case 2:
|
case 2:
|
||||||
{
|
{
|
||||||
// SPLIT
|
// SPLIT
|
||||||
std::cout << "SPLIT\n";
|
//std::cout << "SPLIT\n";
|
||||||
if (sjOnce)
|
if (sjOnce)
|
||||||
{
|
{
|
||||||
sjOnce = false;
|
sjOnce = false;
|
||||||
if (checkUnanimous(pred, c.reg, c.tmask)) {
|
if (checkUnanimous(pred, c.reg, c.tmask)) {
|
||||||
std::cout << "Unanimous pred: " << pred << " val: " << reg[pred] << "\n";
|
//std::cout << "Unanimous pred: " << pred << " val: " << reg[pred] << "\n";
|
||||||
DomStackEntry e(c.tmask);
|
DomStackEntry e(c.tmask);
|
||||||
e.uni = true;
|
e.uni = true;
|
||||||
c.domStack.push(e);
|
c.domStack.push(e);
|
||||||
@@ -595,7 +600,7 @@ void Instruction::executeOn(Warp &c) {
|
|||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
// JOIN
|
// JOIN
|
||||||
std::cout << "JOIN\n";
|
//std::cout << "JOIN\n";
|
||||||
if (sjOnce)
|
if (sjOnce)
|
||||||
{
|
{
|
||||||
sjOnce = false;
|
sjOnce = false;
|
||||||
@@ -615,33 +620,34 @@ void Instruction::executeOn(Warp &c) {
|
|||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
// JMPRT
|
// JMPRT
|
||||||
std::cout << "JMPRT\n";
|
//std::cout << "JMPRT\n";
|
||||||
nextActiveThreads = 1;
|
nextActiveThreads = 1;
|
||||||
if (!pcSet) nextPc = reg[rsrc[0]];
|
if (!pcSet) nextPc = reg[rsrc[0]];
|
||||||
pcSet = true;
|
pcSet = true;
|
||||||
break;
|
break;
|
||||||
case 5:
|
case 5:
|
||||||
// CLONE
|
// CLONE
|
||||||
std::cout << "CLONE\n";
|
//std::cout << "CLONE\n";
|
||||||
// std::cout << "CLONING REG: " << rsrc[0] << " lane: " << reg[rsrc[0]] << "\n";
|
// //std::cout << "CLONING REG: " << rsrc[0] << " lane: " << reg[rsrc[0]] << "\n";
|
||||||
c.reg[reg[rsrc[0]]] = reg;
|
c.reg[reg[rsrc[0]]] = reg;
|
||||||
break;
|
break;
|
||||||
case 6:
|
case 6:
|
||||||
// JALRS
|
// JALRS
|
||||||
std::cout << "JALRS\n";
|
//std::cout << "JALRS\n";
|
||||||
nextActiveThreads = reg[rsrc[1]];
|
nextActiveThreads = reg[rsrc[1]];
|
||||||
reg[rdest] = c.pc;
|
reg[rdest] = c.pc;
|
||||||
if (!pcSet) nextPc = reg[rsrc[0]];
|
if (!pcSet) nextPc = reg[rsrc[0]];
|
||||||
pcSet = true;
|
pcSet = true;
|
||||||
// std::cout << "ACTIVE_THREDS: " << rsrc[1] << " val: " << reg[rsrc[1]] << "\n";
|
// //std::cout << "ACTIVE_THREDS: " << rsrc[1] << " val: " << reg[rsrc[1]] << "\n";
|
||||||
// std::cout << "nextPC: " << rsrc[0] << " val: " << std::hex << reg[rsrc[0]] << "\n";
|
// //std::cout << "nextPC: " << rsrc[0] << " val: " << std::hex << reg[rsrc[0]] << "\n";
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
cout << "ERROR: UNSUPPORTED GPGPU INSTRUCTION " << *this << "\n";
|
cout << "ERROR: UNSUPPORTED GPGPU INSTRUCTION " << *this << "\n";
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
cout << "aERROR: Unsupported instruction: " << *this << "\n";
|
cout << "pc: " << hex << (c.pc) << "\n";
|
||||||
|
cout << "aERROR: Unsupported instruction: " << *this << "\n" << flush;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -660,9 +666,9 @@ void Instruction::executeOn(Warp &c) {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
// std::cout << "new thread mask: ";
|
// //std::cout << "new thread mask: ";
|
||||||
// for (int i = 0; i < c.tmask.size(); ++i) std::cout << " " << c.tmask[i];
|
// for (int i = 0; i < c.tmask.size(); ++i) //std::cout << " " << c.tmask[i];
|
||||||
// std::cout << "\n";
|
// //std::cout << "\n";
|
||||||
|
|
||||||
// This way, if pc was set by a side effect (such as interrupt), it will
|
// This way, if pc was set by a side effect (such as interrupt), it will
|
||||||
// retain its new value.
|
// retain its new value.
|
||||||
|
|||||||
24
src/obj.cpp
24
src/obj.cpp
@@ -1,21 +1,21 @@
|
|||||||
/*******************************************************************************
|
/*******************************************************************************
|
||||||
HARPtools by Chad D. Kersey, Summer 2011
|
HARPtools by Chad D. Kersey, Summer 2011
|
||||||
*******************************************************************************/
|
*******************************************************************************/
|
||||||
#include "include/types.h"
|
// #include "include/types.h"
|
||||||
#include "include/obj.h"
|
// #include "include/obj.h"
|
||||||
#include "include/util.h"
|
// #include "include/util.h"
|
||||||
#include "include/asm-tokens.h"
|
// #include "include/asm-tokens.h"
|
||||||
#include "include/debug.h"
|
// #include "include/debug.h"
|
||||||
|
|
||||||
#include <iostream>
|
// #include <iostream>
|
||||||
#include <stdlib.h>
|
// #include <stdlib.h>
|
||||||
#include <FlexLexer.h>
|
// #include <FlexLexer.h>
|
||||||
|
|
||||||
#include <cctype>
|
// #include <cctype>
|
||||||
#include <cstdio>
|
// #include <cstdio>
|
||||||
#include <cmath>
|
// #include <cmath>
|
||||||
|
|
||||||
#include <map>
|
// #include <map>
|
||||||
|
|
||||||
// using namespace std;
|
// using namespace std;
|
||||||
// using namespace Harp;
|
// using namespace Harp;
|
||||||
|
|||||||
297672
src/results.txt
297672
src/results.txt
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,7 @@
|
|||||||
// #include <stdint.h>
|
// #include <stdint.h>
|
||||||
#include <stdbool.h>
|
|
||||||
// #include <cstdint>
|
// #include <cstdint>
|
||||||
|
extern void print_consol(char *);
|
||||||
|
extern void printc(char);
|
||||||
|
|
||||||
|
|
||||||
int main(void);
|
int main(void);
|
||||||
@@ -13,24 +14,39 @@ void matAddition (unsigned, unsigned);
|
|||||||
// unsigned y[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
// unsigned y[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||||
// unsigned z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
// unsigned z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||||
|
|
||||||
unsigned x[] = {1, 1, 6, 0, 3, 1, 1, 2, 0, 3, 6, 7, 5, 7, 7, 9};
|
unsigned x[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 , 1 , 1 , 1 , 1 , 1 };
|
||||||
unsigned y[] = {0, 2, 2, 0, 5, 0, 1, 1, 4, 2, 0, 0, 3, 2, 3, 2};
|
unsigned y[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||||
unsigned z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
unsigned z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||||
|
|
||||||
// unsigned x[] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2};
|
// unsigned x[] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2};
|
||||||
// unsigned y[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
// unsigned y[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||||
// unsigned z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
// unsigned z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||||
|
|
||||||
#define NUM_WARPS 3
|
#define NUM_WARPS 2
|
||||||
#define NUM_THREADS 7
|
#define NUM_THREADS 8
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
|
|
||||||
|
for (int i = 0; i < 8; i++)
|
||||||
|
{
|
||||||
|
queue_initialize(q + i);
|
||||||
|
}
|
||||||
|
|
||||||
createWarps(NUM_WARPS, NUM_THREADS, matAddition, x, y, z);
|
createWarps(NUM_WARPS, NUM_THREADS, matAddition, x, y, z);
|
||||||
|
|
||||||
while(!queue_isEmpty()) {}
|
wait_for_done(NUM_WARPS);
|
||||||
|
|
||||||
|
print_consol("-------------------------\n");
|
||||||
|
print_consol("FINAL Z\n");
|
||||||
|
for (int i = 0; i < 16; i++)
|
||||||
|
{
|
||||||
|
int_print(i);
|
||||||
|
print_consol(": ");
|
||||||
|
int_print(z[i]);
|
||||||
|
print_consol("\n");
|
||||||
|
}
|
||||||
|
print_consol("-------------------------------\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -44,14 +60,11 @@ void matAddition(unsigned tid, unsigned wid)
|
|||||||
|
|
||||||
unsigned i = (wid * NUM_THREADS) + tid;
|
unsigned i = (wid * NUM_THREADS) + tid;
|
||||||
|
|
||||||
__if((i < 10))
|
__if((i < 11))
|
||||||
z_ptr[i] = x_ptr[i] + y_ptr[i];
|
z_ptr[i] = x_ptr[i] + y_ptr[i];
|
||||||
__else
|
__else
|
||||||
__end_if
|
__end_if
|
||||||
|
|
||||||
|
|
||||||
sleep((50 * (wid + wid))+100);
|
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
}
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@@ -1,104 +1,189 @@
|
|||||||
:0200000480007A
|
:0200000480007A
|
||||||
:1000000037F1FF7FEF00401DEF0080067300000016
|
:1000000037F1FF7FEF00C00C73000000938B0600F8
|
||||||
:10001000938B0600130C0700938C0700130F01004D
|
:10001000130C0700938C0700130D0800130F010049
|
||||||
:100020009303050013051000635C75001301018044
|
:100020009303050013051000635C75001301018044
|
||||||
:10003000130305006B500300130515006FF0DFFE7E
|
:10003000130305006B500300130515006FF0DFFE7E
|
||||||
:1000400013010F0013050000930F0600938D0300AA
|
:1000400013010F0013050000930F0600938D0300AA
|
||||||
:10005000EBE0BF01170500001305052F6B400500FD
|
:10005000EBE0BF01170500001305854B6B40050061
|
||||||
:1000600017030000130303FB6B000300678000000D
|
:10006000B708010023A0B800678000001703000054
|
||||||
:10007000130101FF23261100232481001304010131
|
:10007000130303FA6B00030067800000130141FFC4
|
||||||
:10008000B707008193870700370700811307C76010
|
:10008000232011002322B100834505006388050069
|
||||||
:10009000B70600819386C65C370600801306460DBE
|
:10009000EFF01FFD130515006FF01FFF8320010017
|
||||||
:1000A0009305700013053000EF00803D1300000041
|
:1000A000832541001301C10067800000130141FF57
|
||||||
:1000B000EF00C02593070500E38C07FE93070000BF
|
:1000B000232011002322B10093050503EFF05FFA1E
|
||||||
:1000C000138507008320C10003248100130101016F
|
:1000C00083200100832541001301C10067800000E7
|
||||||
:1000D00067800000130101FC232E1102232C8102F2
|
:1000D000130101FE232E1100232C810013040102C1
|
||||||
:1000E000130401042326A4FC2324B4FCEF000045E0
|
:1000E000232604FE6F00C0040327C4FE9307070005
|
||||||
:1000F0002326A4FEEF0000472324A4FEEF000049BE
|
:1000F00093973700B387E74093972700B387E7408C
|
||||||
:100100002322A4FE032784FC93070700939737005C
|
:10010000139747003307F74093172700138707001B
|
||||||
:10011000B387E7400327C4FCB307F7002320F4FEAE
|
:10011000B707008193874724B307F70013850700CB
|
||||||
:10012000832704FE93B7A70093C71700A30FF4FC1F
|
:10012000EF00401C8327C4FE938717002326F4FEAC
|
||||||
:100130008347F4FD138F0700B7070080938F47199B
|
:100130000327C4FE93077000E3D8E7FAB7070081EE
|
||||||
:100140006B200F007B70FF01832704FE939727002D
|
:100140009387871F3707008113078717B70600813A
|
||||||
:100150000327C4FEB307F70083A60700832704FE26
|
:1001500093868613370600801306062193058000D8
|
||||||
:1001600093972700032784FEB307F70003A7070030
|
:1001600013052000EF00406013052000EF00007130
|
||||||
:10017000832704FE93972700032644FEB307F60067
|
:10017000B707008113850704EFF05FF0B707008130
|
||||||
:100180003387E60023A0E700B7070080138E8719A6
|
:100180001385C705EFF09FEF232404FE6F004005A1
|
||||||
:1001900067000E00130000006B300000832784FC12
|
:10019000832784FE13850700EF00002BB70700813B
|
||||||
:1001A000138717009307070093971700B387E7009B
|
:1001A00013858706EFF09FEDB7070081032784FED4
|
||||||
:1001B00093973700B387E7009397270013850700CD
|
:1001B000131727009387871FB307F70083A707004C
|
||||||
:1001C000EF000028130000008320C10303248103F3
|
:1001C00013850700EF004028B70700811385C70695
|
||||||
:1001D000130101046780000097020001938282E608
|
:1001D000EFF0DFEA832784FE938717002324F4FEE1
|
||||||
:1001E000130300009303700023A0620023A26200A7
|
:1001E000032784FE9307F000E3D4E7FAB707008102
|
||||||
:1001F00023A4620023A6720023A862006780000087
|
:1001F00013850707EFF09FE89307000013850700BA
|
||||||
:1002000097020001938202E403A382001303130008
|
:100200008320C101032481011301010267800000E2
|
||||||
:1002100023A462001383420183AE420093935E00E5
|
:10021000130101FC232E1102232C8102130401047B
|
||||||
:1002200033037300032E05002320C301032E450072
|
:100220002326A4FC2324B4FCEF00C06E2326A4FEE6
|
||||||
:100230002322C301032E85002324C301032EC500FE
|
:10023000EF00C0702324A4FEEF00C0722322A4FEAE
|
||||||
:100240002326C301032E05012328C301032E4501E4
|
:10024000832784FC939737000327C4FCB307F70088
|
||||||
:10025000232AC301032E8501232CC301938E1E0084
|
:100250002320F4FE832704FE93B7B70093C717004B
|
||||||
:10026000130F20036394EE01930E000023A2D2012A
|
:10026000A30FF4FC8347F4FD138F0700B70700804A
|
||||||
:1002700067800000970200019382C2DC03A3820022
|
:10027000938F872C6B200F007B70FF01832704FE78
|
||||||
:100280001303F3FF23A462001383420183AE020031
|
:10028000939727000327C4FEB307F70083A6070050
|
||||||
:10029000930F2003138F0E00130F1F006314FF0131
|
:10029000832704FE93972700032784FEB307F70004
|
||||||
:1002A000130F000023A0E20193935E003303730059
|
:1002A00003A70700832704FE93972700032644FE35
|
||||||
:1002B000032E03002320C501032E43002322C50182
|
:1002B000B307F6003387E60023A0E700B707008006
|
||||||
:1002C000032E83002324C501032EC3002326C5016A
|
:1002C000138EC72C67000E00130000006B30000077
|
||||||
:1002D000032E03012328C501032E4301232AC50150
|
:1002D000130000008320C1030324810313010104E0
|
||||||
:1002E000032E8301232CC5016780000097020001C3
|
:1002E0006780000093020500130300009303700071
|
||||||
:1002F000938242D503A3820013050000130E20034E
|
:1002F00023A0620023A2620023A4620023A672004E
|
||||||
:1003000063146E001305150067800000970200015A
|
:1003000023A86200678000009302050003A3820017
|
||||||
:10031000938242D303A3820013050000130E000052
|
:100310001303130023A462001383420183AE42003F
|
||||||
:1003200063146E001305150067800000970200013A
|
:1003200093935E003303730003AE05002320C301E3
|
||||||
:10033000938242D103A3C20083A3020133B56300B9
|
:1003300003AE45002322C30103AE85002324C3017D
|
||||||
:1003400067800000130101FD23261102232481028E
|
:1003400003AEC5002326C30103AE05012328C30164
|
||||||
:1003500013040103EFF09FFB93070500638407007C
|
:1003500003AE4501232AC30103AE8501232CC3014B
|
||||||
:1003600073000000930744FD13850700EFF09FF032
|
:1003600003AEC501232EC301938E1E00130F20037D
|
||||||
:100370008327C4FD13810700032584FD832544FDE5
|
:100370006394EE01930E000023A2D2016780000077
|
||||||
:10038000032604FE832644FE032784FE8327C4FE3F
|
:100380009302050003A382001303F3FF23A462007A
|
||||||
:10039000EFF01FC873000000130000008320C102AB
|
:100390001383420183AE0200930F2003138F0E00DC
|
||||||
:1003A000032481021301010367800000130101FD92
|
:1003A000130F1F006314FF01130F000023A0E201CD
|
||||||
:1003B000232611022324810213040103930901005F
|
:1003B00093935E0033037300032E030023A0C50153
|
||||||
:1003C0006F000005B70700819387070483A7070123
|
:1003C000032E430023A2C501032E830023A4C501ED
|
||||||
:1003D00013871700B70700819387070423A8E70056
|
:1003D000032EC30023A6C501032E030123A8C501D4
|
||||||
:1003E000930744FD13850700EFF0DFE88327C4FD82
|
:1003E000032E430123AAC501032E830123ACC501BB
|
||||||
:1003F00013810700032584FD832544FD032604FEA5
|
:1003F000032EC30123AEC5016780000093020500F0
|
||||||
:10040000832644FE032784FE8327C4FEEFF05FC5E6
|
:1004000003A3820013050000130E200363146E0083
|
||||||
:10041000EFF0DFEF9307050063980700EFF01FF19F
|
:1004100013051500678000009302050003A3820006
|
||||||
:1004200093070500E39007FA138109001300000009
|
:1004200013050000130E000063146E001305150081
|
||||||
:100430008320C102032481021301010367800000AD
|
:10043000678000009302050003A3C20083A30201AA
|
||||||
:10044000130101FD2326810213040103232EA4FCC2
|
:1004400033B5630067800000130101FD232611020C
|
||||||
:10045000232604FE6F0000018327C4FE9387170044
|
:100450002324810213040103232EA4FC0327C4FDDB
|
||||||
:100460002326F4FE0327C4FE8327C4FDE346F7FEDC
|
:100460009307F00063E4E702B70700810327C4FDA8
|
||||||
:10047000130000000324C102130101036780000080
|
:10047000131727009387871BB307F70083A707008D
|
||||||
:10048000130101FB23261104232481041304010515
|
:1004800013850700EFF09FBF6F004007930700023E
|
||||||
:100490002326A4FC2324B4FC2322C4FC2320D4FC64
|
:100490002326F4FEA30504FE8327C4FE9387C7FF2B
|
||||||
:1004A000232EE4FA232CF4FA13090100232604FE78
|
:1004A0000327C4FDB357F70093F7F7002322F4FEA8
|
||||||
:1004B0006F00C005B709FFFF330131018327C4FE78
|
:1004B000832744FE6386070093071000A305F4FE1C
|
||||||
:1004C0002328F4FC832784FC232AF4FC93070100EF
|
:1004C0008347B4FE63820702B7070081032744FE17
|
||||||
:1004D000232CF4FC832744FC232EF4FC832704FC08
|
:1004D000131727009387871BB307F70083A707002D
|
||||||
:1004E0002320F4FE8327C4FB2322F4FE832784FB0E
|
:1004E00013850700EFF09FB98327C4FE9387C7FFEA
|
||||||
:1004F0002324F4FE930704FD13850700EFF05FD07B
|
:1004F0002326F4FE8327C4FEE340F0FA8320C102E2
|
||||||
:100500008327C4FE938717002326F4FE0327C4FE27
|
:10050000032481021301010367800000130101FD30
|
||||||
:100510008327C4FCE360F7FA13010900EFF01FE939
|
:1005100023261102232481022322A10313040103B1
|
||||||
:1005200013054006EFF0DFF1130000008320C10443
|
:1005200013070D009307070093973700B387E74041
|
||||||
:10053000032481041301010567800000130101FFFA
|
:1005300093972700B387E740139747003307F740A7
|
||||||
:1005400023268100232471011304010193870B00EA
|
:100540009317270013870700B70700819387472475
|
||||||
:10055000138507000324C100832B810013010101CF
|
:10055000B307F70013850700EFF01FEC93070500C2
|
||||||
:1005600067800000130101FF2326810023248101FD
|
:1005600063820704B70700811385470DEFF01FB1C1
|
||||||
:100570001304010193070C00138507000324C10035
|
:1005700093070D0013850700EFF01FEDB70700810B
|
||||||
:10058000032C81001301010167800000130101FFAA
|
:100580001385C70DEFF09FAF13070D00B73700813C
|
||||||
:1005900023268100232491011304010193870C0079
|
:100590009387474EB307F700130710002380E70047
|
||||||
:1005A000138507000324C100832C8100130101017E
|
:1005A0007300000013070D009307070093973700AF
|
||||||
:0405B0006780000060
|
:1005B000B387E74093972700B387E7401397470037
|
||||||
|
:1005C0003307F7409317270013870700B707008109
|
||||||
|
:1005D00093874724B307F700130704FD930507002B
|
||||||
|
:1005E00013850700EFF0DFD9832784FD138107000F
|
||||||
|
:1005F000032544FD832504FD0326C4FD832604FE54
|
||||||
|
:10060000032744FE832784FE0328C4FEEFF01FA0C7
|
||||||
|
:1006100073000000130000008320C1020324810244
|
||||||
|
:10062000032D41021301010367800000130101FC47
|
||||||
|
:10063000232E1102232C81021304010493090100CB
|
||||||
|
:10064000232604FE6F00000C0327C4FE9307070057
|
||||||
|
:1006500093973700B387E74093972700B387E74026
|
||||||
|
:10066000139747003307F7409317270013870700B6
|
||||||
|
:10067000B707008193874724B307F7001385070066
|
||||||
|
:10068000EFF09FD993070500639807060327C4FE80
|
||||||
|
:100690009307070093973700B387E74093972700A6
|
||||||
|
:1006A000B387E740139747003307F74093172700B6
|
||||||
|
:1006B00013870700B707008193874724B307F70024
|
||||||
|
:1006C0001307C4FC9305070013850700EFF05FCB09
|
||||||
|
:1006D000832744FD13810700032504FD8325C4FC03
|
||||||
|
:1006E000032684FD8326C4FD032704FE832744FEDE
|
||||||
|
:1006F000032884FEEFF09F978327C4FE938717009B
|
||||||
|
:100700002326F4FE0327C4FE93076000E3DEE7F22E
|
||||||
|
:1007100013810900130000008320C1030324810317
|
||||||
|
:100720001301010467800000130101FD23268102EB
|
||||||
|
:1007300013040103232EA4FC232604FE6F000001F2
|
||||||
|
:100740008327C4FE938717002326F4FE0327C4FEE5
|
||||||
|
:100750008327C4FDE346F7FE130000000324C10213
|
||||||
|
:100760001301010367800000130101FA232E110415
|
||||||
|
:10077000232C810413040106232EA4FA232CB4FA9B
|
||||||
|
:10078000232AC4FA2328D4FA2326E4FA2324F4FAE9
|
||||||
|
:1007900013090100232604FE232404FE6F00800BAE
|
||||||
|
:1007A000B709FFFF33013101832784FE2324F4FCC2
|
||||||
|
:1007B000832784FB2326F4FC930701002328F4FC01
|
||||||
|
:1007C000832744FB232AF4FC832704FB232CF4FC1B
|
||||||
|
:1007D0008327C4FA232EF4FC832784FA2320F4FE13
|
||||||
|
:1007E0008327C4FE2322F4FE0327C4FE93070700D9
|
||||||
|
:1007F00093973700B387E74093972700B387E74085
|
||||||
|
:10080000139747003307F740931727001387070014
|
||||||
|
:10081000B707008193874724B307F700130784FCC9
|
||||||
|
:100820009305070013850700EFF01FAE8327C4FE72
|
||||||
|
:10083000938717002326F4FE0327C4FE9307600066
|
||||||
|
:1008400063D4E700232604FE832784FE93871700E2
|
||||||
|
:100850002324F4FE032784FE8327C4FBE362F7F41A
|
||||||
|
:1008600013010900EFF09FDC130000008320C10595
|
||||||
|
:10087000032481051301010667800000130101FDB7
|
||||||
|
:10088000232611022324810213040103232EA4FC36
|
||||||
|
:10089000B70700811385070EEFF04FFEA30704FE94
|
||||||
|
:1008A0006F00000593071000A307F4FE232404FE45
|
||||||
|
:1008B0006F0040038347F4FE373700819306474EAD
|
||||||
|
:1008C000032784FE3387E60003470700B3F7E700FA
|
||||||
|
:1008D000B337F000A307F4FE832784FE9387170045
|
||||||
|
:1008E0002324F4FE832784FE0327C4FDE3E4E7FC0E
|
||||||
|
:1008F0008347F4FE93C7170093F7F70FE39407FAC3
|
||||||
|
:10090000130000008320C1020324810213010103AC
|
||||||
|
:1009100067800000130101FF232681002324710159
|
||||||
|
:100920001304010193870B00138507000324C10002
|
||||||
|
:10093000832B81001301010167800000130101FF77
|
||||||
|
:1009400023268100232481011304010193070C0055
|
||||||
|
:10095000138507000324C100032C8100130101014A
|
||||||
|
:1009600067800000130101FF2326810023249101E9
|
||||||
|
:100970001304010193870C00138507000324C100B1
|
||||||
|
:0C098000832C810013010101678000003E
|
||||||
:02000004810079
|
:02000004810079
|
||||||
:1005CC000100000001000000060000000000000017
|
:10000000300000003100000032000000330000002A
|
||||||
:1005DC000300000001000000010000000200000008
|
:10001000340000003500000036000000370000000A
|
||||||
:1005EC0000000000030000000600000007000000EF
|
:10002000380000003900000061000000620000009C
|
||||||
:1005FC0005000000070000000700000009000000D3
|
:10003000630000006400000065000000660000002E
|
||||||
:10060C0000000000020000000200000000000000DA
|
:100040002D2D2D2D2D2D2D2D2D2D2D2D2D2D2D2DE0
|
||||||
:10061C0005000000000000000100000001000000C7
|
:100050002D2D2D2D2D2D2D2D2D0A000046494E41E3
|
||||||
:10062C0004000000020000000000000000000000B8
|
:100060004C205A0A000000003A2000000A0000005C
|
||||||
:10063C0003000000020000000300000002000000A4
|
:100070002D2D2D2D2D2D2D2D2D2D2D2D2D2D2D2DB0
|
||||||
|
:100080002D2D2D2D2D2D2D2D2D2D2D2D2D2D2D0AC3
|
||||||
|
:1000900000000000300000003100000032000000CD
|
||||||
|
:1000A000330000003400000035000000360000007E
|
||||||
|
:1000B0003700000038000000390000006100000037
|
||||||
|
:1000C00062000000630000006400000065000000A2
|
||||||
|
:1000D00066000000646F6E653A2000000A000000B0
|
||||||
|
:1000E00061626F757420746F207761697420666F28
|
||||||
|
:0800F0007220646F6E650A00C6
|
||||||
|
:1000F8000000008104000081080000810C000081DC
|
||||||
|
:100108001000008114000081180000811C0000818B
|
||||||
|
:100118002000008124000081280000812C0000813B
|
||||||
|
:100128003000008134000081380000813C000081EB
|
||||||
|
:1001380001000000010000000100000001000000B3
|
||||||
|
:1001480001000000010000000100000001000000A3
|
||||||
|
:100158000100000001000000010000000100000093
|
||||||
|
:100168000100000001000000010000000100000083
|
||||||
|
:100178000000000001000000020000000300000071
|
||||||
|
:100188000400000005000000060000000700000051
|
||||||
|
:1001980008000000090000000A0000000B00000031
|
||||||
|
:1001A8000C0000000D0000000E0000000F00000011
|
||||||
|
:1001B80094000081980000819C000081A0000081CB
|
||||||
|
:1001C800A4000081A8000081AC000081B00000817B
|
||||||
|
:1001D800B4000081B8000081BC000081C00000812B
|
||||||
|
:1001E800C4000081C8000081CC000081D0000081DB
|
||||||
:040000058000000077
|
:040000058000000077
|
||||||
:00000001FF
|
:00000001FF
|
||||||
|
|||||||
@@ -1,22 +1,49 @@
|
|||||||
#include "lib.h"
|
#include "lib.h"
|
||||||
|
|
||||||
|
|
||||||
extern void createThreads(unsigned, unsigned, unsigned, unsigned *, unsigned *, unsigned *);
|
extern void createThreads(unsigned, unsigned, unsigned, unsigned *, unsigned *, unsigned *, unsigned);
|
||||||
extern void wspawn(unsigned, unsigned, unsigned, unsigned *, unsigned *, unsigned *);
|
extern void wspawn(unsigned, unsigned, unsigned, unsigned *, unsigned *, unsigned *, unsigned);
|
||||||
|
extern void print_consol(char *);
|
||||||
|
extern void printc(char);
|
||||||
|
|
||||||
|
|
||||||
|
void int_print(unsigned f)
|
||||||
|
{
|
||||||
|
if (f < 16)
|
||||||
|
{
|
||||||
|
print_consol(hextoa[f]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int temp;
|
||||||
|
int sf = 32;
|
||||||
|
bool start = false;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
temp = (f >> (sf - 4)) & 0xf;
|
||||||
|
if (temp != 0) start = true;
|
||||||
|
if (start) print_consol(hextoa[temp]);
|
||||||
|
sf -= 4;
|
||||||
|
} while(sf > 0);
|
||||||
|
}
|
||||||
|
|
||||||
void reschedule_warps()
|
void reschedule_warps()
|
||||||
{
|
{
|
||||||
|
|
||||||
if (queue_isEmpty())
|
register unsigned curr_warp asm("s10");
|
||||||
|
|
||||||
|
if (queue_isEmpty(q+curr_warp))
|
||||||
{
|
{
|
||||||
|
print_consol("done: ");
|
||||||
|
int_print(curr_warp);
|
||||||
|
print_consol("\n");
|
||||||
|
done[curr_warp] = true;
|
||||||
ECALL;
|
ECALL;
|
||||||
}
|
}
|
||||||
|
|
||||||
Job j;
|
Job j;
|
||||||
queue_dequeue(&j);
|
queue_dequeue(q+curr_warp,&j);
|
||||||
asm __volatile__("mv sp,%0"::"r" (j.base_sp):);
|
asm __volatile__("mv sp,%0"::"r" (j.base_sp):);
|
||||||
createThreads(j.n_threads, j.wid, j.func_ptr, j.x, j.y, j.z);
|
createThreads(j.n_threads, j.wid, j.func_ptr, j.x, j.y, j.z, j.assigned_warp);
|
||||||
|
|
||||||
ECALL;
|
ECALL;
|
||||||
|
|
||||||
@@ -25,16 +52,20 @@ void reschedule_warps()
|
|||||||
void schedule_warps()
|
void schedule_warps()
|
||||||
{
|
{
|
||||||
asm __volatile__("mv s3, sp");
|
asm __volatile__("mv s3, sp");
|
||||||
while (!queue_isEmpty() && queue_availableWarps())
|
|
||||||
{
|
|
||||||
++q.active_warps;
|
|
||||||
Job j;
|
|
||||||
queue_dequeue(&j);
|
|
||||||
|
|
||||||
asm __volatile__("mv sp,%0"::"r" (j.base_sp):);
|
for (int curr_warp = 0; curr_warp < 7; ++curr_warp)
|
||||||
wspawn(j.n_threads, j.wid, j.func_ptr, j.x, j.y, j.z);
|
{
|
||||||
|
if (!queue_isEmpty(q+curr_warp))
|
||||||
|
{
|
||||||
|
Job j;
|
||||||
|
queue_dequeue(q+curr_warp,&j);
|
||||||
|
asm __volatile__("mv sp,%0"::"r" (j.base_sp):);
|
||||||
|
wspawn(j.n_threads, j.wid, j.func_ptr, j.x, j.y, j.z, j.assigned_warp);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
asm __volatile__("mv sp, s3");
|
asm __volatile__("mv sp, s3");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void sleep(int t)
|
void sleep(int t)
|
||||||
@@ -46,8 +77,8 @@ void sleep(int t)
|
|||||||
|
|
||||||
void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, unsigned * x_ptr, unsigned * y_ptr, unsigned * z_ptr)
|
void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, unsigned * x_ptr, unsigned * y_ptr, unsigned * z_ptr)
|
||||||
{
|
{
|
||||||
|
|
||||||
asm __volatile__("addi s2, sp, 0");
|
asm __volatile__("addi s2, sp, 0");
|
||||||
|
int warp = 0;
|
||||||
for (unsigned i = 0; i < num_Warps; i++)
|
for (unsigned i = 0; i < num_Warps; i++)
|
||||||
{
|
{
|
||||||
asm __volatile__("lui s3, 0xFFFF0");
|
asm __volatile__("lui s3, 0xFFFF0");
|
||||||
@@ -62,23 +93,33 @@ void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, unsigned * x_pt
|
|||||||
j.x = x_ptr;
|
j.x = x_ptr;
|
||||||
j.y = y_ptr;
|
j.y = y_ptr;
|
||||||
j.z = z_ptr;
|
j.z = z_ptr;
|
||||||
|
j.assigned_warp = warp;
|
||||||
|
|
||||||
queue_enqueue(&j);
|
queue_enqueue(q + warp,&j);
|
||||||
|
++warp;
|
||||||
|
if (warp >= 7) warp = 0;
|
||||||
}
|
}
|
||||||
asm __volatile__("addi sp, s2, 0");
|
asm __volatile__("addi sp, s2, 0");
|
||||||
|
|
||||||
|
|
||||||
schedule_warps();
|
schedule_warps();
|
||||||
|
|
||||||
sleep(100);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void wait_for_done(unsigned num_wait)
|
||||||
|
{
|
||||||
|
print_consol("about to wait for done\n");
|
||||||
|
bool temp = false;
|
||||||
|
while (!temp)
|
||||||
|
{
|
||||||
|
temp = true;
|
||||||
|
for (int i = 0; i < num_wait; i++)
|
||||||
|
{
|
||||||
|
temp &= done[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// unsigned get_wid()
|
|
||||||
// {
|
|
||||||
// register unsigned ret asm("s7");
|
|
||||||
// return ret;
|
|
||||||
// }
|
|
||||||
|
|
||||||
unsigned * get_1st_arg(void)
|
unsigned * get_1st_arg(void)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
|
|
||||||
#ifndef __RISCV_GP_
|
#ifndef __RISCV_GP_
|
||||||
#define __RISCV_GP_
|
#define __RISCV_GP_
|
||||||
|
#include <stdbool.h>
|
||||||
#include "queue.h"
|
#include "queue.h"
|
||||||
|
|
||||||
#define WSPAWN asm __volatile__(".word 0x3006b"::);
|
#define WSPAWN asm __volatile__(".word 0x3006b"::);
|
||||||
@@ -29,9 +29,16 @@
|
|||||||
JOIN;
|
JOIN;
|
||||||
|
|
||||||
|
|
||||||
|
static char * hextoa[] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f"};
|
||||||
|
static bool done[] = {false, false, false, false, false, false, false};
|
||||||
|
|
||||||
|
static int main_sp[1];
|
||||||
|
|
||||||
#define FUNC void (func)(unsigned, unsigned)
|
#define FUNC void (func)(unsigned, unsigned)
|
||||||
void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, unsigned *, unsigned *, unsigned *);
|
void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, unsigned *, unsigned *, unsigned *);
|
||||||
void reschedule_warps(void);
|
void reschedule_warps(void);
|
||||||
|
void int_print(unsigned);
|
||||||
|
void wait_for_done(unsigned);
|
||||||
|
|
||||||
unsigned * get_1st_arg(void);
|
unsigned * get_1st_arg(void);
|
||||||
unsigned * get_2nd_arg(void);
|
unsigned * get_2nd_arg(void);
|
||||||
|
|||||||
@@ -7,18 +7,18 @@
|
|||||||
.global _start
|
.global _start
|
||||||
_start:
|
_start:
|
||||||
lui sp, 0x7ffff
|
lui sp, 0x7ffff
|
||||||
jal queue_initialize
|
|
||||||
jal main
|
jal main
|
||||||
ecall
|
ecall
|
||||||
|
|
||||||
.type createThreads, @function
|
.type createThreads, @function
|
||||||
.global createThreads
|
.global createThreads
|
||||||
createThreads:
|
createThreads:
|
||||||
mv s7,a3 # Moving x_ptr to s7
|
mv s7 ,a3 # Moving x_ptr to s7
|
||||||
mv s8,a4 # Moving y_ptr to s8
|
mv s8 ,a4 # Moving y_ptr to s8
|
||||||
mv s9,a5 # Moving z_ptr to s9
|
mv s9 ,a5 # Moving z_ptr to s9
|
||||||
mv t5,sp # Saving the current stack pointer to t5
|
mv s10,a6 # Moving assigned_warp to s10
|
||||||
mv t2, a0 # t2 = num_threads
|
mv t5 ,sp # Saving the current stack pointer to t5
|
||||||
|
mv t2 , a0 # t2 = num_threads
|
||||||
loop_init:
|
loop_init:
|
||||||
li a0,1 # i = 0
|
li a0,1 # i = 0
|
||||||
loop_cond:
|
loop_cond:
|
||||||
@@ -39,6 +39,12 @@ loop_done:
|
|||||||
la a0, reschedule_warps
|
la a0, reschedule_warps
|
||||||
.word 0x5406b
|
.word 0x5406b
|
||||||
|
|
||||||
|
.type printc, @function
|
||||||
|
.global printc
|
||||||
|
printc:
|
||||||
|
la a7, 0x00010000
|
||||||
|
sw a1, 0(a7)
|
||||||
|
ret
|
||||||
|
|
||||||
|
|
||||||
.type wspawn, @function
|
.type wspawn, @function
|
||||||
@@ -47,3 +53,34 @@ wspawn:
|
|||||||
la t1, createThreads
|
la t1, createThreads
|
||||||
.word 0x3006b # WSPAWN instruction
|
.word 0x3006b # WSPAWN instruction
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
.type print_consol, @function
|
||||||
|
.global print_consol
|
||||||
|
print_consol:
|
||||||
|
addi sp, sp, -12
|
||||||
|
sw ra, 0(sp)
|
||||||
|
sw a1, 4(sp)
|
||||||
|
bl:
|
||||||
|
lbu a1,0(a0)
|
||||||
|
beqz a1,be
|
||||||
|
jal printc
|
||||||
|
addi a0, a0, 1
|
||||||
|
j bl
|
||||||
|
be:
|
||||||
|
lw ra, 0(sp)
|
||||||
|
lw a1, 4(sp)
|
||||||
|
addi sp, sp, 12
|
||||||
|
ret
|
||||||
|
|
||||||
|
.type print_int, @function
|
||||||
|
.global print_int
|
||||||
|
print_int:
|
||||||
|
addi sp, sp, -12
|
||||||
|
sw ra, 0(sp)
|
||||||
|
sw a1, 4(sp)
|
||||||
|
addi a1, a0, 48
|
||||||
|
jal printc
|
||||||
|
lw ra, 0(sp)
|
||||||
|
lw a1, 4(sp)
|
||||||
|
addi sp, sp, 12
|
||||||
|
ret
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ typedef struct Job_t
|
|||||||
unsigned * x;
|
unsigned * x;
|
||||||
unsigned * y;
|
unsigned * y;
|
||||||
unsigned * z;
|
unsigned * z;
|
||||||
|
unsigned assigned_warp;
|
||||||
|
|
||||||
} Job;
|
} Job;
|
||||||
|
|
||||||
@@ -32,17 +33,17 @@ typedef struct Queue_t
|
|||||||
|
|
||||||
} Queue;
|
} Queue;
|
||||||
|
|
||||||
Queue q;
|
Queue q[8];
|
||||||
|
|
||||||
void queue_initialize(void);
|
void queue_initialize(Queue *);
|
||||||
|
|
||||||
void queue_enqueue(Job *);
|
void queue_enqueue(Queue *, Job *);
|
||||||
|
|
||||||
void queue_dequeue(Job *);
|
void queue_dequeue(Queue *, Job *);
|
||||||
|
|
||||||
int queue_isFull(void);
|
int queue_isFull(Queue *);
|
||||||
int queue_isEmpty(void);
|
int queue_isEmpty(Queue *);
|
||||||
int queue_availableWarps();
|
int queue_availableWarps(Queue *);
|
||||||
|
|
||||||
|
|
||||||
void func();
|
void func();
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
.type queue_initialize, @function
|
.type queue_initialize, @function
|
||||||
.global queue_initialize
|
.global queue_initialize
|
||||||
queue_initialize:
|
queue_initialize:
|
||||||
la t0, q # loading base address of q
|
mv t0, a0 # loading base address of q
|
||||||
li t1, 0 # to initialize variables
|
li t1, 0 # to initialize variables
|
||||||
li t2, A_WARPS # Num of available warps
|
li t2, A_WARPS # Num of available warps
|
||||||
sw t1, 0 (t0) # start_i
|
sw t1, 0 (t0) # start_i
|
||||||
@@ -23,7 +23,7 @@ queue_initialize:
|
|||||||
.type queue_enqueue, @function
|
.type queue_enqueue, @function
|
||||||
.global queue_enqueue
|
.global queue_enqueue
|
||||||
queue_enqueue:
|
queue_enqueue:
|
||||||
la t0, q # loading base address of q
|
mv t0, a0 # loding base address of q
|
||||||
lw t1, 8 (t0) # t1 = num_j
|
lw t1, 8 (t0) # t1 = num_j
|
||||||
addi t1, t1, 1 # ++t1
|
addi t1, t1, 1 # ++t1
|
||||||
sw t1, 8 (t0) # num_j = t1
|
sw t1, 8 (t0) # num_j = t1
|
||||||
@@ -31,20 +31,22 @@ queue_enqueue:
|
|||||||
lw t4, 4 (t0) # t4 = end_i
|
lw t4, 4 (t0) # t4 = end_i
|
||||||
slli t2, t4, 5 # index * 32 [log(sizeof(job))]
|
slli t2, t4, 5 # index * 32 [log(sizeof(job))]
|
||||||
add t1, t1, t2 # jobs + index
|
add t1, t1, t2 # jobs + index
|
||||||
lw t3, 0 (a0) # wid
|
lw t3, 0 (a1) # wid
|
||||||
sw t3, 0 (t1) #
|
sw t3, 0 (t1) #
|
||||||
lw t3, 4 (a0) # n_threads
|
lw t3, 4 (a1) # n_threads
|
||||||
sw t3, 4 (t1) #
|
sw t3, 4 (t1) #
|
||||||
lw t3, 8 (a0) # base_sp
|
lw t3, 8 (a1) # base_sp
|
||||||
sw t3, 8 (t1) #
|
sw t3, 8 (t1) #
|
||||||
lw t3, 12(a0) # func_ptr
|
lw t3, 12(a1) # func_ptr
|
||||||
sw t3, 12(t1) #
|
sw t3, 12(t1) #
|
||||||
lw t3, 16(a0) # x
|
lw t3, 16(a1) # x
|
||||||
sw t3, 16(t1) #
|
sw t3, 16(t1) #
|
||||||
lw t3, 20(a0) # y
|
lw t3, 20(a1) # y
|
||||||
sw t3, 20(t1) #
|
sw t3, 20(t1) #
|
||||||
lw t3, 24(a0) # z
|
lw t3, 24(a1) # z
|
||||||
sw t3, 24(t1) #
|
sw t3, 24(t1) #
|
||||||
|
lw t3, 28(a1) # assigned_warp
|
||||||
|
sw t3, 28(t1) #
|
||||||
addi t4, t4, 1 # end_i++
|
addi t4, t4, 1 # end_i++
|
||||||
li t5, SIZE # size
|
li t5, SIZE # size
|
||||||
bne t4, t5, ec # if ((q.end_i + 1) == SIZE)
|
bne t4, t5, ec # if ((q.end_i + 1) == SIZE)
|
||||||
@@ -58,7 +60,7 @@ ec:
|
|||||||
.global queue_dequeue
|
.global queue_dequeue
|
||||||
|
|
||||||
queue_dequeue:
|
queue_dequeue:
|
||||||
la t0, q # loading base address of q
|
mv t0, a0 # loading base address of q
|
||||||
lw t1, 8 (t0) # t1 = num_j
|
lw t1, 8 (t0) # t1 = num_j
|
||||||
addi t1, t1, -1 # --t1
|
addi t1, t1, -1 # --t1
|
||||||
sw t1, 8 (t0) # num_j = t1
|
sw t1, 8 (t0) # num_j = t1
|
||||||
@@ -74,26 +76,28 @@ dc:
|
|||||||
slli t2, t4, 5 # index * 32 [log(sizeof(job))]
|
slli t2, t4, 5 # index * 32 [log(sizeof(job))]
|
||||||
add t1, t1, t2 # jobs + index
|
add t1, t1, t2 # jobs + index
|
||||||
lw t3, 0 (t1) # wid
|
lw t3, 0 (t1) # wid
|
||||||
sw t3, 0 (a0) #
|
sw t3, 0 (a1) #
|
||||||
lw t3, 4 (t1) # n_threads
|
lw t3, 4 (t1) # n_threads
|
||||||
sw t3, 4 (a0) #
|
sw t3, 4 (a1) #
|
||||||
lw t3, 8 (t1) # base_sp
|
lw t3, 8 (t1) # base_sp
|
||||||
sw t3, 8 (a0) #
|
sw t3, 8 (a1) #
|
||||||
lw t3, 12(t1) # func_ptr
|
lw t3, 12(t1) # func_ptr
|
||||||
sw t3, 12(a0) #
|
sw t3, 12(a1) #
|
||||||
lw t3, 16(t1) # x
|
lw t3, 16(t1) # x
|
||||||
sw t3, 16(a0) #
|
sw t3, 16(a1) #
|
||||||
lw t3, 20(t1) # y
|
lw t3, 20(t1) # y
|
||||||
sw t3, 20(a0) #
|
sw t3, 20(a1) #
|
||||||
lw t3, 24(t1) # z
|
lw t3, 24(t1) # z
|
||||||
sw t3, 24(a0) #
|
sw t3, 24(a1) #
|
||||||
|
lw t3, 28(t1) # assigned_warp
|
||||||
|
sw t3, 28(a1) #
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
||||||
.type queue_isFull, @function
|
.type queue_isFull, @function
|
||||||
.global queue_isFull
|
.global queue_isFull
|
||||||
queue_isFull:
|
queue_isFull:
|
||||||
la t0, q # loading base address of q
|
mv t0, a0 # loading base address of q
|
||||||
lw t1, 8 (t0) # t1 = num_j
|
lw t1, 8 (t0) # t1 = num_j
|
||||||
mv a0, zero # ret_val = 0
|
mv a0, zero # ret_val = 0
|
||||||
li t3, SIZE # t3 = SIZE
|
li t3, SIZE # t3 = SIZE
|
||||||
@@ -107,7 +111,7 @@ qf:
|
|||||||
.type queue_isEmpty, @function
|
.type queue_isEmpty, @function
|
||||||
.global queue_isEmpty
|
.global queue_isEmpty
|
||||||
queue_isEmpty:
|
queue_isEmpty:
|
||||||
la t0, q # loading base address of q
|
mv t0, a0 # loading base address of q
|
||||||
lw t1, 8 (t0) # t1 = num_j
|
lw t1, 8 (t0) # t1 = num_j
|
||||||
mv a0, zero # ret_val = 0
|
mv a0, zero # ret_val = 0
|
||||||
mv t3, zero # t3 = 0
|
mv t3, zero # t3 = 0
|
||||||
@@ -120,7 +124,7 @@ qe:
|
|||||||
.type queue_availableWarps, @function
|
.type queue_availableWarps, @function
|
||||||
.global queue_availableWarps
|
.global queue_availableWarps
|
||||||
queue_availableWarps:
|
queue_availableWarps:
|
||||||
la t0, q # loading base address of q
|
mv t0, a0 # loading base address of q
|
||||||
lw t1, 12(t0) # t1 = total_warps
|
lw t1, 12(t0) # t1 = total_warps
|
||||||
lw t2, 16(t0) # t2 = active_warps
|
lw t2, 16(t0) # t2 = active_warps
|
||||||
sltu a0, t2, t1
|
sltu a0, t2, t1
|
||||||
|
|||||||
@@ -35,6 +35,10 @@ SECTIONS
|
|||||||
PROVIDE (__etext = .);
|
PROVIDE (__etext = .);
|
||||||
PROVIDE (_etext = .);
|
PROVIDE (_etext = .);
|
||||||
PROVIDE (etext = .);
|
PROVIDE (etext = .);
|
||||||
|
PROVIDE (_edata = .);
|
||||||
|
PROVIDE (_end = .);
|
||||||
|
PROVIDE (__global_pointer$ = .);
|
||||||
|
|
||||||
. = 0x81000000;
|
. = 0x81000000;
|
||||||
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
|
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
|
||||||
.rodata1 : { *(.rodata1) }
|
.rodata1 : { *(.rodata1) }
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
echo start > results.txt
|
echo start > results.txt
|
||||||
|
|
||||||
echo ./riscv_gpgpu/gpgpu_test.hex >> results.txt
|
echo ./riscv_gpgpu/gpgpu_test.hex
|
||||||
./harptool -E -a rv32i --core ./riscv_gpgpu/gpgpu_test.hex -s -b
|
./harptool -E -a rv32i --core ./riscv_gpgpu/gpgpu_test.hex -s -b
|
||||||
|
|||||||
Reference in New Issue
Block a user