Merge branch 'main' of https://github.com/ucb-bar/radiance into main
This commit is contained in:
31
radiance.mk
31
radiance.mk
@@ -2,6 +2,7 @@
|
|||||||
# extra variables/targets ingested by the chipyard make system
|
# extra variables/targets ingested by the chipyard make system
|
||||||
##############################################################
|
##############################################################
|
||||||
|
|
||||||
|
VORTEX_SRC_DIR = $(base_dir)/generators/radiance/src/main/resources/vsrc/vortex
|
||||||
RADPIE_SRC_DIR = $(base_dir)/generators/radiance/radpie
|
RADPIE_SRC_DIR = $(base_dir)/generators/radiance/radpie
|
||||||
RADPIE_BUILD_DIR = $(RADPIE_SRC_DIR)/target/release
|
RADPIE_BUILD_DIR = $(RADPIE_SRC_DIR)/target/release
|
||||||
|
|
||||||
@@ -10,13 +11,14 @@ RADPIE_BUILD_DIR = $(RADPIE_SRC_DIR)/target/release
|
|||||||
##################################################################
|
##################################################################
|
||||||
|
|
||||||
# EXTRA_SIM_REQS += radpie
|
# EXTRA_SIM_REQS += radpie
|
||||||
EXTRA_SIM_LDFLAGS += -L$(RADPIE_BUILD_DIR) -Wl,-rpath,$(RADPIE_BUILD_DIR) -lradpie
|
# EXTRA_SIM_LDFLAGS += -L$(RADPIE_BUILD_DIR) -Wl,-rpath,$(RADPIE_BUILD_DIR) -lradpie
|
||||||
ifeq ($(shell echo $(CONFIG) | grep -E "SynConfig$$"),$(CONFIG))
|
ifeq ($(shell echo $(CONFIG) | grep -E "SynConfig$$"),$(CONFIG))
|
||||||
EXTRA_SIM_PREPROC_DEFINES += +define+SYNTHESIS +define+NDEBUG +define+DPI_DISABLE
|
EXTRA_SIM_PREPROC_DEFINES += +define+SYNTHESIS +define+NDEBUG +define+DPI_DISABLE
|
||||||
endif
|
endif
|
||||||
EXTRA_SIM_PREPROC_DEFINES += \
|
EXTRA_SIM_PREPROC_DEFINES += \
|
||||||
+define+SIMULATION \
|
+define+SIMULATION \
|
||||||
+define+GPR_RESET \
|
+define+GPR_RESET \
|
||||||
|
+define+GPR_DUPLICATED \
|
||||||
+define+LSU_DUP_DISABLE \
|
+define+LSU_DUP_DISABLE \
|
||||||
+define+DBG_TRACE_CORE_PIPELINE_VCS \
|
+define+DBG_TRACE_CORE_PIPELINE_VCS \
|
||||||
+define+PERF_ENABLE \
|
+define+PERF_ENABLE \
|
||||||
@@ -35,3 +37,30 @@ VCS_NONCC_OPTS += +vcs+initreg+random
|
|||||||
.PHONY: radpie
|
.PHONY: radpie
|
||||||
radpie:
|
radpie:
|
||||||
cd $(RADPIE_SRC_DIR) && cargo build --release
|
cd $(RADPIE_SRC_DIR) && cargo build --release
|
||||||
|
|
||||||
|
EXTRA_SIM_REQS += vortex_vsrc.$(CONFIG)
|
||||||
|
# doesn't work if we use $(call lookup_srcs) from common.mk, the variable
|
||||||
|
# doesn't expand somehow
|
||||||
|
ifneq ($(shell which fd 2> /dev/null),)
|
||||||
|
VORTEX_VLOG_SOURCES := $(shell fd -L -t f -e "sv" -e "vh" -e "v" . $(VORTEX_SRC_DIR))
|
||||||
|
endif
|
||||||
|
# VORTEX_COLLATERAL := $(patsubst $(VORTEX_SRC_DIR)%,$(GEN_COLLATERAL_DIR)%,$(VORTEX_VLOG_SOURCES))
|
||||||
|
# check if expanded
|
||||||
|
# $(info VORTEX_VLOG_SOURCES: $(VORTEX_VLOG_SOURCES))
|
||||||
|
|
||||||
|
# For every Vortex verilog source file, if there's a matching file in
|
||||||
|
# gen-collateral/, copy them over. This is a hacky way to ensure the changes
|
||||||
|
# in the verilog sources are reflected before Verilator/VCS kicks in. This is
|
||||||
|
# necessary when common.mk does not trigger chipyard jar rebuild upon verilog
|
||||||
|
# source updates, in which case we need to manually ensure the up-to-date-ness
|
||||||
|
# of gen-collateral/.
|
||||||
|
vortex_vsrc.$(CONFIG): $(VORTEX_VLOG_SOURCES)
|
||||||
|
@for file in $(VORTEX_VLOG_SOURCES); do \
|
||||||
|
filename=$$(basename "$$file"); \
|
||||||
|
if [ -f $(GEN_COLLATERAL_DIR)/$$filename ]; then \
|
||||||
|
if ! diff $$file $(GEN_COLLATERAL_DIR)/$$filename &>/dev/null ; then \
|
||||||
|
cp -v "$$file" $(GEN_COLLATERAL_DIR); \
|
||||||
|
fi; \
|
||||||
|
fi; \
|
||||||
|
done
|
||||||
|
touch $@
|
||||||
|
|||||||
1392
src/main/resources/vsrc/TensorDotProductUnit.sv
Normal file
1392
src/main/resources/vsrc/TensorDotProductUnit.sv
Normal file
File diff suppressed because it is too large
Load Diff
Submodule src/main/resources/vsrc/vortex updated: d624b3e50a...a47389fc0e
226
src/main/scala/radiance/core/TensorDPU.scala
Normal file
226
src/main/scala/radiance/core/TensorDPU.scala
Normal file
@@ -0,0 +1,226 @@
|
|||||||
|
// See LICENSE.SiFive for license details.
|
||||||
|
// See LICENSE.Berkeley for license details.
|
||||||
|
|
||||||
|
package radiance.core
|
||||||
|
|
||||||
|
import chisel3._
|
||||||
|
import chisel3.util._
|
||||||
|
import freechips.rocketchip.tile
|
||||||
|
|
||||||
|
// Implements the four-element dot product (FEDP) unit in Volta Tensor Cores.
|
||||||
|
class TensorDotProductUnit extends Module with tile.HasFPUParameters {
|
||||||
|
val fLen = 32
|
||||||
|
val minFLen = 32
|
||||||
|
def xLen = 32
|
||||||
|
val dotProductDim = 4
|
||||||
|
|
||||||
|
val io = IO(new Bundle {
|
||||||
|
val in = Flipped(Valid(new Bundle {
|
||||||
|
val a = Vec(dotProductDim, Bits((fLen).W))
|
||||||
|
val b = Vec(dotProductDim, Bits((fLen).W))
|
||||||
|
val c = Bits((fLen).W)
|
||||||
|
}))
|
||||||
|
val stall = Input(Bool())
|
||||||
|
val out = Valid(new Bundle {
|
||||||
|
val data = Bits((fLen).W)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
val t = tile.FType.S
|
||||||
|
|
||||||
|
val in1 = io.in.bits.a.map(x => unbox(recode(x, S), S, Some(tile.FType.S)))
|
||||||
|
val in2 = io.in.bits.b.map(x => unbox(recode(x, S), S, Some(tile.FType.S)))
|
||||||
|
val in3 = unbox(recode(io.in.bits.c, S), S, Some(tile.FType.S))
|
||||||
|
|
||||||
|
val dpu = Module(new DotProductPipe(dotProductDim, t.exp, t.sig))
|
||||||
|
dpu.io.in.valid := io.in.valid
|
||||||
|
dpu.io.in.bits.a := in1
|
||||||
|
dpu.io.in.bits.b := in2
|
||||||
|
dpu.io.in.bits.c := in3
|
||||||
|
dpu.io.stall := io.stall
|
||||||
|
|
||||||
|
io.out.valid := dpu.io.out.valid
|
||||||
|
io.out.bits.data := ieee(box(dpu.io.out.bits.data, S))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copied from chisel3.util.Pipe.
|
||||||
|
class StallingPipe[T <: Data](val gen: T, val latency: Int = 1) extends Module {
|
||||||
|
/** A non-ambiguous name of this `StallingPipe` for use in generated Verilog
|
||||||
|
* names. Includes the latency cycle count in the name as well as the
|
||||||
|
* parameterized generator's `typeName`, e.g. `Pipe4_UInt4`
|
||||||
|
*/
|
||||||
|
override def desiredName = s"${simpleClassName(this.getClass)}${latency}_${gen.typeName}"
|
||||||
|
|
||||||
|
class StallingPipeIO extends Bundle {
|
||||||
|
val stall = Input(Bool())
|
||||||
|
val enq = Input(Valid(gen))
|
||||||
|
val deq = Output(Valid(gen))
|
||||||
|
}
|
||||||
|
|
||||||
|
val io = IO(new StallingPipeIO)
|
||||||
|
|
||||||
|
io.deq <> StallingPipe(io.stall, io.enq, latency)
|
||||||
|
}
|
||||||
|
|
||||||
|
object StallingPipe {
|
||||||
|
import chisel3.experimental.prefix
|
||||||
|
|
||||||
|
def apply[T <: Data](stall: Bool, enqValid: Bool, enqBits: T, latency: Int): Valid[T] = {
|
||||||
|
require(latency == 1, "StallingPipe only supports latency equals one!")
|
||||||
|
prefix("stalling_pipe") {
|
||||||
|
val out = Wire(Valid(chiselTypeOf(enqBits)))
|
||||||
|
val v = RegEnable(enqValid, false.B, !stall)
|
||||||
|
val b = RegEnable(enqBits, !stall && enqValid)
|
||||||
|
out.valid := v
|
||||||
|
out.bits := b
|
||||||
|
out
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def apply[T <: Data](stall: Bool, enqValid: Bool, enqBits: T): Valid[T] = {
|
||||||
|
apply(stall, enqValid, enqBits, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
def apply[T <: Data](stall: Bool, enq: Valid[T], latency: Int = 1): Valid[T] = {
|
||||||
|
apply(stall, enq.valid, enq.bits, latency)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Computes d = a(0)*b(0) + ... + a(3)*b(3) + c.
|
||||||
|
// Fully pipelined with a fixed latency of 4 cycles.
|
||||||
|
class DotProductPipe(dim: Int, expWidth: Int, sigWidth: Int) extends Module {
|
||||||
|
require(dim == 4, "DPU currently only supports dimension 4")
|
||||||
|
|
||||||
|
val recFLen = expWidth + sigWidth + 1
|
||||||
|
val io = IO(new Bundle {
|
||||||
|
val in = Flipped(Valid(new Bundle {
|
||||||
|
val a = Vec(4, Bits((recFLen).W))
|
||||||
|
val b = Vec(4, Bits((recFLen).W))
|
||||||
|
val c = Bits((recFLen).W)
|
||||||
|
// val roundingMode = UInt(3.W)
|
||||||
|
// val detectTininess = UInt(1.W)
|
||||||
|
}))
|
||||||
|
val stall = Input(Bool())
|
||||||
|
val out = Valid(new Bundle {
|
||||||
|
val data = Bits((recFLen).W)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
val mul = Seq.fill(dim)(Module(new hardfloat.MulRecFN(expWidth, sigWidth)))
|
||||||
|
mul.zipWithIndex.foreach { case (m, i) =>
|
||||||
|
// FIXME: these settings are arbitrary
|
||||||
|
m.io.roundingMode := hardfloat.consts.round_near_even
|
||||||
|
m.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
||||||
|
m.io.a := io.in.bits.a(i)
|
||||||
|
m.io.b := io.in.bits.b(i)
|
||||||
|
}
|
||||||
|
|
||||||
|
val mulStageOut = StallingPipe(io.stall, io.in.valid, VecInit(mul.map(_.io.out)))
|
||||||
|
val mulStageC = StallingPipe(io.stall, io.in.valid, io.in.bits.c)
|
||||||
|
|
||||||
|
// mul stage end -------------------------------------------------------------
|
||||||
|
|
||||||
|
val add1 = Seq.fill(dim / 2)(Module(new hardfloat.AddRecFN(expWidth, sigWidth)))
|
||||||
|
add1.zipWithIndex.foreach { case (a, i) =>
|
||||||
|
a.io.subOp := 0.U // FIXME
|
||||||
|
a.io.a := mulStageOut.bits(2 * i + 0)
|
||||||
|
a.io.b := mulStageOut.bits(2 * i + 1)
|
||||||
|
a.io.roundingMode := hardfloat.consts.round_near_even
|
||||||
|
a.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
||||||
|
}
|
||||||
|
|
||||||
|
val add1StageOut = StallingPipe(io.stall, mulStageOut.valid, VecInit(add1.map(_.io.out)))
|
||||||
|
val add1StageC = StallingPipe(io.stall, mulStageOut.valid, mulStageC.bits)
|
||||||
|
|
||||||
|
// add1 stage end ------------------------------------------------------------
|
||||||
|
|
||||||
|
val add2 = Module(new hardfloat.AddRecFN(expWidth, sigWidth))
|
||||||
|
add2.io.subOp := 0.U // FIXME
|
||||||
|
assert(add1StageOut.bits.length == 2)
|
||||||
|
add2.io.a := add1StageOut.bits(0)
|
||||||
|
add2.io.b := add1StageOut.bits(1)
|
||||||
|
add2.io.roundingMode := hardfloat.consts.round_near_even
|
||||||
|
add2.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
||||||
|
|
||||||
|
val add2StageOut = StallingPipe(io.stall, add1StageOut.valid, add2.io.out)
|
||||||
|
val add2StageC = StallingPipe(io.stall, add1StageOut.valid, add1StageC.bits)
|
||||||
|
|
||||||
|
// add2 stage end ------------------------------------------------------------
|
||||||
|
|
||||||
|
val acc = Module(new hardfloat.AddRecFN(expWidth, sigWidth))
|
||||||
|
acc.io.subOp := 0.U // FIXME
|
||||||
|
acc.io.a := add2StageOut.bits
|
||||||
|
acc.io.b := add2StageC.bits
|
||||||
|
acc.io.roundingMode := hardfloat.consts.round_near_even
|
||||||
|
acc.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
||||||
|
|
||||||
|
val accStageOut = StallingPipe(io.stall, add2StageOut.valid, acc.io.out)
|
||||||
|
// FIXME: exception output ignored
|
||||||
|
|
||||||
|
// acc stage end -------------------------------------------------------------
|
||||||
|
|
||||||
|
io.out.valid := accStageOut.valid
|
||||||
|
io.out.bits.data := accStageOut.bits
|
||||||
|
}
|
||||||
|
|
||||||
|
class MulAddRecFNPipe(latency: Int, expWidth: Int, sigWidth: Int) extends Module {
|
||||||
|
require(latency <= 2)
|
||||||
|
|
||||||
|
val io = IO(new Bundle {
|
||||||
|
val validin = Input(Bool())
|
||||||
|
val op = Input(Bits(2.W))
|
||||||
|
val a = Input(Bits((expWidth + sigWidth + 1).W))
|
||||||
|
val b = Input(Bits((expWidth + sigWidth + 1).W))
|
||||||
|
val c = Input(Bits((expWidth + sigWidth + 1).W))
|
||||||
|
val roundingMode = Input(UInt(3.W))
|
||||||
|
val detectTininess = Input(UInt(1.W))
|
||||||
|
val out = Output(Bits((expWidth + sigWidth + 1).W))
|
||||||
|
val exceptionFlags = Output(Bits(5.W))
|
||||||
|
val validout = Output(Bool())
|
||||||
|
})
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------
|
||||||
|
//------------------------------------------------------------------------
|
||||||
|
|
||||||
|
val mulAddRecFNToRaw_preMul = Module(new hardfloat.MulAddRecFNToRaw_preMul(expWidth, sigWidth))
|
||||||
|
val mulAddRecFNToRaw_postMul = Module(new hardfloat.MulAddRecFNToRaw_postMul(expWidth, sigWidth))
|
||||||
|
|
||||||
|
mulAddRecFNToRaw_preMul.io.op := io.op
|
||||||
|
mulAddRecFNToRaw_preMul.io.a := io.a
|
||||||
|
mulAddRecFNToRaw_preMul.io.b := io.b
|
||||||
|
mulAddRecFNToRaw_preMul.io.c := io.c
|
||||||
|
|
||||||
|
val mulAddResult =
|
||||||
|
(mulAddRecFNToRaw_preMul.io.mulAddA *
|
||||||
|
mulAddRecFNToRaw_preMul.io.mulAddB) +&
|
||||||
|
mulAddRecFNToRaw_preMul.io.mulAddC
|
||||||
|
|
||||||
|
val valid_stage0 = Wire(Bool())
|
||||||
|
val roundingMode_stage0 = Wire(UInt(3.W))
|
||||||
|
val detectTininess_stage0 = Wire(UInt(1.W))
|
||||||
|
|
||||||
|
val postmul_regs = if(latency>0) 1 else 0
|
||||||
|
mulAddRecFNToRaw_postMul.io.fromPreMul := Pipe(io.validin, mulAddRecFNToRaw_preMul.io.toPostMul, postmul_regs).bits
|
||||||
|
mulAddRecFNToRaw_postMul.io.mulAddResult := Pipe(io.validin, mulAddResult, postmul_regs).bits
|
||||||
|
mulAddRecFNToRaw_postMul.io.roundingMode := Pipe(io.validin, io.roundingMode, postmul_regs).bits
|
||||||
|
roundingMode_stage0 := Pipe(io.validin, io.roundingMode, postmul_regs).bits
|
||||||
|
detectTininess_stage0 := Pipe(io.validin, io.detectTininess, postmul_regs).bits
|
||||||
|
valid_stage0 := Pipe(io.validin, false.B, postmul_regs).valid
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------
|
||||||
|
//------------------------------------------------------------------------
|
||||||
|
|
||||||
|
val roundRawFNToRecFN = Module(new hardfloat.RoundRawFNToRecFN(expWidth, sigWidth, 0))
|
||||||
|
|
||||||
|
val round_regs = if(latency==2) 1 else 0
|
||||||
|
roundRawFNToRecFN.io.invalidExc := Pipe(valid_stage0, mulAddRecFNToRaw_postMul.io.invalidExc, round_regs).bits
|
||||||
|
roundRawFNToRecFN.io.in := Pipe(valid_stage0, mulAddRecFNToRaw_postMul.io.rawOut, round_regs).bits
|
||||||
|
roundRawFNToRecFN.io.roundingMode := Pipe(valid_stage0, roundingMode_stage0, round_regs).bits
|
||||||
|
roundRawFNToRecFN.io.detectTininess := Pipe(valid_stage0, detectTininess_stage0, round_regs).bits
|
||||||
|
io.validout := Pipe(valid_stage0, false.B, round_regs).valid
|
||||||
|
|
||||||
|
roundRawFNToRecFN.io.infiniteExc := false.B
|
||||||
|
|
||||||
|
io.out := roundRawFNToRecFN.io.out
|
||||||
|
io.exceptionFlags := roundRawFNToRecFN.io.exceptionFlags
|
||||||
|
}
|
||||||
@@ -5,7 +5,8 @@ package radiance.memory
|
|||||||
import chisel3._
|
import chisel3._
|
||||||
import chisel3.util._
|
import chisel3.util._
|
||||||
import org.chipsalliance.cde.config.{Field, Parameters}
|
import org.chipsalliance.cde.config.{Field, Parameters}
|
||||||
import freechips.rocketchip.diplomacy._
|
import freechips.rocketchip.diplomacy.{IdRange, AddressSet, BufferParams}
|
||||||
|
import org.chipsalliance.diplomacy.lazymodule.{LazyModule, LazyModuleImp}
|
||||||
import freechips.rocketchip.util.{Code, MultiPortQueue, OnePortLanePositionedQueue}
|
import freechips.rocketchip.util.{Code, MultiPortQueue, OnePortLanePositionedQueue}
|
||||||
import freechips.rocketchip.unittest._
|
import freechips.rocketchip.unittest._
|
||||||
import freechips.rocketchip.tilelink._
|
import freechips.rocketchip.tilelink._
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import chisel3._
|
|||||||
import chisel3.util._
|
import chisel3.util._
|
||||||
import chisel3.experimental._
|
import chisel3.experimental._
|
||||||
import freechips.rocketchip.diplomacy._
|
import freechips.rocketchip.diplomacy._
|
||||||
|
import org.chipsalliance.diplomacy.lazymodule.{LazyModule, LazyModuleImp}
|
||||||
import freechips.rocketchip.tilelink._
|
import freechips.rocketchip.tilelink._
|
||||||
import org.chipsalliance.cde.config.{Parameters, Field}
|
import org.chipsalliance.cde.config.{Parameters, Field}
|
||||||
|
|
||||||
@@ -14,15 +15,11 @@ case class VortexL1Config(
|
|||||||
numBanks: Int,
|
numBanks: Int,
|
||||||
inputSize: Int, // This is the read/write granularity of the L1 cache
|
inputSize: Int, // This is the read/write granularity of the L1 cache
|
||||||
cacheLineSize: Int,
|
cacheLineSize: Int,
|
||||||
coreTagWidth: Int,
|
|
||||||
writeInfoReqQSize: Int,
|
writeInfoReqQSize: Int,
|
||||||
mshrSize: Int,
|
mshrSize: Int,
|
||||||
memSideSourceIds: Int,
|
memSideSourceIds: Int,
|
||||||
uncachedAddrSets: Seq[AddressSet]
|
uncachedAddrSets: Seq[AddressSet]
|
||||||
) {
|
) {
|
||||||
def coreTagPlusSizeWidth: Int = {
|
|
||||||
log2Ceil(inputSize) + coreTagWidth
|
|
||||||
}
|
|
||||||
// NOTE: This assertion depends on the fact that the Vortex cache is
|
// NOTE: This assertion depends on the fact that the Vortex cache is
|
||||||
// configured to have 1 bank, and that it uses MSHR id as the tag of
|
// configured to have 1 bank, and that it uses MSHR id as the tag of
|
||||||
// memory-side requests. Otherwise, it will append bank id to the tag as
|
// memory-side requests. Otherwise, it will append bank id to the tag as
|
||||||
@@ -39,7 +36,6 @@ object defaultVortexL1Config
|
|||||||
numBanks = 4,
|
numBanks = 4,
|
||||||
inputSize = 16,
|
inputSize = 16,
|
||||||
cacheLineSize = 16,
|
cacheLineSize = 16,
|
||||||
coreTagWidth = 8,
|
|
||||||
writeInfoReqQSize = 16,
|
writeInfoReqQSize = 16,
|
||||||
mshrSize = 8,
|
mshrSize = 8,
|
||||||
memSideSourceIds = 8,
|
memSideSourceIds = 8,
|
||||||
@@ -95,13 +91,18 @@ class VortexBankPassThrough(config: VortexL1Config)(implicit p: Parameters)
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// HACK: Set arbitrarily since we cannot query the coresideNode's sourceId
|
||||||
|
// here. See comment on the require below.
|
||||||
|
// @perf: This is quite high
|
||||||
|
val sourceWidth = 9
|
||||||
|
|
||||||
// Master node to downstream
|
// Master node to downstream
|
||||||
val clientParam = Seq(
|
val clientParam = Seq(
|
||||||
TLMasterPortParameters.v1(
|
TLMasterPortParameters.v1(
|
||||||
clients = Seq(
|
clients = Seq(
|
||||||
TLMasterParameters.v1(
|
TLMasterParameters.v1(
|
||||||
name = "VortexBankPassthrough",
|
name = "VortexBankPassthrough",
|
||||||
sourceId = IdRange(0, 1 << config.coreTagWidth),
|
sourceId = IdRange(0, 1 << sourceWidth),
|
||||||
supportsProbe = TransferSizes(1, config.cacheLineSize),
|
supportsProbe = TransferSizes(1, config.cacheLineSize),
|
||||||
supportsGet = TransferSizes(1, config.cacheLineSize),
|
supportsGet = TransferSizes(1, config.cacheLineSize),
|
||||||
supportsPutFull = TransferSizes(1, config.cacheLineSize),
|
supportsPutFull = TransferSizes(1, config.cacheLineSize),
|
||||||
@@ -121,6 +122,16 @@ class VortexBankPassThrough(config: VortexL1Config)(implicit p: Parameters)
|
|||||||
val (upstream, _) = coresideNode.in(0)
|
val (upstream, _) = coresideNode.in(0)
|
||||||
val (downstream, _) = vxCacheFetchNode.out(0)
|
val (downstream, _) = vxCacheFetchNode.out(0)
|
||||||
|
|
||||||
|
// Make sure the outgoing edge of this passthrough has enough sourceIds
|
||||||
|
// that encompasses the core-side incoming edge's. This is an unfortunate
|
||||||
|
// hack due to not doing proper param negotiations across disconnected
|
||||||
|
// Diplomacy graphs.
|
||||||
|
// println(s"${upstream.params.sourceBits} <= ${downstream.params.sourceBits}")
|
||||||
|
require(upstream.params.sourceBits <= downstream.params.sourceBits,
|
||||||
|
"mem-side source of L1 cache truncates core-side source! " +
|
||||||
|
"Try lowering core/coalescer srcIds, or increasing sourceWidth " +
|
||||||
|
"for VortexBankPassThrough")
|
||||||
|
|
||||||
downstream.a <> upstream.a
|
downstream.a <> upstream.a
|
||||||
upstream.d <> downstream.d
|
upstream.d <> downstream.d
|
||||||
}
|
}
|
||||||
@@ -197,13 +208,17 @@ class VortexBankImp(
|
|||||||
outer: VortexBank,
|
outer: VortexBank,
|
||||||
config: VortexL1Config
|
config: VortexL1Config
|
||||||
) extends LazyModuleImp(outer) {
|
) extends LazyModuleImp(outer) {
|
||||||
|
val (tlInFromCoal, _) = outer.coresideNode.in.head
|
||||||
|
val coreTagWidth = tlInFromCoal.a.bits.source.getWidth
|
||||||
|
val coreTagWidthPlusSize = coreTagWidth + log2Ceil(config.inputSize)
|
||||||
|
|
||||||
val vxCache = Module(
|
val vxCache = Module(
|
||||||
new VX_cache_top(
|
new VX_cache_top(
|
||||||
WORD_SIZE = config.inputSize,
|
WORD_SIZE = config.inputSize,
|
||||||
// distribute total size across numBanks
|
// distribute total size across numBanks
|
||||||
CACHE_SIZE = config.cacheSize / config.numBanks,
|
CACHE_SIZE = config.cacheSize / config.numBanks,
|
||||||
CACHE_LINE_SIZE = config.cacheLineSize,
|
CACHE_LINE_SIZE = config.cacheLineSize,
|
||||||
CORE_TAG_WIDTH = config.coreTagPlusSizeWidth,
|
CORE_TAG_WIDTH = coreTagWidthPlusSize,
|
||||||
MSHR_SIZE = config.mshrSize
|
MSHR_SIZE = config.mshrSize
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
@@ -232,7 +247,7 @@ class VortexBankImp(
|
|||||||
|
|
||||||
class ReadReqInfo(config: VortexL1Config) extends Bundle {
|
class ReadReqInfo(config: VortexL1Config) extends Bundle {
|
||||||
val size = UInt(log2Ceil(4).W + 1)
|
val size = UInt(log2Ceil(4).W + 1)
|
||||||
val id = UInt(config.coreTagWidth.W)
|
val id = UInt(coreTagWidth.W)
|
||||||
}
|
}
|
||||||
|
|
||||||
val coreWriteReqQueue = Module(
|
val coreWriteReqQueue = Module(
|
||||||
@@ -247,8 +262,6 @@ class VortexBankImp(
|
|||||||
|
|
||||||
// Translate TL request from Coalescer to requests for VX_cache
|
// Translate TL request from Coalescer to requests for VX_cache
|
||||||
def TLReq2VXReq = {
|
def TLReq2VXReq = {
|
||||||
val (tlInFromCoal, _) = outer.coresideNode.in.head
|
|
||||||
|
|
||||||
// coal -> vxCache
|
// coal -> vxCache
|
||||||
tlInFromCoal.a.ready :=
|
tlInFromCoal.a.ready :=
|
||||||
vxCache.io.core_req_ready && coreWriteReqQueue.io.enq.ready // not optimal
|
vxCache.io.core_req_ready && coreWriteReqQueue.io.enq.ready // not optimal
|
||||||
@@ -269,13 +282,9 @@ class VortexBankImp(
|
|||||||
readReqInfo.id := tlInFromCoal.a.bits.source
|
readReqInfo.id := tlInFromCoal.a.bits.source
|
||||||
readReqInfo.size := tlInFromCoal.a.bits.size
|
readReqInfo.size := tlInFromCoal.a.bits.size
|
||||||
assert(readReqInfo.id.getWidth == tlInFromCoal.a.bits.source.getWidth,
|
assert(readReqInfo.id.getWidth == tlInFromCoal.a.bits.source.getWidth,
|
||||||
s"id width mismatch; coalescer ${tlInFromCoal.a.bits.source.getWidth}, cache ${readReqInfo.id.getWidth}")
|
s"id width mismatch; core-side ${tlInFromCoal.a.bits.source.getWidth}, cache-side ${readReqInfo.id.getWidth}")
|
||||||
assert(readReqInfo.size.getWidth == tlInFromCoal.a.bits.size.getWidth,
|
assert(readReqInfo.size.getWidth == tlInFromCoal.a.bits.size.getWidth,
|
||||||
s"size width mismatch; coalescer ${tlInFromCoal.a.bits.size.getWidth}, cache ${readReqInfo.size.getWidth}")
|
s"size width mismatch; core-side ${tlInFromCoal.a.bits.size.getWidth}, cache-side ${readReqInfo.size.getWidth}")
|
||||||
assert(readReqInfo.id.getWidth == tlInFromCoal.a.bits.source.getWidth,
|
|
||||||
s"id width mismatch; coalescer ${tlInFromCoal.a.bits.source.getWidth}, cache ${readReqInfo.id.getWidth}")
|
|
||||||
assert(readReqInfo.size.getWidth == tlInFromCoal.a.bits.size.getWidth,
|
|
||||||
s"size width mismatch; coalescer ${tlInFromCoal.a.bits.size.getWidth}, cache ${readReqInfo.size.getWidth}")
|
|
||||||
// ignore param, size, corrupt
|
// ignore param, size, corrupt
|
||||||
vxCache.io.core_req_tag := readReqInfo.asTypeOf(vxCache.io.core_req_tag)
|
vxCache.io.core_req_tag := readReqInfo.asTypeOf(vxCache.io.core_req_tag)
|
||||||
|
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ class WithRadianceCores(
|
|||||||
useVxCache: Boolean
|
useVxCache: Boolean
|
||||||
) extends Config((site, _, up) => {
|
) extends Config((site, _, up) => {
|
||||||
case TilesLocated(`location`) => {
|
case TilesLocated(`location`) => {
|
||||||
val prev = up(TilesLocated(`location`), site)
|
val prev = up(TilesLocated(`location`))
|
||||||
val idOffset = prev.size
|
val idOffset = prev.size
|
||||||
val vortex = RadianceTileParams(
|
val vortex = RadianceTileParams(
|
||||||
core = VortexCoreParams(fpu = None),
|
core = VortexCoreParams(fpu = None),
|
||||||
@@ -87,7 +87,7 @@ class WithRadianceGemmini(location: HierarchicalLocation,
|
|||||||
crossing: RocketCrossingParams,
|
crossing: RocketCrossingParams,
|
||||||
dim: Int, accSizeInKB: Int, tileSize: Int) extends Config((site, _, up) => {
|
dim: Int, accSizeInKB: Int, tileSize: Int) extends Config((site, _, up) => {
|
||||||
case TilesLocated(`location`) => {
|
case TilesLocated(`location`) => {
|
||||||
val prev = up(TilesLocated(`location`), site)
|
val prev = up(TilesLocated(`location`))
|
||||||
val idOffset = prev.size
|
val idOffset = prev.size
|
||||||
if (idOffset == 0) {
|
if (idOffset == 0) {
|
||||||
println("******WARNING****** gemmini tile id is 0! radiance tiles in the same cluster needs to be before gemmini")
|
println("******WARNING****** gemmini tile id is 0! radiance tiles in the same cluster needs to be before gemmini")
|
||||||
@@ -171,7 +171,7 @@ class WithFuzzerCores(
|
|||||||
useVxCache: Boolean
|
useVxCache: Boolean
|
||||||
) extends Config((site, _, up) => {
|
) extends Config((site, _, up) => {
|
||||||
case TilesLocated(InSubsystem) => {
|
case TilesLocated(InSubsystem) => {
|
||||||
val prev = up(TilesLocated(InSubsystem), site)
|
val prev = up(TilesLocated(InSubsystem))
|
||||||
val idOffset = prev.size
|
val idOffset = prev.size
|
||||||
val fuzzer = FuzzerTileParams(
|
val fuzzer = FuzzerTileParams(
|
||||||
core = VortexCoreParams(fpu = None),
|
core = VortexCoreParams(fpu = None),
|
||||||
@@ -202,11 +202,11 @@ class WithRadianceCluster(
|
|||||||
case PossibleTileLocations => up(PossibleTileLocations) :+ InCluster(clusterId)
|
case PossibleTileLocations => up(PossibleTileLocations) :+ InCluster(clusterId)
|
||||||
})
|
})
|
||||||
|
|
||||||
// `nSrcIds`: number of source IDs for dmem requests on each SIMT lane
|
// `nSrcIds`: number of source IDs for each mem lane. This is for all warps
|
||||||
class WithSimtConfig(nWarps: Int = 4, nCoreLanes: Int = 4, nMemLanes: Int = 4, nSrcIds: Int = 8)
|
class WithSimtConfig(nWarps: Int = 4, nCoreLanes: Int = 4, nMemLanes: Int = 4, nSrcIds: Int = 8)
|
||||||
extends Config((site, _, up) => {
|
extends Config((site, _, up) => {
|
||||||
case SIMTCoreKey => {
|
case SIMTCoreKey => {
|
||||||
Some(up(SIMTCoreKey, site).getOrElse(SIMTCoreParams()).copy(
|
Some(up(SIMTCoreKey).getOrElse(SIMTCoreParams()).copy(
|
||||||
nWarps = nWarps,
|
nWarps = nWarps,
|
||||||
nCoreLanes = nCoreLanes,
|
nCoreLanes = nCoreLanes,
|
||||||
nMemLanes = nMemLanes,
|
nMemLanes = nMemLanes,
|
||||||
@@ -228,22 +228,18 @@ extends Config((site, _, _) => {
|
|||||||
|
|
||||||
class WithPriorityCoalXbar extends Config((site, _, up) => {
|
class WithPriorityCoalXbar extends Config((site, _, up) => {
|
||||||
case CoalXbarKey => {
|
case CoalXbarKey => {
|
||||||
Some(up(CoalXbarKey, site).getOrElse(CoalXbarParam))
|
Some(up(CoalXbarKey).getOrElse(CoalXbarParam))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
class WithVortexL1Banks(nBanks: Int = 4) extends Config ((site, _, up) => {
|
class WithVortexL1Banks(nBanks: Int = 4) extends Config ((site, here, up) => {
|
||||||
case VortexL1Key => {
|
case VortexL1Key => {
|
||||||
Some(defaultVortexL1Config.copy(
|
Some(defaultVortexL1Config.copy(
|
||||||
numBanks = nBanks,
|
numBanks = nBanks,
|
||||||
inputSize = up(SIMTCoreKey).get.nMemLanes * 4,
|
inputSize = up(SIMTCoreKey).get.nMemLanes * 4/*32b word*/,
|
||||||
cacheLineSize = up(SIMTCoreKey).get.nMemLanes * 4,
|
cacheLineSize = up(SIMTCoreKey).get.nMemLanes * 4/*32b word*/,
|
||||||
memSideSourceIds = 16,
|
memSideSourceIds = 16,
|
||||||
mshrSize = 16,
|
mshrSize = 16,
|
||||||
coreTagWidth = log2Ceil(up(SIMTCoreKey).get.nSrcIds.max(up(CoalescerKey) match {
|
|
||||||
case Some(key) => key.numNewSrcIds
|
|
||||||
case None => 0
|
|
||||||
})) + log2Ceil(up(SIMTCoreKey).get.nMemLanes) + 1
|
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@@ -254,7 +250,7 @@ class WithVortexL1Banks(nBanks: Int = 4) extends Config ((site, _, up) => {
|
|||||||
// to e.g. compare waveforms.
|
// to e.g. compare waveforms.
|
||||||
class WithCoalescer(nNewSrcIds: Int = 8, enable : Boolean = true) extends Config((site, _, up) => {
|
class WithCoalescer(nNewSrcIds: Int = 8, enable : Boolean = true) extends Config((site, _, up) => {
|
||||||
case CoalescerKey => {
|
case CoalescerKey => {
|
||||||
val (nLanes, numOldSrcIds) = up(SIMTCoreKey, site) match {
|
val (nLanes, numOldSrcIds) = up(SIMTCoreKey) match {
|
||||||
case Some(param) => (param.nMemLanes, param.nSrcIds)
|
case Some(param) => (param.nMemLanes, param.nSrcIds)
|
||||||
case None => (1,1)
|
case None => (1,1)
|
||||||
}
|
}
|
||||||
@@ -266,7 +262,7 @@ class WithCoalescer(nNewSrcIds: Int = 8, enable : Boolean = true) extends Config
|
|||||||
|
|
||||||
// If instantiating L1 cache, the maximum coalescing size should match the
|
// If instantiating L1 cache, the maximum coalescing size should match the
|
||||||
// cache line size
|
// cache line size
|
||||||
val maxCoalSizeInBytes = up(VortexL1Key, site) match {
|
val maxCoalSizeInBytes = up(VortexL1Key) match {
|
||||||
case Some(param) => param.inputSize
|
case Some(param) => param.inputSize
|
||||||
case None => sbusWidthInBytes
|
case None => sbusWidthInBytes
|
||||||
}
|
}
|
||||||
@@ -291,7 +287,7 @@ class WithNCustomSmallRocketCores(
|
|||||||
crossing: RocketCrossingParams = RocketCrossingParams()
|
crossing: RocketCrossingParams = RocketCrossingParams()
|
||||||
) extends Config((site, here, up) => {
|
) extends Config((site, here, up) => {
|
||||||
case TilesLocated(InSubsystem) => {
|
case TilesLocated(InSubsystem) => {
|
||||||
val prev = up(TilesLocated(InSubsystem), site)
|
val prev = up(TilesLocated(InSubsystem))
|
||||||
val idOffset = overrideIdOffset.getOrElse(prev.size)
|
val idOffset = overrideIdOffset.getOrElse(prev.size)
|
||||||
val med = RocketTileParams(
|
val med = RocketTileParams(
|
||||||
core = RocketCoreParams(fpu = None),
|
core = RocketCoreParams(fpu = None),
|
||||||
@@ -325,7 +321,7 @@ class WithNCustomSmallRocketCores(
|
|||||||
class WithExtGPUMem(address: BigInt = BigInt("0x100000000", 16),
|
class WithExtGPUMem(address: BigInt = BigInt("0x100000000", 16),
|
||||||
size: BigInt = 0x80000000) extends Config((site, here, up) => {
|
size: BigInt = 0x80000000) extends Config((site, here, up) => {
|
||||||
case GPUMemory() => Some(GPUMemParams(address, size))
|
case GPUMemory() => Some(GPUMemParams(address, size))
|
||||||
case ExtMem => up(ExtMem, site).map(x => {
|
case ExtMem => up(ExtMem).map(x => {
|
||||||
val gap = address - x.master.base - x.master.size
|
val gap = address - x.master.base - x.master.size
|
||||||
x.copy(master = x.master.copy(size = x.master.size + gap + size))
|
x.copy(master = x.master.copy(size = x.master.size + gap + size))
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -89,7 +89,7 @@ class RadianceCluster (
|
|||||||
val stride_by_word = true
|
val stride_by_word = true
|
||||||
val filter_aligned = true
|
val filter_aligned = true
|
||||||
val disable_monitors = true // otherwise it generate 1k+ different tl monitors
|
val disable_monitors = true // otherwise it generate 1k+ different tl monitors
|
||||||
val serialize_unaligned = true
|
val serialize_unaligned = false
|
||||||
|
|
||||||
def guard_monitors[T](callback: Parameters => T)(implicit p: Parameters): Unit = {
|
def guard_monitors[T](callback: Parameters => T)(implicit p: Parameters): Unit = {
|
||||||
if (disable_monitors) {
|
if (disable_monitors) {
|
||||||
|
|||||||
@@ -7,8 +7,8 @@ import chisel3._
|
|||||||
import chisel3.experimental.AffectsChiselPrefix
|
import chisel3.experimental.AffectsChiselPrefix
|
||||||
import chisel3.util._
|
import chisel3.util._
|
||||||
import freechips.rocketchip.devices.tilelink._
|
import freechips.rocketchip.devices.tilelink._
|
||||||
import org.chipsalliance.diplomacy._
|
|
||||||
import freechips.rocketchip.diplomacy._
|
import freechips.rocketchip.diplomacy._
|
||||||
|
import org.chipsalliance.diplomacy.lazymodule.LazyModule
|
||||||
import freechips.rocketchip.prci.ClockSinkParameters
|
import freechips.rocketchip.prci.ClockSinkParameters
|
||||||
import freechips.rocketchip.regmapper.RegField
|
import freechips.rocketchip.regmapper.RegField
|
||||||
import freechips.rocketchip.rocket._
|
import freechips.rocketchip.rocket._
|
||||||
@@ -319,8 +319,7 @@ class RadianceTile private (
|
|||||||
// )
|
// )
|
||||||
|
|
||||||
val icache = LazyModule(new VortexL1Cache(vortexL1Config.copy(
|
val icache = LazyModule(new VortexL1Cache(vortexL1Config.copy(
|
||||||
numBanks = 1,
|
numBanks = 1
|
||||||
coreTagWidth = imemSourceWidth
|
|
||||||
)))
|
)))
|
||||||
val dcache = LazyModule(new VortexL1Cache(vortexL1Config))
|
val dcache = LazyModule(new VortexL1Cache(vortexL1Config))
|
||||||
// imemNodes.foreach { icache.coresideNode := TLWidthWidget(4) := _ }
|
// imemNodes.foreach { icache.coresideNode := TLWidthWidget(4) := _ }
|
||||||
|
|||||||
@@ -172,6 +172,7 @@ class Vortex(tile: RadianceTile)(implicit p: Parameters)
|
|||||||
addResource("/vsrc/vortex/hw/rtl/core/VX_decode.sv")
|
addResource("/vsrc/vortex/hw/rtl/core/VX_decode.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/core/VX_dispatch.sv")
|
addResource("/vsrc/vortex/hw/rtl/core/VX_dispatch.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/core/VX_dispatch_unit.sv")
|
addResource("/vsrc/vortex/hw/rtl/core/VX_dispatch_unit.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_dispatch_unit_sane.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/core/VX_execute.sv")
|
addResource("/vsrc/vortex/hw/rtl/core/VX_execute.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/core/VX_fetch.sv")
|
addResource("/vsrc/vortex/hw/rtl/core/VX_fetch.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/core/VX_gather_unit.sv")
|
addResource("/vsrc/vortex/hw/rtl/core/VX_gather_unit.sv")
|
||||||
@@ -329,6 +330,8 @@ class Vortex(tile: RadianceTile)(implicit p: Parameters)
|
|||||||
addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_fpnew.sv")
|
addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_fpnew.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/core/VX_fpu_unit.sv")
|
addResource("/vsrc/vortex/hw/rtl/core/VX_fpu_unit.sv")
|
||||||
|
|
||||||
|
addResource("/vsrc/TensorDotProductUnit.sv")
|
||||||
|
|
||||||
// fpnew
|
// fpnew
|
||||||
// compile order matters; package definitions (ex. fpnew_pkg) should be
|
// compile order matters; package definitions (ex. fpnew_pkg) should be
|
||||||
// compiled before all the other modules that reference them. They are added
|
// compiled before all the other modules that reference them. They are added
|
||||||
|
|||||||
96
src/test/scala/radiance/TensorDPUTest.scala
Normal file
96
src/test/scala/radiance/TensorDPUTest.scala
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
package radiance.core
|
||||||
|
|
||||||
|
import chisel3._
|
||||||
|
import chisel3.util._
|
||||||
|
import chiseltest._
|
||||||
|
import chiseltest.simulator.VerilatorFlags
|
||||||
|
import org.chipsalliance.cde.config.Parameters
|
||||||
|
import freechips.rocketchip.tile
|
||||||
|
import org.scalatest.flatspec.AnyFlatSpec
|
||||||
|
|
||||||
|
class MulAddTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||||
|
behavior of "MulAddRecFNPipe"
|
||||||
|
|
||||||
|
val t = tile.FType.S
|
||||||
|
it should "do basic arithmetic" in {
|
||||||
|
test(new MulAddRecFNPipe(2, t.exp, t.sig))
|
||||||
|
// .withAnnotations(Seq(WriteVcdAnnotation))
|
||||||
|
{ c =>
|
||||||
|
c.io.validin.poke(true.B)
|
||||||
|
// 0: MADD
|
||||||
|
// 1: MSUB
|
||||||
|
// 2: NMSUB
|
||||||
|
// 3: NMADD
|
||||||
|
c.io.op.poke(0.U)
|
||||||
|
// rounding mode (p.113 of spec)
|
||||||
|
// 0: round to nearest, ties to even
|
||||||
|
c.io.roundingMode.poke(0.U)
|
||||||
|
c.io.detectTininess.poke(hardfloat.consts.tininess_beforeRounding)
|
||||||
|
c.io.a.poke(0x3f800000.U)
|
||||||
|
c.io.b.poke(0x3f800000.U)
|
||||||
|
c.io.c.poke(0x00000000.U)
|
||||||
|
c.clock.step()
|
||||||
|
c.io.validin.poke(false.B)
|
||||||
|
c.io.validout.expect(false.B)
|
||||||
|
c.clock.step()
|
||||||
|
c.io.validout.expect(true.B)
|
||||||
|
c.io.out.expect(0x40c00000.U)
|
||||||
|
c.clock.step()
|
||||||
|
c.io.validout.expect(false.B)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class TensorDotProductUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||||
|
behavior of "TensorDotProductUnit"
|
||||||
|
|
||||||
|
implicit val p: Parameters = Parameters.empty
|
||||||
|
|
||||||
|
it should "pass" in {
|
||||||
|
test(new TensorDotProductUnit)
|
||||||
|
// .withAnnotations(Seq(VerilatorBackendAnnotation))
|
||||||
|
// .withAnnotations(Seq(WriteVcdAnnotation))
|
||||||
|
{ c =>
|
||||||
|
c.io.in.valid.poke(true.B)
|
||||||
|
c.io.stall.poke(false.B)
|
||||||
|
// (1,3,5,7)*(2,4,6,8) + 9 = 109
|
||||||
|
c.io.in.bits.a(0).poke(0x3f800000L.U(64.W))
|
||||||
|
c.io.in.bits.a(1).poke(0x40400000L.U(64.W))
|
||||||
|
c.io.in.bits.a(2).poke(0x40a00000L.U(64.W))
|
||||||
|
c.io.in.bits.a(3).poke(0x40e00000L.U(64.W))
|
||||||
|
c.io.in.bits.b(0).poke(0x40000000L.U(64.W))
|
||||||
|
c.io.in.bits.b(1).poke(0x40800000L.U(64.W))
|
||||||
|
c.io.in.bits.b(2).poke(0x40c00000L.U(64.W))
|
||||||
|
c.io.in.bits.b(3).poke(0x41000000L.U(64.W))
|
||||||
|
c.io.in.bits.c .poke(0x41100000L.U(64.W))
|
||||||
|
|
||||||
|
c.io.out.valid.expect(false.B)
|
||||||
|
|
||||||
|
c.clock.step()
|
||||||
|
c.io.in.valid.poke(false.B)
|
||||||
|
c.io.out.valid.expect(false.B)
|
||||||
|
|
||||||
|
// stall the pipeline
|
||||||
|
c.io.stall.poke(true.B)
|
||||||
|
c.clock.step()
|
||||||
|
c.io.stall.poke(true.B)
|
||||||
|
c.clock.step()
|
||||||
|
c.io.stall.poke(true.B)
|
||||||
|
c.clock.step()
|
||||||
|
c.io.stall.poke(false.B)
|
||||||
|
|
||||||
|
c.clock.step()
|
||||||
|
c.clock.step()
|
||||||
|
c.clock.step()
|
||||||
|
// 4-cycle latency + stalls
|
||||||
|
|
||||||
|
c.io.out.valid.expect(true.B)
|
||||||
|
c.io.out.bits.data.expect(0x42da0000L.U)
|
||||||
|
|
||||||
|
c.clock.step()
|
||||||
|
|
||||||
|
c.io.out.valid.expect(false.B)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Reference in New Issue
Block a user