diff --git a/.github/scripts/defaults.sh b/.github/scripts/defaults.sh index 83117317..176d20d5 100755 --- a/.github/scripts/defaults.sh +++ b/.github/scripts/defaults.sh @@ -29,7 +29,7 @@ REMOTE_COURSIER_CACHE=$REMOTE_WORK_DIR/.coursier-cache # key value store to get the build groups declare -A grouping grouping["group-cores"]="chipyard-cva6 chipyard-ibex chipyard-rocket chipyard-hetero chipyard-boom chipyard-sodor chipyard-digitaltop chipyard-multiclock-rocket chipyard-nomem-scratchpad chipyard-spike chipyard-clone" -grouping["group-peripherals"]="chipyard-dmirocket chipyard-spiflashwrite chipyard-mmios chipyard-nocores chipyard-manyperipherals chipyard-chiplike" +grouping["group-peripherals"]="chipyard-dmirocket chipyard-dmiboom chipyard-spiflashwrite chipyard-mmios chipyard-nocores chipyard-manyperipherals chipyard-chiplike" grouping["group-accels"]="chipyard-mempress chipyard-sha3 chipyard-hwacha chipyard-gemmini chipyard-manymmioaccels" grouping["group-constellation"]="chipyard-constellation" grouping["group-tracegen"]="tracegen tracegen-boom" @@ -46,6 +46,7 @@ mapping["chipyard-digitaltop"]=" TOP=DigitalTop" mapping["chipyard-manymmioaccels"]=" CONFIG=ManyMMIOAcceleratorRocketConfig" mapping["chipyard-hetero"]=" CONFIG=LargeBoomAndRocketConfig" mapping["chipyard-boom"]=" CONFIG=MediumBoomCosimConfig" +mapping["chipyard-dmiboom"]=" CONFIG=dmiMediumBoomCosimConfig" mapping["chipyard-spike"]=" CONFIG=SpikeFastUARTConfig EXTRA_SIM_FLAGS='+spike-ipc=10'" mapping["chipyard-hwacha"]=" CONFIG=HwachaRocketConfig" mapping["chipyard-gemmini"]=" CONFIG=GemminiRocketConfig" diff --git a/.github/scripts/run-tests.sh b/.github/scripts/run-tests.sh index bf9f2585..d08614d7 100755 --- a/.github/scripts/run-tests.sh +++ b/.github/scripts/run-tests.sh @@ -33,11 +33,16 @@ case $1 in run_bmark ${mapping[$1]} ;; chipyard-dmirocket) - run_bmark ${mapping[$1]} + $LOCAL_CHIPYARD_DIR/scripts/generate-ckpt.sh -b $RISCV/riscv64-unknown-elf/share/riscv-tests/benchmarks/dhrystone.riscv -i 10000 + make -C $LOCAL_SIM_DIR $DISABLE_SIM_PREREQ ${mapping[$1]} run-binary LOADARCH=$PWD/dhrystone.riscv.0x80000000.10000.loadarch ;; chipyard-boom) run_bmark ${mapping[$1]} ;; + chipyard-dmiboom) + $LOCAL_CHIPYARD_DIR/scripts/generate-ckpt.sh -b $RISCV/riscv64-unknown-elf/share/riscv-tests/benchmarks/dhrystone.riscv -i 10000 + make -C $LOCAL_SIM_DIR $DISABLE_SIM_PREREQ ${mapping[$1]} run-binary LOADARCH=$PWD/dhrystone.riscv.0x80000000.10000.loadarch + ;; chipyard-spike) run_bmark ${mapping[$1]} ;; diff --git a/.github/workflows/chipyard-run-tests.yml b/.github/workflows/chipyard-run-tests.yml index f7917e92..d357637c 100644 --- a/.github/workflows/chipyard-run-tests.yml +++ b/.github/workflows/chipyard-run-tests.yml @@ -603,6 +603,29 @@ jobs: group-key: "group-peripherals" project-key: "chipyard-dmirocket" + chipyard-dmiboom-run-tests: + name: chipyard-dmiboom-run-tests + needs: prepare-chipyard-peripherals + runs-on: self-hosted + steps: + - name: Delete old checkout + run: | + ls -alh . + rm -rf ${{ github.workspace }}/* || true + rm -rf ${{ github.workspace }}/.* || true + ls -alh . + - name: Checkout + uses: actions/checkout@v3 + - name: Git workaround + uses: ./.github/actions/git-workaround + - name: Create conda env + uses: ./.github/actions/create-conda-env + - name: Run tests + uses: ./.github/actions/run-tests + with: + group-key: "group-peripherals" + project-key: "chipyard-dmiboom" + chipyard-spiflashwrite-run-tests: name: chipyard-spiflashwrite-run-tests needs: prepare-chipyard-peripherals @@ -944,6 +967,7 @@ jobs: chipyard-cva6-run-tests, chipyard-ibex-run-tests, chipyard-sodor-run-tests, + chipyard-dmiboom-run-tests, chipyard-dmirocket-run-tests, chipyard-spiflashwrite-run-tests, chipyard-manyperipherals-run-tests, diff --git a/common.mk b/common.mk index 0868088b..cf2236ff 100644 --- a/common.mk +++ b/common.mk @@ -314,29 +314,15 @@ run-binary-debug: $(SIM_DEBUG_PREREQ) check-binary | $(output_dir) run-fast: run-asm-tests-fast run-bmark-tests-fast ######################################################################################### -# helper rules to run simulator with fast loadmem via hex files +# helper rules to run simulator with fast loadmem +# LEGACY - use LOADMEM=1 instead ######################################################################################### -$(binary_hex): $(firstword $(BINARY)) | $(output_dir) - $(base_dir)/scripts/smartelf2hex.sh $(firstword $(BINARY)) > $(binary_hex) - -run-binary-hex: check-binary -run-binary-hex: $(SIM_PREREQ) $(binary_hex) | $(output_dir) run-binary-hex: run-binary -run-binary-hex: override LOADMEM_ADDR = 80000000 -run-binary-hex: override LOADMEM = $(binary_hex) -run-binary-hex: override SIM_FLAGS += +loadmem=$(LOADMEM) +loadmem_addr=$(LOADMEM_ADDR) -run-binary-debug-hex: check-binary -run-binary-debug-hex: $(SIM_DEBUG_REREQ) $(binary_hex) | $(output_dir) +run-binary-hex: override SIM_FLAGS += +loadmem=$(BINARY) run-binary-debug-hex: run-binary-debug -run-binary-debug-hex: override LOADMEM_ADDR = 80000000 -run-binary-debug-hex: override LOADMEM = $(binary_hex) -run-binary-debug-hex: override SIM_FLAGS += +loadmem=$(LOADMEM) +loadmem_addr=$(LOADMEM_ADDR) -run-binary-fast-hex: check-binary -run-binary-fast-hex: $(SIM_PREREQ) $(binary_hex) | $(output_dir) +run-binary-debug-hex: override SIM_FLAGS += +loadmem=$(BINARY) run-binary-fast-hex: run-binary-fast -run-binary-fast-hex: override LOADMEM_ADDR = 80000000 -run-binary-fast-hex: override LOADMEM = $(binary_hex) -run-binary-fast-hex: override SIM_FLAGS += +loadmem=$(LOADMEM) +loadmem_addr=$(LOADMEM_ADDR) +run-binary-fast-hex: override SIM_FLAGS += +loadmem=$(BINARY) ######################################################################################### # run assembly/benchmarks rules diff --git a/docs/Advanced-Concepts/Architectural-Checkpoints.rst b/docs/Advanced-Concepts/Architectural-Checkpoints.rst new file mode 100644 index 00000000..490bddc5 --- /dev/null +++ b/docs/Advanced-Concepts/Architectural-Checkpoints.rst @@ -0,0 +1,39 @@ +.. _checkpointing: + +Architectural Checkpoints +========================= + +Chipyard supports generating architectural checkpoints using Spike. +These checkpoints contain a snapshot of the architectural state of a RISC-V SoC at some point in the execution of a program. +The checkpoints include the contents of cacheable memory, core architectural registers, and core CSRs. +RTL simulations of SoCs can resume execution from checkpoints after restoring the architectural state. + +.. note:: + Currently, only checkpoints of single-core systems are supported + +Generating Checkpoints +------------------------ + +``scripts/generate-ckpt.sh`` is a script that runs spike with the right commands to generate an architectural checkpoint +``scripts/generate-ckpt.sh -h`` lists options for checkpoint generation. + +Example: run the ``hello.riscv`` binary for 1000 instructions before generating a checkpoint. +This should produce a directory named ``hello.riscv.0x80000000.1000.loadarch`` + +.. code:: + + scripts/generate-ckpt.sh -b tests/hello.riscv -i 1000 + + +Loading Checkpoints in RTL Simulation +-------------------------------------- + +Checkpoints can be loaded in RTL simulations with the ``LOADARCH`` flag. +The target config **MUST** use dmi-based bringup (as opposed to the default TSI-based bringup), and support fast ``LOADMEM``. +The target config should also match the architectural configuration of however spike was configured when generating the checkpoint. + +.. code:: + + cd sims/vcs + make CONFIG=dmiRocketConfig run-binary LOADARCH=../../hello.riscv.0x80000000.1000.loadarch + diff --git a/docs/Advanced-Concepts/index.rst b/docs/Advanced-Concepts/index.rst index b67bbbb3..d5455ddb 100644 --- a/docs/Advanced-Concepts/index.rst +++ b/docs/Advanced-Concepts/index.rst @@ -16,3 +16,4 @@ They expect you to know about Chisel, Parameters, configs, etc. CDEs Harness-Clocks Managing-Published-Scala-Dependencies + Architectural-Checkpoints diff --git a/docs/Simulation/Software-RTL-Simulation.rst b/docs/Simulation/Software-RTL-Simulation.rst index d62f6f1c..580a5d2e 100644 --- a/docs/Simulation/Software-RTL-Simulation.rst +++ b/docs/Simulation/Software-RTL-Simulation.rst @@ -151,25 +151,17 @@ Fast Memory Loading ------------------- The simulator loads the program binary over a simulated serial line. This can be quite slow if there is a lot of static data, so the simulator also allows data to be loaded from a file directly into the DRAM model. +Loadmem files should be ELF files. In the most common use case, this can be the binary. .. code-block:: shell - make run-binary BINARY=test.riscv LOADMEM=testdata.hex LOADMEM_ADDR=81000000 + make run-binary BINARY=test.riscv LOADMEM=test.riscv -The ``.hex`` file should be a text file with a hexadecimal number on each line. - -.. code-block:: text - - deadbeef - 0123 - -Each line uses little-endian order, so this file would produce the bytes "ef be ad de 01 23". ``LOADMEM_ADDR`` specifies which address in memory (in hexadecimal) to write the first byte to. The default is 0x81000000. - -A special target that facilitates automatically generating a hex file for an entire elf RISC-V exectuable and then running the simulator with the appropriate flags is also available. +Usually the ``LOADMEM`` ELF is the same as the ``BINARY`` ELF, so ``LOADMEM=1`` can be used as a shortcut. .. code-block:: shell - make run-binary-hex BINARY=test.riscv + make run-binary BINARY=test.riscv LOADMEM=1 Generating Waveforms ----------------------- diff --git a/docs/Software/Spike.rst b/docs/Software/Spike.rst index 79e41d6e..e9abe0c0 100644 --- a/docs/Software/Spike.rst +++ b/docs/Software/Spike.rst @@ -43,7 +43,7 @@ Spike-as-a-Tile can be configured with custom IPC, commit logging, and other beh .. code-block:: shell - make CONFIG=SpikeUltraFastConfig run-binary-hex BINARY=hello.riscv EXTRA_SPIKE_FLAGS="+spike-ipc=10000 +spike-fast-clint +spike-debug" + make CONFIG=SpikeUltraFastConfig run-binary BINARY=hello.riscv EXTRA_SPIKE_FLAGS="+spike-ipc=10000 +spike-fast-clint +spike-debug" LOADMEM=1 * ``+spike-ipc=``: Sets the maximum number of instructions Spike can retire in a single "tick", or cycle of the uncore simulation. diff --git a/generators/boom b/generators/boom index 1b1f210b..679f3587 160000 --- a/generators/boom +++ b/generators/boom @@ -1 +1 @@ -Subproject commit 1b1f210bcf5985c1e1f588c1639d5d0ec2d04998 +Subproject commit 679f358755c57524f18cf46b72fc3fc1ac67f127 diff --git a/generators/chipyard/src/main/resources/csrc/cospike.cc b/generators/chipyard/src/main/resources/csrc/cospike.cc index fa0513d7..f531e61f 100644 --- a/generators/chipyard/src/main/resources/csrc/cospike.cc +++ b/generators/chipyard/src/main/resources/csrc/cospike.cc @@ -1,3 +1,4 @@ +#include #include #include #include @@ -5,6 +6,25 @@ #include #include #include +#include +#include +#include +#include +#include +#include + +#if __has_include ("cospike_dtm.h") +#define COSPIKE_DTM +#include "testchip_dtm.h" +extern testchip_dtm_t* dtm; +bool spike_loadarch_done = false; +#endif + +#if __has_include ("mm.h") +#define COSPIKE_SIMDRAM +#include "mm.h" +extern std::map backing_mem_data; +#endif #define CLINT_BASE (0x2000000) #define CLINT_SIZE (0x1000) @@ -20,6 +40,7 @@ typedef struct system_info_t { system_info_t* info = NULL; sim_t* sim = NULL; +bool cospike_debug; reg_t tohost_addr = 0; reg_t fromhost_addr = 0; std::set magic_addrs; @@ -64,10 +85,11 @@ extern "C" void cospike_cosim(long long int cycle, int raise_exception, int raise_interrupt, unsigned long long int cause, - unsigned long long int wdata) + unsigned long long int wdata, + int priv) { assert(info); - if (!sim) { + if (unlikely(!sim)) { printf("Configuring spike cosim\n"); std::vector mem_cfg; std::vector hartids; @@ -110,7 +132,7 @@ extern "C" void cospike_cosim(long long int cycle, abort(); std::vector htif_args; bool in_permissive = false; - bool cospike_debug = false; + cospike_debug = false; for (int i = 1; i < vinfo.argc; i++) { std::string arg(vinfo.argv[i]); if (arg == "+permissive") { @@ -136,7 +158,7 @@ extern "C" void cospike_cosim(long long int cycle, .support_impebreak = true }; - printf("%s\n", info->isa.c_str()); + printf("isa string is %s\n", info->isa.c_str()); for (int i = 0; i < htif_args.size(); i++) { printf("%s\n", htif_args[i].c_str()); } @@ -146,13 +168,29 @@ extern "C" void cospike_cosim(long long int cycle, plugin_devices, htif_args, dm_config, - nullptr, + "cospike.log", false, nullptr, false, nullptr ); +#ifdef COSPIKE_SIMDRAM + // match sim_t's backing memory with the SimDRAM memory + bus_t temp_mem_bus; + for (auto& pair : mems) temp_mem_bus.add_device(pair.first, pair.second); + + for (auto& pair : backing_mem_data) { + size_t base = pair.first; + size_t size = pair.second.size; + printf("Matching spike memory initial state for region %lx-%lx\n", base, base + size); + if (!temp_mem_bus.store(base, size, pair.second.data)) { + printf("Error, unable to match memory at address %lx\n", base); + abort(); + } + } +#endif + sim->configure_log(true, true); // Use our own reset vector for (int i = 0; i < info->nharts; i++) { @@ -166,18 +204,80 @@ extern "C" void cospike_cosim(long long int cycle, fromhost_addr = ((htif_t*)sim)->get_fromhost_addr(); printf("Tohost : %lx\n", tohost_addr); printf("Fromhost: %lx\n", fromhost_addr); + printf("Memory base : %lx\n", info->mem0_base); + printf("Memory Size : %lx\n", info->mem0_size); + } + + if (priv & 0x4) { // debug + return; } processor_t* p = sim->get_core(hartid); state_t* s = p->get_state(); +#ifdef COSPIKE_DTM + if (dtm && dtm->loadarch_done && !spike_loadarch_done) { + printf("Restoring spike state from testchip_dtm loadarch\n"); + // copy the loadarch state into the cosim + loadarch_state_t &ls = dtm->loadarch_state[hartid]; + s->pc = ls.pc; + s->prv = ls.prv; + s->csrmap[CSR_MSTATUS]->write(s->csrmap[CSR_MSTATUS]->read() | MSTATUS_VS | MSTATUS_XS | MSTATUS_FS); +#define RESTORE(CSRID, csr) s->csrmap[CSRID]->write(ls.csr); + RESTORE(CSR_FCSR , fcsr); + RESTORE(CSR_VSTART , vstart); + RESTORE(CSR_VXSAT , vxsat); + RESTORE(CSR_VXRM , vxrm); + RESTORE(CSR_VCSR , vcsr); + RESTORE(CSR_VTYPE , vtype); + RESTORE(CSR_STVEC , stvec); + RESTORE(CSR_SSCRATCH , sscratch); + RESTORE(CSR_SEPC , sepc); + RESTORE(CSR_SCAUSE , scause); + RESTORE(CSR_STVAL , stval); + RESTORE(CSR_SATP , satp); + RESTORE(CSR_MSTATUS , mstatus); + RESTORE(CSR_MEDELEG , medeleg); + RESTORE(CSR_MIDELEG , mideleg); + RESTORE(CSR_MIE , mie); + RESTORE(CSR_MTVEC , mtvec); + RESTORE(CSR_MSCRATCH , mscratch); + RESTORE(CSR_MEPC , mepc); + RESTORE(CSR_MCAUSE , mcause); + RESTORE(CSR_MTVAL , mtval); + RESTORE(CSR_MIP , mip); + RESTORE(CSR_MCYCLE , mcycle); + RESTORE(CSR_MINSTRET , minstret); + if (ls.VLEN != p->VU.VLEN) { + printf("VLEN mismatch loadarch: $d != spike: $d\n", ls.VLEN, p->VU.VLEN); + abort(); + } + if (ls.ELEN != p->VU.ELEN) { + printf("ELEN mismatch loadarch: $d != spike: $d\n", ls.ELEN, p->VU.ELEN); + abort(); + } + for (size_t i = 0; i < 32; i++) { + s->XPR.write(i, ls.XPR[i]); + s->FPR.write(i, { (uint64_t)ls.FPR[i], (uint64_t)-1 }); + memcpy(p->VU.reg_file + i * ls.VLEN / 8, ls.VPR[i], ls.VLEN / 8); + } + spike_loadarch_done = true; + p->clear_waiting_for_interrupt(); + } +#endif uint64_t s_pc = s->pc; + uint64_t interrupt_cause = cause & 0x7FFFFFFFFFFFFFFF; + bool msip_interrupt = interrupt_cause == 0x3; + bool debug_interrupt = interrupt_cause == 0xe; if (raise_interrupt) { printf("%d interrupt %lx\n", cycle, cause); - uint64_t interrupt_cause = cause & 0x7FFFFFFFFFFFFFFF; - if (interrupt_cause == 3) { + + if (msip_interrupt) { s->mip->backdoor_write_with_mask(MIP_MSIP, MIP_MSIP); + } else if (debug_interrupt) { + return; } else { printf("Unknown interrupt %lx\n", interrupt_cause); + abort(); } } if (raise_exception) @@ -185,74 +285,122 @@ extern "C" void cospike_cosim(long long int cycle, if (valid) { printf("%d Cosim: %lx", cycle, iaddr); if (has_wdata) { - printf(" %lx", wdata); + printf(" s: %lx", wdata); } printf("\n"); } - if (valid || raise_interrupt || raise_exception) + if (valid || raise_interrupt || raise_exception) { p->step(1); + if (unlikely(cospike_debug)) { + printf("spike pc is %lx\n", s->pc); + printf("spike mstatus is %lx\n", s->mstatus->read()); + printf("spike mip is %lx\n", s->mip->read()); + printf("spike mie is %lx\n", s->mie->read()); + } + } if (valid) { if (s_pc != iaddr) { - printf("%d PC mismatch %lx != %lx\n", cycle, s_pc, iaddr); + printf("%d PC mismatch spike %llx != DUT %llx\n", cycle, s_pc, iaddr); + if (unlikely(cospike_debug)) { + printf("spike mstatus is %lx\n", s->mstatus->read()); + printf("spike mcause is %lx\n", s->mcause->read()); + printf("spike mtval is %lx\n" , s->mtval->read()); + printf("spike mtinst is %lx\n", s->mtinst->read()); + } exit(1); } - // Try to remember magic_mem addrs, and ignore these in the future + auto& mem_write = s->log_mem_write; - if (!mem_write.empty() && tohost_addr && std::get<0>(mem_write[0]) == tohost_addr) { - reg_t wdata = std::get<1>(mem_write[0]); - if (wdata >= info->mem0_base && wdata < (info->mem0_base + info->mem0_size)) { - printf("Probable magic mem %x\n", wdata); - magic_addrs.insert(wdata); + auto& log = s->log_reg_write; + auto& mem_read = s->log_mem_read; + + for (auto memwrite : mem_write) { + reg_t waddr = std::get<0>(memwrite); + uint64_t w_data = std::get<1>(memwrite); + if ((waddr == CLINT_BASE + 4*hartid) && w_data == 0) { + s->mip->backdoor_write_with_mask(MIP_MSIP, 0); + } + // Try to remember magic_mem addrs, and ignore these in the future + if ( waddr == tohost_addr && w_data >= info->mem0_base && w_data < (info->mem0_base + info->mem0_size)) { + printf("Probable magic mem %lx\n", w_data); + magic_addrs.insert(w_data); + } + // Try to remember magic_mem addrs, and ignore these in the future + if ( waddr == tohost_addr && w_data >= info->mem0_base && w_data < (info->mem0_base + info->mem0_size)) { + printf("Probable magic mem %lx\n", w_data); + magic_addrs.insert(w_data); } } - if (has_wdata) { - auto& log = s->log_reg_write; - auto& mem_read = s->log_mem_read; + bool scalar_wb = false; + bool vector_wb = false; + uint32_t vector_cnt = 0; + + for (auto ®write : log) { + + //TODO: scaling to multi issue reads? reg_t mem_read_addr = mem_read.empty() ? 0 : std::get<0>(mem_read[0]); - for (auto regwrite : log) { - int rd = regwrite.first >> 4; - int type = regwrite.first & 0xf; - // 0 => int - // 1 => fp - // 2 => vec - // 3 => vec hint - // 4 => csr - if ((rd != 0 && type == 0) || type == 1) { - // Override reads from some CSRs - uint64_t csr_addr = (insn >> 20) & 0xfff; - bool csr_read = (insn & 0x7f) == 0x73; - if (csr_read) printf("CSR read %lx\n", csr_addr); - if (csr_read && ( - (csr_addr == 0x301) || // misa - (csr_addr == 0xf13) || // mimpid - (csr_addr == 0xf12) || // marchid - (csr_addr == 0xf11) || // mvendorid - (csr_addr == 0xb00) || // mcycle - (csr_addr == 0xb02) || // minstret - (csr_addr >= 0x3b0 && csr_addr <= 0x3ef) // pmpaddr - )) { - printf("CSR override\n"); - s->XPR.write(rd, wdata); - } else if (!mem_read.empty() && ((magic_addrs.count(mem_read_addr) || - (tohost_addr && mem_read_addr == tohost_addr) || - (fromhost_addr && mem_read_addr == fromhost_addr) || - (CLINT_BASE <= mem_read_addr && mem_read_addr < (CLINT_BASE + CLINT_SIZE)) - ))) { - // Don't check reads from tohost, reads from magic memory, or reads from clint - // Technically this could be buggy because log_mem_read only reports vaddrs, but - // no software ever should access tohost/fromhost/clint with vaddrs anyways - printf("Read override %lx\n", mem_read_addr); - s->XPR.write(rd, wdata); - } else if (wdata != regwrite.second.v[0]) { - printf("%d wdata mismatch reg %d %lx != %lx\n", cycle, rd, regwrite.second.v[0], wdata); - exit(1); - } - } + + int rd = regwrite.first >> 4; + int type = regwrite.first & 0xf; + + // 0 => int + // 1 => fp + // 2 => vec + // 3 => vec hint + // 4 => csr + + bool ignore_read = (!mem_read.empty() && + ((magic_addrs.count(mem_read_addr) || + (tohost_addr && mem_read_addr == tohost_addr) || + (fromhost_addr && mem_read_addr == fromhost_addr) || + (CLINT_BASE <= mem_read_addr && mem_read_addr < (CLINT_BASE + CLINT_SIZE))))); + + // check the type is compliant with writeback first + if ((type == 0 || type == 1)) + scalar_wb = true; + if (type == 2) { + vector_wb = true; } + if (type == 3) continue; + + + if ((rd != 0 && type == 0) || type == 1) { + // Override reads from some CSRs + uint64_t csr_addr = (insn >> 20) & 0xfff; + bool csr_read = (insn & 0x7f) == 0x73; + if (csr_read) + printf("CSR read %lx\n", csr_addr); + if (csr_read && ((csr_addr == 0xf13) || // mimpid + (csr_addr == 0xf12) || // marchid + (csr_addr == 0xf11) || // mvendorid + (csr_addr == 0xb00) || // mcycle + (csr_addr == 0xb02) || // minstret + (csr_addr >= 0x3b0 && csr_addr <= 0x3ef) // pmpaddr + )) { + printf("CSR override\n"); + s->XPR.write(rd, wdata); + } else if (ignore_read) { + // Don't check reads from tohost, reads from magic memory, or reads + // from clint Technically this could be buggy because log_mem_read + // only reports vaddrs, but no software ever should access + // tohost/fromhost/clint with vaddrs anyways + printf("Read override %lx\n", mem_read_addr); + s->XPR.write(rd, wdata); + } else if (wdata != regwrite.second.v[0]) { + printf("%d wdata mismatch reg %d %lx != %lx\n", cycle, rd, + regwrite.second.v[0], wdata); + exit(1); + } + } + + // TODO FIX: Rocketchip TracedInstruction.wdata should be Valid(UInt) + // if (scalar_wb ^ has_wdata) { + // printf("Scalar wdata behavior divergence between spike and DUT\n"); + // exit(-1); + // } } } } -// } diff --git a/generators/chipyard/src/main/resources/csrc/cospike_dtm.h b/generators/chipyard/src/main/resources/csrc/cospike_dtm.h new file mode 100644 index 00000000..e69de29b diff --git a/generators/chipyard/src/main/resources/csrc/spiketile.cc b/generators/chipyard/src/main/resources/csrc/spiketile.cc index f59e825a..3b14079b 100644 --- a/generators/chipyard/src/main/resources/csrc/spiketile.cc +++ b/generators/chipyard/src/main/resources/csrc/spiketile.cc @@ -2,13 +2,22 @@ #include #include #include +#include +#include +#include #include #include #include #include -#include "testchip_tsi.h" -extern testchip_tsi_t* tsi; +#if __has_include("spiketile_tsi.h") +#define SPIKETILE_HTIF_TSI +extern htif_t* tsi; +#endif +#if __has_include("spiketile_dtm.h") +#define SPIKETILE_HTIF_DTM +extern htif_t* dtm; +#endif enum transfer_t { NToB, @@ -80,10 +89,11 @@ public: void tcm_a(uint64_t address, uint64_t data, uint32_t mask, uint32_t opcode, uint32_t size); bool tcm_d(uint64_t *data); - void loadmem(const char* fname); + void loadmem(size_t base, const char* fname); void drain_stq(); bool stq_empty() { return st_q.size() == 0; }; + void flush_icache(); const cfg_t &get_cfg() const { return cfg; } const std::map& get_harts() const { return harts; } @@ -109,6 +119,7 @@ public: bool fast_clint; cfg_t cfg; std::map harts; + bool accessed_tofrom_host; private: bool handle_cache_access(reg_t addr, size_t len, uint8_t* load_bytes, @@ -324,7 +335,7 @@ extern "C" void spike_tile(int hartid, char* isa, } } if (loadmem_file != "" && tcm_size > 0) - simif->loadmem(loadmem_file.c_str()); + simif->loadmem(tcm_base, loadmem_file.c_str()); p->reset(); p->get_state()->pc = reset_vector; @@ -334,14 +345,22 @@ extern "C" void spike_tile(int hartid, char* isa, tile_t* tile = tiles[hartid]; chipyard_simif_t* simif = tile->simif; processor_t* proc = tile->proc; - if (!simif->htif && tsi) { - simif->htif = (htif_t*) tsi; - } +#if defined(SPIKETILE_HTIF_TSI) + if (!simif->htif && tsi) + simif->htif = tsi; +#endif +#if defined(SPIKETILE_HTIF_DTM) + if (!simif->htif && dtm) + simif->htif = dtm; +#endif simif->cycle = cycle; if (debug) { proc->halt_request = proc->HR_REGULAR; } + if (!debug && proc->halt_request != proc->HR_NONE) { + proc->halt_request = proc->HR_NONE; + } proc->get_state()->mip->backdoor_write_with_mask(MIP_MTIP, mtip ? MIP_MTIP : 0); proc->get_state()->mip->backdoor_write_with_mask(MIP_MSIP, msip ? MIP_MSIP : 0); @@ -350,6 +369,7 @@ extern "C" void spike_tile(int hartid, char* isa, tile->max_insns = ipc; uint64_t pre_insns = proc->get_state()->minstret->read(); + simif->accessed_tofrom_host = false; tile->spike_context.switch_to(); *insns_retired = proc->get_state()->minstret->read() - pre_insns; if (simif->use_stq) { @@ -439,6 +459,7 @@ chipyard_simif_t::chipyard_simif_t(size_t icache_ways, std::vector(), false, 0), + accessed_tofrom_host(false), icache_ways(icache_ways), icache_sets(icache_sets), dcache_ways(dcache_ways), @@ -504,6 +525,12 @@ chipyard_simif_t::chipyard_simif_t(size_t icache_ways, tcm = (uint8_t*)malloc(tcm_size); } +void chipyard_simif_t::flush_icache() { + for (auto &w : icache) { + for (size_t i = 0; i < icache_sets; i++) w[i].state = NONE; + } +} + bool chipyard_simif_t::reservable(reg_t addr) { for (auto& r: cacheables) { if (addr >= r.base && addr < r.base + r.size) { @@ -544,6 +571,12 @@ bool chipyard_simif_t::mmio_load(reg_t addr, size_t len, uint8_t* bytes) { bool found = false; bool cacheable = false; bool readonly = false; + reg_t tohost_addr = htif ? htif->get_tohost_addr() : 0; + reg_t fromhost_addr = htif ? htif->get_fromhost_addr() : 0; + if (addr == tohost_addr || addr == fromhost_addr) { + accessed_tofrom_host = true; + } + if (addr >= tcm_base && addr < tcm_base + tcm_size) { memcpy(bytes, tcm + addr - tcm_base, len); return true; @@ -579,6 +612,8 @@ bool chipyard_simif_t::mmio_load(reg_t addr, size_t len, uint8_t* bytes) { while (!handle_cache_access(addr, len, bytes, nullptr, LOAD)) { host->switch_to(); } + uint64_t lddata = 0; + memcpy(&lddata, bytes, len); } else { handle_mmio_access(addr, len, bytes, nullptr, LOAD, readonly); } @@ -605,6 +640,7 @@ void chipyard_simif_t::handle_mmio_access(reg_t addr, size_t len, mmio_st = type == STORE; if (type == STORE) { assert(len <= 8); + mmio_stdata = 0; memcpy(&mmio_stdata, store_bytes, len); } mmio_len = len; @@ -911,6 +947,13 @@ bool chipyard_simif_t::dcache_c(uint64_t* address, uint64_t* source, int* param, } bool chipyard_simif_t::mmio_store(reg_t addr, size_t len, const uint8_t* bytes) { + reg_t tohost_addr = htif ? htif->get_tohost_addr() : 0; + reg_t fromhost_addr = htif ? htif->get_fromhost_addr() : 0; + + if (addr == tohost_addr || addr == fromhost_addr) { + accessed_tofrom_host = true; + } + if (addr >= tcm_base && addr < tcm_base + tcm_size) { memcpy(tcm + addr - tcm_base, bytes, len); return true; @@ -936,6 +979,8 @@ bool chipyard_simif_t::mmio_store(reg_t addr, size_t len, const uint8_t* bytes) return false; } if (cacheable) { + uint64_t temp = 0; + memcpy(&temp, bytes, len); if (use_stq) { assert(len <= 8); uint64_t stdata; @@ -1009,30 +1054,28 @@ bool chipyard_simif_t::tcm_d(uint64_t* data) { return true; } -#define parse_nibble(c) ((c) >= 'a' ? (c)-'a'+10 : (c)-'0') -void chipyard_simif_t::loadmem(const char* fname) { - std::ifstream in(fname); - std::string line; - if (!in.is_open()) { - printf("SpikeTile couldn't open loadmem file %s\n", fname); - abort(); - } - size_t fsize = 0; - size_t start = 0; - while (std::getline(in, line)) { - for (ssize_t i = line.length()-2, j = 0; i >= 0; i -= 2, j++) { - char byte = (parse_nibble(line[i]) << 4) | parse_nibble(line[i+1]); - ssize_t addr = (start + j) % tcm_size; - tcm[addr] = (uint8_t)byte; +void chipyard_simif_t::loadmem(size_t base, const char* fname) { + class loadmem_memif_t : public memif_t { + public: + loadmem_memif_t(chipyard_simif_t* _simif, size_t _start) : memif_t(nullptr), simif(_simif), start(_start) {} + void write(addr_t taddr, size_t len, const void* src) override + { + addr_t addr = taddr - start; + memcpy(simif->tcm + addr, src, len); } - start += line.length()/2; - fsize += line.length()/2; + void read(addr_t taddr, size_t len, void* bytes) override { + assert(false); + } + endianness_t get_target_endianness() const override { + return endianness_little; + } + private: + chipyard_simif_t* simif; + size_t start; + } loadmem_memif(this, tcm_base); - if (fsize > tcm_size) { - fprintf(stderr, "Loadmem file is too large\n"); - abort(); - } - } + reg_t entry; + load_elf(fname, &loadmem_memif, &entry); } bool insn_should_fence(uint64_t bits) { @@ -1060,33 +1103,31 @@ void spike_thread_main(void* arg) // if (insn_should_fence(last_bits) && !simif->stq_empty()) { // host->switch_to(); // } + uint64_t old_minstret = state->minstret->read(); proc->step(1); tile->max_insns--; if (proc->is_waiting_for_interrupt()) { if (simif->fast_clint) { - // uint64_t mip = state->mip->read(); - // uint64_t mie = state->mie->read(); - //printf("Setting MTIP %x %x %x %x %lx\n", simif->cycle, old_minstret, mip, mie, - // state->pc); state->mip->backdoor_write_with_mask(MIP_MTIP, MIP_MTIP); tile->max_insns = tile->max_insns <= 1 ? 0 : 1; } else { - //printf("SpikeTile in WFI\n"); tile->max_insns = 0; } } - if (tile->max_insns % 100 == 0) { - uint64_t old_minstret = state->minstret->read(); - uint64_t tohost_addr = simif->htif ? simif->htif->get_tohost_addr() : 0; - uint64_t fromhost_addr = simif->htif ? simif->htif->get_fromhost_addr() : 0; - auto& mem_read = state->log_mem_read; - reg_t mem_read_addr = mem_read.empty() ? 0 : std::get<0>(mem_read[0]); - if ((old_minstret == state->minstret->read()) || - (tohost_addr && mem_read_addr == tohost_addr) || - (fromhost_addr && mem_read_addr == fromhost_addr)) { - tile->max_insns == 0; + if (state->debug_mode) { + // TODO: Fix. This needs to apply the same hack as rocket-chip... + // JALRs in debug mode should flush the ICache. + // There is no API to determine if a JALR was executed, so hack the + // pc of the JALR in the debug rom here instead. + if (state->pc == 0x838) { + simif->flush_icache(); } } + + // If we get stuck in WFI, or we start polling tohost/fromhost, switch to host thread + if ((old_minstret == state->minstret->read()) || simif->accessed_tofrom_host) { + tile->max_insns = 0; + } state->mcycle->write(simif->cycle); } } diff --git a/generators/chipyard/src/main/resources/csrc/spiketile_dtm.h b/generators/chipyard/src/main/resources/csrc/spiketile_dtm.h new file mode 100644 index 00000000..e69de29b diff --git a/generators/chipyard/src/main/resources/csrc/spiketile_tsi.h b/generators/chipyard/src/main/resources/csrc/spiketile_tsi.h new file mode 100644 index 00000000..e69de29b diff --git a/generators/chipyard/src/main/resources/vsrc/cospike.v b/generators/chipyard/src/main/resources/vsrc/cospike.v index f9d2322c..824e8d35 100644 --- a/generators/chipyard/src/main/resources/vsrc/cospike.v +++ b/generators/chipyard/src/main/resources/vsrc/cospike.v @@ -16,7 +16,8 @@ import "DPI-C" function void cospike_cosim(input longint cycle, input bit raise_exception, input bit raise_interrupt, input longint cause, - input longint wdata + input longint wdata, + input int priv ); @@ -42,6 +43,7 @@ module SpikeCosim #( input [63:0] trace_0_cause, input trace_0_has_wdata, input [63:0] trace_0_wdata, + input [2:0] trace_0_priv, input trace_1_valid, input [63:0] trace_1_iaddr, @@ -50,7 +52,8 @@ module SpikeCosim #( input trace_1_interrupt, input [63:0] trace_1_cause, input trace_1_has_wdata, - input [63:0] trace_1_wdata + input [63:0] trace_1_wdata, + input [2:0] trace_1_priv ); initial begin @@ -62,12 +65,12 @@ module SpikeCosim #( if (trace_0_valid || trace_0_exception || trace_0_cause) begin cospike_cosim(cycle, hartid, trace_0_has_wdata, trace_0_valid, trace_0_iaddr, trace_0_insn, trace_0_exception, trace_0_interrupt, trace_0_cause, - trace_0_wdata); + trace_0_wdata, trace_0_priv); end if (trace_1_valid || trace_1_exception || trace_1_cause) begin cospike_cosim(cycle, hartid, trace_1_has_wdata, trace_1_valid, trace_1_iaddr, trace_1_insn, trace_1_exception, trace_1_interrupt, trace_1_cause, - trace_1_wdata); + trace_1_wdata, trace_1_priv); end end end diff --git a/generators/chipyard/src/main/scala/Cospike.scala b/generators/chipyard/src/main/scala/Cospike.scala index fffb97d6..cee23413 100644 --- a/generators/chipyard/src/main/scala/Cospike.scala +++ b/generators/chipyard/src/main/scala/Cospike.scala @@ -18,7 +18,8 @@ case class SpikeCosimConfig( mem0_base: BigInt, mem0_size: BigInt, nharts: Int, - bootrom: String + bootrom: String, + has_dtm: Boolean ) class SpikeCosim(cfg: SpikeCosimConfig) extends BlackBox(Map( @@ -32,6 +33,7 @@ class SpikeCosim(cfg: SpikeCosimConfig) extends BlackBox(Map( { addResource("/csrc/cospike.cc") addResource("/vsrc/cospike.v") + if (cfg.has_dtm) addResource("/csrc/cospike_dtm.h") val io = IO(new Bundle { val clock = Input(Clock()) val reset = Input(Bool()) @@ -46,6 +48,7 @@ class SpikeCosim(cfg: SpikeCosimConfig) extends BlackBox(Map( val cause = UInt(64.W) val has_wdata = Bool() val wdata = UInt(64.W) + val priv = UInt(3.W) })) }) } @@ -64,12 +67,8 @@ object SpikeCosim require(trace.numInsns <= 2) cosim.io.cycle := cycle cosim.io.trace.map(t => { + t := DontCare t.valid := false.B - t.iaddr := 0.U - t.insn := 0.U - t.exception := false.B - t.interrupt := false.B - t.cause := 0.U }) cosim.io.hartid := hartid.U for (i <- 0 until trace.numInsns) { @@ -83,6 +82,7 @@ object SpikeCosim cosim.io.trace(i).cause := trace.insns(i).cause cosim.io.trace(i).has_wdata := trace.insns(i).wdata.isDefined.B cosim.io.trace(i).wdata := trace.insns(i).wdata.getOrElse(0.U) + cosim.io.trace(i).priv := trace.insns(i).priv } } } diff --git a/generators/chipyard/src/main/scala/HarnessBinders.scala b/generators/chipyard/src/main/scala/HarnessBinders.scala index c39242f9..3f167aa3 100644 --- a/generators/chipyard/src/main/scala/HarnessBinders.scala +++ b/generators/chipyard/src/main/scala/HarnessBinders.scala @@ -168,8 +168,9 @@ class WithSimAXIMemOverSerialTL extends OverrideHarnessBinder({ // connect SimDRAM from the AXI port coming from the harness multi clock axi ram (harnessMultiClockAXIRAM.mem_axi4 zip harnessMultiClockAXIRAM.memNode.edges.in).map { case (axi_port, edge) => val memSize = sVal.memParams.size + val memBase = sVal.memParams.base val lineSize = p(CacheBlockBytes) - val mem = Module(new SimDRAM(memSize, lineSize, BigInt(memFreq.toLong), edge.bundle)).suggestName("simdram") + val mem = Module(new SimDRAM(memSize, lineSize, BigInt(memFreq.toLong), memBase, edge.bundle)).suggestName("simdram") mem.io.axi <> axi_port.bits mem.io.clock := axi_port.clock mem.io.reset := axi_port.reset @@ -184,10 +185,12 @@ class WithBlackBoxSimMem(additionalLatency: Int = 0) extends OverrideHarnessBind (system: CanHaveMasterAXI4MemPort, th: HasHarnessSignalReferences, ports: Seq[ClockedAndResetIO[AXI4Bundle]]) => { val p: Parameters = chipyard.iobinders.GetSystemParameters(system) (ports zip system.memAXI4Node.edges.in).map { case (port, edge) => + // TODO FIX: This currently makes each SimDRAM contain the entire memory space val memSize = p(ExtMem).get.master.size + val memBase = p(ExtMem).get.master.base val lineSize = p(CacheBlockBytes) val clockFreq = p(MemoryBusKey).dtsFrequency.get - val mem = Module(new SimDRAM(memSize, lineSize, clockFreq, edge.bundle)).suggestName("simdram") + val mem = Module(new SimDRAM(memSize, lineSize, clockFreq, memBase, edge.bundle)).suggestName("simdram") mem.io.axi <> port.bits // Bug in Chisel implementation. See https://github.com/chipsalliance/chisel3/pull/1781 def Decoupled[T <: Data](irr: IrrevocableIO[T]): DecoupledIO[T] = { @@ -252,7 +255,7 @@ class WithSimDebug extends OverrideHarnessBinder({ case d: ClockedDMIIO => val dtm_success = WireInit(false.B) when (dtm_success) { th.success := true.B } - val dtm = Module(new SimDTM).connect(th.buildtopClock, th.buildtopReset.asBool, d, dtm_success) + val dtm = Module(new TestchipSimDTM).connect(th.buildtopClock, th.buildtopReset.asBool, d, dtm_success) case j: JTAGChipIO => val dtm_success = WireInit(false.B) when (dtm_success) { th.success := true.B } @@ -262,7 +265,8 @@ class WithSimDebug extends OverrideHarnessBinder({ j.TCK := jtag_wire.TCK j.TMS := jtag_wire.TMS j.TDI := jtag_wire.TDI - val jtag = Module(new SimJTAG(tickDelay=3)).connect(jtag_wire, th.buildtopClock, th.buildtopReset.asBool, ~(th.buildtopReset.asBool), dtm_success) + val jtag = Module(new SimJTAG(tickDelay=3)) + jtag.connect(jtag_wire, th.buildtopClock, th.buildtopReset.asBool, ~(th.buildtopReset.asBool), dtm_success) } } }) @@ -365,7 +369,8 @@ class WithCospike extends ComposeHarnessBinder({ mem0_size = p(ExtMem).map(_.master.size).getOrElse(BigInt(0)), pmpregions = tiles.headOption.map(_.tileParams.core.nPMPs).getOrElse(0), nharts = tiles.size, - bootrom = chipyardSystem.bootROM.map(_.module.contents.toArray.mkString(" ")).getOrElse("") + bootrom = chipyardSystem.bootROM.map(_.module.contents.toArray.mkString(" ")).getOrElse(""), + has_dtm = p(ExportDebug).protocols.contains(DMI) // assume that exposing clockeddmi means we will connect SimDTM ) ports.map { p => p.traces.zipWithIndex.map(t => SpikeCosim(t._1, t._2, cfg)) } } diff --git a/generators/chipyard/src/main/scala/SpikeTile.scala b/generators/chipyard/src/main/scala/SpikeTile.scala index 9aac7421..c6ec13ae 100644 --- a/generators/chipyard/src/main/scala/SpikeTile.scala +++ b/generators/chipyard/src/main/scala/SpikeTile.scala @@ -7,6 +7,7 @@ import chisel3.experimental.{IntParam, StringParam, IO} import org.chipsalliance.cde.config._ import freechips.rocketchip.subsystem._ import freechips.rocketchip.devices.tilelink._ +import freechips.rocketchip.devices.debug.{ExportDebug, DMI} import freechips.rocketchip.diplomacy._ import freechips.rocketchip.rocket._ import freechips.rocketchip.tilelink._ @@ -189,7 +190,8 @@ class SpikeBlackBox( readonly_uncacheable_regions: String, executable_regions: String, tcm_base: BigInt, - tcm_size: BigInt) extends BlackBox(Map( + tcm_size: BigInt, + use_dtm: Boolean) extends BlackBox(Map( "HARTID" -> IntParam(hartId), "ISA" -> StringParam(isa), "PMPREGIONS" -> IntParam(pmpregions), @@ -302,7 +304,11 @@ class SpikeBlackBox( }) addResource("/vsrc/spiketile.v") addResource("/csrc/spiketile.cc") - + if (use_dtm) { + addResource("/csrc/spiketile_dtm.h") + } else { + addResource("/csrc/spiketile_tsi.h") + } } class SpikeTileModuleImp(outer: SpikeTile) extends BaseTileModuleImp(outer) { @@ -326,13 +332,18 @@ class SpikeTileModuleImp(outer: SpikeTile) extends BaseTileModuleImp(outer) { val (dcache_tl, dcacheEdge) = outer.dcacheNode.out(0) val (mmio_tl, mmioEdge) = outer.mmioNode.out(0) + // Note: This assumes that if the debug module exposes the ClockedDMI port, + // then the DTM-based bringup with SimDTM will be used. This isn't required to be + // true, but it usually is + val useDTM = p(ExportDebug).protocols.contains(DMI) val spike = Module(new SpikeBlackBox(hartId, isaDTS, tileParams.core.nPMPs, tileParams.icache.get.nSets, tileParams.icache.get.nWays, tileParams.dcache.get.nSets, tileParams.dcache.get.nWays, tileParams.dcache.get.nMSHRs, cacheable_regions, uncacheable_regions, readonly_uncacheable_regions, executable_regions, outer.spikeTileParams.tcmParams.map(_.base).getOrElse(0), - outer.spikeTileParams.tcmParams.map(_.size).getOrElse(0) + outer.spikeTileParams.tcmParams.map(_.size).getOrElse(0), + useDTM )) spike.io.clock := clock.asBool val cycle = RegInit(0.U(64.W)) @@ -421,7 +432,7 @@ class SpikeTileModuleImp(outer: SpikeTile) extends BaseTileModuleImp(outer) { spike.io.mmio.a.ready := mmio_tl.a.ready mmio_tl.a.valid := spike.io.mmio.a.valid - val log_size = MuxCase(0.U, (0 until 3).map { i => (spike.io.mmio.a.size === (1 << i).U) -> i.U }) + val log_size = (0 until 4).map { i => Mux(spike.io.mmio.a.size === (1 << i).U, i.U, 0.U) }.reduce(_|_) mmio_tl.a.bits := Mux(spike.io.mmio.a.store, mmioEdge.Put(0.U, spike.io.mmio.a.address, log_size, spike.io.mmio.a.data)._2, mmioEdge.Get(0.U, spike.io.mmio.a.address, log_size)._2) diff --git a/generators/chipyard/src/main/scala/config/AbstractConfig.scala b/generators/chipyard/src/main/scala/config/AbstractConfig.scala index e270c978..ef02f468 100644 --- a/generators/chipyard/src/main/scala/config/AbstractConfig.scala +++ b/generators/chipyard/src/main/scala/config/AbstractConfig.scala @@ -48,6 +48,7 @@ class AbstractConfig extends Config( new testchipip.WithSerialTLWidth(32) ++ // fatten the serialTL interface to improve testing performance new testchipip.WithDefaultSerialTL ++ // use serialized tilelink port to external serialadapter/harnessRAM + new chipyard.config.WithDebugModuleAbstractDataWords(8) ++ // increase debug module data capacity new chipyard.config.WithBootROM ++ // use default bootrom new chipyard.config.WithUART ++ // add a UART new chipyard.config.WithL2TLBs(1024) ++ // use L2 TLBs @@ -56,6 +57,7 @@ class AbstractConfig extends Config( new chipyard.config.WithPeripheryBusFrequencyAsDefault ++ // Unspecified frequencies with match the pbus frequency (which is always set) new chipyard.config.WithMemoryBusFrequency(100.0) ++ // Default 100 MHz mbus new chipyard.config.WithPeripheryBusFrequency(100.0) ++ // Default 100 MHz pbus + new freechips.rocketchip.subsystem.WithNMemoryChannels(2) ++ // Default 2 memory channels new freechips.rocketchip.subsystem.WithClockGateModel ++ // add default EICG_wrapper clock gate model new freechips.rocketchip.subsystem.WithJtagDTM ++ // set the debug module to expose a JTAG port new freechips.rocketchip.subsystem.WithNoMMIOPort ++ // no top-level MMIO master port (overrides default set in rocketchip) diff --git a/generators/chipyard/src/main/scala/config/BoomConfigs.scala b/generators/chipyard/src/main/scala/config/BoomConfigs.scala index 4c101403..5544e4c3 100644 --- a/generators/chipyard/src/main/scala/config/BoomConfigs.scala +++ b/generators/chipyard/src/main/scala/config/BoomConfigs.scala @@ -53,3 +53,18 @@ class MediumBoomCosimConfig extends Config( new chipyard.config.WithTraceIO ++ // enable the traceio new boom.common.WithNMediumBooms(1) ++ new chipyard.config.AbstractConfig) + +class dmiMediumBoomConfig extends Config( + new chipyard.harness.WithSerialAdapterTiedOff ++ // don't attach an external SimSerial + new chipyard.config.WithDMIDTM ++ // have debug module expose a clocked DMI port + new boom.common.WithNMediumBooms(1) ++ + new chipyard.config.AbstractConfig) + +class dmiMediumBoomCosimConfig extends Config( + new chipyard.harness.WithCospike ++ // attach spike-cosim + new chipyard.config.WithTraceIO ++ // enable the traceio + new chipyard.harness.WithSerialAdapterTiedOff ++ // don't attach an external SimSerial + new chipyard.config.WithDMIDTM ++ // have debug module expose a clocked DMI port + new boom.common.WithNMediumBooms(1) ++ + new chipyard.config.AbstractConfig) + diff --git a/generators/chipyard/src/main/scala/config/ChipConfigs.scala b/generators/chipyard/src/main/scala/config/ChipConfigs.scala index 2d6cb206..e0ccd2cc 100644 --- a/generators/chipyard/src/main/scala/config/ChipConfigs.scala +++ b/generators/chipyard/src/main/scala/config/ChipConfigs.scala @@ -24,6 +24,7 @@ class ChipLikeQuadRocketConfig extends Config( new chipyard.harness.WithSimAXIMemOverSerialTL ++ // Attach fast SimDRAM to TestHarness new chipyard.config.WithSerialTLBackingMemory ++ // Backing memory is over serial TL protocol new freechips.rocketchip.subsystem.WithExtMemSize((1 << 30) * 4L) ++ // 4GB max external memory + new freechips.rocketchip.subsystem.WithNMemoryChannels(1) ++ // 1 memory channel //================================== // Set up clock./reset diff --git a/generators/chipyard/src/main/scala/config/RocketConfigs.scala b/generators/chipyard/src/main/scala/config/RocketConfigs.scala index 8ed4acdc..965a81fc 100644 --- a/generators/chipyard/src/main/scala/config/RocketConfigs.scala +++ b/generators/chipyard/src/main/scala/config/RocketConfigs.scala @@ -125,6 +125,7 @@ class MulticlockAXIOverSerialConfig extends Config( new chipyard.config.WithSerialTLBackingMemory ++ // remove axi4 mem port in favor of SerialTL memory new freechips.rocketchip.subsystem.WithNBigCores(2) ++ + new freechips.rocketchip.subsystem.WithNMemoryChannels(1) ++ // 1 memory channel new chipyard.config.AbstractConfig) // DOC include end: MulticlockAXIOverSerialConfig diff --git a/generators/chipyard/src/main/scala/config/SpikeConfigs.scala b/generators/chipyard/src/main/scala/config/SpikeConfigs.scala index cb82360e..34413b36 100644 --- a/generators/chipyard/src/main/scala/config/SpikeConfigs.scala +++ b/generators/chipyard/src/main/scala/config/SpikeConfigs.scala @@ -10,6 +10,11 @@ class SpikeConfig extends Config( new chipyard.WithNSpikeCores(1) ++ new chipyard.config.AbstractConfig) +class dmiSpikeConfig extends Config( + new chipyard.harness.WithSerialAdapterTiedOff ++ // don't attach an external SimSerial + new chipyard.config.WithDMIDTM ++ // have debug module expose a clocked DMI port + new SpikeConfig) + // Avoids polling on the UART registers class SpikeFastUARTConfig extends Config( new chipyard.WithNSpikeCores(1) ++ @@ -29,6 +34,11 @@ class SpikeUltraFastConfig extends Config( new chipyard.config.WithBroadcastManager ++ new chipyard.config.AbstractConfig) +class dmiSpikeUltraFastConfig extends Config( + new chipyard.harness.WithSerialAdapterTiedOff ++ // don't attach an external SimSerial + new chipyard.config.WithDMIDTM ++ // have debug module expose a clocked DMI port + new SpikeUltraFastConfig) + // Add the default firechip devices class SpikeUltraFastDevicesConfig extends Config( new chipyard.harness.WithSimBlockDevice ++ diff --git a/generators/chipyard/src/main/scala/config/fragments/PeripheralFragments.scala b/generators/chipyard/src/main/scala/config/fragments/PeripheralFragments.scala index 7ac7f569..56cd3bb6 100644 --- a/generators/chipyard/src/main/scala/config/fragments/PeripheralFragments.scala +++ b/generators/chipyard/src/main/scala/config/fragments/PeripheralFragments.scala @@ -87,3 +87,7 @@ class WithExtMemIdBits(n: Int) extends Config((site, here, up) => { class WithNoPLIC extends Config((site, here, up) => { case PLICKey => None }) + +class WithDebugModuleAbstractDataWords(words: Int = 16) extends Config((site, here, up) => { + case DebugModuleKey => up(DebugModuleKey).map(_.copy(nAbstractDataWords=words)) +}) diff --git a/generators/chipyard/src/main/scala/example/FlatTestHarness.scala b/generators/chipyard/src/main/scala/example/FlatTestHarness.scala index 3954931e..c428a5e9 100644 --- a/generators/chipyard/src/main/scala/example/FlatTestHarness.scala +++ b/generators/chipyard/src/main/scala/example/FlatTestHarness.scala @@ -60,8 +60,9 @@ class FlatTestHarness(implicit val p: Parameters) extends Module { // connect SimDRAM from the AXI port coming from the harness multi clock axi ram (harnessMultiClockAXIRAM.mem_axi4 zip harnessMultiClockAXIRAM.memNode.edges.in).map { case (axi_port, edge) => val memSize = sVal.memParams.size + val memBase = sVal.memParams.base val lineSize = p(CacheBlockBytes) - val mem = Module(new SimDRAM(memSize, lineSize, BigInt(memFreq.toLong), edge.bundle)).suggestName("simdram") + val mem = Module(new SimDRAM(memSize, lineSize, BigInt(memFreq.toLong), memBase, edge.bundle)).suggestName("simdram") mem.io.axi <> axi_port.bits mem.io.clock := axi_port.clock mem.io.reset := axi_port.reset diff --git a/generators/testchipip b/generators/testchipip index efdef967..b6676e51 160000 --- a/generators/testchipip +++ b/generators/testchipip @@ -1 +1 @@ -Subproject commit efdef967720ac836cf007f361371f095fbdce4ea +Subproject commit b6676e5122e9ffab10928cc00417e163dc69c952 diff --git a/scripts/generate-ckpt.sh b/scripts/generate-ckpt.sh new file mode 100755 index 00000000..0f8b1e8b --- /dev/null +++ b/scripts/generate-ckpt.sh @@ -0,0 +1,141 @@ +#!/bin/bash + +set -e + +usage() { + echo "Usage: $0 [OPTIONS]" + echo "" + echo "Options" + echo " --help -h : Display this message" + echo " -n : Number of harts" + echo " -b : Binary to run in spike" + echo " -p : PC to take checkpoint at [default 0x80000000]" + echo " -i : Instructions after PC to take checkpoint at [default 0]" + echo " -m : ISA to pass to spike for checkpoint generation [default rv64gc]" + echo " -o : Output directory to store the checkpoint in. [default ...loadarch]" + exit "$1" +} + +NHARTS=1 +BINARY="" +PC="0x80000000" +INSNS=0 +ISA="rv64gc" +OUTPATH="" +while [ "$1" != "" ]; +do + case $1 in + -h | --help ) + usage 3 ;; + -n ) + shift + NHARTS=$1 ;; + -b ) + shift + BINARY=$1 ;; + -p ) + shift + PC=$1 ;; + -i ) + shift + INSNS=$1 ;; + -m ) + shift + ISA=$1 ;; + -o ) + shift + OUTPATH=$1 ;; + * ) + error "Invalid option $1" + usage 1 ;; + esac + shift +done + +BASEMEM="$((0x80000000)):$((0x10000000))" +SPIKEFLAGS="-p$NHARTS --pmpregions=0 --isa=$ISA -m$BASEMEM" +BASENAME=$(basename -- $BINARY) + +if [ -z "$OUTPATH" ] ; then + OUTPATH=$BASENAME.$PC.$INSNS.loadarch +fi + +echo "Generating loadarch directory $OUTPATH" +rm -rf $OUTPATH +mkdir -p $OUTPATH + +LOADARCH_FILE=$OUTPATH/loadarch +RAWMEM_ELF=$OUTPATH/raw.elf +LOADMEM_ELF=$OUTPATH/mem.elf +CMDS_FILE=$OUTPATH/cmds_tmp.txt +SPIKECMD_FILE=$OUTPATH/spikecmd.sh + +echo "Generating state capture spike interactive commands in $CMDS_FILE" +echo "until pc 0 $PC" >> $CMDS_FILE +echo "rs $INSNS" >> $CMDS_FILE +echo "dump" >> $CMDS_FILE +for (( h=0; h<$NHARTS; h++ )) +do + echo "pc $h" >> $CMDS_FILE + echo "priv $h" >> $CMDS_FILE + echo "reg $h fcsr" >> $CMDS_FILE + + echo "reg $h vstart" >> $CMDS_FILE + echo "reg $h vxsat" >> $CMDS_FILE + echo "reg $h vxrm" >> $CMDS_FILE + echo "reg $h vcsr" >> $CMDS_FILE + echo "reg $h vtype" >> $CMDS_FILE + + echo "reg $h stvec" >> $CMDS_FILE + echo "reg $h sscratch" >> $CMDS_FILE + echo "reg $h sepc" >> $CMDS_FILE + echo "reg $h scause" >> $CMDS_FILE + echo "reg $h stval" >> $CMDS_FILE + echo "reg $h satp" >> $CMDS_FILE + + echo "reg $h mstatus" >> $CMDS_FILE + echo "reg $h medeleg" >> $CMDS_FILE + echo "reg $h mideleg" >> $CMDS_FILE + echo "reg $h mie" >> $CMDS_FILE + echo "reg $h mtvec" >> $CMDS_FILE + echo "reg $h mscratch" >> $CMDS_FILE + echo "reg $h mepc" >> $CMDS_FILE + echo "reg $h mcause" >> $CMDS_FILE + echo "reg $h mtval" >> $CMDS_FILE + echo "reg $h mip" >> $CMDS_FILE + + echo "reg $h mcycle" >> $CMDS_FILE + echo "reg $h minstret" >> $CMDS_FILE + + echo "mtime" >> $CMDS_FILE + echo "mtimecmp $h" >> $CMDS_FILE + + for (( fr=0; fr<32; fr++ )) + do + echo "freg $h $fr" >> $CMDS_FILE + done + for (( xr=0; xr<32; xr++ )) + do + echo "reg $h $xr" >> $CMDS_FILE + done + echo "vreg $h" >> $CMDS_FILE +done +echo "quit" >> $CMDS_FILE + +echo "spike -d --debug-cmd=$CMDS_FILE $SPIKEFLAGS $BINARY" > $SPIKECMD_FILE + +echo "Capturing state at checkpoint to spikeout" +spike -d --debug-cmd=$CMDS_FILE $SPIKEFLAGS $BINARY 2> $LOADARCH_FILE + + +echo "Finding tohost/fromhost in elf file" +TOHOST=$(riscv64-unknown-elf-nm $BINARY | grep tohost | head -c 16) +FROMHOST=$(riscv64-unknown-elf-nm $BINARY | grep fromhost | head -c 16) + +echo "Compiling memory to elf" +riscv64-unknown-elf-objcopy -I binary -O elf64-littleriscv mem.0x80000000.bin $RAWMEM_ELF +rm -rf mem.0x80000000.bin + +riscv64-unknown-elf-ld -Tdata=0x80000000 -nmagic --defsym tohost=0x$TOHOST --defsym fromhost=0x$FROMHOST -o $LOADMEM_ELF $RAWMEM_ELF +rm -rf $RAWMEM_ELF + diff --git a/scripts/smartelf2hex.sh b/scripts/smartelf2hex.sh deleted file mode 100755 index cc2ea2f8..00000000 --- a/scripts/smartelf2hex.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -# This script find the appropriate arguments to pass to elf2hex by inspecting the given RISC-V elf binary -# First and only argument is the binary to be converted. -# The output of this script should be redirected to a file (as with normal elf2hex). - -binary=$1 -segments=`readelf --segments --wide $binary` -entry_hex=`echo -e "$segments" | grep "Entry point" | cut -f3 -d' ' | sed 's/0x//' | tr [:lower:] [:upper:]` -entry_dec=`bc <<< "ibase=16;$entry_hex"` -length_hex=`echo "$segments" | grep "LOAD\|TLS" | tail -n 1 | tr -s [:space:] | cut -f4,7 -d' '` -length_dec=`echo $length_hex | tr -d x | tr [:lower:] [:upper:] | tr ' ' + | sed 's/^/ibase=16;/' | sed "s/$/-$entry_hex/" | bc` -power_2_length=`echo "x=l($length_dec)/l(2); scale=0; 2^((x+1)/1)" | bc -l` -width=64 -depth=$((power_2_length / width)) -elf2hex $width $depth $binary $entry_dec diff --git a/sims/verilator/Makefile b/sims/verilator/Makefile index d48da28e..96f37237 100644 --- a/sims/verilator/Makefile +++ b/sims/verilator/Makefile @@ -60,7 +60,8 @@ SIM_FILE_REQS += \ $(TESTCHIP_RSRCS_DIR)/testchipip/csrc/mm.cc \ $(TESTCHIP_RSRCS_DIR)/testchipip/csrc/mm_dramsim2.h \ $(TESTCHIP_RSRCS_DIR)/testchipip/csrc/mm_dramsim2.cc \ - $(ROCKETCHIP_RSRCS_DIR)/csrc/SimDTM.cc \ + $(TESTCHIP_RSRCS_DIR)/testchipip/csrc/testchip_dtm.cc \ + $(TESTCHIP_RSRCS_DIR)/testchipip/csrc/testchip_dtm.h \ $(ROCKETCHIP_RSRCS_DIR)/csrc/SimJTAG.cc \ $(ROCKETCHIP_RSRCS_DIR)/csrc/remote_bitbang.h \ $(ROCKETCHIP_RSRCS_DIR)/csrc/remote_bitbang.cc diff --git a/toolchains/riscv-tools/riscv-isa-sim b/toolchains/riscv-tools/riscv-isa-sim index d70ea67d..fcbdbe79 160000 --- a/toolchains/riscv-tools/riscv-isa-sim +++ b/toolchains/riscv-tools/riscv-isa-sim @@ -1 +1 @@ -Subproject commit d70ea67df7e85a8d92a8baa254afde67c33c43a9 +Subproject commit fcbdbe7946079650d0e656fa3d353e3f652d471f diff --git a/variables.mk b/variables.mk index 443caa94..9caaf676 100644 --- a/variables.mk +++ b/variables.mk @@ -25,6 +25,8 @@ HELP_PROJECT_VARIABLES = \ HELP_SIMULATION_VARIABLES = \ " BINARY = riscv elf binary that the simulator will run when using the run-binary* targets" \ +" LOADMEM = riscv elf binary that should be loaded directly into simulated DRAM. LOADMEM=1 will load the BINARY elf" \ +" LOADARCH = path to a architectural checkpoint directory that should end in .loadarch/, for restoring from a checkpoint" \ " VERBOSE_FLAGS = flags used when doing verbose simulation [$(VERBOSE_FLAGS)]" \ " timeout_cycles = number of clock cycles before simulator times out, defaults to 10000000" \ " bmark_timeout_cycles = number of clock cycles before benchmark simulator times out, defaults to 100000000" @@ -242,15 +244,28 @@ output_dir=$(sim_dir)/output/$(long_name) PERMISSIVE_ON=+permissive PERMISSIVE_OFF=+permissive-off BINARY ?= -LOADMEM ?= -LOADMEM_ADDR ?= 81000000 override SIM_FLAGS += +dramsim +dramsim_ini_dir=$(TESTCHIP_DIR)/src/main/resources/dramsim2_ini +max-cycles=$(timeout_cycles) -ifneq ($(LOADMEM),) -override SIM_FLAGS += +loadmem=$(LOADMEM) +loadmem_addr=$(LOADMEM_ADDR) -endif VERBOSE_FLAGS ?= +verbose -sim_out_name = $(output_dir)/$(subst $() $(),_,$(notdir $(basename $(BINARY)))) -binary_hex= $(sim_out_name).loadmem_hex +OUT_NAME ?= $(subst $() $(),_,$(notdir $(basename $(BINARY)))) +LOADMEM ?= +LOADARCH ?= + +ifneq ($(LOADARCH),) +override BINARY = $(LOADARCH)/mem.elf +override OUT_NAME = $(shell basename $(LOADARCH)) +override LOADMEM = 1 +override SIM_FLAGS += +loadarch=$(LOADARCH)/loadarch +endif + +ifeq ($(LOADMEM),1) +# If LOADMEM=1, assume BINARY is the loadmem elf +override SIM_FLAGS += +loadmem=$(BINARY) +else ifneq ($(LOADMEM),) +# Otherwise, assume the variable points to an elf file +override SIM_FLAGS += +loadmem=$(LOADMEM) +endif + +sim_out_name = $(output_dir)/$(OUT_NAME) ######################################################################################### # build output directory for compilation