diff --git a/benchmarks/opencl/Makefile b/benchmarks/opencl/Makefile index c5255b79..09ad414d 100644 --- a/benchmarks/opencl/Makefile +++ b/benchmarks/opencl/Makefile @@ -20,4 +20,12 @@ clean: $(MAKE) -C saxpy clean $(MAKE) -C sfilter clean $(MAKE) -C nearn clean - $(MAKE) -C guassian clean \ No newline at end of file + $(MAKE) -C guassian clean + +clean-all: + $(MAKE) -C vecadd clean-all + $(MAKE) -C sgemm clean-all + $(MAKE) -C saxpy clean-all + $(MAKE) -C sfilter clean-all + $(MAKE) -C nearn clean-all + $(MAKE) -C guassian clean-all \ No newline at end of file diff --git a/benchmarks/opencl/bfs/Makefile b/benchmarks/opencl/bfs/Makefile index 10f2e231..d46eb9bb 100644 --- a/benchmarks/opencl/bfs/Makefile +++ b/benchmarks/opencl/bfs/Makefile @@ -51,6 +51,9 @@ run-rtlsim: $(PROJECT) kernel.pocl clean: rm -rf $(PROJECT) *.o .depend +clean-all: clean + rm *.pocl *.dump + ifneq ($(MAKECMDGOALS),clean) -include .depend endif diff --git a/benchmarks/opencl/convolution/Makefile b/benchmarks/opencl/convolution/Makefile index 931bdefe..c73544aa 100644 --- a/benchmarks/opencl/convolution/Makefile +++ b/benchmarks/opencl/convolution/Makefile @@ -49,7 +49,10 @@ run-rtlsim: $(PROJECT) kernel.pocl $(CXX) $(CXXFLAGS) -MM $^ > .depend; clean: - rm -rf $(PROJECT) *.o .depend + rm -rf $(PROJECT) *.o .depend + +clean-all: clean + rm *.pocl *.dump ifneq ($(MAKECMDGOALS),clean) -include .depend diff --git a/benchmarks/opencl/guassian/Makefile b/benchmarks/opencl/guassian/Makefile index 58f112d9..e16e5ca7 100644 --- a/benchmarks/opencl/guassian/Makefile +++ b/benchmarks/opencl/guassian/Makefile @@ -52,7 +52,10 @@ run-rtlsim: $(PROJECT) kernel.pocl $(CXX) $(CXXFLAGS) -MM $^ > .depend; clean: - rm -rf $(PROJECT) *.o .depend + rm -rf $(PROJECT) *.o .depend + +clean-all: clean + rm *.pocl *.dump ifneq ($(MAKECMDGOALS),clean) -include .depend diff --git a/benchmarks/opencl/kmeans/Makefile b/benchmarks/opencl/kmeans/Makefile index d5af8a02..8ade050f 100644 --- a/benchmarks/opencl/kmeans/Makefile +++ b/benchmarks/opencl/kmeans/Makefile @@ -51,6 +51,9 @@ run-rtlsim: $(PROJECT) kernel.pocl clean: rm -rf $(PROJECT) *.o .depend +clean-all: clean + rm *.pocl *.dump + ifneq ($(MAKECMDGOALS),clean) -include .depend endif diff --git a/benchmarks/opencl/nearn/Makefile b/benchmarks/opencl/nearn/Makefile index 2d28041b..f1f3754d 100644 --- a/benchmarks/opencl/nearn/Makefile +++ b/benchmarks/opencl/nearn/Makefile @@ -54,6 +54,9 @@ run-rtlsim: $(PROJECT) kernel.pocl clean: rm -rf $(PROJECT) *.o .depend +clean-all: clean + rm *.pocl *.dump + ifneq ($(MAKECMDGOALS),clean) -include .depend endif diff --git a/benchmarks/opencl/saxpy/Makefile b/benchmarks/opencl/saxpy/Makefile index 92f290e0..8c9de60b 100644 --- a/benchmarks/opencl/saxpy/Makefile +++ b/benchmarks/opencl/saxpy/Makefile @@ -51,7 +51,10 @@ run-rtlsim: $(PROJECT) kernel.pocl $(CXX) $(CXXFLAGS) -MM $^ > .depend; clean: - rm -rf $(PROJECT) *.o .depend + rm -rf $(PROJECT) *.o .depend + +clean-all: clean + rm *.pocl *.dump ifneq ($(MAKECMDGOALS),clean) -include .depend diff --git a/benchmarks/opencl/sfilter/Makefile b/benchmarks/opencl/sfilter/Makefile index acd8633b..714190b8 100644 --- a/benchmarks/opencl/sfilter/Makefile +++ b/benchmarks/opencl/sfilter/Makefile @@ -53,6 +53,9 @@ run-rtlsim: $(PROJECT) kernel.pocl clean: rm -rf $(PROJECT) *.o .depend +clean-all: clean + rm *.pocl *.dump + ifneq ($(MAKECMDGOALS),clean) -include .depend endif diff --git a/benchmarks/opencl/sgemm/Makefile b/benchmarks/opencl/sgemm/Makefile index d4cd077f..01aa86e0 100644 --- a/benchmarks/opencl/sgemm/Makefile +++ b/benchmarks/opencl/sgemm/Makefile @@ -51,7 +51,10 @@ run-rtlsim: $(PROJECT) kernel.pocl $(CXX) $(CXXFLAGS) -MM $^ > .depend; clean: - rm -rf $(PROJECT) *.o .depend + rm -rf $(PROJECT) *.o .depend + +clean-all: clean + rm *.pocl *.dump ifneq ($(MAKECMDGOALS),clean) -include .depend diff --git a/benchmarks/opencl/transpose/Makefile b/benchmarks/opencl/transpose/Makefile index e4837cdc..dce57c43 100644 --- a/benchmarks/opencl/transpose/Makefile +++ b/benchmarks/opencl/transpose/Makefile @@ -49,7 +49,10 @@ run-rtlsim: $(PROJECT) kernel.pocl $(CXX) $(CXXFLAGS) -MM $^ > .depend; clean: - rm -rf $(PROJECT) *.o .depend + rm -rf $(PROJECT) *.o .depend + +clean-all: clean + rm *.pocl *.dump ifneq ($(MAKECMDGOALS),clean) -include .depend diff --git a/benchmarks/opencl/vecadd/Makefile b/benchmarks/opencl/vecadd/Makefile index c701d950..6ddf59dd 100644 --- a/benchmarks/opencl/vecadd/Makefile +++ b/benchmarks/opencl/vecadd/Makefile @@ -53,6 +53,9 @@ run-rtlsim: $(PROJECT) kernel.pocl clean: rm -rf $(PROJECT) *.o .depend +clean-all: clean + rm *.pocl *.dump + ifneq ($(MAKECMDGOALS),clean) -include .depend endif diff --git a/ci/blackbox.sh b/ci/blackbox.sh index 4a8ca943..f21b04d8 100755 --- a/ci/blackbox.sh +++ b/ci/blackbox.sh @@ -9,7 +9,7 @@ show_usage() echo "Usage: [[--clusters=#n] [--cores=#n] [--warps=#n] [--threads=#n] [--l2cache] [--l3cache] [[--driver=rtlsim|vlsim] [--debug] [--scope] [--perf] [--app=vecadd|sgemm|basic|demo|dogfood] [--args=] [--help]]" } -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +SCRIPT_DIR=$(dirname "$0") VORTEX_HOME=$SCRIPT_DIR/.. DRIVER=vlsim diff --git a/driver/include/vortex.h b/driver/include/vortex.h index e53e8431..25a2e3a2 100644 --- a/driver/include/vortex.h +++ b/driver/include/vortex.h @@ -43,9 +43,6 @@ int vx_buf_release(vx_buffer_h hbuffer); // allocate device memory and return address int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr); -// Copy bytes from device local memory to buffer -int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size); - // Copy bytes from buffer to device local memory int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset); diff --git a/driver/opae/vortex.cpp b/driver/opae/vortex.cpp index 1a5c0ce6..79131233 100755 --- a/driver/opae/vortex.cpp +++ b/driver/opae/vortex.cpp @@ -41,7 +41,6 @@ #define CMD_MEM_READ AFU_IMAGE_CMD_MEM_READ #define CMD_MEM_WRITE AFU_IMAGE_CMD_MEM_WRITE #define CMD_RUN AFU_IMAGE_CMD_RUN -#define CMD_CLFLUSH AFU_IMAGE_CMD_CLFLUSH #define CMD_CSR_READ AFU_IMAGE_CMD_CSR_READ #define CMD_CSR_WRITE AFU_IMAGE_CMD_CSR_WRITE @@ -462,36 +461,6 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, return 0; } -extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) { - if (nullptr == hdevice - || 0 >= size) - return -1; - - vx_device_t* device = ((vx_device_t*)hdevice); - - size_t asize = align_size(size, CACHE_BLOCK_SIZE); - - // check alignment - if (!is_aligned(dev_maddr, CACHE_BLOCK_SIZE)) - return -1; - - // Ensure ready for new command - if (vx_ready_wait(hdevice, -1) != 0) - return -1; - - auto ls_shift = (int)std::log2(CACHE_BLOCK_SIZE); - - CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_MEM_ADDR, dev_maddr >> ls_shift)); - CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_DATA_SIZE, asize >> ls_shift)); - CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CLFLUSH)); - - // Wait for the write operation to finish - if (vx_ready_wait(hdevice, -1) != 0) - return -1; - - return 0; -} - extern int vx_start(vx_device_h hdevice) { if (nullptr == hdevice) return -1; diff --git a/driver/opae/vortex_afu.h b/driver/opae/vortex_afu.h index c31e1a9e..8a4b7826 100644 --- a/driver/opae/vortex_afu.h +++ b/driver/opae/vortex_afu.h @@ -7,9 +7,8 @@ #define AFU_ACCEL_NAME "vortex_afu" #define AFU_ACCEL_UUID "35F9452B-25C2-434C-93D5-6F8C60DB361C" -#define AFU_IMAGE_CMD_CLFLUSH 4 -#define AFU_IMAGE_CMD_CSR_READ 5 -#define AFU_IMAGE_CMD_CSR_WRITE 6 +#define AFU_IMAGE_CMD_CSR_READ 4 +#define AFU_IMAGE_CMD_CSR_WRITE 5 #define AFU_IMAGE_CMD_MEM_READ 1 #define AFU_IMAGE_CMD_MEM_WRITE 2 #define AFU_IMAGE_CMD_RUN 3 diff --git a/driver/rtlsim/vortex.cpp b/driver/rtlsim/vortex.cpp index e6667e29..4736bb6b 100644 --- a/driver/rtlsim/vortex.cpp +++ b/driver/rtlsim/vortex.cpp @@ -140,19 +140,6 @@ public: return 0; } - int flush_caches(size_t dev_maddr, size_t size) { - if (future_.valid()) { - future_.wait(); // ensure prior run completed - } - simulator_.attach_ram(&ram_); - simulator_.flush_caches(dev_maddr, size); - while (simulator_.snp_req_active()) { - simulator_.step(); - }; - simulator_.attach_ram(NULL); - return 0; - } - int set_csr(int core_id, int addr, unsigned value) { if (future_.valid()) { future_.wait(); // ensure prior run completed @@ -257,16 +244,6 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) return device->alloc_local_mem(size, dev_maddr); } -extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) { - if (nullptr == hdevice - || 0 >= size) - return -1; - - vx_device *device = ((vx_device*)hdevice); - - return device->flush_caches(dev_maddr, size); -} - extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) { if (nullptr == hdevice diff --git a/driver/simx/vortex.cpp b/driver/simx/vortex.cpp index 9d96ea63..8a9e8f3d 100644 --- a/driver/simx/vortex.cpp +++ b/driver/simx/vortex.cpp @@ -267,14 +267,6 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) return device->alloc_local_mem(size, dev_maddr); } -extern int vx_flush_caches(vx_device_h hdevice, size_t /*dev_maddr*/, size_t size) { - if (nullptr == hdevice - || 0 >= size) - return -1; - // this functionality is not need by simX - return 0; -} - extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) { if (nullptr == hdevice || 0 >= size diff --git a/driver/stub/vortex.cpp b/driver/stub/vortex.cpp index eae722e9..7585a656 100644 --- a/driver/stub/vortex.cpp +++ b/driver/stub/vortex.cpp @@ -16,10 +16,6 @@ extern int vx_alloc_dev_mem(vx_device_h /*hdevice*/, size_t /*size*/, size_t* /* return -1; } -extern int vx_flush_caches(vx_device_h /*hdevice*/, size_t /*dev_maddr*/, size_t /*size*/) { - return -1; -} - extern int vx_alloc_shared_mem(vx_device_h /*hdevice*/, size_t /*size*/, vx_buffer_h* /*hbuffer*/) { return -1; } diff --git a/driver/tests/Makefile b/driver/tests/Makefile index b5e798cb..5e4b3382 100644 --- a/driver/tests/Makefile +++ b/driver/tests/Makefile @@ -13,3 +13,8 @@ clean: $(MAKE) -C demo clean $(MAKE) -C dogfood clean +clean-all: + $(MAKE) -C basic clean-all + $(MAKE) -C demo clean-all + $(MAKE) -C dogfood clean-all + diff --git a/driver/tests/basic/basic.cpp b/driver/tests/basic/basic.cpp index 88557ae1..f33db409 100755 --- a/driver/tests/basic/basic.cpp +++ b/driver/tests/basic/basic.cpp @@ -171,17 +171,11 @@ int run_kernel_test(const kernel_arg_t& kernel_arg, RT_CHECK(vx_ready_wait(device, -1)); auto t3 = std::chrono::high_resolution_clock::now(); - // flush the caches - std::cout << "flush the caches" << std::endl; - auto t4 = std::chrono::high_resolution_clock::now(); - RT_CHECK(vx_flush_caches(device, kernel_arg.dst_ptr, buf_size)); - auto t5 = std::chrono::high_resolution_clock::now(); - // read buffer from local memory std::cout << "read buffer from local memory" << std::endl; - auto t6 = std::chrono::high_resolution_clock::now(); + auto t4 = std::chrono::high_resolution_clock::now(); RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0)); - auto t7 = std::chrono::high_resolution_clock::now(); + auto t5 = std::chrono::high_resolution_clock::now(); // verify result @@ -210,8 +204,6 @@ int run_kernel_test(const kernel_arg_t& kernel_arg, elapsed = std::chrono::duration_cast(t3 - t2).count(); printf("execute time: %lg ms\n", elapsed); elapsed = std::chrono::duration_cast(t5 - t4).count(); - printf("flush time: %lg ms\n", elapsed); - elapsed = std::chrono::duration_cast(t7 - t6).count(); printf("download time: %lg ms\n", elapsed); elapsed = std::chrono::duration_cast(time_end - time_start).count(); printf("Total elapsed time: %lg ms\n", elapsed); diff --git a/driver/tests/basic/kernel.bin b/driver/tests/basic/kernel.bin index 21381a65..af234296 100755 Binary files a/driver/tests/basic/kernel.bin and b/driver/tests/basic/kernel.bin differ diff --git a/driver/tests/basic/kernel.dump b/driver/tests/basic/kernel.dump index 76eccc8c..6294fb65 100644 --- a/driver/tests/basic/kernel.dump +++ b/driver/tests/basic/kernel.dump @@ -142,11 +142,11 @@ Disassembly of section .text: 80000180: 00008067 ret 80000184 : -80000184: c0002573 rdcycle a0 +80000184: b0002573 csrr a0,mcycle 80000188: 00008067 ret 8000018c : -8000018c: c0202573 rdinstret a0 +8000018c: b0202573 csrr a0,minstret 80000190: 00008067 ret 80000194 : @@ -390,7 +390,7 @@ Disassembly of section .text: 80000518: 00492703 lw a4,4(s2) 8000051c: 148a2783 lw a5,328(s4) 80000520: 01871463 bne a4,s8,80000528 <__call_exitprocs+0xe4> -80000524: f8f904e3 beq s2,a5,800004ac <__call_exitprocs+0x68> +80000524: f92784e3 beq a5,s2,800004ac <__call_exitprocs+0x68> 80000528: f80788e3 beqz a5,800004b8 <__call_exitprocs+0x74> 8000052c: 00078913 mv s2,a5 80000530: f5dff06f j 8000048c <__call_exitprocs+0x48> @@ -450,21 +450,20 @@ Disassembly of section .comment: 0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm 4: 2820 fld fs0,80(s0) 6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm - a: 3920 fld fs0,112(a0) - c: 322e fld ft4,232(sp) - e: 302e fld ft0,232(sp) - ... + a: 3120 fld fs0,96(a0) + c: 2e30 fld fa2,88(a2) + e: 2e32 fld ft8,264(sp) + 10: 0030 addi a2,sp,8 Disassembly of section .riscv.attributes: 00000000 <.riscv.attributes>: - 0: 2541 jal 680 <_start-0x7ffff980> + 0: 2941 jal 490 <_start-0x7ffffb70> 2: 0000 unimp 4: 7200 flw fs0,32(a2) 6: 7369 lui t1,0xffffa 8: 01007663 bgeu zero,a6,14 <_start-0x7fffffec> - c: 0000001b 0x1b - 10: 1004 addi s1,sp,32 + c: 001f 0000 1004 0x10040000001f 12: 7205 lui tp,0xfffe1 14: 3376 fld ft6,376(sp) 16: 6932 flw fs2,12(sp) @@ -473,3 +472,5 @@ Disassembly of section .riscv.attributes: 1c: 326d jal fffff9c6 <__global_pointer$+0x7fffdc5e> 1e: 3070 fld fa2,224(s0) 20: 665f 7032 0030 0x307032665f + 26: 0108 addi a0,sp,128 + 28: 0b0a slli s6,s6,0x2 diff --git a/driver/tests/basic/kernel.elf b/driver/tests/basic/kernel.elf index 31205987..0cad586a 100755 Binary files a/driver/tests/basic/kernel.elf and b/driver/tests/basic/kernel.elf differ diff --git a/driver/tests/demo/demo.cpp b/driver/tests/demo/demo.cpp index 962c51ee..d93afda7 100644 --- a/driver/tests/demo/demo.cpp +++ b/driver/tests/demo/demo.cpp @@ -69,10 +69,6 @@ int run_test(const kernel_arg_t& kernel_arg, std::cout << "wait for completion" << std::endl; RT_CHECK(vx_ready_wait(device, -1)); - // flush the destination buffer caches - std::cout << "flush the destination buffer caches" << std::endl; - RT_CHECK(vx_flush_caches(device, kernel_arg.dst_ptr, buf_size)); - // download destination buffer std::cout << "download destination buffer" << std::endl; RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0)); diff --git a/driver/tests/demo/kernel.bin b/driver/tests/demo/kernel.bin index 280e5cbc..aeeeab99 100755 Binary files a/driver/tests/demo/kernel.bin and b/driver/tests/demo/kernel.bin differ diff --git a/driver/tests/demo/kernel.dump b/driver/tests/demo/kernel.dump index dcedb988..98cb45a2 100644 --- a/driver/tests/demo/kernel.dump +++ b/driver/tests/demo/kernel.dump @@ -165,11 +165,11 @@ Disassembly of section .text: 800001d4: 00008067 ret 800001d8 : -800001d8: c0002573 rdcycle a0 +800001d8: b0002573 csrr a0,mcycle 800001dc: 00008067 ret 800001e0 : -800001e0: c0202573 rdinstret a0 +800001e0: b0202573 csrr a0,minstret 800001e4: 00008067 ret 800001e8 : @@ -461,7 +461,7 @@ Disassembly of section .text: 8000061c: 00492703 lw a4,4(s2) 80000620: 148a2783 lw a5,328(s4) 80000624: 01871463 bne a4,s8,8000062c <__call_exitprocs+0xe4> -80000628: f8f904e3 beq s2,a5,800005b0 <__call_exitprocs+0x68> +80000628: f92784e3 beq a5,s2,800005b0 <__call_exitprocs+0x68> 8000062c: f80788e3 beqz a5,800005bc <__call_exitprocs+0x74> 80000630: 00078913 mv s2,a5 80000634: f5dff06f j 80000590 <__call_exitprocs+0x48> @@ -527,22 +527,20 @@ Disassembly of section .comment: 0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm 4: 2820 fld fs0,80(s0) 6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm - a: 3920 fld fs0,112(a0) - c: 322e fld ft4,232(sp) - e: 302e fld ft0,232(sp) - ... + a: 3120 fld fs0,96(a0) + c: 2e30 fld fa2,88(a2) + e: 2e32 fld ft8,264(sp) + 10: 0030 addi a2,sp,8 Disassembly of section .riscv.attributes: 00000000 <.riscv.attributes>: - 0: 2041 jal 80 <_start-0x7fffff80> + 0: 2941 jal 490 <_start-0x7ffffb70> 2: 0000 unimp 4: 7200 flw fs0,32(a2) 6: 7369 lui t1,0xffffa 8: 01007663 bgeu zero,a6,14 <_start-0x7fffffec> - c: 0016 c.slli zero,0x5 - e: 0000 unimp - 10: 1004 addi s1,sp,32 + c: 001f 0000 1004 0x10040000001f 12: 7205 lui tp,0xfffe1 14: 3376 fld ft6,376(sp) 16: 6932 flw fs2,12(sp) @@ -550,4 +548,6 @@ Disassembly of section .riscv.attributes: 1a: 5f30 lw a2,120(a4) 1c: 326d jal fffff9c6 <__global_pointer$+0x7fffdb56> 1e: 3070 fld fa2,224(s0) - ... + 20: 665f 7032 0030 0x307032665f + 26: 0108 addi a0,sp,128 + 28: 0b0a slli s6,s6,0x2 diff --git a/driver/tests/demo/kernel.elf b/driver/tests/demo/kernel.elf index f7681ea2..dd166d7e 100755 Binary files a/driver/tests/demo/kernel.elf and b/driver/tests/demo/kernel.elf differ diff --git a/driver/tests/dogfood/dogfood.cpp b/driver/tests/dogfood/dogfood.cpp index d2aee1c8..b996658c 100644 --- a/driver/tests/dogfood/dogfood.cpp +++ b/driver/tests/dogfood/dogfood.cpp @@ -245,10 +245,6 @@ int main(int argc, char *argv[]) { std::cout << "wait for completion" << std::endl; RT_CHECK(vx_ready_wait(device, -1)); - // flush the destination buffer caches - std::cout << "flush the destination buffer caches" << std::endl; - RT_CHECK(vx_flush_caches(device, kernel_arg.dst_ptr, buf_size)); - // download destination buffer std::cout << "download destination buffer" << std::endl; RT_CHECK(vx_copy_from_dev(dst_buf, kernel_arg.dst_ptr, buf_size, 0)); diff --git a/driver/tests/dogfood/kernel.bin b/driver/tests/dogfood/kernel.bin index edb116a5..599ab919 100755 Binary files a/driver/tests/dogfood/kernel.bin and b/driver/tests/dogfood/kernel.bin differ diff --git a/driver/tests/dogfood/kernel.dump b/driver/tests/dogfood/kernel.dump index c07fd8f0..8fcd6374 100644 --- a/driver/tests/dogfood/kernel.dump +++ b/driver/tests/dogfood/kernel.dump @@ -6,23 +6,23 @@ Disassembly of section .init: 80000000 <_start>: 80000000: 00001597 auipc a1,0x1 -80000004: b8c58593 addi a1,a1,-1140 # 80000b8c +80000004: b8458593 addi a1,a1,-1148 # 80000b84 80000008: 02602573 csrr a0,0x26 8000000c: 00b5106b 0xb5106b -80000010: 37d000ef jal ra,80000b8c +80000010: 375000ef jal ra,80000b84 80000014: 00100513 li a0,1 80000018: 0005006b 0x5006b 8000001c: c3018513 addi a0,gp,-976 # 800026c8 80000020: c3818613 addi a2,gp,-968 # 800026d0 <__BSS_END__> 80000024: 40a60633 sub a2,a2,a0 80000028: 00000593 li a1,0 -8000002c: 789000ef jal ra,80000fb4 +8000002c: 785000ef jal ra,80000fb0 80000030: 00001517 auipc a0,0x1 -80000034: e8c50513 addi a0,a0,-372 # 80000ebc <__libc_fini_array> -80000038: 639000ef jal ra,80000e70 -8000003c: 6dd000ef jal ra,80000f18 <__libc_init_array> +80000034: e8850513 addi a0,a0,-376 # 80000eb8 <__libc_fini_array> +80000038: 635000ef jal ra,80000e6c +8000003c: 6d9000ef jal ra,80000f14 <__libc_init_array> 80000040: 008000ef jal ra,80000048
-80000044: 6490006f j 80000e8c +80000044: 6450006f j 80000e88 Disassembly of section .text: @@ -30,30 +30,30 @@ Disassembly of section .text: 80000048: ff010113 addi sp,sp,-16 8000004c: 00112623 sw ra,12(sp) 80000050: 00812423 sw s0,8(sp) -80000054: 3d1000ef jal ra,80000c24 +80000054: 3c9000ef jal ra,80000c1c 80000058: 00050413 mv s0,a0 -8000005c: 3c1000ef jal ra,80000c1c +8000005c: 3b9000ef jal ra,80000c14 80000060: 7ffff7b7 lui a5,0x7ffff 80000064: 0007a783 lw a5,0(a5) # 7ffff000 <_start-0x1000> 80000068: 00050593 mv a1,a0 8000006c: 00040513 mv a0,s0 80000070: 00279713 slli a4,a5,0x2 80000074: 800017b7 lui a5,0x80001 -80000078: 24878793 addi a5,a5,584 # 80001248 <__global_pointer$+0xffffe7b0> +80000078: 24478793 addi a5,a5,580 # 80001244 <__global_pointer$+0xffffe7ac> 8000007c: 00812403 lw s0,8(sp) 80000080: 00e787b3 add a5,a5,a4 80000084: 00c12083 lw ra,12(sp) 80000088: 0007a603 lw a2,0(a5) 8000008c: 7ffff6b7 lui a3,0x7ffff 80000090: 01010113 addi sp,sp,16 -80000094: 3f10006f j 80000c84 +80000094: 3e90006f j 80000c7c 80000098 : 80000098: 00000793 li a5,0 8000009c: 00078863 beqz a5,800000ac 800000a0: 80001537 lui a0,0x80001 -800000a4: ebc50513 addi a0,a0,-324 # 80000ebc <__global_pointer$+0xffffe424> -800000a8: 5c90006f j 80000e70 +800000a4: eb850513 addi a0,a0,-328 # 80000eb8 <__global_pointer$+0xffffe420> +800000a8: 5c50006f j 80000e6c 800000ac: 00008067 ret 800000b0 : @@ -67,7 +67,7 @@ Disassembly of section .text: 800000cc: 00852983 lw s3,8(a0) 800000d0: 01052903 lw s2,16(a0) 800000d4: 00112e23 sw ra,28(sp) -800000d8: 335000ef jal ra,80000c0c +800000d8: 32d000ef jal ra,80000c04 800000dc: 02850533 mul a0,a0,s0 800000e0: 04040063 beqz s0,80000120 800000e4: 00a406b3 add a3,s0,a0 @@ -104,7 +104,7 @@ Disassembly of section .text: 80000158: 00852983 lw s3,8(a0) 8000015c: 01052903 lw s2,16(a0) 80000160: 00112e23 sw ra,28(sp) -80000164: 2a9000ef jal ra,80000c0c +80000164: 2a1000ef jal ra,80000c04 80000168: 02850533 mul a0,a0,s0 8000016c: 04040063 beqz s0,800001ac 80000170: 00a406b3 add a3,s0,a0 @@ -141,7 +141,7 @@ Disassembly of section .text: 800001e4: 00852983 lw s3,8(a0) 800001e8: 01052903 lw s2,16(a0) 800001ec: 00112e23 sw ra,28(sp) -800001f0: 21d000ef jal ra,80000c0c +800001f0: 215000ef jal ra,80000c04 800001f4: 02850533 mul a0,a0,s0 800001f8: 04040063 beqz s0,80000238 800001fc: 00a406b3 add a3,s0,a0 @@ -178,7 +178,7 @@ Disassembly of section .text: 80000270: 00852983 lw s3,8(a0) 80000274: 01052483 lw s1,16(a0) 80000278: 00112e23 sw ra,28(sp) -8000027c: 191000ef jal ra,80000c0c +8000027c: 189000ef jal ra,80000c04 80000280: 02850533 mul a0,a0,s0 80000284: 04040463 beqz s0,800002cc 80000288: 00a405b3 add a1,s0,a0 @@ -217,7 +217,7 @@ Disassembly of section .text: 80000304: 00852983 lw s3,8(a0) 80000308: 01052903 lw s2,16(a0) 8000030c: 00112e23 sw ra,28(sp) -80000310: 0fd000ef jal ra,80000c0c +80000310: 0f5000ef jal ra,80000c04 80000314: 02850533 mul a0,a0,s0 80000318: 04040063 beqz s0,80000358 8000031c: 00a40733 add a4,s0,a0 @@ -254,7 +254,7 @@ Disassembly of section .text: 80000390: 00852983 lw s3,8(a0) 80000394: 01052903 lw s2,16(a0) 80000398: 00112e23 sw ra,28(sp) -8000039c: 071000ef jal ra,80000c0c +8000039c: 069000ef jal ra,80000c04 800003a0: 02850533 mul a0,a0,s0 800003a4: 04040063 beqz s0,800003e4 800003a8: 00a40733 add a4,s0,a0 @@ -291,7 +291,7 @@ Disassembly of section .text: 8000041c: 00852983 lw s3,8(a0) 80000420: 01052903 lw s2,16(a0) 80000424: 00112e23 sw ra,28(sp) -80000428: 7e4000ef jal ra,80000c0c +80000428: 7dc000ef jal ra,80000c04 8000042c: 02850533 mul a0,a0,s0 80000430: 04040063 beqz s0,80000470 80000434: 00a40733 add a4,s0,a0 @@ -328,7 +328,7 @@ Disassembly of section .text: 800004a8: 00852983 lw s3,8(a0) 800004ac: 01052483 lw s1,16(a0) 800004b0: 00112e23 sw ra,28(sp) -800004b4: 758000ef jal ra,80000c0c +800004b4: 750000ef jal ra,80000c04 800004b8: 02850533 mul a0,a0,s0 800004bc: 04040063 beqz s0,800004fc 800004c0: 00a40733 add a4,s0,a0 @@ -365,7 +365,7 @@ Disassembly of section .text: 80000534: 00852983 lw s3,8(a0) 80000538: 01052483 lw s1,16(a0) 8000053c: 00112e23 sw ra,28(sp) -80000540: 6cc000ef jal ra,80000c0c +80000540: 6c4000ef jal ra,80000c04 80000544: 02850533 mul a0,a0,s0 80000548: 04040063 beqz s0,80000588 8000054c: 00a40733 add a4,s0,a0 @@ -402,7 +402,7 @@ Disassembly of section .text: 800005c0: 00852983 lw s3,8(a0) 800005c4: 01052483 lw s1,16(a0) 800005c8: 00112e23 sw ra,28(sp) -800005cc: 640000ef jal ra,80000c0c +800005cc: 638000ef jal ra,80000c04 800005d0: 02850533 mul a0,a0,s0 800005d4: 04040063 beqz s0,80000614 800005d8: 00a40733 add a4,s0,a0 @@ -439,7 +439,7 @@ Disassembly of section .text: 8000064c: 00852983 lw s3,8(a0) 80000650: 01052483 lw s1,16(a0) 80000654: 00112e23 sw ra,28(sp) -80000658: 5b4000ef jal ra,80000c0c +80000658: 5ac000ef jal ra,80000c04 8000065c: 02850533 mul a0,a0,s0 80000660: 04040063 beqz s0,800006a0 80000664: 00a40733 add a4,s0,a0 @@ -476,7 +476,7 @@ Disassembly of section .text: 800006d8: 00852983 lw s3,8(a0) 800006dc: 01052483 lw s1,16(a0) 800006e0: 00112e23 sw ra,28(sp) -800006e4: 528000ef jal ra,80000c0c +800006e4: 520000ef jal ra,80000c04 800006e8: 02850533 mul a0,a0,s0 800006ec: 04040463 beqz s0,80000734 800006f0: 00a40733 add a4,s0,a0 @@ -515,7 +515,7 @@ Disassembly of section .text: 8000076c: 00852983 lw s3,8(a0) 80000770: 01052903 lw s2,16(a0) 80000774: 00112e23 sw ra,28(sp) -80000778: 494000ef jal ra,80000c0c +80000778: 48c000ef jal ra,80000c04 8000077c: 02850533 mul a0,a0,s0 80000780: 04040063 beqz s0,800007c0 80000784: 00a40733 add a4,s0,a0 @@ -552,7 +552,7 @@ Disassembly of section .text: 800007f8: 00852983 lw s3,8(a0) 800007fc: 01052483 lw s1,16(a0) 80000800: 00112e23 sw ra,28(sp) -80000804: 408000ef jal ra,80000c0c +80000804: 400000ef jal ra,80000c04 80000808: 02850533 mul a0,a0,s0 8000080c: 04040463 beqz s0,80000854 80000810: 00a40733 add a4,s0,a0 @@ -591,25 +591,25 @@ Disassembly of section .text: 8000088c: 00852983 lw s3,8(a0) 80000890: 01052903 lw s2,16(a0) 80000894: 00112e23 sw ra,28(sp) -80000898: 374000ef jal ra,80000c0c +80000898: 36c000ef jal ra,80000c04 8000089c: 02850533 mul a0,a0,s0 800008a0: 04040263 beqz s0,800008e4 -800008a4: 00a406b3 add a3,s0,a0 -800008a8: 00269693 slli a3,a3,0x2 +800008a4: 00a40733 add a4,s0,a0 +800008a8: 00271713 slli a4,a4,0x2 800008ac: 00251513 slli a0,a0,0x2 800008b0: 013507b3 add a5,a0,s3 -800008b4: 013686b3 add a3,a3,s3 +800008b4: 01370733 add a4,a4,s3 800008b8: 41390533 sub a0,s2,s3 800008bc: 413485b3 sub a1,s1,s3 -800008c0: 00f58733 add a4,a1,a5 +800008c0: 00f586b3 add a3,a1,a5 800008c4: 0007a707 flw fa4,0(a5) -800008c8: 00072787 flw fa5,0(a4) -800008cc: 00f50633 add a2,a0,a5 +800008c8: 0006a787 flw fa5,0(a3) +800008cc: 00f506b3 add a3,a0,a5 800008d0: 00478793 addi a5,a5,4 800008d4: 00e7f7d3 fadd.s fa5,fa5,fa4 -800008d8: c0079753 fcvt.w.s a4,fa5,rtz -800008dc: 00e62023 sw a4,0(a2) -800008e0: fef690e3 bne a3,a5,800008c0 +800008d8: c0079653 fcvt.w.s a2,fa5,rtz +800008dc: 00c6a023 sw a2,0(a3) +800008e0: fef710e3 bne a4,a5,800008c0 800008e4: 01c12083 lw ra,28(sp) 800008e8: 01812403 lw s0,24(sp) 800008ec: 01412483 lw s1,20(sp) @@ -629,25 +629,25 @@ Disassembly of section .text: 8000091c: 00852983 lw s3,8(a0) 80000920: 01052903 lw s2,16(a0) 80000924: 00112e23 sw ra,28(sp) -80000928: 2e4000ef jal ra,80000c0c +80000928: 2dc000ef jal ra,80000c04 8000092c: 02850533 mul a0,a0,s0 80000930: 04040263 beqz s0,80000974 -80000934: 00a406b3 add a3,s0,a0 -80000938: 00269693 slli a3,a3,0x2 +80000934: 00a40733 add a4,s0,a0 +80000938: 00271713 slli a4,a4,0x2 8000093c: 00251513 slli a0,a0,0x2 80000940: 013507b3 add a5,a0,s3 -80000944: 013686b3 add a3,a3,s3 +80000944: 01370733 add a4,a4,s3 80000948: 41390533 sub a0,s2,s3 8000094c: 413485b3 sub a1,s1,s3 -80000950: 00f58733 add a4,a1,a5 +80000950: 00f586b3 add a3,a1,a5 80000954: 0007a707 flw fa4,0(a5) -80000958: 00072787 flw fa5,0(a4) -8000095c: 00f50633 add a2,a0,a5 +80000958: 0006a787 flw fa5,0(a3) +8000095c: 00f506b3 add a3,a0,a5 80000960: 00478793 addi a5,a5,4 80000964: 00e7f7d3 fadd.s fa5,fa5,fa4 -80000968: c0179753 fcvt.wu.s a4,fa5,rtz -8000096c: 00e62023 sw a4,0(a2) -80000970: fef690e3 bne a3,a5,80000950 +80000968: c0179653 fcvt.wu.s a2,fa5,rtz +8000096c: 00c6a023 sw a2,0(a3) +80000970: fef710e3 bne a4,a5,80000950 80000974: 01c12083 lw ra,28(sp) 80000978: 01812403 lw s0,24(sp) 8000097c: 01412483 lw s1,20(sp) @@ -667,7 +667,7 @@ Disassembly of section .text: 800009ac: 00852983 lw s3,8(a0) 800009b0: 01052903 lw s2,16(a0) 800009b4: 00112e23 sw ra,28(sp) -800009b8: 254000ef jal ra,80000c0c +800009b8: 24c000ef jal ra,80000c04 800009bc: 02850533 mul a0,a0,s0 800009c0: 04040263 beqz s0,80000a04 800009c4: 00a406b3 add a3,s0,a0 @@ -705,7 +705,7 @@ Disassembly of section .text: 80000a3c: 00852983 lw s3,8(a0) 80000a40: 01052903 lw s2,16(a0) 80000a44: 00112e23 sw ra,28(sp) -80000a48: 1c4000ef jal ra,80000c0c +80000a48: 1bc000ef jal ra,80000c04 80000a4c: 02850533 mul a0,a0,s0 80000a50: 04040263 beqz s0,80000a94 80000a54: 00a406b3 add a3,s0,a0 @@ -744,589 +744,588 @@ Disassembly of section .text: 80000ad0: 01052903 lw s2,16(a0) 80000ad4: 02112e23 sw ra,60(sp) 80000ad8: 00812e27 fsw fs0,28(sp) -80000adc: 00912c27 fsw fs1,24(sp) -80000ae0: 12c000ef jal ra,80000c0c -80000ae4: 02950533 mul a0,a0,s1 -80000ae8: 04048c63 beqz s1,80000b40 -80000aec: 00a484b3 add s1,s1,a0 -80000af0: f00004d3 fmv.w.x fs1,zero -80000af4: 00251513 slli a0,a0,0x2 -80000af8: 00249493 slli s1,s1,0x2 -80000afc: 01450533 add a0,a0,s4 -80000b00: 014484b3 add s1,s1,s4 -80000b04: 414989b3 sub s3,s3,s4 -80000b08: 41490933 sub s2,s2,s4 -80000b0c: 00a987b3 add a5,s3,a0 -80000b10: 0007a507 flw fa0,0(a5) -80000b14: 00052787 flw fa5,0(a0) -80000b18: 00a90733 add a4,s2,a0 -80000b1c: 10f57553 fmul.s fa0,fa0,fa5 -80000b20: 001026f3 frflags a3 -80000b24: a09517d3 flt.s a5,fa0,fs1 -80000b28: 00169073 fsflags a3 -80000b2c: 58057453 fsqrt.s fs0,fa0 -80000b30: 02079a63 bnez a5,80000b64 -80000b34: 00450513 addi a0,a0,4 -80000b38: 00872027 fsw fs0,0(a4) -80000b3c: fc9518e3 bne a0,s1,80000b0c -80000b40: 03c12083 lw ra,60(sp) -80000b44: 03812483 lw s1,56(sp) -80000b48: 03412903 lw s2,52(sp) -80000b4c: 03012983 lw s3,48(sp) -80000b50: 02c12a03 lw s4,44(sp) -80000b54: 01c12407 flw fs0,28(sp) -80000b58: 01812487 flw fs1,24(sp) -80000b5c: 04010113 addi sp,sp,64 -80000b60: 00008067 ret -80000b64: 00a12623 sw a0,12(sp) -80000b68: 18c000ef jal ra,80000cf4 -80000b6c: 00c12503 lw a0,12(sp) -80000b70: 00a907b3 add a5,s2,a0 -80000b74: 0087a027 fsw fs0,0(a5) -80000b78: 00450513 addi a0,a0,4 -80000b7c: f8a498e3 bne s1,a0,80000b0c -80000b80: fc1ff06f j 80000b40 +80000adc: 128000ef jal ra,80000c04 +80000ae0: 02950533 mul a0,a0,s1 +80000ae4: 04048c63 beqz s1,80000b3c +80000ae8: 00a484b3 add s1,s1,a0 +80000aec: f0000453 fmv.w.x fs0,zero +80000af0: 00251513 slli a0,a0,0x2 +80000af4: 00249493 slli s1,s1,0x2 +80000af8: 01450533 add a0,a0,s4 +80000afc: 014484b3 add s1,s1,s4 +80000b00: 414989b3 sub s3,s3,s4 +80000b04: 41490933 sub s2,s2,s4 +80000b08: 00a987b3 add a5,s3,a0 +80000b0c: 0007a507 flw fa0,0(a5) +80000b10: 00052787 flw fa5,0(a0) +80000b14: 00a90733 add a4,s2,a0 +80000b18: 10f57553 fmul.s fa0,fa0,fa5 +80000b1c: 001026f3 frflags a3 +80000b20: a08517d3 flt.s a5,fa0,fs0 +80000b24: 00169073 fsflags a3 +80000b28: 02079a63 bnez a5,80000b5c +80000b2c: 58057553 fsqrt.s fa0,fa0 +80000b30: 00450513 addi a0,a0,4 +80000b34: 00a72027 fsw fa0,0(a4) +80000b38: fc9518e3 bne a0,s1,80000b08 +80000b3c: 03c12083 lw ra,60(sp) +80000b40: 03812483 lw s1,56(sp) +80000b44: 03412903 lw s2,52(sp) +80000b48: 03012983 lw s3,48(sp) +80000b4c: 02c12a03 lw s4,44(sp) +80000b50: 01c12407 flw fs0,28(sp) +80000b54: 04010113 addi sp,sp,64 +80000b58: 00008067 ret +80000b5c: 00a12623 sw a0,12(sp) +80000b60: 18c000ef jal ra,80000cec +80000b64: 00c12503 lw a0,12(sp) +80000b68: 00a907b3 add a5,s2,a0 +80000b6c: 00a7a027 fsw fa0,0(a5) +80000b70: 00450513 addi a0,a0,4 +80000b74: f8a49ae3 bne s1,a0,80000b08 +80000b78: fc5ff06f j 80000b3c -80000b84 <_exit>: -80000b84: 00000513 li a0,0 +80000b7c <_exit>: +80000b7c: 00000513 li a0,0 +80000b80: 0005006b 0x5006b + +80000b84 : +80000b84: 02502573 csrr a0,0x25 80000b88: 0005006b 0x5006b +80000b8c: 00002197 auipc gp,0x2 +80000b90: f0c18193 addi gp,gp,-244 # 80002a98 <__global_pointer$> +80000b94: 022025f3 csrr a1,0x22 +80000b98: 00a59593 slli a1,a1,0xa +80000b9c: 02002673 csrr a2,0x20 +80000ba0: 00261613 slli a2,a2,0x2 +80000ba4: 6ffff137 lui sp,0x6ffff +80000ba8: 40b10133 sub sp,sp,a1 +80000bac: 00c10133 add sp,sp,a2 +80000bb0: 021026f3 csrr a3,0x21 +80000bb4: 00068663 beqz a3,80000bc0 +80000bb8: 00000513 li a0,0 +80000bbc: 0005006b 0x5006b -80000b8c : -80000b8c: 02502573 csrr a0,0x25 -80000b90: 0005006b 0x5006b -80000b94: 00002197 auipc gp,0x2 -80000b98: f0418193 addi gp,gp,-252 # 80002a98 <__global_pointer$> -80000b9c: 022025f3 csrr a1,0x22 -80000ba0: 00a59593 slli a1,a1,0xa -80000ba4: 02002673 csrr a2,0x20 -80000ba8: 00261613 slli a2,a2,0x2 -80000bac: 6ffff137 lui sp,0x6ffff -80000bb0: 40b10133 sub sp,sp,a1 -80000bb4: 00c10133 add sp,sp,a2 -80000bb8: 021026f3 csrr a3,0x21 -80000bbc: 00068663 beqz a3,80000bc8 -80000bc0: 00000513 li a0,0 -80000bc4: 0005006b 0x5006b +80000bc0 : +80000bc0: 00008067 ret -80000bc8 : +80000bc4 : +80000bc4: 00b5106b 0xb5106b 80000bc8: 00008067 ret -80000bcc : -80000bcc: 00b5106b 0xb5106b +80000bcc : +80000bcc: 0005006b 0x5006b 80000bd0: 00008067 ret -80000bd4 : -80000bd4: 0005006b 0x5006b +80000bd4 : +80000bd4: 00b5406b 0xb5406b 80000bd8: 00008067 ret -80000bdc : -80000bdc: 00b5406b 0xb5406b +80000bdc : +80000bdc: 0005206b 0x5206b 80000be0: 00008067 ret -80000be4 : -80000be4: 0005206b 0x5206b +80000be4 : +80000be4: 0000306b 0x306b 80000be8: 00008067 ret -80000bec : -80000bec: 0000306b 0x306b +80000bec : +80000bec: 02102573 csrr a0,0x21 80000bf0: 00008067 ret -80000bf4 : -80000bf4: 02102573 csrr a0,0x21 +80000bf4 : +80000bf4: 02302573 csrr a0,0x23 80000bf8: 00008067 ret -80000bfc : -80000bfc: 02302573 csrr a0,0x23 +80000bfc : +80000bfc: 02002573 csrr a0,0x20 80000c00: 00008067 ret -80000c04 : -80000c04: 02002573 csrr a0,0x20 +80000c04 : +80000c04: 02202573 csrr a0,0x22 80000c08: 00008067 ret -80000c0c : -80000c0c: 02202573 csrr a0,0x22 +80000c0c : +80000c0c: 02402573 csrr a0,0x24 80000c10: 00008067 ret -80000c14 : -80000c14: 02402573 csrr a0,0x24 +80000c14 : +80000c14: 02502573 csrr a0,0x25 80000c18: 00008067 ret -80000c1c : -80000c1c: 02502573 csrr a0,0x25 +80000c1c : +80000c1c: 02602573 csrr a0,0x26 80000c20: 00008067 ret -80000c24 : -80000c24: 02602573 csrr a0,0x26 +80000c24 : +80000c24: 02702573 csrr a0,0x27 80000c28: 00008067 ret -80000c2c : -80000c2c: 02702573 csrr a0,0x27 +80000c2c : +80000c2c: b0002573 csrr a0,mcycle 80000c30: 00008067 ret -80000c34 : -80000c34: c0002573 rdcycle a0 +80000c34 : +80000c34: b0202573 csrr a0,minstret 80000c38: 00008067 ret -80000c3c : -80000c3c: c0202573 rdinstret a0 -80000c40: 00008067 ret +80000c3c : +80000c3c: ff010113 addi sp,sp,-16 # 6fffeff0 <_start-0x10001010> +80000c40: 00812423 sw s0,8(sp) +80000c44: c301a783 lw a5,-976(gp) # 800026c8 +80000c48: 00112623 sw ra,12(sp) +80000c4c: 0087a503 lw a0,8(a5) +80000c50: f7dff0ef jal ra,80000bcc +80000c54: c301a783 lw a5,-976(gp) # 800026c8 +80000c58: 0047a503 lw a0,4(a5) +80000c5c: 0007a783 lw a5,0(a5) +80000c60: 000780e7 jalr a5 +80000c64: f89ff0ef jal ra,80000bec +80000c68: 00812403 lw s0,8(sp) +80000c6c: 00c12083 lw ra,12(sp) +80000c70: 00153513 seqz a0,a0 +80000c74: 01010113 addi sp,sp,16 +80000c78: f55ff06f j 80000bcc -80000c44 : -80000c44: ff010113 addi sp,sp,-16 # 6fffeff0 <_start-0x10001010> -80000c48: 00812423 sw s0,8(sp) -80000c4c: c301a783 lw a5,-976(gp) # 800026c8 -80000c50: 00112623 sw ra,12(sp) -80000c54: 0087a503 lw a0,8(a5) -80000c58: f7dff0ef jal ra,80000bd4 -80000c5c: c301a783 lw a5,-976(gp) # 800026c8 -80000c60: 0047a503 lw a0,4(a5) -80000c64: 0007a783 lw a5,0(a5) -80000c68: 000780e7 jalr a5 -80000c6c: f89ff0ef jal ra,80000bf4 -80000c70: 00812403 lw s0,8(sp) -80000c74: 00c12083 lw ra,12(sp) -80000c78: 00153513 seqz a0,a0 -80000c7c: 01010113 addi sp,sp,16 -80000c80: f55ff06f j 80000bd4 +80000c7c : +80000c7c: fe010113 addi sp,sp,-32 +80000c80: 00410793 addi a5,sp,4 +80000c84: 00812c23 sw s0,24(sp) +80000c88: c2f1a823 sw a5,-976(gp) # 800026c8 +80000c8c: 00112e23 sw ra,28(sp) +80000c90: 00c12223 sw a2,4(sp) +80000c94: 00d12423 sw a3,8(sp) +80000c98: 00b12623 sw a1,12(sp) +80000c9c: 00100793 li a5,1 +80000ca0: 00a7dc63 bge a5,a0,80000cb8 +80000ca4: 800015b7 lui a1,0x80001 +80000ca8: c3c58593 addi a1,a1,-964 # 80000c3c <__global_pointer$+0xffffe1a4> +80000cac: f19ff0ef jal ra,80000bc4 +80000cb0: c301a783 lw a5,-976(gp) # 800026c8 +80000cb4: 0087a583 lw a1,8(a5) +80000cb8: 00058513 mv a0,a1 +80000cbc: f11ff0ef jal ra,80000bcc +80000cc0: c301a783 lw a5,-976(gp) # 800026c8 +80000cc4: 0047a503 lw a0,4(a5) +80000cc8: 0007a783 lw a5,0(a5) +80000ccc: 000780e7 jalr a5 +80000cd0: f1dff0ef jal ra,80000bec +80000cd4: 00153513 seqz a0,a0 +80000cd8: ef5ff0ef jal ra,80000bcc +80000cdc: 01c12083 lw ra,28(sp) +80000ce0: 01812403 lw s0,24(sp) +80000ce4: 02010113 addi sp,sp,32 +80000ce8: 00008067 ret -80000c84 : -80000c84: fe010113 addi sp,sp,-32 -80000c88: 00410793 addi a5,sp,4 -80000c8c: 00812c23 sw s0,24(sp) -80000c90: c2f1a823 sw a5,-976(gp) # 800026c8 -80000c94: 00112e23 sw ra,28(sp) -80000c98: 00c12223 sw a2,4(sp) -80000c9c: 00d12423 sw a3,8(sp) -80000ca0: 00b12623 sw a1,12(sp) -80000ca4: 00100793 li a5,1 -80000ca8: 00a7dc63 bge a5,a0,80000cc0 -80000cac: 800015b7 lui a1,0x80001 -80000cb0: c4458593 addi a1,a1,-956 # 80000c44 <__global_pointer$+0xffffe1ac> -80000cb4: f19ff0ef jal ra,80000bcc -80000cb8: c301a783 lw a5,-976(gp) # 800026c8 -80000cbc: 0087a583 lw a1,8(a5) -80000cc0: 00058513 mv a0,a1 -80000cc4: f11ff0ef jal ra,80000bd4 -80000cc8: c301a783 lw a5,-976(gp) # 800026c8 -80000ccc: 0047a503 lw a0,4(a5) -80000cd0: 0007a783 lw a5,0(a5) -80000cd4: 000780e7 jalr a5 -80000cd8: f1dff0ef jal ra,80000bf4 -80000cdc: 00153513 seqz a0,a0 -80000ce0: ef5ff0ef jal ra,80000bd4 -80000ce4: 01c12083 lw ra,28(sp) -80000ce8: 01812403 lw s0,24(sp) -80000cec: 02010113 addi sp,sp,32 -80000cf0: 00008067 ret +80000cec : +80000cec: fe010113 addi sp,sp,-32 +80000cf0: 00812627 fsw fs0,12(sp) +80000cf4: 00112e23 sw ra,28(sp) +80000cf8: 20a50453 fmv.s fs0,fa0 +80000cfc: 00912427 fsw fs1,8(sp) +80000d00: 05c000ef jal ra,80000d5c <__ieee754_sqrtf> +80000d04: c341a703 lw a4,-972(gp) # 800026cc <__fdlib_version> +80000d08: fff00793 li a5,-1 +80000d0c: 00f70c63 beq a4,a5,80000d24 +80000d10: a08427d3 feq.s a5,fs0,fs0 +80000d14: 00078863 beqz a5,80000d24 +80000d18: f00004d3 fmv.w.x fs1,zero +80000d1c: a09417d3 flt.s a5,fs0,fs1 +80000d20: 00079c63 bnez a5,80000d38 +80000d24: 01c12083 lw ra,28(sp) +80000d28: 00c12407 flw fs0,12(sp) +80000d2c: 00812487 flw fs1,8(sp) +80000d30: 02010113 addi sp,sp,32 +80000d34: 00008067 ret +80000d38: 148000ef jal ra,80000e80 <__errno> +80000d3c: 01c12083 lw ra,28(sp) +80000d40: 02100793 li a5,33 +80000d44: 00f52023 sw a5,0(a0) +80000d48: 1894f553 fdiv.s fa0,fs1,fs1 +80000d4c: 00c12407 flw fs0,12(sp) +80000d50: 00812487 flw fs1,8(sp) +80000d54: 02010113 addi sp,sp,32 +80000d58: 00008067 ret -80000cf4 : -80000cf4: fe010113 addi sp,sp,-32 -80000cf8: 00812627 fsw fs0,12(sp) -80000cfc: 00112e23 sw ra,28(sp) -80000d00: 20a50453 fmv.s fs0,fa0 -80000d04: 00912427 fsw fs1,8(sp) -80000d08: 05c000ef jal ra,80000d64 <__ieee754_sqrtf> -80000d0c: c341a703 lw a4,-972(gp) # 800026cc <__fdlib_version> -80000d10: fff00793 li a5,-1 -80000d14: 00f70c63 beq a4,a5,80000d2c -80000d18: a08427d3 feq.s a5,fs0,fs0 -80000d1c: 00078863 beqz a5,80000d2c -80000d20: f00004d3 fmv.w.x fs1,zero -80000d24: a09417d3 flt.s a5,fs0,fs1 -80000d28: 00079c63 bnez a5,80000d40 -80000d2c: 01c12083 lw ra,28(sp) -80000d30: 00c12407 flw fs0,12(sp) -80000d34: 00812487 flw fs1,8(sp) -80000d38: 02010113 addi sp,sp,32 -80000d3c: 00008067 ret -80000d40: 144000ef jal ra,80000e84 <__errno> -80000d44: 01c12083 lw ra,28(sp) -80000d48: 02100793 li a5,33 -80000d4c: 00f52023 sw a5,0(a0) -80000d50: 1894f553 fdiv.s fa0,fs1,fs1 -80000d54: 00c12407 flw fs0,12(sp) -80000d58: 00812487 flw fs1,8(sp) -80000d5c: 02010113 addi sp,sp,32 -80000d60: 00008067 ret +80000d5c <__ieee754_sqrtf>: +80000d5c: e00506d3 fmv.x.w a3,fa0 +80000d60: 7f800737 lui a4,0x7f800 +80000d64: 00169793 slli a5,a3,0x1 +80000d68: 0017d793 srli a5,a5,0x1 +80000d6c: 0ce7f463 bgeu a5,a4,80000e34 <__ieee754_sqrtf+0xd8> +80000d70: e0050553 fmv.x.w a0,fa0 +80000d74: 0a078c63 beqz a5,80000e2c <__ieee754_sqrtf+0xd0> +80000d78: 00068793 mv a5,a3 +80000d7c: 0c06c863 bltz a3,80000e4c <__ieee754_sqrtf+0xf0> +80000d80: 00d77633 and a2,a4,a3 +80000d84: 4176d713 srai a4,a3,0x17 +80000d88: 02061663 bnez a2,80000db4 <__ieee754_sqrtf+0x58> +80000d8c: 00800637 lui a2,0x800 +80000d90: 00d676b3 and a3,a2,a3 +80000d94: 0c069663 bnez a3,80000e60 <__ieee754_sqrtf+0x104> +80000d98: 00800537 lui a0,0x800 +80000d9c: 00179793 slli a5,a5,0x1 +80000da0: 00a7f633 and a2,a5,a0 +80000da4: 00068593 mv a1,a3 +80000da8: 00168693 addi a3,a3,1 +80000dac: fe0608e3 beqz a2,80000d9c <__ieee754_sqrtf+0x40> +80000db0: 40b70733 sub a4,a4,a1 +80000db4: 008006b7 lui a3,0x800 +80000db8: fff68613 addi a2,a3,-1 # 7fffff <_start-0x7f800001> +80000dbc: 00c7f7b3 and a5,a5,a2 +80000dc0: f8170713 addi a4,a4,-127 # 7f7fff81 <_start-0x80007f> +80000dc4: 00d7e6b3 or a3,a5,a3 +80000dc8: 00177613 andi a2,a4,1 +80000dcc: 00169793 slli a5,a3,0x1 +80000dd0: 06061a63 bnez a2,80000e44 <__ieee754_sqrtf+0xe8> +80000dd4: 40175813 srai a6,a4,0x1 +80000dd8: 01900693 li a3,25 +80000ddc: 00000513 li a0,0 +80000de0: 00000593 li a1,0 +80000de4: 01000737 lui a4,0x1000 +80000de8: 00e58633 add a2,a1,a4 +80000dec: fff68693 addi a3,a3,-1 +80000df0: 00c7c863 blt a5,a2,80000e00 <__ieee754_sqrtf+0xa4> +80000df4: 00e605b3 add a1,a2,a4 +80000df8: 40c787b3 sub a5,a5,a2 +80000dfc: 00e50533 add a0,a0,a4 +80000e00: 00179793 slli a5,a5,0x1 +80000e04: 00175713 srli a4,a4,0x1 +80000e08: fe0690e3 bnez a3,80000de8 <__ieee754_sqrtf+0x8c> +80000e0c: 00078663 beqz a5,80000e18 <__ieee754_sqrtf+0xbc> +80000e10: 00150513 addi a0,a0,1 # 800001 <_start-0x7f7fffff> +80000e14: ffe57513 andi a0,a0,-2 +80000e18: 40155513 srai a0,a0,0x1 +80000e1c: 3f0007b7 lui a5,0x3f000 +80000e20: 00f50533 add a0,a0,a5 +80000e24: 01781713 slli a4,a6,0x17 +80000e28: 00a70533 add a0,a4,a0 +80000e2c: f0050553 fmv.w.x fa0,a0 +80000e30: 00008067 ret +80000e34: 50a577c3 fmadd.s fa5,fa0,fa0,fa0 +80000e38: e0078553 fmv.x.w a0,fa5 +80000e3c: f0050553 fmv.w.x fa0,a0 +80000e40: 00008067 ret +80000e44: 00269793 slli a5,a3,0x2 +80000e48: f8dff06f j 80000dd4 <__ieee754_sqrtf+0x78> +80000e4c: 08a577d3 fsub.s fa5,fa0,fa0 +80000e50: 18f7f7d3 fdiv.s fa5,fa5,fa5 +80000e54: e0078553 fmv.x.w a0,fa5 +80000e58: f0050553 fmv.w.x fa0,a0 +80000e5c: 00008067 ret +80000e60: fff00593 li a1,-1 +80000e64: 40b70733 sub a4,a4,a1 +80000e68: f4dff06f j 80000db4 <__ieee754_sqrtf+0x58> -80000d64 <__ieee754_sqrtf>: -80000d64: e00506d3 fmv.x.w a3,fa0 -80000d68: 7f800737 lui a4,0x7f800 -80000d6c: 00169793 slli a5,a3,0x1 -80000d70: 0017d793 srli a5,a5,0x1 -80000d74: 0ce7f263 bgeu a5,a4,80000e38 <__ieee754_sqrtf+0xd4> -80000d78: e0050553 fmv.x.w a0,fa0 -80000d7c: 0a078a63 beqz a5,80000e30 <__ieee754_sqrtf+0xcc> -80000d80: 00068793 mv a5,a3 -80000d84: 0c06c663 bltz a3,80000e50 <__ieee754_sqrtf+0xec> -80000d88: 00d77633 and a2,a4,a3 -80000d8c: 4176d713 srai a4,a3,0x17 -80000d90: 02061463 bnez a2,80000db8 <__ieee754_sqrtf+0x54> -80000d94: 00800637 lui a2,0x800 -80000d98: 00d676b3 and a3,a2,a3 -80000d9c: 0c069463 bnez a3,80000e64 <__ieee754_sqrtf+0x100> -80000da0: 00179793 slli a5,a5,0x1 -80000da4: 00879593 slli a1,a5,0x8 -80000da8: 00068613 mv a2,a3 -80000dac: 00168693 addi a3,a3,1 -80000db0: fe05d8e3 bgez a1,80000da0 <__ieee754_sqrtf+0x3c> -80000db4: 40c70733 sub a4,a4,a2 -80000db8: 008006b7 lui a3,0x800 -80000dbc: fff68613 addi a2,a3,-1 # 7fffff <_start-0x7f800001> -80000dc0: 00c7f7b3 and a5,a5,a2 -80000dc4: f8170713 addi a4,a4,-127 # 7f7fff81 <_start-0x80007f> -80000dc8: 00d7e6b3 or a3,a5,a3 -80000dcc: 00177613 andi a2,a4,1 -80000dd0: 00169793 slli a5,a3,0x1 -80000dd4: 06061a63 bnez a2,80000e48 <__ieee754_sqrtf+0xe4> -80000dd8: 40175813 srai a6,a4,0x1 -80000ddc: 01900693 li a3,25 -80000de0: 00000513 li a0,0 -80000de4: 00000593 li a1,0 -80000de8: 01000737 lui a4,0x1000 -80000dec: 00e58633 add a2,a1,a4 -80000df0: fff68693 addi a3,a3,-1 -80000df4: 00c7c863 blt a5,a2,80000e04 <__ieee754_sqrtf+0xa0> -80000df8: 00e605b3 add a1,a2,a4 -80000dfc: 40c787b3 sub a5,a5,a2 -80000e00: 00e50533 add a0,a0,a4 -80000e04: 00179793 slli a5,a5,0x1 -80000e08: 00175713 srli a4,a4,0x1 -80000e0c: fe0690e3 bnez a3,80000dec <__ieee754_sqrtf+0x88> -80000e10: 00078663 beqz a5,80000e1c <__ieee754_sqrtf+0xb8> -80000e14: 00150513 addi a0,a0,1 -80000e18: ffe57513 andi a0,a0,-2 -80000e1c: 40155513 srai a0,a0,0x1 -80000e20: 3f0007b7 lui a5,0x3f000 -80000e24: 00f50533 add a0,a0,a5 -80000e28: 01781713 slli a4,a6,0x17 -80000e2c: 00a70533 add a0,a4,a0 -80000e30: f0050553 fmv.w.x fa0,a0 -80000e34: 00008067 ret -80000e38: 50a577c3 fmadd.s fa5,fa0,fa0,fa0 -80000e3c: e0078553 fmv.x.w a0,fa5 -80000e40: f0050553 fmv.w.x fa0,a0 -80000e44: 00008067 ret -80000e48: 00269793 slli a5,a3,0x2 -80000e4c: f8dff06f j 80000dd8 <__ieee754_sqrtf+0x74> -80000e50: 08a577d3 fsub.s fa5,fa0,fa0 -80000e54: 18f7f7d3 fdiv.s fa5,fa5,fa5 -80000e58: e0078553 fmv.x.w a0,fa5 -80000e5c: f0050553 fmv.w.x fa0,a0 -80000e60: 00008067 ret -80000e64: fff00613 li a2,-1 -80000e68: 40c70733 sub a4,a4,a2 -80000e6c: f4dff06f j 80000db8 <__ieee754_sqrtf+0x54> +80000e6c : +80000e6c: 00050593 mv a1,a0 +80000e70: 00000693 li a3,0 +80000e74: 00000613 li a2,0 +80000e78: 00000513 li a0,0 +80000e7c: 2100006f j 8000108c <__register_exitproc> -80000e70 : -80000e70: 00050593 mv a1,a0 -80000e74: 00000693 li a3,0 -80000e78: 00000613 li a2,0 -80000e7c: 00000513 li a0,0 -80000e80: 2100006f j 80001090 <__register_exitproc> +80000e80 <__errno>: +80000e80: c2c1a503 lw a0,-980(gp) # 800026c4 <_impure_ptr> +80000e84: 00008067 ret -80000e84 <__errno>: -80000e84: c2c1a503 lw a0,-980(gp) # 800026c4 <_impure_ptr> -80000e88: 00008067 ret +80000e88 : +80000e88: ff010113 addi sp,sp,-16 +80000e8c: 00000593 li a1,0 +80000e90: 00812423 sw s0,8(sp) +80000e94: 00112623 sw ra,12(sp) +80000e98: 00050413 mv s0,a0 +80000e9c: 288000ef jal ra,80001124 <__call_exitprocs> +80000ea0: c281a503 lw a0,-984(gp) # 800026c0 <_global_impure_ptr> +80000ea4: 03c52783 lw a5,60(a0) +80000ea8: 00078463 beqz a5,80000eb0 +80000eac: 000780e7 jalr a5 # 3f000000 <_start-0x41000000> +80000eb0: 00040513 mv a0,s0 +80000eb4: cc9ff0ef jal ra,80000b7c <_exit> -80000e8c : -80000e8c: ff010113 addi sp,sp,-16 -80000e90: 00000593 li a1,0 -80000e94: 00812423 sw s0,8(sp) -80000e98: 00112623 sw ra,12(sp) -80000e9c: 00050413 mv s0,a0 -80000ea0: 288000ef jal ra,80001128 <__call_exitprocs> -80000ea4: c281a503 lw a0,-984(gp) # 800026c0 <_global_impure_ptr> -80000ea8: 03c52783 lw a5,60(a0) -80000eac: 00078463 beqz a5,80000eb4 -80000eb0: 000780e7 jalr a5 # 3f000000 <_start-0x41000000> -80000eb4: 00040513 mv a0,s0 -80000eb8: ccdff0ef jal ra,80000b84 <_exit> +80000eb8 <__libc_fini_array>: +80000eb8: ff010113 addi sp,sp,-16 +80000ebc: 00812423 sw s0,8(sp) +80000ec0: 800027b7 lui a5,0x80002 +80000ec4: 80002437 lui s0,0x80002 +80000ec8: 29440413 addi s0,s0,660 # 80002294 <__global_pointer$+0xfffff7fc> +80000ecc: 29478793 addi a5,a5,660 # 80002294 <__global_pointer$+0xfffff7fc> +80000ed0: 408787b3 sub a5,a5,s0 +80000ed4: 00912223 sw s1,4(sp) +80000ed8: 00112623 sw ra,12(sp) +80000edc: 4027d493 srai s1,a5,0x2 +80000ee0: 02048063 beqz s1,80000f00 <__libc_fini_array+0x48> +80000ee4: ffc78793 addi a5,a5,-4 +80000ee8: 00878433 add s0,a5,s0 +80000eec: 00042783 lw a5,0(s0) +80000ef0: fff48493 addi s1,s1,-1 +80000ef4: ffc40413 addi s0,s0,-4 +80000ef8: 000780e7 jalr a5 +80000efc: fe0498e3 bnez s1,80000eec <__libc_fini_array+0x34> +80000f00: 00c12083 lw ra,12(sp) +80000f04: 00812403 lw s0,8(sp) +80000f08: 00412483 lw s1,4(sp) +80000f0c: 01010113 addi sp,sp,16 +80000f10: 00008067 ret -80000ebc <__libc_fini_array>: -80000ebc: ff010113 addi sp,sp,-16 -80000ec0: 00812423 sw s0,8(sp) -80000ec4: 800027b7 lui a5,0x80002 -80000ec8: 80002437 lui s0,0x80002 -80000ecc: 29840413 addi s0,s0,664 # 80002298 <__global_pointer$+0xfffff800> -80000ed0: 29878793 addi a5,a5,664 # 80002298 <__global_pointer$+0xfffff800> -80000ed4: 408787b3 sub a5,a5,s0 -80000ed8: 00912223 sw s1,4(sp) -80000edc: 00112623 sw ra,12(sp) -80000ee0: 4027d493 srai s1,a5,0x2 -80000ee4: 02048063 beqz s1,80000f04 <__libc_fini_array+0x48> -80000ee8: ffc78793 addi a5,a5,-4 -80000eec: 00878433 add s0,a5,s0 -80000ef0: 00042783 lw a5,0(s0) -80000ef4: fff48493 addi s1,s1,-1 -80000ef8: ffc40413 addi s0,s0,-4 -80000efc: 000780e7 jalr a5 -80000f00: fe0498e3 bnez s1,80000ef0 <__libc_fini_array+0x34> -80000f04: 00c12083 lw ra,12(sp) -80000f08: 00812403 lw s0,8(sp) -80000f0c: 00412483 lw s1,4(sp) -80000f10: 01010113 addi sp,sp,16 -80000f14: 00008067 ret +80000f14 <__libc_init_array>: +80000f14: ff010113 addi sp,sp,-16 +80000f18: 00812423 sw s0,8(sp) +80000f1c: 01212023 sw s2,0(sp) +80000f20: 80002437 lui s0,0x80002 +80000f24: 80002937 lui s2,0x80002 +80000f28: 29040793 addi a5,s0,656 # 80002290 <__global_pointer$+0xfffff7f8> +80000f2c: 29090913 addi s2,s2,656 # 80002290 <__global_pointer$+0xfffff7f8> +80000f30: 40f90933 sub s2,s2,a5 +80000f34: 00112623 sw ra,12(sp) +80000f38: 00912223 sw s1,4(sp) +80000f3c: 40295913 srai s2,s2,0x2 +80000f40: 02090063 beqz s2,80000f60 <__libc_init_array+0x4c> +80000f44: 29040413 addi s0,s0,656 +80000f48: 00000493 li s1,0 +80000f4c: 00042783 lw a5,0(s0) +80000f50: 00148493 addi s1,s1,1 +80000f54: 00440413 addi s0,s0,4 +80000f58: 000780e7 jalr a5 +80000f5c: fe9918e3 bne s2,s1,80000f4c <__libc_init_array+0x38> +80000f60: 80002437 lui s0,0x80002 +80000f64: 80002937 lui s2,0x80002 +80000f68: 29040793 addi a5,s0,656 # 80002290 <__global_pointer$+0xfffff7f8> +80000f6c: 29490913 addi s2,s2,660 # 80002294 <__global_pointer$+0xfffff7fc> +80000f70: 40f90933 sub s2,s2,a5 +80000f74: 40295913 srai s2,s2,0x2 +80000f78: 02090063 beqz s2,80000f98 <__libc_init_array+0x84> +80000f7c: 29040413 addi s0,s0,656 +80000f80: 00000493 li s1,0 +80000f84: 00042783 lw a5,0(s0) +80000f88: 00148493 addi s1,s1,1 +80000f8c: 00440413 addi s0,s0,4 +80000f90: 000780e7 jalr a5 +80000f94: fe9918e3 bne s2,s1,80000f84 <__libc_init_array+0x70> +80000f98: 00c12083 lw ra,12(sp) +80000f9c: 00812403 lw s0,8(sp) +80000fa0: 00412483 lw s1,4(sp) +80000fa4: 00012903 lw s2,0(sp) +80000fa8: 01010113 addi sp,sp,16 +80000fac: 00008067 ret -80000f18 <__libc_init_array>: -80000f18: ff010113 addi sp,sp,-16 -80000f1c: 00812423 sw s0,8(sp) -80000f20: 01212023 sw s2,0(sp) -80000f24: 80002437 lui s0,0x80002 -80000f28: 80002937 lui s2,0x80002 -80000f2c: 29440793 addi a5,s0,660 # 80002294 <__global_pointer$+0xfffff7fc> -80000f30: 29490913 addi s2,s2,660 # 80002294 <__global_pointer$+0xfffff7fc> -80000f34: 40f90933 sub s2,s2,a5 -80000f38: 00112623 sw ra,12(sp) -80000f3c: 00912223 sw s1,4(sp) -80000f40: 40295913 srai s2,s2,0x2 -80000f44: 02090063 beqz s2,80000f64 <__libc_init_array+0x4c> -80000f48: 29440413 addi s0,s0,660 -80000f4c: 00000493 li s1,0 -80000f50: 00042783 lw a5,0(s0) -80000f54: 00148493 addi s1,s1,1 -80000f58: 00440413 addi s0,s0,4 -80000f5c: 000780e7 jalr a5 -80000f60: fe9918e3 bne s2,s1,80000f50 <__libc_init_array+0x38> -80000f64: 80002437 lui s0,0x80002 -80000f68: 80002937 lui s2,0x80002 -80000f6c: 29440793 addi a5,s0,660 # 80002294 <__global_pointer$+0xfffff7fc> -80000f70: 29890913 addi s2,s2,664 # 80002298 <__global_pointer$+0xfffff800> -80000f74: 40f90933 sub s2,s2,a5 -80000f78: 40295913 srai s2,s2,0x2 -80000f7c: 02090063 beqz s2,80000f9c <__libc_init_array+0x84> -80000f80: 29440413 addi s0,s0,660 -80000f84: 00000493 li s1,0 -80000f88: 00042783 lw a5,0(s0) -80000f8c: 00148493 addi s1,s1,1 -80000f90: 00440413 addi s0,s0,4 -80000f94: 000780e7 jalr a5 -80000f98: fe9918e3 bne s2,s1,80000f88 <__libc_init_array+0x70> -80000f9c: 00c12083 lw ra,12(sp) -80000fa0: 00812403 lw s0,8(sp) -80000fa4: 00412483 lw s1,4(sp) -80000fa8: 00012903 lw s2,0(sp) -80000fac: 01010113 addi sp,sp,16 -80000fb0: 00008067 ret +80000fb0 : +80000fb0: 00f00313 li t1,15 +80000fb4: 00050713 mv a4,a0 +80000fb8: 02c37e63 bgeu t1,a2,80000ff4 +80000fbc: 00f77793 andi a5,a4,15 +80000fc0: 0a079063 bnez a5,80001060 +80000fc4: 08059263 bnez a1,80001048 +80000fc8: ff067693 andi a3,a2,-16 +80000fcc: 00f67613 andi a2,a2,15 +80000fd0: 00e686b3 add a3,a3,a4 +80000fd4: 00b72023 sw a1,0(a4) # 1000000 <_start-0x7f000000> +80000fd8: 00b72223 sw a1,4(a4) +80000fdc: 00b72423 sw a1,8(a4) +80000fe0: 00b72623 sw a1,12(a4) +80000fe4: 01070713 addi a4,a4,16 +80000fe8: fed766e3 bltu a4,a3,80000fd4 +80000fec: 00061463 bnez a2,80000ff4 +80000ff0: 00008067 ret +80000ff4: 40c306b3 sub a3,t1,a2 +80000ff8: 00269693 slli a3,a3,0x2 +80000ffc: 00000297 auipc t0,0x0 +80001000: 005686b3 add a3,a3,t0 +80001004: 00c68067 jr 12(a3) +80001008: 00b70723 sb a1,14(a4) +8000100c: 00b706a3 sb a1,13(a4) +80001010: 00b70623 sb a1,12(a4) +80001014: 00b705a3 sb a1,11(a4) +80001018: 00b70523 sb a1,10(a4) +8000101c: 00b704a3 sb a1,9(a4) +80001020: 00b70423 sb a1,8(a4) +80001024: 00b703a3 sb a1,7(a4) +80001028: 00b70323 sb a1,6(a4) +8000102c: 00b702a3 sb a1,5(a4) +80001030: 00b70223 sb a1,4(a4) +80001034: 00b701a3 sb a1,3(a4) +80001038: 00b70123 sb a1,2(a4) +8000103c: 00b700a3 sb a1,1(a4) +80001040: 00b70023 sb a1,0(a4) +80001044: 00008067 ret +80001048: 0ff5f593 andi a1,a1,255 +8000104c: 00859693 slli a3,a1,0x8 +80001050: 00d5e5b3 or a1,a1,a3 +80001054: 01059693 slli a3,a1,0x10 +80001058: 00d5e5b3 or a1,a1,a3 +8000105c: f6dff06f j 80000fc8 +80001060: 00279693 slli a3,a5,0x2 +80001064: 00000297 auipc t0,0x0 +80001068: 005686b3 add a3,a3,t0 +8000106c: 00008293 mv t0,ra +80001070: fa0680e7 jalr -96(a3) +80001074: 00028093 mv ra,t0 +80001078: ff078793 addi a5,a5,-16 +8000107c: 40f70733 sub a4,a4,a5 +80001080: 00f60633 add a2,a2,a5 +80001084: f6c378e3 bgeu t1,a2,80000ff4 +80001088: f3dff06f j 80000fc4 -80000fb4 : -80000fb4: 00f00313 li t1,15 -80000fb8: 00050713 mv a4,a0 -80000fbc: 02c37e63 bgeu t1,a2,80000ff8 -80000fc0: 00f77793 andi a5,a4,15 -80000fc4: 0a079063 bnez a5,80001064 -80000fc8: 08059263 bnez a1,8000104c -80000fcc: ff067693 andi a3,a2,-16 -80000fd0: 00f67613 andi a2,a2,15 -80000fd4: 00e686b3 add a3,a3,a4 -80000fd8: 00b72023 sw a1,0(a4) # 1000000 <_start-0x7f000000> -80000fdc: 00b72223 sw a1,4(a4) -80000fe0: 00b72423 sw a1,8(a4) -80000fe4: 00b72623 sw a1,12(a4) -80000fe8: 01070713 addi a4,a4,16 -80000fec: fed766e3 bltu a4,a3,80000fd8 -80000ff0: 00061463 bnez a2,80000ff8 -80000ff4: 00008067 ret -80000ff8: 40c306b3 sub a3,t1,a2 -80000ffc: 00269693 slli a3,a3,0x2 -80001000: 00000297 auipc t0,0x0 -80001004: 005686b3 add a3,a3,t0 -80001008: 00c68067 jr 12(a3) -8000100c: 00b70723 sb a1,14(a4) -80001010: 00b706a3 sb a1,13(a4) -80001014: 00b70623 sb a1,12(a4) -80001018: 00b705a3 sb a1,11(a4) -8000101c: 00b70523 sb a1,10(a4) -80001020: 00b704a3 sb a1,9(a4) -80001024: 00b70423 sb a1,8(a4) -80001028: 00b703a3 sb a1,7(a4) -8000102c: 00b70323 sb a1,6(a4) -80001030: 00b702a3 sb a1,5(a4) -80001034: 00b70223 sb a1,4(a4) -80001038: 00b701a3 sb a1,3(a4) -8000103c: 00b70123 sb a1,2(a4) -80001040: 00b700a3 sb a1,1(a4) -80001044: 00b70023 sb a1,0(a4) -80001048: 00008067 ret -8000104c: 0ff5f593 andi a1,a1,255 -80001050: 00859693 slli a3,a1,0x8 -80001054: 00d5e5b3 or a1,a1,a3 -80001058: 01059693 slli a3,a1,0x10 -8000105c: 00d5e5b3 or a1,a1,a3 -80001060: f6dff06f j 80000fcc -80001064: 00279693 slli a3,a5,0x2 -80001068: 00000297 auipc t0,0x0 -8000106c: 005686b3 add a3,a3,t0 -80001070: 00008293 mv t0,ra -80001074: fa0680e7 jalr -96(a3) -80001078: 00028093 mv ra,t0 -8000107c: ff078793 addi a5,a5,-16 -80001080: 40f70733 sub a4,a4,a5 -80001084: 00f60633 add a2,a2,a5 -80001088: f6c378e3 bgeu t1,a2,80000ff8 -8000108c: f3dff06f j 80000fc8 +8000108c <__register_exitproc>: +8000108c: c281a703 lw a4,-984(gp) # 800026c0 <_global_impure_ptr> +80001090: 14872783 lw a5,328(a4) +80001094: 04078c63 beqz a5,800010ec <__register_exitproc+0x60> +80001098: 0047a703 lw a4,4(a5) +8000109c: 01f00813 li a6,31 +800010a0: 06e84e63 blt a6,a4,8000111c <__register_exitproc+0x90> +800010a4: 00271813 slli a6,a4,0x2 +800010a8: 02050663 beqz a0,800010d4 <__register_exitproc+0x48> +800010ac: 01078333 add t1,a5,a6 +800010b0: 08c32423 sw a2,136(t1) +800010b4: 1887a883 lw a7,392(a5) +800010b8: 00100613 li a2,1 +800010bc: 00e61633 sll a2,a2,a4 +800010c0: 00c8e8b3 or a7,a7,a2 +800010c4: 1917a423 sw a7,392(a5) +800010c8: 10d32423 sw a3,264(t1) +800010cc: 00200693 li a3,2 +800010d0: 02d50463 beq a0,a3,800010f8 <__register_exitproc+0x6c> +800010d4: 00170713 addi a4,a4,1 +800010d8: 00e7a223 sw a4,4(a5) +800010dc: 010787b3 add a5,a5,a6 +800010e0: 00b7a423 sw a1,8(a5) +800010e4: 00000513 li a0,0 +800010e8: 00008067 ret +800010ec: 14c70793 addi a5,a4,332 +800010f0: 14f72423 sw a5,328(a4) +800010f4: fa5ff06f j 80001098 <__register_exitproc+0xc> +800010f8: 18c7a683 lw a3,396(a5) +800010fc: 00170713 addi a4,a4,1 +80001100: 00e7a223 sw a4,4(a5) +80001104: 00c6e633 or a2,a3,a2 +80001108: 18c7a623 sw a2,396(a5) +8000110c: 010787b3 add a5,a5,a6 +80001110: 00b7a423 sw a1,8(a5) +80001114: 00000513 li a0,0 +80001118: 00008067 ret +8000111c: fff00513 li a0,-1 +80001120: 00008067 ret -80001090 <__register_exitproc>: -80001090: c281a703 lw a4,-984(gp) # 800026c0 <_global_impure_ptr> -80001094: 14872783 lw a5,328(a4) -80001098: 04078c63 beqz a5,800010f0 <__register_exitproc+0x60> -8000109c: 0047a703 lw a4,4(a5) -800010a0: 01f00813 li a6,31 -800010a4: 06e84e63 blt a6,a4,80001120 <__register_exitproc+0x90> -800010a8: 00271813 slli a6,a4,0x2 -800010ac: 02050663 beqz a0,800010d8 <__register_exitproc+0x48> -800010b0: 01078333 add t1,a5,a6 -800010b4: 08c32423 sw a2,136(t1) -800010b8: 1887a883 lw a7,392(a5) -800010bc: 00100613 li a2,1 -800010c0: 00e61633 sll a2,a2,a4 -800010c4: 00c8e8b3 or a7,a7,a2 -800010c8: 1917a423 sw a7,392(a5) -800010cc: 10d32423 sw a3,264(t1) -800010d0: 00200693 li a3,2 -800010d4: 02d50463 beq a0,a3,800010fc <__register_exitproc+0x6c> -800010d8: 00170713 addi a4,a4,1 -800010dc: 00e7a223 sw a4,4(a5) -800010e0: 010787b3 add a5,a5,a6 -800010e4: 00b7a423 sw a1,8(a5) -800010e8: 00000513 li a0,0 -800010ec: 00008067 ret -800010f0: 14c70793 addi a5,a4,332 -800010f4: 14f72423 sw a5,328(a4) -800010f8: fa5ff06f j 8000109c <__register_exitproc+0xc> -800010fc: 18c7a683 lw a3,396(a5) -80001100: 00170713 addi a4,a4,1 -80001104: 00e7a223 sw a4,4(a5) -80001108: 00c6e633 or a2,a3,a2 -8000110c: 18c7a623 sw a2,396(a5) -80001110: 010787b3 add a5,a5,a6 -80001114: 00b7a423 sw a1,8(a5) -80001118: 00000513 li a0,0 -8000111c: 00008067 ret -80001120: fff00513 li a0,-1 -80001124: 00008067 ret - -80001128 <__call_exitprocs>: -80001128: fd010113 addi sp,sp,-48 -8000112c: 01412c23 sw s4,24(sp) -80001130: c281aa03 lw s4,-984(gp) # 800026c0 <_global_impure_ptr> -80001134: 03212023 sw s2,32(sp) -80001138: 02112623 sw ra,44(sp) -8000113c: 148a2903 lw s2,328(s4) -80001140: 02812423 sw s0,40(sp) -80001144: 02912223 sw s1,36(sp) -80001148: 01312e23 sw s3,28(sp) -8000114c: 01512a23 sw s5,20(sp) -80001150: 01612823 sw s6,16(sp) -80001154: 01712623 sw s7,12(sp) -80001158: 01812423 sw s8,8(sp) -8000115c: 04090063 beqz s2,8000119c <__call_exitprocs+0x74> -80001160: 00050b13 mv s6,a0 -80001164: 00058b93 mv s7,a1 -80001168: 00100a93 li s5,1 -8000116c: fff00993 li s3,-1 -80001170: 00492483 lw s1,4(s2) -80001174: fff48413 addi s0,s1,-1 -80001178: 02044263 bltz s0,8000119c <__call_exitprocs+0x74> -8000117c: 00249493 slli s1,s1,0x2 -80001180: 009904b3 add s1,s2,s1 -80001184: 040b8463 beqz s7,800011cc <__call_exitprocs+0xa4> -80001188: 1044a783 lw a5,260(s1) -8000118c: 05778063 beq a5,s7,800011cc <__call_exitprocs+0xa4> -80001190: fff40413 addi s0,s0,-1 -80001194: ffc48493 addi s1,s1,-4 -80001198: ff3416e3 bne s0,s3,80001184 <__call_exitprocs+0x5c> -8000119c: 02c12083 lw ra,44(sp) -800011a0: 02812403 lw s0,40(sp) -800011a4: 02412483 lw s1,36(sp) -800011a8: 02012903 lw s2,32(sp) -800011ac: 01c12983 lw s3,28(sp) -800011b0: 01812a03 lw s4,24(sp) -800011b4: 01412a83 lw s5,20(sp) -800011b8: 01012b03 lw s6,16(sp) -800011bc: 00c12b83 lw s7,12(sp) -800011c0: 00812c03 lw s8,8(sp) -800011c4: 03010113 addi sp,sp,48 -800011c8: 00008067 ret -800011cc: 00492783 lw a5,4(s2) -800011d0: 0044a683 lw a3,4(s1) -800011d4: fff78793 addi a5,a5,-1 -800011d8: 04878e63 beq a5,s0,80001234 <__call_exitprocs+0x10c> -800011dc: 0004a223 sw zero,4(s1) -800011e0: fa0688e3 beqz a3,80001190 <__call_exitprocs+0x68> -800011e4: 18892783 lw a5,392(s2) -800011e8: 008a9733 sll a4,s5,s0 -800011ec: 00492c03 lw s8,4(s2) -800011f0: 00f777b3 and a5,a4,a5 -800011f4: 02079263 bnez a5,80001218 <__call_exitprocs+0xf0> -800011f8: 000680e7 jalr a3 -800011fc: 00492703 lw a4,4(s2) -80001200: 148a2783 lw a5,328(s4) -80001204: 01871463 bne a4,s8,8000120c <__call_exitprocs+0xe4> -80001208: f8f904e3 beq s2,a5,80001190 <__call_exitprocs+0x68> -8000120c: f80788e3 beqz a5,8000119c <__call_exitprocs+0x74> -80001210: 00078913 mv s2,a5 -80001214: f5dff06f j 80001170 <__call_exitprocs+0x48> -80001218: 18c92783 lw a5,396(s2) -8000121c: 0844a583 lw a1,132(s1) -80001220: 00f77733 and a4,a4,a5 -80001224: 00071c63 bnez a4,8000123c <__call_exitprocs+0x114> -80001228: 000b0513 mv a0,s6 -8000122c: 000680e7 jalr a3 -80001230: fcdff06f j 800011fc <__call_exitprocs+0xd4> -80001234: 00892223 sw s0,4(s2) -80001238: fa9ff06f j 800011e0 <__call_exitprocs+0xb8> -8000123c: 00058513 mv a0,a1 -80001240: 000680e7 jalr a3 -80001244: fb9ff06f j 800011fc <__call_exitprocs+0xd4> +80001124 <__call_exitprocs>: +80001124: fd010113 addi sp,sp,-48 +80001128: 01412c23 sw s4,24(sp) +8000112c: c281aa03 lw s4,-984(gp) # 800026c0 <_global_impure_ptr> +80001130: 03212023 sw s2,32(sp) +80001134: 02112623 sw ra,44(sp) +80001138: 148a2903 lw s2,328(s4) +8000113c: 02812423 sw s0,40(sp) +80001140: 02912223 sw s1,36(sp) +80001144: 01312e23 sw s3,28(sp) +80001148: 01512a23 sw s5,20(sp) +8000114c: 01612823 sw s6,16(sp) +80001150: 01712623 sw s7,12(sp) +80001154: 01812423 sw s8,8(sp) +80001158: 04090063 beqz s2,80001198 <__call_exitprocs+0x74> +8000115c: 00050b13 mv s6,a0 +80001160: 00058b93 mv s7,a1 +80001164: 00100a93 li s5,1 +80001168: fff00993 li s3,-1 +8000116c: 00492483 lw s1,4(s2) +80001170: fff48413 addi s0,s1,-1 +80001174: 02044263 bltz s0,80001198 <__call_exitprocs+0x74> +80001178: 00249493 slli s1,s1,0x2 +8000117c: 009904b3 add s1,s2,s1 +80001180: 040b8463 beqz s7,800011c8 <__call_exitprocs+0xa4> +80001184: 1044a783 lw a5,260(s1) +80001188: 05778063 beq a5,s7,800011c8 <__call_exitprocs+0xa4> +8000118c: fff40413 addi s0,s0,-1 +80001190: ffc48493 addi s1,s1,-4 +80001194: ff3416e3 bne s0,s3,80001180 <__call_exitprocs+0x5c> +80001198: 02c12083 lw ra,44(sp) +8000119c: 02812403 lw s0,40(sp) +800011a0: 02412483 lw s1,36(sp) +800011a4: 02012903 lw s2,32(sp) +800011a8: 01c12983 lw s3,28(sp) +800011ac: 01812a03 lw s4,24(sp) +800011b0: 01412a83 lw s5,20(sp) +800011b4: 01012b03 lw s6,16(sp) +800011b8: 00c12b83 lw s7,12(sp) +800011bc: 00812c03 lw s8,8(sp) +800011c0: 03010113 addi sp,sp,48 +800011c4: 00008067 ret +800011c8: 00492783 lw a5,4(s2) +800011cc: 0044a683 lw a3,4(s1) +800011d0: fff78793 addi a5,a5,-1 +800011d4: 04878e63 beq a5,s0,80001230 <__call_exitprocs+0x10c> +800011d8: 0004a223 sw zero,4(s1) +800011dc: fa0688e3 beqz a3,8000118c <__call_exitprocs+0x68> +800011e0: 18892783 lw a5,392(s2) +800011e4: 008a9733 sll a4,s5,s0 +800011e8: 00492c03 lw s8,4(s2) +800011ec: 00f777b3 and a5,a4,a5 +800011f0: 02079263 bnez a5,80001214 <__call_exitprocs+0xf0> +800011f4: 000680e7 jalr a3 +800011f8: 00492703 lw a4,4(s2) +800011fc: 148a2783 lw a5,328(s4) +80001200: 01871463 bne a4,s8,80001208 <__call_exitprocs+0xe4> +80001204: f92784e3 beq a5,s2,8000118c <__call_exitprocs+0x68> +80001208: f80788e3 beqz a5,80001198 <__call_exitprocs+0x74> +8000120c: 00078913 mv s2,a5 +80001210: f5dff06f j 8000116c <__call_exitprocs+0x48> +80001214: 18c92783 lw a5,396(s2) +80001218: 0844a583 lw a1,132(s1) +8000121c: 00f77733 and a4,a4,a5 +80001220: 00071c63 bnez a4,80001238 <__call_exitprocs+0x114> +80001224: 000b0513 mv a0,s6 +80001228: 000680e7 jalr a3 +8000122c: fcdff06f j 800011f8 <__call_exitprocs+0xd4> +80001230: 00892223 sw s0,4(s2) +80001234: fa9ff06f j 800011dc <__call_exitprocs+0xb8> +80001238: 00058513 mv a0,a1 +8000123c: 000680e7 jalr a3 +80001240: fb9ff06f j 800011f8 <__call_exitprocs+0xd4> Disassembly of section .rodata: -80001248 : -80001248: 00b0 addi a2,sp,72 +80001244 : +80001244: 00b0 addi a2,sp,72 +80001246: 8000 0x8000 +80001248: 013c addi a5,sp,136 8000124a: 8000 0x8000 -8000124c: 013c addi a5,sp,136 +8000124c: 01c8 addi a0,sp,196 8000124e: 8000 0x8000 -80001250: 01c8 addi a0,sp,196 +80001250: 0254 addi a3,sp,260 80001252: 8000 0x8000 -80001254: 0254 addi a3,sp,260 +80001254: 02e8 addi a0,sp,332 80001256: 8000 0x8000 -80001258: 02e8 addi a0,sp,332 +80001258: 0374 addi a3,sp,396 8000125a: 8000 0x8000 -8000125c: 0374 addi a3,sp,396 +8000125c: 0400 addi s0,sp,512 8000125e: 8000 0x8000 -80001260: 0400 addi s0,sp,512 +80001260: 048c addi a1,sp,576 80001262: 8000 0x8000 -80001264: 048c addi a1,sp,576 +80001264: 0518 addi a4,sp,640 80001266: 8000 0x8000 -80001268: 0518 addi a4,sp,640 +80001268: 05a4 addi s1,sp,712 8000126a: 8000 0x8000 -8000126c: 05a4 addi s1,sp,712 +8000126c: 0630 addi a2,sp,776 8000126e: 8000 0x8000 -80001270: 0630 addi a2,sp,776 +80001270: 06bc addi a5,sp,840 80001272: 8000 0x8000 -80001274: 06bc addi a5,sp,840 +80001274: 0750 addi a2,sp,900 80001276: 8000 0x8000 -80001278: 0750 addi a2,sp,900 +80001278: 07dc addi a5,sp,964 8000127a: 8000 0x8000 -8000127c: 07dc addi a5,sp,964 +8000127c: 0ab0 addi a2,sp,344 8000127e: 8000 0x8000 -80001280: 0ab0 addi a2,sp,344 +80001280: 0870 addi a2,sp,28 80001282: 8000 0x8000 -80001284: 0870 addi a2,sp,28 +80001284: 0900 addi s0,sp,144 80001286: 8000 0x8000 -80001288: 0900 addi s0,sp,144 +80001288: 0990 addi a2,sp,208 8000128a: 8000 0x8000 -8000128c: 0990 addi a2,sp,208 +8000128c: 0a20 addi s0,sp,280 8000128e: 8000 0x8000 -80001290: 0a20 addi s0,sp,280 -80001292: 8000 0x8000 Disassembly of section .init_array: -80002294 <__init_array_start>: -80002294: 0098 addi a4,sp,64 -80002296: 8000 0x8000 +80002290 <__init_array_start>: +80002290: 0098 addi a4,sp,64 +80002292: 8000 0x8000 Disassembly of section .data: @@ -1379,21 +1378,20 @@ Disassembly of section .comment: 0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm 4: 2820 fld fs0,80(s0) 6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm - a: 3920 fld fs0,112(a0) - c: 322e fld ft4,232(sp) - e: 302e fld ft0,232(sp) - ... + a: 3120 fld fs0,96(a0) + c: 2e30 fld fa2,88(a2) + e: 2e32 fld ft8,264(sp) + 10: 0030 addi a2,sp,8 Disassembly of section .riscv.attributes: 00000000 <.riscv.attributes>: - 0: 2541 jal 680 <_start-0x7ffff980> + 0: 2941 jal 490 <_start-0x7ffffb70> 2: 0000 unimp 4: 7200 flw fs0,32(a2) 6: 7369 lui t1,0xffffa 8: 01007663 bgeu zero,a6,14 <_start-0x7fffffec> - c: 0000001b 0x1b - 10: 1004 addi s1,sp,32 + c: 001f 0000 1004 0x10040000001f 12: 7205 lui tp,0xfffe1 14: 3376 fld ft6,376(sp) 16: 6932 flw fs2,12(sp) @@ -1402,3 +1400,5 @@ Disassembly of section .riscv.attributes: 1c: 326d jal fffff9c6 <__global_pointer$+0x7fffcf2e> 1e: 3070 fld fa2,224(s0) 20: 665f 7032 0030 0x307032665f + 26: 0108 addi a0,sp,128 + 28: 0b0a slli s6,s6,0x2 diff --git a/driver/tests/dogfood/kernel.elf b/driver/tests/dogfood/kernel.elf index 2f858cee..81caa976 100755 Binary files a/driver/tests/dogfood/kernel.elf and b/driver/tests/dogfood/kernel.elf differ diff --git a/hw/opae/vortex_afu.json b/hw/opae/vortex_afu.json index fb1e908c..cab3c388 100644 --- a/hw/opae/vortex_afu.json +++ b/hw/opae/vortex_afu.json @@ -8,9 +8,8 @@ "cmd-mem-read": 1, "cmd-mem-write": 2, "cmd-run": 3, - "cmd-clflush": 4, - "cmd-csr-read": 5, - "cmd-csr-write": 6, + "cmd-csr-read": 4, + "cmd-csr-write": 5, "mmio-cmd-type": 10, "mmio-io-addr": 12, diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index 18cb2d7c..4e55060b 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -24,30 +24,18 @@ module VX_cluster #( input wire [`L2DRAM_TAG_WIDTH-1:0] dram_rsp_tag, output wire dram_rsp_ready, - // Snoop request - input wire snp_req_valid, - input wire [`L2DRAM_ADDR_WIDTH-1:0] snp_req_addr, - input wire snp_req_inv, - input wire [`L2SNP_TAG_WIDTH-1:0] snp_req_tag, - output wire snp_req_ready, - - // Snoop response - output wire snp_rsp_valid, - output wire [`L2SNP_TAG_WIDTH-1:0] snp_rsp_tag, - input wire snp_rsp_ready, - // CSR Request - input wire csr_io_req_valid, - input wire [`NC_BITS-1:0] csr_io_req_coreid, - input wire [11:0] csr_io_req_addr, - input wire csr_io_req_rw, - input wire [31:0] csr_io_req_data, - output wire csr_io_req_ready, + input wire csr_req_valid, + input wire [`NC_BITS-1:0] csr_req_coreid, + input wire [11:0] csr_req_addr, + input wire csr_req_rw, + input wire [31:0] csr_req_data, + output wire csr_req_ready, // CSR Response - output wire csr_io_rsp_valid, - output wire [31:0] csr_io_rsp_data, - input wire csr_io_rsp_ready, + output wire csr_rsp_valid, + output wire [31:0] csr_rsp_data, + input wire csr_rsp_ready, // Status output wire busy, @@ -66,25 +54,15 @@ module VX_cluster #( wire [`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] per_core_dram_rsp_tag; wire [`NUM_CORES-1:0] per_core_dram_rsp_ready; - wire [`NUM_CORES-1:0] per_core_snp_req_valid; - wire [`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_snp_req_addr; - wire [`NUM_CORES-1:0] per_core_snp_req_inv; - wire [`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] per_core_snp_req_tag; - wire [`NUM_CORES-1:0] per_core_snp_req_ready; - - wire [`NUM_CORES-1:0] per_core_snp_rsp_valid; - wire [`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] per_core_snp_rsp_tag; - wire [`NUM_CORES-1:0] per_core_snp_rsp_ready; + wire [`NUM_CORES-1:0] per_core_csr_req_valid; + wire [`NUM_CORES-1:0][11:0] per_core_csr_req_addr; + wire [`NUM_CORES-1:0] per_core_csr_req_rw; + wire [`NUM_CORES-1:0][31:0] per_core_csr_req_data; + wire [`NUM_CORES-1:0] per_core_csr_req_ready; - wire [`NUM_CORES-1:0] per_core_csr_io_req_valid; - wire [`NUM_CORES-1:0][11:0] per_core_csr_io_req_addr; - wire [`NUM_CORES-1:0] per_core_csr_io_req_rw; - wire [`NUM_CORES-1:0][31:0] per_core_csr_io_req_data; - wire [`NUM_CORES-1:0] per_core_csr_io_req_ready; - - wire [`NUM_CORES-1:0] per_core_csr_io_rsp_valid; - wire [`NUM_CORES-1:0][31:0] per_core_csr_io_rsp_data; - wire [`NUM_CORES-1:0] per_core_csr_io_rsp_ready; + wire [`NUM_CORES-1:0] per_core_csr_rsp_valid; + wire [`NUM_CORES-1:0][31:0] per_core_csr_rsp_data; + wire [`NUM_CORES-1:0] per_core_csr_rsp_ready; wire [`NUM_CORES-1:0] per_core_busy; wire [`NUM_CORES-1:0] per_core_ebreak; @@ -95,129 +73,77 @@ module VX_cluster #( ) core ( `SCOPE_BIND_VX_cluster_core(i) - .clk (clk), - .reset (reset), + .clk (clk), + .reset (reset), - .dram_req_valid (per_core_dram_req_valid [i]), - .dram_req_rw (per_core_dram_req_rw [i]), - .dram_req_byteen (per_core_dram_req_byteen [i]), - .dram_req_addr (per_core_dram_req_addr [i]), - .dram_req_data (per_core_dram_req_data [i]), - .dram_req_tag (per_core_dram_req_tag [i]), - .dram_req_ready (per_core_dram_req_ready [i]), + .dram_req_valid (per_core_dram_req_valid[i]), + .dram_req_rw (per_core_dram_req_rw [i]), + .dram_req_byteen(per_core_dram_req_byteen[i]), + .dram_req_addr (per_core_dram_req_addr [i]), + .dram_req_data (per_core_dram_req_data [i]), + .dram_req_tag (per_core_dram_req_tag [i]), + .dram_req_ready (per_core_dram_req_ready[i]), - .dram_rsp_valid (per_core_dram_rsp_valid [i]), - .dram_rsp_data (per_core_dram_rsp_data [i]), - .dram_rsp_tag (per_core_dram_rsp_tag [i]), - .dram_rsp_ready (per_core_dram_rsp_ready [i]), + .dram_rsp_valid (per_core_dram_rsp_valid[i]), + .dram_rsp_data (per_core_dram_rsp_data [i]), + .dram_rsp_tag (per_core_dram_rsp_tag [i]), + .dram_rsp_ready (per_core_dram_rsp_ready[i]), - .snp_req_valid (per_core_snp_req_valid [i]), - .snp_req_addr (per_core_snp_req_addr [i]), - .snp_req_inv (per_core_snp_req_inv [i]), - .snp_req_tag (per_core_snp_req_tag [i]), - .snp_req_ready (per_core_snp_req_ready [i]), + .csr_req_valid (per_core_csr_req_valid [i]), + .csr_req_rw (per_core_csr_req_rw [i]), + .csr_req_addr (per_core_csr_req_addr [i]), + .csr_req_data (per_core_csr_req_data [i]), + .csr_req_ready (per_core_csr_req_ready [i]), - .snp_rsp_valid (per_core_snp_rsp_valid [i]), - .snp_rsp_tag (per_core_snp_rsp_tag [i]), - .snp_rsp_ready (per_core_snp_rsp_ready [i]), + .csr_rsp_valid (per_core_csr_rsp_valid [i]), + .csr_rsp_data (per_core_csr_rsp_data [i]), + .csr_rsp_ready (per_core_csr_rsp_ready [i]), - .csr_io_req_valid (per_core_csr_io_req_valid[i]), - .csr_io_req_rw (per_core_csr_io_req_rw [i]), - .csr_io_req_addr (per_core_csr_io_req_addr [i]), - .csr_io_req_data (per_core_csr_io_req_data [i]), - .csr_io_req_ready (per_core_csr_io_req_ready[i]), - - .csr_io_rsp_valid (per_core_csr_io_rsp_valid[i]), - .csr_io_rsp_data (per_core_csr_io_rsp_data [i]), - .csr_io_rsp_ready (per_core_csr_io_rsp_ready[i]), - - .busy (per_core_busy [i]), - .ebreak (per_core_ebreak [i]) + .busy (per_core_busy [i]), + .ebreak (per_core_ebreak [i]) ); end - VX_csr_io_arb #( + VX_csr_arb #( .NUM_REQS (`NUM_CORES), .DATA_WIDTH (32), .ADDR_WIDTH (12), .BUFFERED_REQ (1), .BUFFERED_RSP (`NUM_CORES >= 4) - ) csr_io_arb ( + ) csr_arb ( .clk (clk), .reset (reset), - .request_id (csr_io_req_coreid), + .request_id (csr_req_coreid), // input requests - .req_valid_in (csr_io_req_valid), - .req_addr_in (csr_io_req_addr), - .req_rw_in (csr_io_req_rw), - .req_data_in (csr_io_req_data), - .req_ready_in (csr_io_req_ready), + .req_valid_in (csr_req_valid), + .req_addr_in (csr_req_addr), + .req_rw_in (csr_req_rw), + .req_data_in (csr_req_data), + .req_ready_in (csr_req_ready), // output request - .req_valid_out (per_core_csr_io_req_valid), - .req_addr_out (per_core_csr_io_req_addr), - .req_rw_out (per_core_csr_io_req_rw), - .req_data_out (per_core_csr_io_req_data), - .req_ready_out (per_core_csr_io_req_ready), + .req_valid_out (per_core_csr_req_valid), + .req_addr_out (per_core_csr_req_addr), + .req_rw_out (per_core_csr_req_rw), + .req_data_out (per_core_csr_req_data), + .req_ready_out (per_core_csr_req_ready), // input responses - .rsp_valid_in (per_core_csr_io_rsp_valid), - .rsp_data_in (per_core_csr_io_rsp_data), - .rsp_ready_in (per_core_csr_io_rsp_ready), + .rsp_valid_in (per_core_csr_rsp_valid), + .rsp_data_in (per_core_csr_rsp_data), + .rsp_ready_in (per_core_csr_rsp_ready), // output response - .rsp_valid_out (csr_io_rsp_valid), - .rsp_data_out (csr_io_rsp_data), - .rsp_ready_out (csr_io_rsp_ready) + .rsp_valid_out (csr_rsp_valid), + .rsp_data_out (csr_rsp_data), + .rsp_ready_out (csr_rsp_ready) ); assign busy = (| per_core_busy); assign ebreak = (| per_core_ebreak); - wire snp_fwd_rsp_valid; - wire [`L2DRAM_ADDR_WIDTH-1:0] snp_fwd_rsp_addr; - wire snp_fwd_rsp_inv; - wire [`L2SNP_TAG_WIDTH-1:0] snp_fwd_rsp_tag; - wire snp_fwd_rsp_ready; - - VX_snp_forwarder #( - .CACHE_ID (`L2CACHE_ID), - .NUM_REQS (`NUM_CORES), - .SRC_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH), - .DST_ADDR_WIDTH (`DDRAM_ADDR_WIDTH), - .SREQ_SIZE (`L2SREQ_SIZE), - .TAG_IN_WIDTH (`L2SNP_TAG_WIDTH), - .TAG_OUT_WIDTH (`DSNP_TAG_WIDTH), - .BUFFERED (`NUM_CORES >= 4) - ) snp_forwarder ( - .clk (clk), - .reset (reset), - - .snp_req_valid (snp_req_valid), - .snp_req_addr (snp_req_addr), - .snp_req_inv (snp_req_inv), - .snp_req_tag (snp_req_tag), - .snp_req_ready (snp_req_ready), - - .snp_rsp_valid (snp_fwd_rsp_valid), - .snp_rsp_addr (snp_fwd_rsp_addr), - .snp_rsp_inv (snp_fwd_rsp_inv), - .snp_rsp_tag (snp_fwd_rsp_tag), - .snp_rsp_ready (snp_fwd_rsp_ready), - - .snp_fwdout_valid (per_core_snp_req_valid), - .snp_fwdout_addr (per_core_snp_req_addr), - .snp_fwdout_inv (per_core_snp_req_inv), - .snp_fwdout_tag (per_core_snp_req_tag), - .snp_fwdout_ready (per_core_snp_req_ready), - - .snp_fwdin_valid (per_core_snp_rsp_valid), - .snp_fwdin_tag (per_core_snp_rsp_tag), - .snp_fwdin_ready (per_core_snp_rsp_ready) - ); - if (`L2_ENABLE) begin `ifdef PERF_ENABLE VX_perf_cache_if perf_l2cache_if(); @@ -233,17 +159,13 @@ module VX_cluster #( .CREQ_SIZE (`L2CREQ_SIZE), .MSHR_SIZE (`L2MSHR_SIZE), .DRSQ_SIZE (`L2DRSQ_SIZE), - .SREQ_SIZE (`L2SREQ_SIZE), .CRSQ_SIZE (`L2CRSQ_SIZE), .DREQ_SIZE (`L2DREQ_SIZE), - .SRSQ_SIZE (`L2SRSQ_SIZE), .DRAM_ENABLE (1), - .FLUSH_ENABLE (1), .WRITE_ENABLE (1), .CORE_TAG_WIDTH (`XDRAM_TAG_WIDTH), .CORE_TAG_ID_BITS (0), - .DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH), - .SNP_TAG_WIDTH (`L2SNP_TAG_WIDTH) + .DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH) ) l2cache ( `SCOPE_BIND_VX_cluster_l2cache @@ -284,18 +206,6 @@ module VX_cluster #( .dram_rsp_data (dram_rsp_data), .dram_rsp_ready (dram_rsp_ready), - // Snoop request - .snp_req_valid (snp_fwd_rsp_valid), - .snp_req_addr (snp_fwd_rsp_addr), - .snp_req_inv (snp_fwd_rsp_inv), - .snp_req_tag (snp_fwd_rsp_tag), - .snp_req_ready (snp_fwd_rsp_ready), - - // Snoop response - .snp_rsp_valid (snp_rsp_valid), - .snp_rsp_tag (snp_rsp_tag), - .snp_rsp_ready (snp_rsp_ready), - // Miss status `UNUSED_PIN (miss_vec) ); @@ -344,13 +254,6 @@ module VX_cluster #( .rsp_ready_in (dram_rsp_ready) ); - `UNUSED_VAR (snp_fwd_rsp_addr) - `UNUSED_VAR (snp_fwd_rsp_inv) - - assign snp_rsp_valid = snp_fwd_rsp_valid; - assign snp_rsp_tag = snp_fwd_rsp_tag; - assign snp_fwd_rsp_ready = snp_rsp_ready; - end endmodule diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 6f12fc06..7fc6da0e 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -323,16 +323,6 @@ `define DDRSQ_SIZE 4 `endif -// Snoop Request Queue Size -`ifndef DSREQ_SIZE -`define DSREQ_SIZE 4 -`endif - -// Snoop Response Queue Size -`ifndef DSRSQ_SIZE -`define DSRSQ_SIZE 4 -`endif - // SM Configurable Knobs ////////////////////////////////////////////////////// // Size of cache in bytes @@ -392,16 +382,6 @@ `define L2DRSQ_SIZE 4 `endif -// Snoop Request Queue Size -`ifndef L2SREQ_SIZE -`define L2SREQ_SIZE 4 -`endif - -// Snoop Response Queue Size -`ifndef L2SRSQ_SIZE -`define L2SRSQ_SIZE 4 -`endif - // L3cache Configurable Knobs ///////////////////////////////////////////////// // Size of cache in bytes @@ -439,14 +419,4 @@ `define L3DRSQ_SIZE 4 `endif -// Snoop Request Queue Size -`ifndef L3SREQ_SIZE -`define L3SREQ_SIZE 4 -`endif - -// Snoop Response Queue Size -`ifndef L3SRSQ_SIZE -`define L3SRSQ_SIZE 4 -`endif - `endif diff --git a/hw/rtl/VX_core.v b/hw/rtl/VX_core.v index 585c3fc9..19f53d86 100644 --- a/hw/rtl/VX_core.v +++ b/hw/rtl/VX_core.v @@ -24,28 +24,17 @@ module VX_core #( input wire [`XDRAM_TAG_WIDTH-1:0] dram_rsp_tag, output wire dram_rsp_ready, - // Snoop request - input wire snp_req_valid, - input wire [`DDRAM_ADDR_WIDTH-1:0] snp_req_addr, - input wire snp_req_inv, - input wire [`DSNP_TAG_WIDTH-1:0] snp_req_tag, - output wire snp_req_ready, + // CSR request + input wire csr_req_valid, + input wire [11:0] csr_req_addr, + input wire csr_req_rw, + input wire [31:0] csr_req_data, + output wire csr_req_ready, - output wire snp_rsp_valid, - output wire [`DSNP_TAG_WIDTH-1:0] snp_rsp_tag, - input wire snp_rsp_ready, - - // CSR I/O request - input wire csr_io_req_valid, - input wire [11:0] csr_io_req_addr, - input wire csr_io_req_rw, - input wire [31:0] csr_io_req_data, - output wire csr_io_req_ready, - - // CSR I/O response - output wire csr_io_rsp_valid, - output wire [31:0] csr_io_rsp_data, - input wire csr_io_rsp_ready, + // CSR response + output wire csr_rsp_valid, + output wire [31:0] csr_rsp_data, + input wire csr_rsp_ready, // Status output wire busy, @@ -81,27 +70,6 @@ module VX_core #( //-- - VX_cache_snp_req_if #( - .DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH), - .SNP_TAG_WIDTH(`DSNP_TAG_WIDTH) - ) dcache_snp_req_if(); - - VX_cache_snp_rsp_if #( - .SNP_TAG_WIDTH(`DSNP_TAG_WIDTH) - ) dcache_snp_rsp_if(); - - assign dcache_snp_req_if.valid = snp_req_valid; - assign dcache_snp_req_if.addr = snp_req_addr; - assign dcache_snp_req_if.invalidate = snp_req_inv; - assign dcache_snp_req_if.tag = snp_req_tag; - assign snp_req_ready = dcache_snp_req_if.ready; - - assign snp_rsp_valid = dcache_snp_rsp_if.valid; - assign snp_rsp_tag = dcache_snp_rsp_if.tag; - assign dcache_snp_rsp_if.ready = snp_rsp_ready; - - //-- - VX_cache_core_req_if #( .NUM_REQS(`DNUM_REQUESTS), .WORD_SIZE(`DWORD_SIZE), @@ -135,7 +103,7 @@ module VX_core #( ) pipeline ( `SCOPE_BIND_VX_core_pipeline `ifdef PERF_ENABLE - .perf_memsys_if (perf_memsys_if), + .perf_memsys_if (perf_memsys_if), `endif .clk(clk), @@ -171,17 +139,17 @@ module VX_core #( .icache_rsp_tag (core_icache_rsp_if.tag), .icache_rsp_ready (core_icache_rsp_if.ready), - // CSR I/O request - .csr_io_req_valid (csr_io_req_valid), - .csr_io_req_rw (csr_io_req_rw), - .csr_io_req_addr (csr_io_req_addr), - .csr_io_req_data (csr_io_req_data), - .csr_io_req_ready (csr_io_req_ready), + // CSR request + .csr_req_valid (csr_req_valid), + .csr_req_rw (csr_req_rw), + .csr_req_addr (csr_req_addr), + .csr_req_data (csr_req_data), + .csr_req_ready (csr_req_ready), - // CSR I/O response - .csr_io_rsp_valid (csr_io_rsp_valid), - .csr_io_rsp_data (csr_io_rsp_data), - .csr_io_rsp_ready (csr_io_rsp_ready), + // CSR response + .csr_rsp_valid (csr_rsp_valid), + .csr_rsp_data (csr_rsp_data), + .csr_rsp_ready (csr_rsp_ready), // Status .busy(busy), @@ -195,7 +163,7 @@ module VX_core #( ) mem_unit ( `SCOPE_BIND_VX_core_mem_unit `ifdef PERF_ENABLE - .perf_memsys_if (perf_memsys_if), + .perf_memsys_if (perf_memsys_if), `endif .clk (clk), @@ -209,10 +177,6 @@ module VX_core #( .core_icache_req_if (core_icache_req_if), .core_icache_rsp_if (core_icache_rsp_if), - // Dcache Snoop - .dcache_snp_req_if (dcache_snp_req_if), - .dcache_snp_rsp_if (dcache_snp_rsp_if), - // DRAM .dram_req_if (dram_req_if), .dram_rsp_if (dram_rsp_if) diff --git a/hw/rtl/VX_csr_arb.v b/hw/rtl/VX_csr_arb.v index f90270cb..cd5e0079 100644 --- a/hw/rtl/VX_csr_arb.v +++ b/hw/rtl/VX_csr_arb.v @@ -1,70 +1,81 @@ `include "VX_define.vh" -module VX_csr_arb ( - input wire clk, - input wire reset, +module VX_csr_arb #( + parameter NUM_REQS = 1, + parameter DATA_WIDTH = 1, + parameter BUFFERED_REQ = 0, + parameter BUFFERED_RSP = 0, + + parameter DATA_SIZE = (DATA_WIDTH / 8), + parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE), + parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS) +) ( + input wire clk, + input wire reset, + + input wire [LOG_NUM_REQS-1:0] request_id, - // bus select - input wire select_io_rsp, - - // input requets - VX_csr_req_if csr_core_req_if, - VX_csr_io_req_if csr_io_req_if, + // input requests + input wire req_valid_in, + input wire [ADDR_WIDTH-1:0] req_addr_in, + input wire req_rw_in, + input wire [DATA_WIDTH-1:0] req_data_in, + output wire req_ready_in, // output request - VX_csr_pipe_req_if csr_pipe_req_if, + output wire [NUM_REQS-1:0] req_valid_out, + output wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] req_addr_out, + output wire [NUM_REQS-1:0] req_rw_out, + output wire [NUM_REQS-1:0][DATA_WIDTH-1:0] req_data_out, + input wire [NUM_REQS-1:0] req_ready_out, // input response - VX_commit_if csr_pipe_rsp_if, + input wire [NUM_REQS-1:0] rsp_valid_in, + input wire [NUM_REQS-1:0][DATA_WIDTH-1:0] rsp_data_in, + output wire [NUM_REQS-1:0] rsp_ready_in, - // outputs responses - VX_commit_if csr_commit_if, - VX_csr_io_rsp_if csr_io_rsp_if + // output response + output wire rsp_valid_out, + output wire [DATA_WIDTH-1:0] rsp_data_out, + input wire rsp_ready_out ); - `UNUSED_VAR (clk) - `UNUSED_VAR (reset) + localparam REQ_DATAW = ADDR_WIDTH + 1 + DATA_WIDTH; + localparam RSP_DATAW = DATA_WIDTH; - wire [31:0] csr_core_req_mask = csr_core_req_if.rs2_is_imm ? 32'(csr_core_req_if.rs1) : csr_core_req_if.rs1_data; + wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_merged_data_out; + for (genvar i = 0; i < NUM_REQS; i++) begin + assign {req_addr_out[i], req_rw_out[i], req_data_out[i]} = req_merged_data_out[i]; + end - // requests - assign csr_pipe_req_if.valid = csr_core_req_if.valid || csr_io_req_if.valid; - assign csr_pipe_req_if.wid = csr_core_req_if.wid; - assign csr_pipe_req_if.tmask = csr_core_req_if.tmask; - assign csr_pipe_req_if.PC = csr_core_req_if.PC; - assign csr_pipe_req_if.op_type = csr_core_req_if.valid ? csr_core_req_if.op_type : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS); - assign csr_pipe_req_if.csr_addr = csr_core_req_if.valid ? csr_core_req_if.csr_addr : csr_io_req_if.addr; - assign csr_pipe_req_if.csr_mask = csr_core_req_if.valid ? csr_core_req_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0); - assign csr_pipe_req_if.rd = csr_core_req_if.rd; - assign csr_pipe_req_if.wb = csr_core_req_if.wb; - assign csr_pipe_req_if.is_io = !csr_core_req_if.valid; - - // core always takes priority over IO bus - assign csr_core_req_if.ready = csr_pipe_req_if.ready; - assign csr_io_req_if.ready = csr_pipe_req_if.ready && !csr_core_req_if.valid; - - // responses - wire csr_io_rsp_ready; - VX_skid_buffer #( - .DATAW (32) - ) csr_io_out_buffer ( + VX_stream_demux #( + .NUM_REQS (NUM_REQS), + .DATAW (REQ_DATAW), + .BUFFERED (BUFFERED_REQ) + ) req_demux ( .clk (clk), .reset (reset), - .valid_in (csr_pipe_rsp_if.valid & select_io_rsp), - .data_in (csr_pipe_rsp_if.data[0]), - .ready_in (csr_io_rsp_ready), - .valid_out (csr_io_rsp_if.valid), - .data_out (csr_io_rsp_if.data), - .ready_out (csr_io_rsp_if.ready) + .sel (request_id), + .valid_in (req_valid_in), + .data_in ({req_addr_in, req_rw_in, req_data_in}), + .ready_in (req_ready_in), + .valid_out (req_valid_out), + .data_out (req_merged_data_out), + .ready_out (req_ready_out) ); - assign csr_commit_if.valid = csr_pipe_rsp_if.valid & ~select_io_rsp; - assign csr_commit_if.wid = csr_pipe_rsp_if.wid; - assign csr_commit_if.tmask = csr_pipe_rsp_if.tmask; - assign csr_commit_if.PC = csr_pipe_rsp_if.PC; - assign csr_commit_if.rd = csr_pipe_rsp_if.rd; - assign csr_commit_if.wb = csr_pipe_rsp_if.wb; - assign csr_commit_if.data = csr_pipe_rsp_if.data; - - assign csr_pipe_rsp_if.ready = select_io_rsp ? csr_io_rsp_ready : csr_commit_if.ready; + VX_stream_arbiter #( + .NUM_REQS (NUM_REQS), + .DATAW (RSP_DATAW), + .BUFFERED (BUFFERED_RSP) + ) rsp_arb ( + .clk (clk), + .reset (reset), + .valid_in (rsp_valid_in), + .data_in (rsp_data_in), + .ready_in (rsp_ready_in), + .valid_out (rsp_valid_out), + .data_out (rsp_data_out), + .ready_out (rsp_ready_out) + ); endmodule diff --git a/hw/rtl/VX_csr_io_arb.v b/hw/rtl/VX_csr_io_arb.v index b70ff3ad..b43afc44 100644 --- a/hw/rtl/VX_csr_io_arb.v +++ b/hw/rtl/VX_csr_io_arb.v @@ -1,81 +1,70 @@ `include "VX_define.vh" -module VX_csr_io_arb #( - parameter NUM_REQS = 1, - parameter DATA_WIDTH = 1, - parameter BUFFERED_REQ = 0, - parameter BUFFERED_RSP = 0, - - parameter DATA_SIZE = (DATA_WIDTH / 8), - parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE), - parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS) -) ( - input wire clk, - input wire reset, - - input wire [LOG_NUM_REQS-1:0] request_id, +module VX_csr_io_arb ( + input wire clk, + input wire reset, - // input requests - input wire req_valid_in, - input wire [ADDR_WIDTH-1:0] req_addr_in, - input wire req_rw_in, - input wire [DATA_WIDTH-1:0] req_data_in, - output wire req_ready_in, + // bus select + input wire select_io_rsp, + + // input requets + VX_csr_req_if csr_core_req_if, + VX_csr_io_req_if csr_io_req_if, // output request - output wire [NUM_REQS-1:0] req_valid_out, - output wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] req_addr_out, - output wire [NUM_REQS-1:0] req_rw_out, - output wire [NUM_REQS-1:0][DATA_WIDTH-1:0] req_data_out, - input wire [NUM_REQS-1:0] req_ready_out, + VX_csr_pipe_req_if csr_pipe_req_if, // input response - input wire [NUM_REQS-1:0] rsp_valid_in, - input wire [NUM_REQS-1:0][DATA_WIDTH-1:0] rsp_data_in, - output wire [NUM_REQS-1:0] rsp_ready_in, + VX_commit_if csr_pipe_rsp_if, - // output response - output wire rsp_valid_out, - output wire [DATA_WIDTH-1:0] rsp_data_out, - input wire rsp_ready_out + // outputs responses + VX_commit_if csr_commit_if, + VX_csr_io_rsp_if csr_io_rsp_if ); - localparam REQ_DATAW = ADDR_WIDTH + 1 + DATA_WIDTH; - localparam RSP_DATAW = DATA_WIDTH; + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) - wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_merged_data_out; - for (genvar i = 0; i < NUM_REQS; i++) begin - assign {req_addr_out[i], req_rw_out[i], req_data_out[i]} = req_merged_data_out[i]; - end + wire [31:0] csr_core_req_mask = csr_core_req_if.rs2_is_imm ? 32'(csr_core_req_if.rs1) : csr_core_req_if.rs1_data; - VX_stream_demux #( - .NUM_REQS (NUM_REQS), - .DATAW (REQ_DATAW), - .BUFFERED (BUFFERED_REQ) - ) req_demux ( + // requests + assign csr_pipe_req_if.valid = csr_core_req_if.valid || csr_io_req_if.valid; + assign csr_pipe_req_if.wid = csr_core_req_if.wid; + assign csr_pipe_req_if.tmask = csr_core_req_if.tmask; + assign csr_pipe_req_if.PC = csr_core_req_if.PC; + assign csr_pipe_req_if.op_type = csr_core_req_if.valid ? csr_core_req_if.op_type : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS); + assign csr_pipe_req_if.csr_addr = csr_core_req_if.valid ? csr_core_req_if.csr_addr : csr_io_req_if.addr; + assign csr_pipe_req_if.csr_mask = csr_core_req_if.valid ? csr_core_req_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0); + assign csr_pipe_req_if.rd = csr_core_req_if.rd; + assign csr_pipe_req_if.wb = csr_core_req_if.wb; + assign csr_pipe_req_if.is_io = !csr_core_req_if.valid; + + // core always takes priority over IO bus + assign csr_core_req_if.ready = csr_pipe_req_if.ready; + assign csr_io_req_if.ready = csr_pipe_req_if.ready && !csr_core_req_if.valid; + + // responses + wire csr_io_rsp_ready; + VX_skid_buffer #( + .DATAW (32) + ) csr_io_out_buffer ( .clk (clk), .reset (reset), - .sel (request_id), - .valid_in (req_valid_in), - .data_in ({req_addr_in, req_rw_in, req_data_in}), - .ready_in (req_ready_in), - .valid_out (req_valid_out), - .data_out (req_merged_data_out), - .ready_out (req_ready_out) + .valid_in (csr_pipe_rsp_if.valid & select_io_rsp), + .data_in (csr_pipe_rsp_if.data[0]), + .ready_in (csr_io_rsp_ready), + .valid_out (csr_io_rsp_if.valid), + .data_out (csr_io_rsp_if.data), + .ready_out (csr_io_rsp_if.ready) ); - VX_stream_arbiter #( - .NUM_REQS (NUM_REQS), - .DATAW (RSP_DATAW), - .BUFFERED (BUFFERED_RSP) - ) rsp_arb ( - .clk (clk), - .reset (reset), - .valid_in (rsp_valid_in), - .data_in (rsp_data_in), - .ready_in (rsp_ready_in), - .valid_out (rsp_valid_out), - .data_out (rsp_data_out), - .ready_out (rsp_ready_out) - ); + assign csr_commit_if.valid = csr_pipe_rsp_if.valid & ~select_io_rsp; + assign csr_commit_if.wid = csr_pipe_rsp_if.wid; + assign csr_commit_if.tmask = csr_pipe_rsp_if.tmask; + assign csr_commit_if.PC = csr_pipe_rsp_if.PC; + assign csr_commit_if.rd = csr_pipe_rsp_if.rd; + assign csr_commit_if.wb = csr_pipe_rsp_if.wb; + assign csr_commit_if.data = csr_pipe_rsp_if.data; + + assign csr_pipe_rsp_if.ready = select_io_rsp ? csr_io_rsp_ready : csr_commit_if.ready; endmodule \ No newline at end of file diff --git a/hw/rtl/VX_csr_unit.v b/hw/rtl/VX_csr_unit.v index 3b007046..311034ab 100644 --- a/hw/rtl/VX_csr_unit.v +++ b/hw/rtl/VX_csr_unit.v @@ -29,7 +29,7 @@ module VX_csr_unit #( wire select_io_rsp; - VX_csr_arb csr_arb ( + VX_csr_io_arb csr_io_arb ( .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index d00c8e2c..d611de69 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -313,9 +313,6 @@ // Core request size `define DNUM_REQUESTS `NUM_THREADS -// Snoop request tag bits -`define DSNP_TAG_WIDTH ((`NUM_CORES > 1) ? `LOG2UP(`L2SREQ_SIZE) : `L2SNP_TAG_WIDTH) - ////////////////////////// SM Configurable Knobs ////////////////////////////// // Cache ID @@ -365,9 +362,6 @@ // DRAM request tag bits `define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`XDRAM_TAG_WIDTH+`CLOG2(`NUM_CORES))) -// Snoop request tag bits -`define L2SNP_TAG_WIDTH ((`NUM_CLUSTERS > 1) ? `LOG2UP(`L3SREQ_SIZE) : `L3SNP_TAG_WIDTH) - ////////////////////////// L3cache Configurable Knobs ///////////////////////// // Cache ID @@ -394,16 +388,12 @@ // DRAM request tag bits `define L3DRAM_TAG_WIDTH (`L3_ENABLE ? `L3DRAM_ADDR_WIDTH : (`L2DRAM_TAG_WIDTH+`CLOG2(`NUM_CLUSTERS))) -// Snoop request tag bits -`define L3SNP_TAG_WIDTH `VX_SNP_TAG_WIDTH - /////////////////////////////////////////////////////////////////////////////// `define VX_DRAM_BYTEEN_WIDTH `L3DRAM_BYTEEN_WIDTH `define VX_DRAM_ADDR_WIDTH `L3DRAM_ADDR_WIDTH `define VX_DRAM_LINE_WIDTH `L3DRAM_LINE_WIDTH `define VX_DRAM_TAG_WIDTH `L3DRAM_TAG_WIDTH -`define VX_SNP_TAG_WIDTH 16 `define VX_CORE_TAG_WIDTH `L3CORE_TAG_WIDTH `define VX_CSR_ID_WIDTH `LOG2UP(`NUM_CLUSTERS * `NUM_CORES) diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index cc8e4864..a2273158 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -20,10 +20,6 @@ module VX_mem_unit # ( VX_cache_core_req_if core_icache_req_if, VX_cache_core_rsp_if core_icache_rsp_if, - // Dcache Snoop - VX_cache_snp_req_if dcache_snp_req_if, - VX_cache_snp_rsp_if dcache_snp_rsp_if, - // DRAM VX_cache_dram_req_if dram_req_if, VX_cache_dram_rsp_if dram_rsp_if @@ -95,12 +91,9 @@ module VX_mem_unit # ( .CREQ_SIZE (`ICREQ_SIZE), .MSHR_SIZE (`IMSHR_SIZE), .DRSQ_SIZE (`IDRSQ_SIZE), - .SREQ_SIZE (1), .CRSQ_SIZE (`ICRSQ_SIZE), .DREQ_SIZE (`IDREQ_SIZE), - .SRSQ_SIZE (1), .DRAM_ENABLE (1), - .FLUSH_ENABLE (0), .WRITE_ENABLE (0), .CORE_TAG_WIDTH (`ICORE_TAG_WIDTH), .CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS), @@ -145,18 +138,6 @@ module VX_mem_unit # ( .dram_rsp_tag (icache_dram_rsp_if.tag), .dram_rsp_ready (icache_dram_rsp_if.ready), - // Snoop request - .snp_req_valid (1'b0), - .snp_req_addr (0), - .snp_req_inv (1'b0), - .snp_req_tag (0), - `UNUSED_PIN (snp_req_ready), - - // Snoop response - `UNUSED_PIN (snp_rsp_valid), - `UNUSED_PIN (snp_rsp_tag), - .snp_rsp_ready (1'b0), - // Miss status `UNUSED_PIN (miss_vec) ); @@ -171,17 +152,13 @@ module VX_mem_unit # ( .CREQ_SIZE (`DCREQ_SIZE), .MSHR_SIZE (`DMSHR_SIZE), .DRSQ_SIZE (`DDRSQ_SIZE), - .SREQ_SIZE (`DSREQ_SIZE), .CRSQ_SIZE (`DCRSQ_SIZE), - .DREQ_SIZE (`DDREQ_SIZE), - .SRSQ_SIZE (`DSRSQ_SIZE), + .DREQ_SIZE (`DDREQ_SIZE), .DRAM_ENABLE (1), - .FLUSH_ENABLE (1), .WRITE_ENABLE (1), .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH), .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), - .DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH), - .SNP_TAG_WIDTH (`DSNP_TAG_WIDTH) + .DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH) ) dcache ( `SCOPE_BIND_VX_mem_unit_dcache @@ -222,18 +199,6 @@ module VX_mem_unit # ( .dram_rsp_tag (dcache_dram_rsp_if.tag), .dram_rsp_ready (dcache_dram_rsp_if.ready), - // Snoop request - .snp_req_valid (dcache_snp_req_if.valid), - .snp_req_addr (dcache_snp_req_if.addr), - .snp_req_inv (dcache_snp_req_if.invalidate), - .snp_req_tag (dcache_snp_req_if.tag), - .snp_req_ready (dcache_snp_req_if.ready), - - // Snoop response - .snp_rsp_valid (dcache_snp_rsp_if.valid), - .snp_rsp_tag (dcache_snp_rsp_if.tag), - .snp_rsp_ready (dcache_snp_rsp_if.ready), - // Miss status `UNUSED_PIN (miss_vec) ); @@ -250,12 +215,9 @@ module VX_mem_unit # ( .CREQ_SIZE (`SCREQ_SIZE), .MSHR_SIZE (8), .DRSQ_SIZE (1), - .SREQ_SIZE (1), .CRSQ_SIZE (`SCRSQ_SIZE), .DREQ_SIZE (1), - .SRSQ_SIZE (1), .DRAM_ENABLE (0), - .FLUSH_ENABLE (0), .WRITE_ENABLE (1), .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH), .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), @@ -300,18 +262,6 @@ module VX_mem_unit # ( .dram_rsp_tag (0), `UNUSED_PIN (dram_rsp_ready), - // Snoop request - .snp_req_valid (1'b0), - .snp_req_addr (0), - .snp_req_inv (0), - .snp_req_tag (0), - `UNUSED_PIN (snp_req_ready), - - // Snoop response - `UNUSED_PIN (snp_rsp_valid), - `UNUSED_PIN (snp_rsp_tag), - .snp_rsp_ready (1'b0), - // Miss status `UNUSED_PIN (miss_vec) ); diff --git a/hw/rtl/VX_pipeline.v b/hw/rtl/VX_pipeline.v index 47ea5876..777e5bbe 100644 --- a/hw/rtl/VX_pipeline.v +++ b/hw/rtl/VX_pipeline.v @@ -40,16 +40,16 @@ module VX_pipeline #( output wire icache_rsp_ready, // CSR I/O Request - input wire csr_io_req_valid, - input wire[11:0] csr_io_req_addr, - input wire csr_io_req_rw, - input wire[31:0] csr_io_req_data, - output wire csr_io_req_ready, + input wire csr_req_valid, + input wire[11:0] csr_req_addr, + input wire csr_req_rw, + input wire[31:0] csr_req_data, + output wire csr_req_ready, // CSR I/O Response - output wire csr_io_rsp_valid, - output wire[31:0] csr_io_rsp_data, - input wire csr_io_rsp_ready, + output wire csr_rsp_valid, + output wire[31:0] csr_rsp_data, + input wire csr_rsp_ready, `ifdef PERF_ENABLE VX_perf_memsys_if perf_memsys_if, @@ -134,22 +134,20 @@ module VX_pipeline #( // VX_csr_io_req_if csr_io_req_if(); - - assign csr_io_req_if.valid = csr_io_req_valid; - assign csr_io_req_if.rw = csr_io_req_rw; - assign csr_io_req_if.addr = csr_io_req_addr; - assign csr_io_req_if.data = csr_io_req_data; - assign csr_io_req_ready = csr_io_req_if.ready; + assign csr_io_req_if.valid = csr_req_valid; + assign csr_io_req_if.rw = csr_req_rw; + assign csr_io_req_if.addr = csr_req_addr; + assign csr_io_req_if.data = csr_req_data; + assign csr_req_ready = csr_io_req_if.ready; // // CSR IO response // VX_csr_io_rsp_if csr_io_rsp_if(); - - assign csr_io_rsp_valid = csr_io_rsp_if.valid; - assign csr_io_rsp_data = csr_io_rsp_if.data; - assign csr_io_rsp_if.ready = csr_io_rsp_ready; + assign csr_rsp_valid = csr_io_rsp_if.valid; + assign csr_rsp_data = csr_io_rsp_if.data; + assign csr_io_rsp_if.ready = csr_rsp_ready; /////////////////////////////////////////////////////////////////////////// diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 92a33ef9..41f030d0 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -22,30 +22,18 @@ module Vortex ( input wire [`VX_DRAM_TAG_WIDTH-1:0] dram_rsp_tag, output wire dram_rsp_ready, - // Snoop request - input wire snp_req_valid, - input wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_addr, - input wire snp_req_inv, - input wire [`VX_SNP_TAG_WIDTH-1:0] snp_req_tag, - output wire snp_req_ready, - - // Snoop response - output wire snp_rsp_valid, - output wire [`VX_SNP_TAG_WIDTH-1:0] snp_rsp_tag, - input wire snp_rsp_ready, - // CSR Request - input wire csr_io_req_valid, - input wire [`VX_CSR_ID_WIDTH-1:0] csr_io_req_coreid, - input wire [11:0] csr_io_req_addr, - input wire csr_io_req_rw, - input wire [31:0] csr_io_req_data, - output wire csr_io_req_ready, + input wire csr_req_valid, + input wire [`VX_CSR_ID_WIDTH-1:0] csr_req_coreid, + input wire [11:0] csr_req_addr, + input wire csr_req_rw, + input wire [31:0] csr_req_data, + output wire csr_req_ready, // CSR Response - output wire csr_io_rsp_valid, - output wire [31:0] csr_io_rsp_data, - input wire csr_io_rsp_ready, + output wire csr_rsp_valid, + output wire [31:0] csr_rsp_data, + input wire csr_rsp_ready, // Status output wire busy, @@ -64,31 +52,21 @@ module Vortex ( wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag; wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready; - wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_valid; - wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr; - wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_inv; - wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag; - wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_ready; + wire [`NUM_CLUSTERS-1:0] per_cluster_csr_req_valid; + wire [`NUM_CLUSTERS-1:0][11:0] per_cluster_csr_req_addr; + wire [`NUM_CLUSTERS-1:0] per_cluster_csr_req_rw; + wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_req_data; + wire [`NUM_CLUSTERS-1:0] per_cluster_csr_req_ready; - wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_valid; - wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag; - wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_ready; - - wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_valid; - wire [`NUM_CLUSTERS-1:0][11:0] per_cluster_csr_io_req_addr; - wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_rw; - wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_io_req_data; - wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_ready; - - wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_rsp_valid; - wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_io_rsp_data; - wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_rsp_ready; + wire [`NUM_CLUSTERS-1:0] per_cluster_csr_rsp_valid; + wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_rsp_data; + wire [`NUM_CLUSTERS-1:0] per_cluster_csr_rsp_ready; wire [`NUM_CLUSTERS-1:0] per_cluster_busy; wire [`NUM_CLUSTERS-1:0] per_cluster_ebreak; - wire [`LOG2UP(`NUM_CLUSTERS)-1:0] csr_io_cluster_id = `LOG2UP(`NUM_CLUSTERS)'(csr_io_req_coreid >> `CLOG2(`NUM_CORES)); - wire [`NC_BITS-1:0] csr_io_core_id = `NC_BITS'(csr_io_req_coreid); + wire [`LOG2UP(`NUM_CLUSTERS)-1:0] csr_cluster_id = `LOG2UP(`NUM_CLUSTERS)'(csr_req_coreid >> `CLOG2(`NUM_CORES)); + wire [`NC_BITS-1:0] csr_core_id = `NC_BITS'(csr_req_coreid); for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin VX_cluster #( @@ -96,130 +74,78 @@ module Vortex ( ) cluster ( `SCOPE_BIND_Vortex_cluster(i) - .clk (clk), - .reset (reset), + .clk (clk), + .reset (reset), - .dram_req_valid (per_cluster_dram_req_valid [i]), - .dram_req_rw (per_cluster_dram_req_rw [i]), - .dram_req_byteen (per_cluster_dram_req_byteen[i]), - .dram_req_addr (per_cluster_dram_req_addr [i]), - .dram_req_data (per_cluster_dram_req_data [i]), - .dram_req_tag (per_cluster_dram_req_tag [i]), - .dram_req_ready (per_cluster_dram_req_ready [i]), + .dram_req_valid (per_cluster_dram_req_valid [i]), + .dram_req_rw (per_cluster_dram_req_rw [i]), + .dram_req_byteen(per_cluster_dram_req_byteen[i]), + .dram_req_addr (per_cluster_dram_req_addr [i]), + .dram_req_data (per_cluster_dram_req_data [i]), + .dram_req_tag (per_cluster_dram_req_tag [i]), + .dram_req_ready (per_cluster_dram_req_ready [i]), - .dram_rsp_valid (per_cluster_dram_rsp_valid [i]), - .dram_rsp_data (per_cluster_dram_rsp_data [i]), - .dram_rsp_tag (per_cluster_dram_rsp_tag [i]), - .dram_rsp_ready (per_cluster_dram_rsp_ready [i]), + .dram_rsp_valid (per_cluster_dram_rsp_valid [i]), + .dram_rsp_data (per_cluster_dram_rsp_data [i]), + .dram_rsp_tag (per_cluster_dram_rsp_tag [i]), + .dram_rsp_ready (per_cluster_dram_rsp_ready [i]), - .snp_req_valid (per_cluster_snp_req_valid [i]), - .snp_req_addr (per_cluster_snp_req_addr [i]), - .snp_req_inv (per_cluster_snp_req_inv [i]), - .snp_req_tag (per_cluster_snp_req_tag [i]), - .snp_req_ready (per_cluster_snp_req_ready [i]), + .csr_req_valid (per_cluster_csr_req_valid [i]), + .csr_req_coreid (csr_core_id), + .csr_req_rw (per_cluster_csr_req_rw [i]), + .csr_req_addr (per_cluster_csr_req_addr [i]), + .csr_req_data (per_cluster_csr_req_data [i]), + .csr_req_ready (per_cluster_csr_req_ready [i]), - .snp_rsp_valid (per_cluster_snp_rsp_valid [i]), - .snp_rsp_tag (per_cluster_snp_rsp_tag [i]), - .snp_rsp_ready (per_cluster_snp_rsp_ready [i]), + .csr_rsp_valid (per_cluster_csr_rsp_valid [i]), + .csr_rsp_data (per_cluster_csr_rsp_data [i]), + .csr_rsp_ready (per_cluster_csr_rsp_ready [i]), - .csr_io_req_valid (per_cluster_csr_io_req_valid[i]), - .csr_io_req_coreid (csr_io_core_id), - .csr_io_req_rw (per_cluster_csr_io_req_rw [i]), - .csr_io_req_addr (per_cluster_csr_io_req_addr[i]), - .csr_io_req_data (per_cluster_csr_io_req_data[i]), - .csr_io_req_ready (per_cluster_csr_io_req_ready[i]), - - .csr_io_rsp_valid (per_cluster_csr_io_rsp_valid[i]), - .csr_io_rsp_data (per_cluster_csr_io_rsp_data[i]), - .csr_io_rsp_ready (per_cluster_csr_io_rsp_ready[i]), - - .busy (per_cluster_busy [i]), - .ebreak (per_cluster_ebreak [i]) + .busy (per_cluster_busy [i]), + .ebreak (per_cluster_ebreak [i]) ); end - VX_csr_io_arb #( + VX_csr_arb #( .NUM_REQS (`NUM_CLUSTERS), .DATA_WIDTH (32), .ADDR_WIDTH (12), .BUFFERED_REQ (`NUM_CLUSTERS >= 4), .BUFFERED_RSP (1) - ) csr_io_arb ( + ) csr_arb ( .clk (clk), .reset (reset), - .request_id (csr_io_cluster_id), + .request_id (csr_cluster_id), // input requests - .req_valid_in (csr_io_req_valid), - .req_addr_in (csr_io_req_addr), - .req_rw_in (csr_io_req_rw), - .req_data_in (csr_io_req_data), - .req_ready_in (csr_io_req_ready), + .req_valid_in (csr_req_valid), + .req_addr_in (csr_req_addr), + .req_rw_in (csr_req_rw), + .req_data_in (csr_req_data), + .req_ready_in (csr_req_ready), // output request - .req_valid_out (per_cluster_csr_io_req_valid), - .req_addr_out (per_cluster_csr_io_req_addr), - .req_rw_out (per_cluster_csr_io_req_rw), - .req_data_out (per_cluster_csr_io_req_data), - .req_ready_out (per_cluster_csr_io_req_ready), + .req_valid_out (per_cluster_csr_req_valid), + .req_addr_out (per_cluster_csr_req_addr), + .req_rw_out (per_cluster_csr_req_rw), + .req_data_out (per_cluster_csr_req_data), + .req_ready_out (per_cluster_csr_req_ready), // input responses - .rsp_valid_in (per_cluster_csr_io_rsp_valid), - .rsp_data_in (per_cluster_csr_io_rsp_data), - .rsp_ready_in (per_cluster_csr_io_rsp_ready), + .rsp_valid_in (per_cluster_csr_rsp_valid), + .rsp_data_in (per_cluster_csr_rsp_data), + .rsp_ready_in (per_cluster_csr_rsp_ready), // output response - .rsp_valid_out (csr_io_rsp_valid), - .rsp_data_out (csr_io_rsp_data), - .rsp_ready_out (csr_io_rsp_ready) + .rsp_valid_out (csr_rsp_valid), + .rsp_data_out (csr_rsp_data), + .rsp_ready_out (csr_rsp_ready) ); assign busy = (| per_cluster_busy); assign ebreak = (| per_cluster_ebreak); - wire snp_fwd_rsp_valid; - wire [`L3DRAM_ADDR_WIDTH-1:0] snp_fwd_rsp_addr; - wire snp_fwd_rsp_inv; - wire [`L3SNP_TAG_WIDTH-1:0] snp_fwd_rsp_tag; - wire snp_fwd_rsp_ready; - - VX_snp_forwarder #( - .CACHE_ID (`L3CACHE_ID), - .NUM_REQS (`NUM_CLUSTERS), - .SRC_ADDR_WIDTH (`L3DRAM_ADDR_WIDTH), - .DST_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH), - .TAG_IN_WIDTH (`L3SNP_TAG_WIDTH), - .TAG_OUT_WIDTH (`L2SNP_TAG_WIDTH), - .SREQ_SIZE (`L3SREQ_SIZE), - .BUFFERED (`NUM_CLUSTERS >= 4) - ) snp_forwarder ( - .clk (clk), - .reset (reset), - - .snp_req_valid (snp_req_valid), - .snp_req_addr (snp_req_addr), - .snp_req_inv (snp_req_inv), - .snp_req_tag (snp_req_tag), - .snp_req_ready (snp_req_ready), - - .snp_rsp_valid (snp_fwd_rsp_valid), - .snp_rsp_addr (snp_fwd_rsp_addr), - .snp_rsp_inv (snp_fwd_rsp_inv), - .snp_rsp_tag (snp_fwd_rsp_tag), - .snp_rsp_ready (snp_fwd_rsp_ready), - - .snp_fwdout_valid (per_cluster_snp_req_valid), - .snp_fwdout_addr (per_cluster_snp_req_addr), - .snp_fwdout_inv (per_cluster_snp_req_inv), - .snp_fwdout_tag (per_cluster_snp_req_tag), - .snp_fwdout_ready (per_cluster_snp_req_ready), - - .snp_fwdin_valid (per_cluster_snp_rsp_valid), - .snp_fwdin_tag (per_cluster_snp_rsp_tag), - .snp_fwdin_ready (per_cluster_snp_rsp_ready) - ); - if (`L3_ENABLE) begin `ifdef PERF_ENABLE VX_perf_cache_if perf_l3cache_if(); @@ -235,17 +161,13 @@ module Vortex ( .CREQ_SIZE (`L3CREQ_SIZE), .MSHR_SIZE (`L3MSHR_SIZE), .DRSQ_SIZE (`L3DRSQ_SIZE), - .SREQ_SIZE (`L3SREQ_SIZE), .CRSQ_SIZE (`L3CRSQ_SIZE), .DREQ_SIZE (`L3DREQ_SIZE), - .SRSQ_SIZE (`L3SRSQ_SIZE), .DRAM_ENABLE (1), - .FLUSH_ENABLE (1), .WRITE_ENABLE (1), .CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH), .CORE_TAG_ID_BITS (0), - .DRAM_TAG_WIDTH (`L3DRAM_TAG_WIDTH), - .SNP_TAG_WIDTH (`L3SNP_TAG_WIDTH) + .DRAM_TAG_WIDTH (`L3DRAM_TAG_WIDTH) ) l3cache ( `SCOPE_BIND_Vortex_l3cache @@ -286,18 +208,6 @@ module Vortex ( .dram_rsp_tag (dram_rsp_tag), .dram_rsp_ready (dram_rsp_ready), - // Snoop request - .snp_req_valid (snp_fwd_rsp_valid), - .snp_req_addr (snp_fwd_rsp_addr), - .snp_req_inv (snp_fwd_rsp_inv), - .snp_req_tag (snp_fwd_rsp_tag), - .snp_req_ready (snp_fwd_rsp_ready), - - // Snoop response - .snp_rsp_valid (snp_rsp_valid), - .snp_rsp_tag (snp_rsp_tag), - .snp_rsp_ready (snp_rsp_ready), - // Miss status `UNUSED_PIN (miss_vec) ); @@ -346,13 +256,6 @@ module Vortex ( .rsp_ready_in (dram_rsp_ready) ); - `UNUSED_VAR (snp_fwd_rsp_addr) - `UNUSED_VAR (snp_fwd_rsp_inv) - - assign snp_rsp_valid = snp_fwd_rsp_valid; - assign snp_rsp_tag = snp_fwd_rsp_tag; - assign snp_fwd_rsp_ready = snp_rsp_ready; - end `SCOPE_ASSIGN (reset, reset); @@ -366,14 +269,6 @@ module Vortex ( `SCOPE_ASSIGN (dram_rsp_fire, dram_rsp_valid && dram_rsp_ready); `SCOPE_ASSIGN (dram_rsp_data, dram_rsp_data); `SCOPE_ASSIGN (dram_rsp_tag, dram_rsp_tag); - `SCOPE_ASSIGN (snp_req_fire, snp_req_valid && snp_req_ready); - `SCOPE_ASSIGN (snp_req_addr, `TO_FULL_ADDR(snp_req_addr)); - `SCOPE_ASSIGN (snp_req_inv, snp_req_inv); - `SCOPE_ASSIGN (snp_req_tag, snp_req_tag); - `SCOPE_ASSIGN (snp_rsp_fire, snp_rsp_valid && snp_rsp_ready); - `SCOPE_ASSIGN (snp_rsp_tag, snp_rsp_tag); - `SCOPE_ASSIGN (snp_rsp_fire, snp_rsp_valid && snp_rsp_ready); - `SCOPE_ASSIGN (snp_rsp_tag, snp_rsp_tag); `SCOPE_ASSIGN (busy, busy); `ifdef DBG_PRINT_DRAM diff --git a/hw/rtl/afu/vortex_afu.sv b/hw/rtl/afu/vortex_afu.sv index 46e7be0e..f1e63414 100644 --- a/hw/rtl/afu/vortex_afu.sv +++ b/hw/rtl/afu/vortex_afu.sv @@ -57,7 +57,6 @@ localparam AFU_ID_H = 16'h0004; // AFU ID Higher localparam CMD_MEM_READ = `AFU_IMAGE_CMD_MEM_READ; localparam CMD_MEM_WRITE = `AFU_IMAGE_CMD_MEM_WRITE; localparam CMD_RUN = `AFU_IMAGE_CMD_RUN; -localparam CMD_CLFLUSH = `AFU_IMAGE_CMD_CLFLUSH; localparam CMD_CSR_READ = `AFU_IMAGE_CMD_CSR_READ; localparam CMD_CSR_WRITE = `AFU_IMAGE_CMD_CSR_WRITE; @@ -83,10 +82,9 @@ localparam STATE_READ = 1; localparam STATE_WRITE = 2; localparam STATE_START = 3; localparam STATE_RUN = 4; -localparam STATE_CLFLUSH = 5; -localparam STATE_CSR_READ = 6; -localparam STATE_CSR_WRITE = 7; -localparam STATE_MAX_VALUE = 8; +localparam STATE_CSR_READ = 5; +localparam STATE_CSR_WRITE = 6; +localparam STATE_MAX_VALUE = 7; localparam STATE_WIDTH = $clog2(STATE_MAX_VALUE); `ifdef SCOPE @@ -112,18 +110,6 @@ wire [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data; wire [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag; wire vx_dram_rsp_ready; -reg vx_snp_req_valid; -reg [`VX_DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr; -wire vx_snp_req_inv = 0; -wire [`VX_SNP_TAG_WIDTH-1:0] vx_snp_req_tag; -wire vx_snp_req_ready; - -wire vx_snp_rsp_valid; -`DEBUG_BEGIN -wire [`VX_SNP_TAG_WIDTH-1:0] vx_snp_rsp_tag; -`DEBUG_END -reg vx_snp_rsp_ready; - wire vx_csr_io_req_valid; wire [`VX_CSR_ID_WIDTH-1:0] vx_csr_io_req_coreid; wire [11:0] vx_csr_io_req_addr; @@ -335,7 +321,6 @@ end wire cmd_read_done; wire cmd_write_done; -wire cmd_clflush_done; wire cmd_csr_done; wire cmd_run_done; @@ -371,12 +356,6 @@ always @(posedge clk) begin vx_enabled <= 1; state <= STATE_START; end - CMD_CLFLUSH: begin - `ifdef DBG_PRINT_OPAE - $display("%t: STATE CFLUSH: addr=%0h size=%0d", $time, cmd_mem_addr, cmd_data_size); - `endif - state <= STATE_CLFLUSH; - end CMD_CSR_READ: begin `ifdef DBG_PRINT_OPAE $display("%t: STATE CSR_READ: addr=%0h", $time, cmd_csr_addr); @@ -426,15 +405,6 @@ always @(posedge clk) begin end end - STATE_CLFLUSH: begin - if (cmd_clflush_done) begin - state <= STATE_IDLE; - `ifdef DBG_PRINT_OPAE - $display("%t: STATE IDLE", $time); - `endif - end - end - STATE_CSR_READ: begin if (cmd_csr_done) begin state <= STATE_IDLE; @@ -854,80 +824,6 @@ begin end end -// Vortex cache snooping ////////////////////////////////////////////////////// - -wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_size; -wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_baseaddr; -reg [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr, snp_rsp_ctr; -wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr_next, snp_rsp_ctr_next; - -wire vx_snp_req_fire, vx_snp_rsp_fire; - -if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin - assign snp_req_baseaddr = {cmd_mem_addr, (`VX_DRAM_ADDR_WIDTH - DRAM_ADDR_WIDTH)'(0)}; - assign snp_req_size = {cmd_data_size, (`VX_DRAM_ADDR_WIDTH - DRAM_ADDR_WIDTH)'(0)}; -end else begin - assign snp_req_baseaddr = cmd_mem_addr; - assign snp_req_size = cmd_data_size; -end - -assign vx_snp_req_tag = (`VX_SNP_TAG_WIDTH)'(snp_req_ctr); - -assign vx_snp_req_fire = vx_snp_req_valid && vx_snp_req_ready; -assign vx_snp_rsp_fire = vx_snp_rsp_valid && vx_snp_rsp_ready; - -assign snp_req_ctr_next = vx_snp_req_fire ? (snp_req_ctr + `VX_DRAM_ADDR_WIDTH'(1)) : snp_req_ctr; -assign snp_rsp_ctr_next = vx_snp_rsp_fire ? (snp_rsp_ctr - `VX_DRAM_ADDR_WIDTH'(1)) : snp_rsp_ctr; - -assign cmd_clflush_done = (0 == snp_rsp_ctr); - -always @(posedge clk) begin - if (reset) begin - vx_snp_req_valid <= 0; - vx_snp_req_addr <= 0; - vx_snp_rsp_ready <= 0; - snp_req_ctr <= 0; - snp_rsp_ctr <= 0; - end else begin - if ((STATE_IDLE == state) - && (CMD_CLFLUSH == cmd_type)) begin - vx_snp_req_valid <= (snp_req_size != 0); - vx_snp_req_addr <= snp_req_baseaddr; - vx_snp_rsp_ready <= (snp_req_size != 0); - snp_req_ctr <= 0; - snp_rsp_ctr <= snp_req_size; - end - - if ((STATE_CLFLUSH == state) - && (snp_req_ctr_next == snp_req_size)) begin - vx_snp_req_valid <= 0; - end - - if ((STATE_CLFLUSH == state) - && (0 == snp_rsp_ctr_next)) begin - vx_snp_rsp_ready <= 0; - end - - if (vx_snp_req_fire) begin - assert(snp_req_ctr < snp_req_size); - vx_snp_req_addr <= vx_snp_req_addr + `VX_DRAM_ADDR_WIDTH'(1); - snp_req_ctr <= snp_req_ctr_next; - `ifdef DBG_PRINT_OPAE - $display("%t: AFU Snp Req: addr=%0h, tag=%0h, rem=%0d", $time, `TO_FULL_ADDR(vx_snp_req_addr), (`VX_SNP_TAG_WIDTH)'(vx_snp_req_tag), (snp_req_size - snp_req_ctr_next)); - `endif - end - - if ((STATE_CLFLUSH == state) - && vx_snp_rsp_fire) begin - assert(snp_rsp_ctr != 0); - snp_rsp_ctr <= snp_rsp_ctr_next; - `ifdef DBG_PRINT_OPAE - $display("%t: AFU Snp Rsp: tag=%0h, rem=%0d", $time, vx_snp_rsp_tag, snp_rsp_ctr_next); - `endif - end - end -end - // CSRs /////////////////////////////////////////////////////////////////////// reg csr_io_req_sent; @@ -969,52 +865,40 @@ assign cmd_run_done = !vx_busy; Vortex #() vortex ( `SCOPE_BIND_afu_vortex - .clk (clk), - .reset (reset | vx_reset), + .clk (clk), + .reset (reset | vx_reset), // DRAM request - .dram_req_valid (vx_dram_req_valid), - .dram_req_rw (vx_dram_req_rw), - .dram_req_byteen (vx_dram_req_byteen), - .dram_req_addr (vx_dram_req_addr), - .dram_req_data (vx_dram_req_data), - .dram_req_tag (vx_dram_req_tag), - .dram_req_ready (vx_dram_req_ready), + .dram_req_valid (vx_dram_req_valid), + .dram_req_rw (vx_dram_req_rw), + .dram_req_byteen(vx_dram_req_byteen), + .dram_req_addr (vx_dram_req_addr), + .dram_req_data (vx_dram_req_data), + .dram_req_tag (vx_dram_req_tag), + .dram_req_ready (vx_dram_req_ready), // DRAM response - .dram_rsp_valid (vx_dram_rsp_valid), - .dram_rsp_data (vx_dram_rsp_data), - .dram_rsp_tag (vx_dram_rsp_tag), - .dram_rsp_ready (vx_dram_rsp_ready), + .dram_rsp_valid (vx_dram_rsp_valid), + .dram_rsp_data (vx_dram_rsp_data), + .dram_rsp_tag (vx_dram_rsp_tag), + .dram_rsp_ready (vx_dram_rsp_ready), - // Snoop request - .snp_req_valid (vx_snp_req_valid), - .snp_req_addr (vx_snp_req_addr), - .snp_req_inv (vx_snp_req_inv), - .snp_req_tag (vx_snp_req_tag), - .snp_req_ready (vx_snp_req_ready), + // CSR Request + .csr_req_valid (vx_csr_io_req_valid), + .csr_req_coreid (vx_csr_io_req_coreid), + .csr_req_addr (vx_csr_io_req_addr), + .csr_req_rw (vx_csr_io_req_rw), + .csr_req_data (vx_csr_io_req_data), + .csr_req_ready (vx_csr_io_req_ready), - // Snoop response - .snp_rsp_valid (vx_snp_rsp_valid), - .snp_rsp_tag (vx_snp_rsp_tag), - .snp_rsp_ready (vx_snp_rsp_ready), - - // CSR I/O Request - .csr_io_req_valid (vx_csr_io_req_valid), - .csr_io_req_coreid(vx_csr_io_req_coreid), - .csr_io_req_addr (vx_csr_io_req_addr), - .csr_io_req_rw (vx_csr_io_req_rw), - .csr_io_req_data (vx_csr_io_req_data), - .csr_io_req_ready (vx_csr_io_req_ready), - - // CSR I/O Response - .csr_io_rsp_valid (vx_csr_io_rsp_valid), - .csr_io_rsp_data (vx_csr_io_rsp_data), - .csr_io_rsp_ready (vx_csr_io_rsp_ready), + // CSR Response + .csr_rsp_valid (vx_csr_io_rsp_valid), + .csr_rsp_data (vx_csr_io_rsp_data), + .csr_rsp_ready (vx_csr_io_rsp_ready), // status - .busy (vx_busy), - `UNUSED_PIN (ebreak) + .busy (vx_busy), + `UNUSED_PIN (ebreak) ); // SCOPE ////////////////////////////////////////////////////////////////////// @@ -1049,8 +933,6 @@ Vortex #() vortex ( `SCOPE_ASSIGN (ccip_rd_req_ctr, cci_rd_req_ctr); `SCOPE_ASSIGN (ccip_rd_rsp_ctr, cci_rd_rsp_ctr); `SCOPE_ASSIGN (ccip_wr_req_ctr, cci_wr_req_ctr); -`SCOPE_ASSIGN (snp_req_ctr, snp_req_ctr); -`SCOPE_ASSIGN (snp_rsp_ctr, snp_rsp_ctr); wire scope_changed = `SCOPE_TRIGGER; diff --git a/hw/rtl/afu/vortex_afu.vh b/hw/rtl/afu/vortex_afu.vh index 15513137..564fd96c 100644 --- a/hw/rtl/afu/vortex_afu.vh +++ b/hw/rtl/afu/vortex_afu.vh @@ -13,9 +13,8 @@ `define AFU_ACCEL_NAME "vortex_afu" `define AFU_ACCEL_UUID 128'h35f9452b_25c2_434c_93d5_6f8c60db361c -`define AFU_IMAGE_CMD_CLFLUSH 4 -`define AFU_IMAGE_CMD_CSR_READ 5 -`define AFU_IMAGE_CMD_CSR_WRITE 6 +`define AFU_IMAGE_CMD_CSR_READ 4 +`define AFU_IMAGE_CMD_CSR_WRITE 5 `define AFU_IMAGE_CMD_MEM_READ 1 `define AFU_IMAGE_CMD_MEM_WRITE 2 `define AFU_IMAGE_CMD_RUN 3 diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 8602d7f5..2806df24 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -21,33 +21,26 @@ module VX_bank #( parameter MSHR_SIZE = 1, // DRAM Response Queue Size parameter DRSQ_SIZE = 1, - // Snoop Request Queue Size - parameter SREQ_SIZE = 1, // Core Response Queue Size parameter CRSQ_SIZE = 1, // DRAM Request Queue Size parameter DREQ_SIZE = 1, - // Snoop Response Size - parameter SRSQ_SIZE = 1, - - // Enable cache writeable - parameter WRITE_ENABLE = 0, // Enable dram update - parameter DRAM_ENABLE = 0, - - // Enable cache flush - parameter FLUSH_ENABLE = 0, + parameter DRAM_ENABLE = 1, + + // Enable cache writeable + parameter WRITE_ENABLE = 1, + + // Enable write-through + parameter WRITE_THROUGH = 1, // core request tag size parameter CORE_TAG_WIDTH = 1, // size of tag id in core request tag - parameter CORE_TAG_ID_BITS = 0, - - // Snooping request tag width - parameter SNP_TAG_WIDTH = 1 + parameter CORE_TAG_ID_BITS = 0 ) ( `SCOPE_IO_VX_bank @@ -85,18 +78,6 @@ module VX_bank #( input wire [`BANK_LINE_WIDTH-1:0] dram_rsp_data, output wire dram_rsp_ready, - // Snoop Request - input wire snp_req_valid, - input wire [`LINE_ADDR_WIDTH-1:0] snp_req_addr, - input wire snp_req_inv, - input wire [SNP_TAG_WIDTH-1:0] snp_req_tag, - output wire snp_req_ready, - - // Snoop Response - output wire snp_rsp_valid, - output wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag, - input wire snp_rsp_ready, - `ifdef PERF_ENABLE output wire perf_read_misses, output wire perf_write_misses, @@ -107,8 +88,6 @@ module VX_bank #( // Misses output wire misses ); - `STATIC_ASSERT (!FLUSH_ENABLE || DRAM_ENABLE, ("invalid parameter")) - `ifdef DBG_CACHE_REQ_INFO /* verilator lint_off UNUSED */ wire [31:0] debug_pc_st0; @@ -133,48 +112,6 @@ module VX_bank #( /* verilator lint_on UNUSED */ `endif - wire sreq_pop; - wire sreq_empty; - - wire [`LINE_ADDR_WIDTH-1:0] sreq_addr_st0; - wire sreq_inv_st0; - wire [SNP_TAG_WIDTH-1:0] sreq_tag_st0; - - if (FLUSH_ENABLE) begin - - wire sreq_full; - assign snp_req_ready = !sreq_full; - wire sreq_push = snp_req_valid && snp_req_ready; - - VX_generic_queue #( - .DATAW (`LINE_ADDR_WIDTH + 1 + SNP_TAG_WIDTH), - .SIZE (SREQ_SIZE), - .BUFFERED (1), - .FASTRAM (1) - ) snp_req_queue ( - .clk (clk), - .reset (reset), - .push (sreq_push), - .pop (sreq_pop), - .data_in ({snp_req_addr, snp_req_inv, snp_req_tag}), - .data_out({sreq_addr_st0, sreq_inv_st0, sreq_tag_st0}), - .empty (sreq_empty), - .full (sreq_full), - `UNUSED_PIN (size) - ); - - end else begin - `UNUSED_VAR (snp_req_valid) - `UNUSED_VAR (snp_req_addr) - `UNUSED_VAR (snp_req_inv) - `UNUSED_VAR (snp_req_tag) - assign sreq_empty = 1; - assign sreq_addr_st0 = 0; - assign sreq_inv_st0 = 0; - assign sreq_tag_st0 = 0; - assign snp_req_ready = 0; - end - wire drsq_pop; wire drsq_empty; @@ -258,19 +195,15 @@ module VX_bank #( wire [`REQ_TAG_WIDTH-1:0] mshr_tag_st0; wire mshr_rw_st0; wire [WORD_SIZE-1:0] mshr_byteen_st0; - wire mshr_is_snp_st0; - wire mshr_snp_inv_st0; wire mshr_pending_hazard_unqual_st0; wire is_fill_st0; wire is_mshr_st0; - wire is_snp_st0; wire valid_st0; wire [`LINE_ADDR_WIDTH-1:0] addr_st0; wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st0; wire [`WORD_WIDTH-1:0] writeword_st0; wire [`BANK_LINE_WIDTH-1:0] writedata_st0; - wire snp_inv_st0; wire [`REQ_TAG_WIDTH-1:0] tag_st0; wire mem_rw_st0; wire [WORD_SIZE-1:0] byteen_st0; @@ -278,13 +211,11 @@ module VX_bank #( wire is_fill_st1; wire is_mshr_st1; - wire is_snp_st1; wire valid_st1; wire [`LINE_ADDR_WIDTH-1:0] addr_st1; wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st1; wire [`WORD_WIDTH-1:0] writeword_st1; wire [`BANK_LINE_WIDTH-1:0] writedata_st1; - wire snp_inv_st1; wire [`TAG_SELECT_BITS-1:0] readtag_st1; wire miss_st1; wire force_miss_st1; @@ -305,8 +236,6 @@ module VX_bank #( wire [BANK_LINE_SIZE-1:0] dirtyb_st2; wire [`TAG_SELECT_BITS-1:0] readtag_st2; wire is_fill_st2; - wire is_snp_st2; - wire snp_inv_st2; wire is_mshr_st2; wire miss_st2; wire force_miss_st2; @@ -331,26 +260,25 @@ module VX_bank #( wire mshr_push_stall; wire crsq_push_stall; - wire dreq_push_stall; - wire srsq_push_stall; + wire dreq_push_stall; wire pipeline_stall; wire is_mshr_miss_st2 = valid_st2 && is_mshr_st2 && (miss_st2 || force_miss_st2); wire is_mshr_miss_st3 = valid_st3 && is_mshr_st3 && (miss_st3 || force_miss_st3); - wire creq_commit = valid_st2 && core_req_hit_st2 && !pipeline_stall; + wire creq_commit = valid_st2 + && (core_req_hit_st2 || (WRITE_THROUGH && mem_rw_st2)) + && !pipeline_stall; // determine which queue to pop next in piority order wire mshr_pop_unqual = mshr_valid_st0; wire drsq_pop_unqual = !mshr_pop_unqual && !drsq_empty; wire creq_pop_unqual = !mshr_pop_unqual && !drsq_pop_unqual && !creq_empty && !mshr_going_full; - wire sreq_pop_unqual = !mshr_pop_unqual && !drsq_pop_unqual && !creq_pop_unqual && !sreq_empty && !mshr_going_full; assign mshr_pop = mshr_pop_unqual && !pipeline_stall && !(is_mshr_miss_st2 || is_mshr_miss_st3); // stop if previous request was a miss assign drsq_pop = drsq_pop_unqual && !pipeline_stall; assign creq_pop = creq_pop_unqual && !pipeline_stall; - assign sreq_pop = sreq_pop_unqual && !pipeline_stall; // MSHR pending size assign mshr_pending_size_n = mshr_pending_size + @@ -368,12 +296,11 @@ module VX_bank #( assign is_mshr_st0 = mshr_pop_unqual; assign is_fill_st0 = drsq_pop_unqual; - assign valid_st0 = drsq_pop || mshr_pop || creq_pop || sreq_pop; + assign valid_st0 = drsq_pop || mshr_pop || creq_pop; assign addr_st0 = mshr_pop_unqual ? mshr_addr_st0 : drsq_pop_unqual ? drsq_addr_st0 : creq_pop_unqual ? creq_addr_st0[`LINE_SELECT_ADDR_RNG] : - sreq_pop_unqual ? sreq_addr_st0 : 0; if (`WORD_SELECT_WIDTH != 0) begin @@ -389,32 +316,20 @@ module VX_bank #( assign tag_st0 = mshr_pop_unqual ? `REQ_TAG_WIDTH'(mshr_tag_st0) : creq_pop_unqual ? `REQ_TAG_WIDTH'(creq_tag_st0) : - sreq_pop_unqual ? `REQ_TAG_WIDTH'(sreq_tag_st0) : 0; assign mem_rw_st0 = mshr_pop_unqual ? mshr_rw_st0 : creq_pop_unqual ? creq_rw_st0 : - sreq_pop_unqual ? 1'b0 : 0; assign byteen_st0 = mshr_pop_unqual ? mshr_byteen_st0 : creq_pop_unqual ? creq_byteen_st0 : - sreq_pop_unqual ? WORD_SIZE'(0) : 0; assign req_tid_st0 = mshr_pop_unqual ? mshr_tid_st0 : creq_pop_unqual ? creq_tid_st0 : - sreq_pop_unqual ? `REQS_BITS'(0) : 0; - assign is_snp_st0 = mshr_pop_unqual ? mshr_is_snp_st0 : - sreq_pop_unqual ? 1 : - 0; - - assign snp_inv_st0 = mshr_pop_unqual ? mshr_snp_inv_st0 : - sreq_pop_unqual ? sreq_inv_st0 : - 0; - assign writeword_st0 = mshr_pop_unqual ? mshr_writeword_st0 : creq_pop_unqual ? creq_writeword_st0 : 0; @@ -436,15 +351,15 @@ if (DRAM_ENABLE) begin || (valid_st3 && (miss_st3 || force_miss_st3) && (addr_st3 == addr_st0)); VX_generic_register #( - .N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + 1 + `BANK_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH), + .N(1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + 1 + `BANK_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH), .R(1) ) pipe_reg0 ( .clk (clk), .reset (reset), .stall (pipeline_stall), .flush (1'b0), - .data_in ({valid_st0, is_mshr_st0, is_snp_st0, snp_inv_st0, mshr_pending_hazard_st0, addr_st0, wsel_st0, writeword_st0, is_fill_st0, writedata_st0, mem_rw_st0, byteen_st0, req_tid_st0, tag_st0}), - .data_out ({valid_st1, is_mshr_st1, is_snp_st1, snp_inv_st1, mshr_pending_hazard_st1, addr_st1, wsel_st1, writeword_st1, is_fill_st1, writedata_st1, mem_rw_st1, byteen_st1, req_tid_st1, tag_st1}) + .data_in ({valid_st0, is_mshr_st0, mshr_pending_hazard_st0, addr_st0, wsel_st0, writeword_st0, is_fill_st0, writedata_st0, mem_rw_st0, byteen_st0, req_tid_st0, tag_st0}), + .data_out ({valid_st1, is_mshr_st1, mshr_pending_hazard_st1, addr_st1, wsel_st1, writeword_st1, is_fill_st1, writedata_st1, mem_rw_st1, byteen_st1, req_tid_st1, tag_st1}) ); `ifdef DBG_CACHE_REQ_INFO @@ -471,8 +386,7 @@ if (DRAM_ENABLE) begin .BANK_LINE_SIZE (BANK_LINE_SIZE), .NUM_BANKS (NUM_BANKS), .WORD_SIZE (WORD_SIZE), - .WRITE_ENABLE (WRITE_ENABLE), - .FLUSH_ENABLE (FLUSH_ENABLE) + .WRITE_ENABLE (WRITE_ENABLE) ) tag_access ( .clk (clk), .reset (reset), @@ -491,8 +405,6 @@ if (DRAM_ENABLE) begin .addr_in (addr_st1), .is_write_in (mem_rw_st1), .is_fill_in (is_fill_st1), - .is_snp_in (is_snp_st1), - .snp_inv_in (snp_inv_st1), .force_miss_in (force_miss_st1), // Outputs @@ -504,20 +416,20 @@ if (DRAM_ENABLE) begin assign misses = miss_st1; - wire core_req_hit_st1 = !is_fill_st1 && !is_snp_st1 && !miss_st1 && !force_miss_st1; + wire core_req_hit_st1 = !is_fill_st1 && !miss_st1 && !force_miss_st1; wire incoming_fill_st1 = !drsq_empty && (addr_st1 == drsq_addr_st0); VX_generic_register #( - .N(1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + `BANK_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH), + .N(1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + `BANK_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH), .R(1) ) pipe_reg1 ( .clk (clk), .reset (reset), .stall (pipeline_stall), .flush (1'b0), - .data_in ({valid_st1, incoming_fill_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, dirty_st1, is_snp_st1, snp_inv_st1, is_fill_st1, addr_st1, wsel_st1, writeword_st1, readtag_st1, miss_st1, writedata_st1, mem_rw_st1, byteen_st1, req_tid_st1, tag_st1}), - .data_out ({valid_st2, incoming_fill_st2, core_req_hit_st2, is_mshr_st2, writeen_st2, force_miss_st2, dirty_st2, is_snp_st2, snp_inv_st2, is_fill_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, writedata_st2, mem_rw_st2, byteen_st2, req_tid_st2, tag_st2}) + .data_in ({valid_st1, incoming_fill_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, dirty_st1, is_fill_st1, addr_st1, wsel_st1, writeword_st1, readtag_st1, miss_st1, writedata_st1, mem_rw_st1, byteen_st1, req_tid_st1, tag_st1}), + .data_out ({valid_st2, incoming_fill_st2, core_req_hit_st2, is_mshr_st2, writeen_st2, force_miss_st2, dirty_st2, is_fill_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, writedata_st2, mem_rw_st2, byteen_st2, req_tid_st2, tag_st2}) ); end else begin @@ -528,12 +440,10 @@ end else begin assign is_fill_st1 = is_fill_st0; assign is_mshr_st1 = is_mshr_st0; - assign is_snp_st1 = is_snp_st0; assign valid_st1 = valid_st0; assign wsel_st1 = wsel_st0; assign writeword_st1= writeword_st0; assign writedata_st1= writedata_st0; - assign snp_inv_st1 = snp_inv_st0; assign addr_st1 = creq_addr_st0[`LINE_SELECT_ADDR_RNG]; assign dirty_st1 = 0; assign readtag_st1 = 0; @@ -547,12 +457,10 @@ end else begin assign is_fill_st2 = is_fill_st1; assign is_mshr_st2 = is_mshr_st1; - assign is_snp_st2 = is_snp_st1; assign valid_st2 = valid_st1; assign wsel_st2 = wsel_st1; assign writeword_st2= writeword_st1; assign writedata_st2= writedata_st1; - assign snp_inv_st2 = snp_inv_st1; assign addr_st2 = addr_st1; assign dirty_st2 = dirty_st1; assign readtag_st2 = readtag_st1; @@ -586,7 +494,8 @@ end .BANK_LINE_SIZE (BANK_LINE_SIZE), .NUM_BANKS (NUM_BANKS), .WORD_SIZE (WORD_SIZE), - .WRITE_ENABLE (WRITE_ENABLE) + .WRITE_ENABLE (WRITE_ENABLE), + .WRITE_THROUGH (WRITE_THROUGH) ) data_access ( .clk (clk), .reset (reset), @@ -597,7 +506,6 @@ end .debug_wid (debug_wid_st2), .debug_tagid (debug_tagid_st2), `endif - .stall (pipeline_stall), // Inputs @@ -621,45 +529,41 @@ end wire [`WORD_WIDTH-1:0] readword_st3; wire [`BANK_LINE_WIDTH-1:0] readdata_st3; wire [BANK_LINE_SIZE-1:0] dirtyb_st3; - wire [`TAG_SELECT_BITS-1:0] readtag_st3; - wire is_snp_st3; - wire snp_inv_st3; + wire [`TAG_SELECT_BITS-1:0] readtag_st3; wire do_writeback_st3; wire incoming_fill_st3; wire mshr_push_st3; wire crsq_push_st3; wire dreq_push_st3; - wire srsq_push_st3; wire incoming_fill_qual_st2 = (!drsq_empty && (addr_st2 == drsq_addr_st0)) || incoming_fill_st2; wire do_fill_req_st2 = miss_st2 + && !(WRITE_THROUGH && mem_rw_st2) && (!force_miss_st2 || (is_mshr_st2 && addr_st2 != addr_st3)) && !incoming_fill_qual_st2; - wire do_writeback_st2 = dirty_st2 - && (is_fill_st2 - || (!force_miss_st2 && is_snp_st2)); - - wire mshr_push_st2 = miss_st2 || force_miss_st2; - - wire crsq_push_st2 = core_req_hit_st2 && !mem_rw_st2; + wire do_writeback_st2 = (WRITE_THROUGH && mem_rw_st2) + || (!WRITE_THROUGH && dirty_st2 && is_fill_st2); wire dreq_push_st2 = do_fill_req_st2 || do_writeback_st2; - wire srsq_push_st2 = is_snp_st2 && !force_miss_st2; + wire mshr_push_st2 = (miss_st2 || force_miss_st2) + && !(WRITE_THROUGH && mem_rw_st2); + + wire crsq_push_st2 = core_req_hit_st2 && !mem_rw_st2; VX_generic_register #( - .N(1 + 1+ 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + 1 + WORD_SIZE + `WORD_WIDTH + `BANK_LINE_WIDTH + `REQS_BITS + `REQ_TAG_WIDTH), + .N(1 + 1+ 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `TAG_SELECT_BITS + BANK_LINE_SIZE + 1 + WORD_SIZE + `WORD_WIDTH + `BANK_LINE_WIDTH + `REQS_BITS + `REQ_TAG_WIDTH), .R(1) ) pipe_reg2 ( .clk (clk), .reset (reset), .stall (pipeline_stall), .flush (1'b0), - .data_in ({valid_st2, mshr_push_st2, crsq_push_st2, dreq_push_st2, srsq_push_st2, do_writeback_st2, incoming_fill_qual_st2, force_miss_st2, is_mshr_st2, is_snp_st2, snp_inv_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, dirtyb_st2, mem_rw_st2, byteen_st2, readword_st2, readdata_st2, req_tid_st2, tag_st2}), - .data_out ({valid_st3, mshr_push_st3, crsq_push_st3, dreq_push_st3, srsq_push_st3, do_writeback_st3, incoming_fill_st3, force_miss_st3, is_mshr_st3, is_snp_st3, snp_inv_st3, addr_st3, wsel_st3, writeword_st3, readtag_st3, miss_st3, dirtyb_st3, mem_rw_st3, byteen_st3, readword_st3, readdata_st3, req_tid_st3, tag_st3}) + .data_in ({valid_st2, mshr_push_st2, crsq_push_st2, dreq_push_st2, do_writeback_st2, incoming_fill_qual_st2, force_miss_st2, is_mshr_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, dirtyb_st2, mem_rw_st2, byteen_st2, readword_st2, readdata_st2, req_tid_st2, tag_st2}), + .data_out ({valid_st3, mshr_push_st3, crsq_push_st3, dreq_push_st3, do_writeback_st3, incoming_fill_st3, force_miss_st3, is_mshr_st3, addr_st3, wsel_st3, writeword_st3, readtag_st3, miss_st3, dirtyb_st3, mem_rw_st3, byteen_st3, readword_st3, readdata_st3, req_tid_st3, tag_st3}) ); `ifdef DBG_CACHE_REQ_INFO @@ -677,8 +581,7 @@ end wire mshr_push = mshr_push_unqual && !crsq_push_stall - && !dreq_push_stall - && !srsq_push_stall; + && !dreq_push_stall; wire mshr_full; always @(posedge clk) begin @@ -707,8 +610,7 @@ end .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), .MSHR_SIZE (MSHR_SIZE), - .CORE_TAG_WIDTH (CORE_TAG_WIDTH), - .SNP_TAG_WIDTH (SNP_TAG_WIDTH) + .CORE_TAG_WIDTH (CORE_TAG_WIDTH) ) miss_resrv ( .clk (clk), .reset (reset), @@ -727,7 +629,7 @@ end // enqueue .enqueue_st3 (mshr_push), .enqueue_addr_st3 (addr_st3), - .enqueue_data_st3 ({writeword_st3, req_tid_st3, tag_st3, mem_rw_st3, byteen_st3, wsel_st3, is_snp_st3, snp_inv_st3}), + .enqueue_data_st3 ({writeword_st3, req_tid_st3, tag_st3, mem_rw_st3, byteen_st3, wsel_st3}), .enqueue_is_mshr_st3(is_mshr_st3), .enqueue_ready_st3 (mshr_init_ready_state_st3), .enqueue_full (mshr_full), @@ -741,7 +643,7 @@ end .schedule_st0 (mshr_pop), .dequeue_valid_st0 (mshr_valid_st0), .dequeue_addr_st0 (mshr_addr_st0), - .dequeue_data_st0 ({mshr_writeword_st0, mshr_tid_st0, mshr_tag_st0, mshr_rw_st0, mshr_byteen_st0, mshr_wsel_st0, mshr_is_snp_st0, mshr_snp_inv_st0}), + .dequeue_data_st0 ({mshr_writeword_st0, mshr_tid_st0, mshr_tag_st0, mshr_rw_st0, mshr_byteen_st0, mshr_wsel_st0}), .dequeue_st3 (mshr_dequeue_st3) ); end else begin @@ -749,10 +651,8 @@ end `UNUSED_VAR (mshr_push) `UNUSED_VAR (wsel_st3) `UNUSED_VAR (writeword_st3) - `UNUSED_VAR (snp_inv_st3) `UNUSED_VAR (mem_rw_st3) `UNUSED_VAR (byteen_st3) - `UNUSED_VAR (is_snp_st3) `UNUSED_VAR (incoming_fill_st3) assign mshr_pending_hazard_unqual_st0 = 0; assign mshr_full = 0; @@ -764,8 +664,6 @@ end assign mshr_tag_st0 = 0; assign mshr_rw_st0 = 0; assign mshr_byteen_st0 = 0; - assign mshr_is_snp_st0 = 0; - assign mshr_snp_inv_st0 = 0; end // Enqueue core response @@ -778,8 +676,7 @@ end wire crsq_push = crsq_push_unqual && !crsq_full && !mshr_push_stall - && !dreq_push_stall - && !srsq_push_stall; + && !dreq_push_stall; wire crsq_pop = core_rsp_valid && core_rsp_ready; @@ -817,15 +714,14 @@ end && (do_writeback_st3 || !incoming_fill_qual_st3) && !dreq_full && !mshr_push_stall - && !crsq_push_stall - && !srsq_push_stall; + && !crsq_push_stall; wire dreq_pop = dram_req_valid && dram_req_ready; wire writeback = WRITE_ENABLE && do_writeback_st3; - wire [`LINE_ADDR_WIDTH-1:0] dreq_addr = writeback ? {readtag_st3, addr_st3[`LINE_SELECT_BITS-1:0]} : - addr_st3; + wire [`LINE_ADDR_WIDTH-1:0] dreq_addr = (WRITE_THROUGH || !writeback) ? addr_st3 : + {readtag_st3, addr_st3[`LINE_SELECT_BITS-1:0]}; wire [BANK_LINE_SIZE-1:0] dreq_byteen = writeback ? dirtyb_st3 : {BANK_LINE_SIZE{1'b1}}; @@ -864,67 +760,18 @@ end assign dram_req_data = 0; end - assign dram_req_valid = !dreq_empty; - - // Enqueue snoop response - - wire srsq_empty, srsq_full; - - wire srsq_push_unqual = valid_st3 && srsq_push_st3; - assign srsq_push_stall = srsq_push_unqual && srsq_full; - - wire srsq_push = srsq_push_unqual - && !srsq_full - && !mshr_push_stall - && !crsq_push_stall - && !dreq_push_stall; - - wire srsq_pop = snp_rsp_valid && snp_rsp_ready; - - wire [SNP_TAG_WIDTH-1:0] srsq_tag_st3 = SNP_TAG_WIDTH'(tag_st3); - - if (FLUSH_ENABLE) begin - VX_generic_queue #( - .DATAW (SNP_TAG_WIDTH), - .SIZE (SRSQ_SIZE), - .BUFFERED (1), - .FASTRAM (1) - ) snp_rsp_queue ( - .clk (clk), - .reset (reset), - .push (srsq_push), - .pop (srsq_pop), - .data_in (srsq_tag_st3), - .data_out(snp_rsp_tag), - .empty (srsq_empty), - .full (srsq_full), - `UNUSED_PIN (size) - ); - end else begin - `UNUSED_VAR (srsq_push) - `UNUSED_VAR (srsq_pop) - `UNUSED_VAR (srsq_tag_st3) - `UNUSED_VAR (snp_rsp_ready) - assign srsq_empty = 1; - assign srsq_full = 0; - assign snp_rsp_tag = 0; - end - - assign snp_rsp_valid = !srsq_empty - && dreq_empty; // ensure all writebacks are sent + assign dram_req_valid = !dreq_empty; // bank pipeline stall assign pipeline_stall = mshr_push_stall || crsq_push_stall - || dreq_push_stall - || srsq_push_stall; + || dreq_push_stall; `SCOPE_ASSIGN (valid_st0, valid_st0); `SCOPE_ASSIGN (valid_st1, valid_st1); `SCOPE_ASSIGN (valid_st2, valid_st2); `SCOPE_ASSIGN (valid_st3, valid_st3); `SCOPE_ASSIGN (is_fill_st0, is_fill_st0); - `SCOPE_ASSIGN (is_snp_st0, is_snp_st0); `SCOPE_ASSIGN (is_mshr_st0, is_mshr_st0); `SCOPE_ASSIGN (miss_st1, miss_st1); `SCOPE_ASSIGN (dirty_st1, dirty_st1); @@ -951,7 +798,7 @@ end assert(!is_mshr_st3); end if (pipeline_stall) begin - $display("%t: cache%0d:%0d pipeline-stall: msrq=%b, cwbq=%b, dwbq=%b, snpq=%b", $time, CACHE_ID, BANK_ID, mshr_push_stall, crsq_push_stall, dreq_push_stall, srsq_push_stall); + $display("%t: cache%0d:%0d pipeline-stall: msrq=%b, cwbq=%b, dwbq=%b", $time, CACHE_ID, BANK_ID, mshr_push_stall, crsq_push_stall, dreq_push_stall); end if (drsq_pop) begin $display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), drsq_filldata_st0); @@ -962,9 +809,6 @@ end else $display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), creq_tag_st0, creq_tid_st0, creq_byteen_st0, debug_wid_st0, debug_pc_st0); end - if (sreq_pop) begin - $display("%t: cache%0d:%0d snp-req: addr=%0h, tag=%0h, invalidate=%0d", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), sreq_tag_st0, sreq_inv_st0); - end if (crsq_push) begin $display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID), crsq_tag_st3, crsq_tid_st3, crsq_data_st3, debug_wid_st3, debug_pc_st3); end @@ -974,9 +818,6 @@ end else $display("%t: cache%0d:%0d fill-req: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), debug_wid_st3, debug_pc_st3); end - if (srsq_push) begin - $display("%t: cache%0d:%0d snp-rsp: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID), srsq_tag_st3); - end end `endif diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index a9cead6a..a8371460 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -19,25 +19,21 @@ module VX_cache #( // Miss Reserv Queue Knob parameter MSHR_SIZE = 8, // DRAM Response Queue Size - parameter DRSQ_SIZE = 4, - // Snoop Request Queue Size - parameter SREQ_SIZE = 4, + parameter DRSQ_SIZE = 4, // Core Response Queue Size parameter CRSQ_SIZE = 4, // DRAM Request Queue Size parameter DREQ_SIZE = 4, - // Snoop Response Size - parameter SRSQ_SIZE = 4, - - // Enable cache writeable - parameter WRITE_ENABLE = 1, // Enable dram update parameter DRAM_ENABLE = 1, - // Enable cache flush - parameter FLUSH_ENABLE = 1, + // Enable cache writeable + parameter WRITE_ENABLE = 1, + + // Enable write-through + parameter WRITE_THROUGH = 1, // core request tag size parameter CORE_TAG_WIDTH = $clog2(MSHR_SIZE), @@ -46,10 +42,7 @@ module VX_cache #( parameter CORE_TAG_ID_BITS = 0, // dram request tag size - parameter DRAM_TAG_WIDTH = (32 - $clog2(BANK_LINE_SIZE)), - - // Snooping request tag width - parameter SNP_TAG_WIDTH = 1 + parameter DRAM_TAG_WIDTH = (32 - $clog2(BANK_LINE_SIZE)) ) ( `SCOPE_IO_VX_cache @@ -89,19 +82,7 @@ module VX_cache #( input wire dram_rsp_valid, input wire [`BANK_LINE_WIDTH-1:0] dram_rsp_data, input wire [DRAM_TAG_WIDTH-1:0] dram_rsp_tag, - output wire dram_rsp_ready, - - // Snoop request - input wire snp_req_valid, - input wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr, - input wire snp_req_inv, - input wire [SNP_TAG_WIDTH-1:0] snp_req_tag, - output wire snp_req_ready, - - // Snoop response - output wire snp_rsp_valid, - output wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag, - input wire snp_rsp_ready, + output wire dram_rsp_ready, output wire [NUM_BANKS-1:0] miss_vec ); @@ -127,12 +108,6 @@ module VX_cache #( wire [NUM_BANKS-1:0] per_bank_dram_rsp_ready; - wire [NUM_BANKS-1:0] per_bank_snp_req_ready; - - wire [NUM_BANKS-1:0] per_bank_snp_rsp_valid; - wire [NUM_BANKS-1:0][SNP_TAG_WIDTH-1:0] per_bank_snp_rsp_tag; - wire [NUM_BANKS-1:0] per_bank_snp_rsp_ready; - wire [NUM_BANKS-1:0] per_bank_miss; assign miss_vec = per_bank_miss; @@ -141,13 +116,7 @@ module VX_cache #( wire [NUM_BANKS-1:0] perf_write_miss_per_bank; wire [NUM_BANKS-1:0] perf_mshr_stall_per_bank; wire [NUM_BANKS-1:0] perf_pipe_stall_per_bank; -`endif - - if (NUM_BANKS == 1) begin - assign snp_req_ready = per_bank_snp_req_ready; - end else begin - assign snp_req_ready = per_bank_snp_req_ready[`DRAM_ADDR_BANK(snp_req_addr)]; - end +`endif VX_cache_core_req_bank_sel #( .BANK_LINE_SIZE (BANK_LINE_SIZE), @@ -205,16 +174,6 @@ module VX_cache #( wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_rsp_addr; wire curr_bank_dram_rsp_ready; - wire curr_bank_snp_req_valid; - wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_req_addr; - wire curr_bank_snp_req_inv; - wire [SNP_TAG_WIDTH-1:0] curr_bank_snp_req_tag; - wire curr_bank_snp_req_ready; - - wire curr_bank_snp_rsp_valid; - wire [SNP_TAG_WIDTH-1:0] curr_bank_snp_rsp_tag; - wire curr_bank_snp_rsp_ready; - wire curr_bank_miss; // Core Req @@ -257,23 +216,6 @@ module VX_cache #( assign curr_bank_dram_rsp_data = dram_rsp_data; assign per_bank_dram_rsp_ready[i] = curr_bank_dram_rsp_ready; - // Snoop request - if (NUM_BANKS == 1) begin - assign curr_bank_snp_req_valid = snp_req_valid; - assign curr_bank_snp_req_addr = snp_req_addr; - end else begin - assign curr_bank_snp_req_valid = snp_req_valid && (`DRAM_ADDR_BANK(snp_req_addr) == i); - assign curr_bank_snp_req_addr = `DRAM_TO_LINE_ADDR(snp_req_addr); - end - assign curr_bank_snp_req_inv = snp_req_inv; - assign curr_bank_snp_req_tag = snp_req_tag; - assign per_bank_snp_req_ready[i] = curr_bank_snp_req_ready; - - // Snoop response - assign per_bank_snp_rsp_valid[i] = curr_bank_snp_rsp_valid; - assign per_bank_snp_rsp_tag[i] = curr_bank_snp_rsp_tag; - assign curr_bank_snp_rsp_ready = per_bank_snp_rsp_ready[i]; - //Misses assign per_bank_miss[i] = curr_bank_miss; @@ -288,16 +230,13 @@ module VX_cache #( .CREQ_SIZE (CREQ_SIZE), .MSHR_SIZE (MSHR_SIZE), .DRSQ_SIZE (DRSQ_SIZE), - .SREQ_SIZE (SREQ_SIZE), .CRSQ_SIZE (CRSQ_SIZE), .DREQ_SIZE (DREQ_SIZE), - .SRSQ_SIZE (SRSQ_SIZE), .DRAM_ENABLE (DRAM_ENABLE), - .FLUSH_ENABLE (FLUSH_ENABLE), .WRITE_ENABLE (WRITE_ENABLE), + .WRITE_THROUGH (WRITE_THROUGH), .CORE_TAG_WIDTH (CORE_TAG_WIDTH), - .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS), - .SNP_TAG_WIDTH (SNP_TAG_WIDTH) + .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS) ) bank ( `SCOPE_BIND_VX_cache_bank(i) @@ -342,18 +281,6 @@ module VX_cache #( .perf_pipe_stalls (perf_pipe_stall_per_bank[i]), `endif - // Snoop request - .snp_req_valid (curr_bank_snp_req_valid), - .snp_req_addr (curr_bank_snp_req_addr), - .snp_req_inv (curr_bank_snp_req_inv), - .snp_req_tag (curr_bank_snp_req_tag), - .snp_req_ready (curr_bank_snp_req_ready), - - // Snoop response - .snp_rsp_valid (curr_bank_snp_rsp_valid), - .snp_rsp_tag (curr_bank_snp_rsp_tag), - .snp_rsp_ready (curr_bank_snp_rsp_ready), - //Misses .misses (curr_bank_miss) ); @@ -414,30 +341,6 @@ module VX_cache #( `UNUSED_VAR (dram_req_ready) end - if (FLUSH_ENABLE) begin - VX_stream_arbiter #( - .NUM_REQS (NUM_BANKS), - .DATAW (SNP_TAG_WIDTH), - .BUFFERED (1) - ) snp_rsp_arb ( - .clk (clk), - .reset (reset), - .valid_in (per_bank_snp_rsp_valid), - .data_in (per_bank_snp_rsp_tag), - .ready_in (per_bank_snp_rsp_ready), - .valid_out (snp_rsp_valid), - .data_out (snp_rsp_tag), - .ready_out (snp_rsp_ready) - ); - end else begin - `UNUSED_VAR (per_bank_snp_rsp_valid) - `UNUSED_VAR (per_bank_snp_rsp_tag) - assign per_bank_snp_rsp_ready = 0; - assign snp_rsp_valid = 0; - assign snp_rsp_tag = 0; - `UNUSED_VAR (snp_rsp_ready) - end - `ifdef PERF_ENABLE // per cycle: core_reads, core_writes reg [($clog2(NUM_REQS+1)-1):0] perf_core_reads_per_cycle, perf_core_writes_per_cycle; diff --git a/hw/rtl/cache/VX_cache_config.vh b/hw/rtl/cache/VX_cache_config.vh index 15b18010..469d36d7 100644 --- a/hw/rtl/cache/VX_cache_config.vh +++ b/hw/rtl/cache/VX_cache_config.vh @@ -7,15 +7,15 @@ `include "VX_define.vh" `endif -`define REQ_TAG_WIDTH `MAX(CORE_TAG_WIDTH, SNP_TAG_WIDTH) +`define REQ_TAG_WIDTH CORE_TAG_WIDTH `define REQS_BITS `LOG2UP(NUM_REQS) // tag rw byteen tid `define REQ_INST_META_WIDTH (`REQ_TAG_WIDTH + 1 + WORD_SIZE + `REQS_BITS) -// data metadata word_sel is_snp snp_inv -`define MSHR_DATA_WIDTH (`WORD_WIDTH + `REQ_INST_META_WIDTH + `UP(`WORD_SELECT_WIDTH) + 1 + 1) +// data metadata word_sel +`define MSHR_DATA_WIDTH (`WORD_WIDTH + `REQ_INST_META_WIDTH + `UP(`WORD_SELECT_WIDTH)) `define BANK_BITS `LOG2UP(NUM_BANKS) diff --git a/hw/rtl/cache/VX_data_access.v b/hw/rtl/cache/VX_data_access.v index 56788607..3128f790 100644 --- a/hw/rtl/cache/VX_data_access.v +++ b/hw/rtl/cache/VX_data_access.v @@ -14,7 +14,10 @@ module VX_data_access #( parameter WORD_SIZE = 1, // Enable cache writeable - parameter WRITE_ENABLE = 0, + parameter WRITE_ENABLE = 1, + + // Enable write-through + parameter WRITE_THROUGH = 1, // size of tag id in core request tag parameter CORE_TAG_ID_BITS = 0 @@ -54,9 +57,9 @@ module VX_data_access #( wire [BANK_LINE_SIZE-1:0] read_dirtyb_out; wire [`BANK_LINE_WIDTH-1:0] read_data; - wire [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] byte_enable; + wire [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] byte_enable; + wire [`BANK_LINE_WIDTH-1:0] write_data; wire write_enable; - wire [`BANK_LINE_WIDTH-1:0] write_data; wire [`LINE_SELECT_BITS-1:0] addrline = addr_in[`LINE_SELECT_BITS-1:0]; @@ -92,18 +95,32 @@ module VX_data_access #( end end - for (genvar i = 0; i < `BANK_LINE_WORDS; i++) begin - wire word_sel = (`WORD_SELECT_WIDTH == 0) || (wordsel_in == `UP(`WORD_SELECT_WIDTH)'(i)); - - assign byte_enable[i] = is_fill_in ? {WORD_SIZE{1'b1}} : - word_sel ? byteen_in : {WORD_SIZE{1'b0}}; + wire [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] byte_enable_w; + wire [`BANK_LINE_WIDTH-1:0] write_data_w; - assign write_data[i * `WORD_WIDTH +: `WORD_WIDTH] = is_fill_in ? writedata_in[i * `WORD_WIDTH +: `WORD_WIDTH] : writeword_in; - end + if (`WORD_SELECT_WIDTH != 0) begin + for (genvar i = 0; i < `BANK_LINE_WORDS; i++) begin + assign byte_enable_w[i] = (wordsel_in == `WORD_SELECT_WIDTH'(i)) ? byteen_in : {WORD_SIZE{1'b0}}; + assign write_data_w[i * `WORD_WIDTH +: `WORD_WIDTH] = writeword_in; + end + end else begin + assign byte_enable_w = byteen_in; + assign write_data_w = writeword_in; + end + + assign byte_enable = is_fill_in ? {BANK_LINE_SIZE{1'b1}} : byte_enable_w; + assign write_data = is_fill_in ? writedata_in : write_data_w; assign write_enable = valid_in && writeen_in && !stall; - assign dirtyb_out = read_dirtyb_out; - assign readdata_out = read_data; + + if (WRITE_THROUGH) begin + `UNUSED_VAR (read_dirtyb_out) + assign dirtyb_out = byte_enable_w; + assign readdata_out = write_data_w; + end else begin + assign dirtyb_out = read_dirtyb_out; + assign readdata_out = read_data; + end `ifdef DBG_PRINT_CACHE_DATA always @(posedge clk) begin diff --git a/hw/rtl/cache/VX_miss_resrv.v b/hw/rtl/cache/VX_miss_resrv.v index 49dfca41..855a4807 100644 --- a/hw/rtl/cache/VX_miss_resrv.v +++ b/hw/rtl/cache/VX_miss_resrv.v @@ -16,8 +16,6 @@ module VX_miss_resrv #( parameter MSHR_SIZE = 1, // core request tag size parameter CORE_TAG_WIDTH = 1, - // Snooping request tag width - parameter SNP_TAG_WIDTH = 1, // size of tag id in core request tag parameter CORE_TAG_ID_BITS = 0 ) ( diff --git a/hw/rtl/cache/VX_snp_forwarder.v b/hw/rtl/cache/VX_snp_forwarder.v deleted file mode 100644 index d0842a2a..00000000 --- a/hw/rtl/cache/VX_snp_forwarder.v +++ /dev/null @@ -1,249 +0,0 @@ -`include "VX_cache_config.vh" - -module VX_snp_forwarder #( - parameter CACHE_ID = 0, - parameter SRC_ADDR_WIDTH = 1, - parameter DST_ADDR_WIDTH = 1, - parameter NUM_REQS = 1, - parameter SREQ_SIZE = 1, - parameter TAG_IN_WIDTH = 1, - parameter TAG_OUT_WIDTH = `LOG2UP(SREQ_SIZE), - parameter BUFFERED = 0 -) ( - input wire clk, - input wire reset, - - // Snoop request - input wire snp_req_valid, - input wire [SRC_ADDR_WIDTH-1:0] snp_req_addr, - input wire snp_req_inv, - input wire [TAG_IN_WIDTH-1:0] snp_req_tag, - output wire snp_req_ready, - - // Snoop response - output wire snp_rsp_valid, - output wire [SRC_ADDR_WIDTH-1:0] snp_rsp_addr, - output wire snp_rsp_inv, - output wire [TAG_IN_WIDTH-1:0] snp_rsp_tag, - input wire snp_rsp_ready, - - // Snoop Forwarding out - output wire [NUM_REQS-1:0] snp_fwdout_valid, - output wire [NUM_REQS-1:0][DST_ADDR_WIDTH-1:0] snp_fwdout_addr, - output wire [NUM_REQS-1:0] snp_fwdout_inv, - output wire [NUM_REQS-1:0][TAG_OUT_WIDTH-1:0] snp_fwdout_tag, - input wire [NUM_REQS-1:0] snp_fwdout_ready, - - // Snoop forwarding in - input wire [NUM_REQS-1:0] snp_fwdin_valid, - input wire [NUM_REQS-1:0][TAG_OUT_WIDTH-1:0] snp_fwdin_tag, - output wire [NUM_REQS-1:0] snp_fwdin_ready -); - localparam ADDR_DIFF = DST_ADDR_WIDTH - SRC_ADDR_WIDTH; - localparam NUM_REQUESTS_QUAL = NUM_REQS * (1 << ADDR_DIFF); - localparam REQ_QUAL_BITS = `LOG2UP(NUM_REQUESTS_QUAL); - - if (NUM_REQS > 1) begin - - reg [REQ_QUAL_BITS:0] pending_cntrs [SREQ_SIZE-1:0]; - - wire [TAG_IN_WIDTH-1:0] snp_rsp_tag_unqual; - wire [SRC_ADDR_WIDTH-1:0] snp_rsp_addr_unqual; - wire snp_rsp_inv_unqual; - wire snp_rsp_ready_unqual; - - wire [TAG_OUT_WIDTH-1:0] sfq_write_addr, sfq_read_addr; - wire sfq_full; - - wire [TAG_OUT_WIDTH-1:0] fwdin_tag; - wire fwdin_valid; - - wire fwdin_ready = snp_rsp_ready_unqual || (1 != pending_cntrs[sfq_read_addr]); - wire fwdin_fire = fwdin_valid && fwdin_ready; - - wire snp_rsp_valid_unqual = fwdin_valid && (1 == pending_cntrs[sfq_read_addr]); - - assign sfq_read_addr = fwdin_tag; - - wire sfq_acquire = snp_req_valid && snp_req_ready; - wire sfq_release = snp_rsp_valid_unqual && snp_rsp_ready_unqual; - - VX_index_buffer #( - .DATAW (SRC_ADDR_WIDTH + 1 + TAG_IN_WIDTH), - .SIZE (SREQ_SIZE), - .FASTRAM (1) - ) req_metadata_buf ( - .clk (clk), - .reset (reset), - .write_addr (sfq_write_addr), - .acquire_slot (sfq_acquire), - .read_addr (sfq_read_addr), - .write_data ({snp_req_tag, snp_req_addr, snp_req_inv}), - .read_data ({snp_rsp_tag_unqual, snp_rsp_addr_unqual, snp_rsp_inv_unqual}), - .release_addr (sfq_read_addr), - .release_slot (sfq_release), - .full (sfq_full) - ); - - wire fwdout_valid; - wire [TAG_OUT_WIDTH-1:0] fwdout_tag; - wire [DST_ADDR_WIDTH-1:0] fwdout_addr; - wire fwdout_inv; - wire fwdout_ready; - wire dispatch_hold; - - if (ADDR_DIFF != 0) begin - reg [TAG_OUT_WIDTH-1:0] fwdout_tag_r; - reg [DST_ADDR_WIDTH-1:0] fwdout_addr_r; - reg fwdout_inv_r; - reg dispatch_hold_r; - - always @(posedge clk) begin - if (reset) begin - dispatch_hold_r <= 0; - end else begin - if (snp_req_valid && snp_req_ready) begin - dispatch_hold_r <= 1; - end - - if (dispatch_hold_r - && fwdout_ready - && (fwdout_addr[ADDR_DIFF-1:0] == ((1 << ADDR_DIFF)-1))) begin - dispatch_hold_r <= 0; - end - end - - if (fwdout_valid && fwdout_ready) begin - fwdout_addr_r <= fwdout_addr + DST_ADDR_WIDTH'(1'b1); - end - - if (snp_req_valid && snp_req_ready) begin - fwdout_inv_r <= snp_req_inv; - fwdout_tag_r <= sfq_write_addr; - end - end - assign fwdout_valid = dispatch_hold_r || (snp_req_valid && !sfq_full); - assign fwdout_tag = dispatch_hold_r ? fwdout_tag_r : sfq_write_addr; - assign fwdout_addr = dispatch_hold_r ? fwdout_addr_r : {snp_req_addr, ADDR_DIFF'(0)}; - assign fwdout_inv = dispatch_hold_r ? fwdout_inv_r : snp_req_inv; - assign dispatch_hold = dispatch_hold_r; - end else begin - assign fwdout_valid = snp_req_valid && !sfq_full; - assign fwdout_tag = sfq_write_addr; - assign fwdout_addr = snp_req_addr; - assign fwdout_inv = snp_req_inv; - assign dispatch_hold = 1'b0; - end - - always @(posedge clk) begin - if (sfq_acquire) begin - pending_cntrs[sfq_write_addr] <= NUM_REQUESTS_QUAL; - end - if (fwdin_fire) begin - pending_cntrs[sfq_read_addr] <= pending_cntrs[sfq_read_addr] - 1; - end - end - - reg [NUM_REQS-1:0] snp_fwdout_ready_other; - wire [NUM_REQS-1:0] fwdout_ready_unqual; - - for (genvar i = 0; i < NUM_REQS; i++) begin - VX_skid_buffer #( - .DATAW (DST_ADDR_WIDTH + 1 + TAG_OUT_WIDTH), - .PASSTHRU (!BUFFERED) - ) fwdout_buffer ( - .clk (clk), - .reset (reset), - .valid_in (fwdout_valid && snp_fwdout_ready_other[i]), - .data_in ({fwdout_addr, fwdout_inv, fwdout_tag}), - .ready_in (fwdout_ready_unqual[i]), - .valid_out (snp_fwdout_valid[i]), - .data_out ({snp_fwdout_addr[i], snp_fwdout_inv[i], snp_fwdout_tag[i]}), - .ready_out (snp_fwdout_ready[i]) - ); - end - - always @(*) begin - snp_fwdout_ready_other = {NUM_REQS{1'b1}}; - for (integer i = 0; i < NUM_REQS; i++) begin - for (integer j = 0; j < NUM_REQS; j++) begin - if (i != j) - snp_fwdout_ready_other[i] &= fwdout_ready_unqual[j]; - end - end - end - - assign fwdout_ready = (& fwdout_ready_unqual); - - assign snp_req_ready = fwdout_ready && !sfq_full && !dispatch_hold; - - VX_stream_arbiter #( - .NUM_REQS (NUM_REQS), - .DATAW (TAG_OUT_WIDTH) - ) snp_fwdin_arb ( - .clk (clk), - .reset (reset), - .valid_in (snp_fwdin_valid), - .data_in (snp_fwdin_tag), - .ready_in (snp_fwdin_ready), - .valid_out (fwdin_valid), - .data_out (fwdin_tag), - .ready_out (fwdin_ready) - ); - - VX_skid_buffer #( - .DATAW (TAG_IN_WIDTH + SRC_ADDR_WIDTH + 1), - .PASSTHRU (!BUFFERED) - ) rsp_buffer ( - .clk (clk), - .reset (reset), - .valid_in (snp_rsp_valid_unqual), - .data_in ({snp_rsp_tag_unqual, snp_rsp_addr_unqual, snp_rsp_inv_unqual}), - .ready_in (snp_rsp_ready_unqual), - .valid_out (snp_rsp_valid), - .data_out ({snp_rsp_tag, snp_rsp_addr, snp_rsp_inv}), - .ready_out (snp_rsp_ready) - ); - - `ifdef DBG_PRINT_CACHE_SNP - always @(posedge clk) begin - if (fwdin_valid && fwdin_ready) begin - $display("%t: cache%0d snp-fwd-in: tag=%0h", $time, CACHE_ID, fwdin_tag); - end - end - `endif - - end else begin - - `UNUSED_VAR (clk) - `UNUSED_VAR (reset) - - assign snp_fwdout_valid = snp_req_valid; - assign snp_fwdout_addr = snp_req_addr; - assign snp_fwdout_inv = snp_req_inv; - assign snp_fwdout_tag = snp_req_tag; - assign snp_req_ready = snp_fwdout_ready; - - assign snp_rsp_valid = snp_fwdin_valid; - assign snp_rsp_addr = snp_req_addr; - assign snp_rsp_inv = snp_req_inv; - assign snp_rsp_tag = snp_fwdin_tag; - assign snp_fwdin_ready = snp_rsp_ready; - - end - -`ifdef DBG_PRINT_CACHE_SNP - always @(posedge clk) begin - if (snp_req_valid && snp_req_ready) begin - $display("%t: cache%0d snp-fwd-req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, `TO_FULL_ADDR(snp_req_addr), snp_req_inv, snp_req_tag); - end - if (snp_fwdout_valid[0] && snp_fwdout_ready[0]) begin - $display("%t: cache%0d snp-fwd-out: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, `TO_FULL_ADDR(snp_fwdout_addr[0]), snp_fwdout_inv[0], snp_fwdout_tag[0]); - end - if (snp_rsp_valid && snp_rsp_ready) begin - $display("%t: cache%0d snp-fwd-rsp: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, snp_rsp_addr, snp_rsp_inv, snp_rsp_tag); - end - end -`endif - -endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_tag_access.v b/hw/rtl/cache/VX_tag_access.v index 7050403e..33b1f0b0 100644 --- a/hw/rtl/cache/VX_tag_access.v +++ b/hw/rtl/cache/VX_tag_access.v @@ -16,9 +16,6 @@ module VX_tag_access #( // Enable cache writeable parameter WRITE_ENABLE = 0, - // Enable cache flush - parameter FLUSH_ENABLE = 1, - // size of tag id in core request tag parameter CORE_TAG_ID_BITS = 0 ) ( @@ -41,8 +38,6 @@ module VX_tag_access #( input wire[`LINE_ADDR_WIDTH-1:0] addr_in, input wire is_write_in, input wire is_fill_in, - input wire is_snp_in, - input wire snp_inv_in, input wire force_miss_in, // Outputs @@ -90,7 +85,6 @@ module VX_tag_access #( assign do_write = WRITE_ENABLE && valid_in && tags_match - && !is_snp_in && !is_fill_in && is_write_in && !force_miss_in @@ -100,17 +94,10 @@ module VX_tag_access #( && is_fill_in && !stall; - assign do_invalidate = FLUSH_ENABLE - && valid_in - && tags_match - && is_snp_in - && (read_dirty || snp_inv_in) - && !force_miss_in - && !stall; + assign do_invalidate = 0; assign miss_out = valid_in && !tags_match - && !is_snp_in && !is_fill_in; assign dirty_out = WRITE_ENABLE diff --git a/hw/rtl/interfaces/VX_cache_snp_req_if.v b/hw/rtl/interfaces/VX_cache_snp_req_if.v deleted file mode 100644 index bc154568..00000000 --- a/hw/rtl/interfaces/VX_cache_snp_req_if.v +++ /dev/null @@ -1,19 +0,0 @@ -`ifndef VX_CACHE_SNP_REQ_IF -`define VX_CACHE_SNP_REQ_IF - -`include "../cache/VX_cache_config.vh" - -interface VX_cache_snp_req_if #( - parameter DRAM_ADDR_WIDTH = 0, - parameter SNP_TAG_WIDTH = 0 -) (); - - wire valid; - wire [DRAM_ADDR_WIDTH-1:0] addr; - wire invalidate; - wire [SNP_TAG_WIDTH-1:0] tag; - wire ready; - -endinterface - -`endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_cache_snp_rsp_if.v b/hw/rtl/interfaces/VX_cache_snp_rsp_if.v deleted file mode 100644 index c9235ebe..00000000 --- a/hw/rtl/interfaces/VX_cache_snp_rsp_if.v +++ /dev/null @@ -1,16 +0,0 @@ -`ifndef VX_CACHE_SNP_RSP_IF -`define VX_CACHE_SNP_RSP_IF - -`include "../cache/VX_cache_config.vh" - -interface VX_cache_snp_rsp_if #( - parameter SNP_TAG_WIDTH = 0 -) (); - - wire valid; - wire [SNP_TAG_WIDTH-1:0] tag; - wire ready; - -endinterface - -`endif \ No newline at end of file diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index b94d31d9..dff4bbd5 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -62,21 +62,15 @@ void Simulator::reset() { dram_rsp_vec_.clear(); dram_rsp_active_ = false; - snp_req_active_ = false; csr_req_active_ = false; - - snp_req_size_ = 0; - pending_snp_reqs_ = 0; csr_rsp_value_ = nullptr; vortex_->dram_rsp_valid = 0; vortex_->dram_req_ready = 0; //vortex_->io_req_ready = 0; //vortex_->io_rsp_valid = 0; - vortex_->snp_req_valid = 0; - vortex_->snp_rsp_ready = 0; - vortex_->csr_io_req_valid = 0; - vortex_->csr_io_rsp_ready = 0; + vortex_->csr_req_valid = 0; + vortex_->csr_rsp_ready = 0; vortex_->reset = 1; @@ -97,8 +91,7 @@ void Simulator::step() { this->eval(); dram_rsp_ready_ = vortex_->dram_rsp_ready; - snp_req_ready_ = vortex_->snp_req_ready; - csr_io_req_ready_ = vortex_->csr_io_req_ready; + csr_req_ready_ = vortex_->csr_req_ready; vortex_->clk = 1; this->eval(); @@ -106,7 +99,6 @@ void Simulator::step() { this->eval_dram_bus(); this->eval_io_bus(); this->eval_csr_bus(); - this->eval_snp_bus(); #ifndef NDEBUG fflush(stdout); @@ -220,63 +212,30 @@ void Simulator::eval_io_bus() { vortex_->io_rsp_valid = 0;*/ } -void Simulator::eval_snp_bus() { - if (snp_req_active_) { - if (vortex_->snp_req_valid && snp_req_ready_) { - assert(snp_req_size_); - #ifdef DBG_PRINT_CACHE_SNP - std::cout << std::dec << timestamp << ": [sim] SNP Req: addr=" << std::hex << vortex_->snp_req_addr << " tag=" << vortex_->snp_req_tag << " remain=" << (snp_req_size_-1) << std::endl; - #endif - ++vortex_->snp_req_addr; - ++vortex_->snp_req_tag; - ++pending_snp_reqs_; - --snp_req_size_; - if (0 == snp_req_size_) { - vortex_->snp_req_valid = false; - } - } - - if (vortex_->snp_rsp_valid && vortex_->snp_rsp_ready) { - assert(pending_snp_reqs_ > 0); - --pending_snp_reqs_; - if (!vortex_->snp_req_valid && 0 == pending_snp_reqs_) { - vortex_->snp_rsp_ready = false; - snp_req_active_ = false; - } - #ifdef DBG_PRINT_CACHE_SNP - std::cout << std::dec << timestamp << ": [sim] SNP Rsp: tag=" << std::hex << vortex_->snp_rsp_tag << " pending=" << pending_snp_reqs_ << std::endl; - #endif - } - } else { - vortex_->snp_req_valid = 0; - vortex_->snp_rsp_ready = 0; - } -} - void Simulator::eval_csr_bus() { if (csr_req_active_) { - if (vortex_->csr_io_req_valid && csr_io_req_ready_) { + if (vortex_->csr_req_valid && csr_req_ready_) { #ifndef NDEBUG - if (vortex_->csr_io_req_rw) - std::cout << std::dec << timestamp << ": [sim] CSR Wr Req: core=" << (int)vortex_->csr_io_req_coreid << ", addr=" << std::hex << vortex_->csr_io_req_addr << ", value=" << vortex_->csr_io_req_data << std::endl; + if (vortex_->csr_req_rw) + std::cout << std::dec << timestamp << ": [sim] CSR Wr Req: core=" << (int)vortex_->csr_req_coreid << ", addr=" << std::hex << vortex_->csr_req_addr << ", value=" << vortex_->csr_req_data << std::endl; else - std::cout << std::dec << timestamp << ": [sim] CSR Rd Req: core=" << (int)vortex_->csr_io_req_coreid << ", addr=" << std::hex << vortex_->csr_io_req_addr << std::endl; + std::cout << std::dec << timestamp << ": [sim] CSR Rd Req: core=" << (int)vortex_->csr_req_coreid << ", addr=" << std::hex << vortex_->csr_req_addr << std::endl; #endif - vortex_->csr_io_req_valid = 0; - if (vortex_->csr_io_req_rw) + vortex_->csr_req_valid = 0; + if (vortex_->csr_req_rw) csr_req_active_ = false; } - if (vortex_->csr_io_rsp_valid && vortex_->csr_io_rsp_ready) { - *csr_rsp_value_ = vortex_->csr_io_rsp_data; - vortex_->csr_io_rsp_ready = 0; + if (vortex_->csr_rsp_valid && vortex_->csr_rsp_ready) { + *csr_rsp_value_ = vortex_->csr_rsp_data; + vortex_->csr_rsp_ready = 0; csr_req_active_ = false; #ifndef NDEBUG - std::cout << std::dec << timestamp << ": [sim] CSR Rsp: value=" << vortex_->csr_io_rsp_data << std::endl; + std::cout << std::dec << timestamp << ": [sim] CSR Rsp: value=" << vortex_->csr_rsp_data << std::endl; #endif } } else { - vortex_->csr_io_req_valid = 0; - vortex_->csr_io_rsp_ready = 0; + vortex_->csr_req_valid = 0; + vortex_->csr_rsp_ready = 0; } } @@ -290,48 +249,27 @@ bool Simulator::is_busy() const { return vortex_->busy; } -bool Simulator::snp_req_active() const { - return snp_req_active_; -} - bool Simulator::csr_req_active() const { return csr_req_active_; } -void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) { - if (0 == size) - return; - - assert(!vortex_->snp_rsp_valid); - - vortex_->snp_req_addr = mem_addr / GLOBAL_BLOCK_SIZE; - vortex_->snp_req_tag = 0; - vortex_->snp_req_valid = 1; - vortex_->snp_rsp_ready = 1; - - snp_req_size_ = (size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE; - pending_snp_reqs_ = 0; - - snp_req_active_ = true; -} - void Simulator::set_csr(int core_id, int addr, unsigned value) { - vortex_->csr_io_req_valid = 1; - vortex_->csr_io_req_coreid = core_id; - vortex_->csr_io_req_addr = addr; - vortex_->csr_io_req_rw = 1; - vortex_->csr_io_req_data = value; - vortex_->csr_io_rsp_ready = 0; + vortex_->csr_req_valid = 1; + vortex_->csr_req_coreid = core_id; + vortex_->csr_req_addr = addr; + vortex_->csr_req_rw = 1; + vortex_->csr_req_data = value; + vortex_->csr_rsp_ready = 0; csr_req_active_ = true; } void Simulator::get_csr(int core_id, int addr, unsigned *value) { - vortex_->csr_io_req_valid = 1; - vortex_->csr_io_req_coreid = core_id; - vortex_->csr_io_req_addr = addr; - vortex_->csr_io_req_rw = 0; - vortex_->csr_io_rsp_ready = 1; + vortex_->csr_req_valid = 1; + vortex_->csr_req_coreid = core_id; + vortex_->csr_req_addr = addr; + vortex_->csr_req_rw = 0; + vortex_->csr_rsp_ready = 1; csr_rsp_value_ = value; diff --git a/hw/simulate/simulator.h b/hw/simulate/simulator.h index 2532b99a..626474bd 100644 --- a/hw/simulate/simulator.h +++ b/hw/simulate/simulator.h @@ -30,14 +30,12 @@ public: bool is_busy() const; - bool snp_req_active() const; bool csr_req_active() const; void reset(); void step(); void wait(uint32_t cycles); - void flush_caches(uint32_t mem_addr, uint32_t size); void set_csr(int core_id, int addr, unsigned value); void get_csr(int core_id, int addr, unsigned *value); @@ -62,20 +60,13 @@ private: void eval_dram_bus(); void eval_io_bus(); void eval_csr_bus(); - void eval_snp_bus(); std::list dram_rsp_vec_; bool dram_rsp_active_; bool dram_rsp_ready_; - bool snp_req_ready_; - bool csr_io_req_ready_; - - bool snp_req_active_; + bool csr_req_ready_; bool csr_req_active_; - - uint32_t snp_req_size_; - uint32_t pending_snp_reqs_; uint32_t* csr_rsp_value_; RAM *ram_;