From c41855ee42fc9f41dd7974c85cc49a267af6a88b Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 16 Mar 2020 08:02:23 -0400 Subject: [PATCH] added driver sim --- driver/sim/Makefile | 44 +++ driver/sim/valgrind.log | 767 +++++++++++++++++++++++++++++++++++++++ driver/sim/vx_driver.cpp | 270 ++++++++++++++ driver/sw/Makefile | 71 ++++ driver/sw/test.cpp | 142 ++++++++ driver/sw/utils.cpp | 71 ++++ driver/sw/utils.h | 15 + driver/sw/vx_driver.c | 259 +++++++++++++ driver/sw/vx_driver.h | 47 +++ emulator/include/mem.h | 4 +- simX/core.cpp | 14 +- simX/include/mem.h | 140 +------ simX/mem.cpp | 114 ++++++ simX/simX.cpp | 49 ++- 14 files changed, 1837 insertions(+), 170 deletions(-) create mode 100644 driver/sim/Makefile create mode 100644 driver/sim/valgrind.log create mode 100644 driver/sim/vx_driver.cpp create mode 100644 driver/sw/Makefile create mode 100644 driver/sw/test.cpp create mode 100644 driver/sw/utils.cpp create mode 100644 driver/sw/utils.h create mode 100644 driver/sw/vx_driver.c create mode 100644 driver/sw/vx_driver.h diff --git a/driver/sim/Makefile b/driver/sim/Makefile new file mode 100644 index 00000000..c6351c41 --- /dev/null +++ b/driver/sim/Makefile @@ -0,0 +1,44 @@ + +CXXFLAGS += -O2 -g -Wall -Wextra -pedantic -Wfatal-errors + +CXXFLAGS += -I../sw + +LDFLAGS += -L./obj_dir + +DRV_CFLAGS += -O2 -g -Wall -Wextra -pedantic -Wfatal-errors + +DRV_CFLAGS += -I../../sw + +DRV_CFLAGS += -fPIC + +DRV_LDFLAGS += -shared -pthread + +DRV_SRCS = vx_driver.cpp ../../simX/args.cpp ../../simX/mem.cpp ../../simX/core.cpp ../../simX/instruction.cpp ../../simX/enc.cpp ../../simX/util.cpp + +RTL_TOP = ../../simX/cache_simX.v + +RTL_INCLUDE = -I../../rtl/shared_memory -I../../rtl/cache -I../../rtl/interfaces -Isimulate -I../../rtl + +VL_FLAGS += -Wno-UNOPTFLAT -Wno-WIDTH + +VL_FLAGS += --trace -DVL_DEBUG=1 + +PROJECT = libvxdrv_sim.so + +all: $(PROJECT) test + +$(PROJECT): $(SIMX_SRCS) + verilator --exe --cc $(RTL_TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(DRV_SRCS) -CFLAGS '$(DRV_CFLAGS)' -LDFLAGS '$(DRV_LDFLAGS)' -o $(PROJECT) + make -j -C obj_dir -f Vcache_simX.mk OPT='-DVL_DEBUG' VL_DEBUG=1 DVL_DEBUG=1 + +test: $(PROJECT) test.o utils.o + $(CXX) $(CXXFLAGS) test.o utils.o $(LDFLAGS) -lvxdrv_sim -o $@ + +utils.o: ../sw/utils.cpp + $(CXX) $(CXXFLAGS) -c ../sw/utils.cpp -o $@ + +test.o: ../sw/test.cpp + $(CXX) $(CXXFLAGS) -c ../sw/test.cpp -o $@ + +clean: + rm -rf $(PROJECT) test *.so *.o obj_dir \ No newline at end of file diff --git a/driver/sim/valgrind.log b/driver/sim/valgrind.log new file mode 100644 index 00000000..e7641f00 --- /dev/null +++ b/driver/sim/valgrind.log @@ -0,0 +1,767 @@ +==5409== Memcheck, a memory error detector +==5409== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al. +==5409== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info +==5409== Command: ./test -f ../../benchmarks/opencl/vecadd/vecadd.hex +==5409== +==5409== error calling PR_SET_PTRACER, vgdb might block +Device ready... +Device running... +==5409== +==5409== Process terminating with default action of signal 2 (SIGINT) +==5409== at 0x58A19CE: nanosleep (nanosleep.c:28) +==5409== by 0x4EE34CA: void std::this_thread::sleep_for >(std::chrono::duration > const&) (thread:373) +==5409== by 0x4EE2AF7: vx_device::wait(long long) (vx_driver.cpp:130) +==5409== by 0x4EE1DEE: vx_ready_wait (vx_driver.cpp:267) +==5409== by 0x1094CE: main (test.cpp:133) +==5409== +==5409== HEAP SUMMARY: +==5409== in use at exit: 3,165,230 bytes in 1,401 blocks +==5409== total heap usage: 8,550 allocs, 7,149 frees, 7,258,340 bytes allocated +==5409== +==5409== 4 bytes in 1 blocks are still reachable in loss record 1 of 54 +==5409== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x585A9B9: strdup (strdup.c:42) +==5409== by 0x4EEB465: VerilatedModule::VerilatedModule(char const*) (verilated.cpp:1566) +==5409== by 0x4EFC7FB: Vcache_simX::Vcache_simX(char const*) (Vcache_simX.cpp:45) +==5409== by 0x4EB8589: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:140) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== by 0x4EE2F96: std::__invoke_result::type std::__invoke(void (*&&)(vx_device*), vx_device*&&) (invoke.h:95) +==5409== by 0x4EE3D2E: decltype (__invoke((_S_declval<0ul>)(), (_S_declval<1ul>)())) std::thread::_Invoker >::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (thread:234) +==5409== by 0x4EE3CCF: std::thread::_Invoker >::operator()() (thread:243) +==5409== +==5409== 8 bytes in 1 blocks are still reachable in loss record 2 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC5233: __gnu_cxx::new_allocator::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC2DA0: std::allocator_traits >::allocate(std::allocator&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC0247: std::_Bvector_base >::_M_allocate(unsigned long) (stl_bvector.h:492) +==5409== by 0x4EBE39E: std::vector >::_M_initialize(unsigned long) (stl_bvector.h:1094) +==5409== by 0x4EBEE5F: std::vector >::vector(unsigned long, bool const&, std::allocator const&) (stl_bvector.h:597) +==5409== by 0x4EBD072: std::vector >::vector(unsigned long, std::allocator const&) (stl_bvector.h:590) +==5409== by 0x4EB88EB: Harp::Core::getCacheDelays(Harp::trace_inst_t*) (core.cpp:224) +==5409== by 0x4EB9444: Harp::Core::fetch() (core.cpp:423) +==5409== by 0x4EB8843: Harp::Core::step() (core.cpp:205) +==5409== by 0x4EE2CD3: vx_device::run() (vx_driver.cpp:146) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== +==5409== 24 bytes in 1 blocks are still reachable in loss record 3 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EF3D73: __gnu_cxx::new_allocator<_IO_FILE*>::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EF2BAF: std::allocator_traits >::allocate(std::allocator<_IO_FILE*>&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EF0D8F: std::_Vector_base<_IO_FILE*, std::allocator<_IO_FILE*> >::_M_allocate(unsigned long) (stl_vector.h:172) +==5409== by 0x4EEF66B: std::vector<_IO_FILE*, std::allocator<_IO_FILE*> >::_M_default_append(unsigned long) (vector.tcc:571) +==5409== by 0x4EEE3B4: std::vector<_IO_FILE*, std::allocator<_IO_FILE*> >::resize(unsigned long) (stl_vector.h:692) +==5409== by 0x4EEC826: VerilatedImp::VerilatedImp() (verilated_imp.h:204) +==5409== by 0x4EEC229: __static_initialization_and_destruction_0(int, int) (verilated.cpp:45) +==5409== by 0x4EEC25F: _GLOBAL__sub_I_verilated.cpp (verilated.cpp:1719) +==5409== by 0x4010732: call_init (dl-init.c:72) +==5409== by 0x4010732: _dl_init (dl-init.c:119) +==5409== by 0x40010C9: ??? (in /lib/x86_64-linux-gnu/ld-2.27.so) +==5409== by 0x2: ??? +==5409== +==5409== 24 bytes in 1 blocks are still reachable in loss record 4 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EE3802: std::unique_ptr > std::thread::_S_make_state > >(std::thread::_Invoker >&&) (thread:197) +==5409== by 0x4EE3037: std::thread::thread(void (&)(vx_device*), vx_device*&&) (thread:126) +==5409== by 0x4EE282A: vx_device::vx_device() (vx_driver.cpp:85) +==5409== by 0x4EE1AF8: vx_dev_open (vx_driver.cpp:189) +==5409== by 0x109458: main (test.cpp:118) +==5409== +==5409== 29 bytes in 1 blocks are still reachable in loss record 5 of 54 +==5409== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x585A9B9: strdup (strdup.c:42) +==5409== by 0x4EEB465: VerilatedModule::VerilatedModule(char const*) (verilated.cpp:1566) +==5409== by 0x4F089D5: Vcache_simX_VX_dcache_request_inter::Vcache_simX_VX_dcache_request_inter(char const*) (Vcache_simX_VX_dcache_request_inter.cpp:15) +==5409== by 0x4F634EE: Vcache_simX__Syms::Vcache_simX__Syms(Vcache_simX*, char const*) (Vcache_simX__Syms.cpp:28) +==5409== by 0x4EFC82E: Vcache_simX::Vcache_simX(char const*) (Vcache_simX.cpp:46) +==5409== by 0x4EB8589: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:140) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== by 0x4EE2F96: std::__invoke_result::type std::__invoke(void (*&&)(vx_device*), vx_device*&&) (invoke.h:95) +==5409== +==5409== 31 bytes in 1 blocks are still reachable in loss record 6 of 54 +==5409== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x585A9B9: strdup (strdup.c:42) +==5409== by 0x4EEB465: VerilatedModule::VerilatedModule(char const*) (verilated.cpp:1566) +==5409== by 0x4F088F9: Vcache_simX_VX_dram_req_rsp_inter__N4_NB4::Vcache_simX_VX_dram_req_rsp_inter__N4_NB4(char const*) (Vcache_simX_VX_dram_req_rsp_inter__N4_NB4.cpp:15) +==5409== by 0x4F6351F: Vcache_simX__Syms::Vcache_simX__Syms(Vcache_simX*, char const*) (Vcache_simX__Syms.cpp:28) +==5409== by 0x4EFC82E: Vcache_simX::Vcache_simX(char const*) (Vcache_simX.cpp:46) +==5409== by 0x4EB8589: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:140) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== by 0x4EE2F96: std::__invoke_result::type std::__invoke(void (*&&)(vx_device*), vx_device*&&) (invoke.h:95) +==5409== +==5409== 38 bytes in 1 blocks are still reachable in loss record 7 of 54 +==5409== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x585A9B9: strdup (strdup.c:42) +==5409== by 0x4EEB465: VerilatedModule::VerilatedModule(char const*) (verilated.cpp:1566) +==5409== by 0x4F088F9: Vcache_simX_VX_dram_req_rsp_inter__N4_NB4::Vcache_simX_VX_dram_req_rsp_inter__N4_NB4(char const*) (Vcache_simX_VX_dram_req_rsp_inter__N4_NB4.cpp:15) +==5409== by 0x4F63550: Vcache_simX__Syms::Vcache_simX__Syms(Vcache_simX*, char const*) (Vcache_simX__Syms.cpp:28) +==5409== by 0x4EFC82E: Vcache_simX::Vcache_simX(char const*) (Vcache_simX.cpp:46) +==5409== by 0x4EB8589: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:140) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== by 0x4EE2F96: std::__invoke_result::type std::__invoke(void (*&&)(vx_device*), vx_device*&&) (invoke.h:95) +==5409== +==5409== 64 bytes in 1 blocks are still reachable in loss record 8 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EF41DF: __gnu_cxx::new_allocator::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EF3C59: std::allocator_traits >::allocate(std::allocator&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EF29F1: std::_Deque_base >::_M_allocate_map(unsigned long) (stl_deque.h:616) +==5409== by 0x4EF0A57: std::_Deque_base >::_M_initialize_map(unsigned long) (stl_deque.h:689) +==5409== by 0x4EEF433: std::_Deque_base >::_Deque_base() (stl_deque.h:492) +==5409== by 0x4EEE2D1: std::deque >::deque() (stl_deque.h:888) +==5409== by 0x4EEC7F6: VerilatedImp::VerilatedImp() (verilated_imp.h:203) +==5409== by 0x4EEC229: __static_initialization_and_destruction_0(int, int) (verilated.cpp:45) +==5409== by 0x4EEC25F: _GLOBAL__sub_I_verilated.cpp (verilated.cpp:1719) +==5409== by 0x4010732: call_init (dl-init.c:72) +==5409== by 0x4010732: _dl_init (dl-init.c:119) +==5409== by 0x40010C9: ??? (in /lib/x86_64-linux-gnu/ld-2.27.so) +==5409== +==5409== 64 bytes in 1 blocks are still reachable in loss record 9 of 54 +==5409== at 0x4C3089F: operator new[](unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EEAF1E: Verilated::catName(char const*, char const*) (verilated.cpp:1451) +==5409== by 0x4F63576: Vcache_simX__Syms::Vcache_simX__Syms(Vcache_simX*, char const*) (Vcache_simX__Syms.cpp:28) +==5409== by 0x4EFC82E: Vcache_simX::Vcache_simX(char const*) (Vcache_simX.cpp:46) +==5409== by 0x4EB8589: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:140) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== by 0x4EE2F96: std::__invoke_result::type std::__invoke(void (*&&)(vx_device*), vx_device*&&) (invoke.h:95) +==5409== by 0x4EE3D2E: decltype (__invoke((_S_declval<0ul>)(), (_S_declval<1ul>)())) std::thread::_Invoker >::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (thread:234) +==5409== by 0x4EE3CCF: std::thread::_Invoker >::operator()() (thread:243) +==5409== +==5409== 64 bytes in 1 blocks are still reachable in loss record 10 of 54 +==5409== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x585A9B9: strdup (strdup.c:42) +==5409== by 0x4EEB465: VerilatedModule::VerilatedModule(char const*) (verilated.cpp:1566) +==5409== by 0x4F1C58B: Vcache_simX_VX_Cache_Bank__pi8::Vcache_simX_VX_Cache_Bank__pi8(char const*) (Vcache_simX_VX_Cache_Bank__pi8.cpp:15) +==5409== by 0x4F63581: Vcache_simX__Syms::Vcache_simX__Syms(Vcache_simX*, char const*) (Vcache_simX__Syms.cpp:28) +==5409== by 0x4EFC82E: Vcache_simX::Vcache_simX(char const*) (Vcache_simX.cpp:46) +==5409== by 0x4EB8589: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:140) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== by 0x4EE2F96: std::__invoke_result::type std::__invoke(void (*&&)(vx_device*), vx_device*&&) (invoke.h:95) +==5409== +==5409== 64 bytes in 1 blocks are still reachable in loss record 11 of 54 +==5409== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x585A9B9: strdup (strdup.c:42) +==5409== by 0x4EEB465: VerilatedModule::VerilatedModule(char const*) (verilated.cpp:1566) +==5409== by 0x4F1C58B: Vcache_simX_VX_Cache_Bank__pi8::Vcache_simX_VX_Cache_Bank__pi8(char const*) (Vcache_simX_VX_Cache_Bank__pi8.cpp:15) +==5409== by 0x4F635B2: Vcache_simX__Syms::Vcache_simX__Syms(Vcache_simX*, char const*) (Vcache_simX__Syms.cpp:28) +==5409== by 0x4EFC82E: Vcache_simX::Vcache_simX(char const*) (Vcache_simX.cpp:46) +==5409== by 0x4EB8589: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:140) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== by 0x4EE2F96: std::__invoke_result::type std::__invoke(void (*&&)(vx_device*), vx_device*&&) (invoke.h:95) +==5409== +==5409== 64 bytes in 1 blocks are still reachable in loss record 12 of 54 +==5409== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x585A9B9: strdup (strdup.c:42) +==5409== by 0x4EEB465: VerilatedModule::VerilatedModule(char const*) (verilated.cpp:1566) +==5409== by 0x4F1C58B: Vcache_simX_VX_Cache_Bank__pi8::Vcache_simX_VX_Cache_Bank__pi8(char const*) (Vcache_simX_VX_Cache_Bank__pi8.cpp:15) +==5409== by 0x4F635E3: Vcache_simX__Syms::Vcache_simX__Syms(Vcache_simX*, char const*) (Vcache_simX__Syms.cpp:28) +==5409== by 0x4EFC82E: Vcache_simX::Vcache_simX(char const*) (Vcache_simX.cpp:46) +==5409== by 0x4EB8589: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:140) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== by 0x4EE2F96: std::__invoke_result::type std::__invoke(void (*&&)(vx_device*), vx_device*&&) (invoke.h:95) +==5409== +==5409== 64 bytes in 1 blocks are still reachable in loss record 13 of 54 +==5409== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x585A9B9: strdup (strdup.c:42) +==5409== by 0x4EEB465: VerilatedModule::VerilatedModule(char const*) (verilated.cpp:1566) +==5409== by 0x4F1C58B: Vcache_simX_VX_Cache_Bank__pi8::Vcache_simX_VX_Cache_Bank__pi8(char const*) (Vcache_simX_VX_Cache_Bank__pi8.cpp:15) +==5409== by 0x4F63614: Vcache_simX__Syms::Vcache_simX__Syms(Vcache_simX*, char const*) (Vcache_simX__Syms.cpp:28) +==5409== by 0x4EFC82E: Vcache_simX::Vcache_simX(char const*) (Vcache_simX.cpp:46) +==5409== by 0x4EB8589: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:140) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== by 0x4EE2F96: std::__invoke_result::type std::__invoke(void (*&&)(vx_device*), vx_device*&&) (invoke.h:95) +==5409== +==5409== 64 bytes in 1 blocks are still reachable in loss record 14 of 54 +==5409== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x585A9B9: strdup (strdup.c:42) +==5409== by 0x4EEB465: VerilatedModule::VerilatedModule(char const*) (verilated.cpp:1566) +==5409== by 0x4F08AC5: Vcache_simX_VX_Cache_Bank__pi9::Vcache_simX_VX_Cache_Bank__pi9(char const*) (Vcache_simX_VX_Cache_Bank__pi9.cpp:15) +==5409== by 0x4F63645: Vcache_simX__Syms::Vcache_simX__Syms(Vcache_simX*, char const*) (Vcache_simX__Syms.cpp:28) +==5409== by 0x4EFC82E: Vcache_simX::Vcache_simX(char const*) (Vcache_simX.cpp:46) +==5409== by 0x4EB8589: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:140) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== by 0x4EE2F96: std::__invoke_result::type std::__invoke(void (*&&)(vx_device*), vx_device*&&) (invoke.h:95) +==5409== +==5409== 64 bytes in 1 blocks are still reachable in loss record 15 of 54 +==5409== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x585A9B9: strdup (strdup.c:42) +==5409== by 0x4EEB465: VerilatedModule::VerilatedModule(char const*) (verilated.cpp:1566) +==5409== by 0x4F08AC5: Vcache_simX_VX_Cache_Bank__pi9::Vcache_simX_VX_Cache_Bank__pi9(char const*) (Vcache_simX_VX_Cache_Bank__pi9.cpp:15) +==5409== by 0x4F63676: Vcache_simX__Syms::Vcache_simX__Syms(Vcache_simX*, char const*) (Vcache_simX__Syms.cpp:28) +==5409== by 0x4EFC82E: Vcache_simX::Vcache_simX(char const*) (Vcache_simX.cpp:46) +==5409== by 0x4EB8589: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:140) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== by 0x4EE2F96: std::__invoke_result::type std::__invoke(void (*&&)(vx_device*), vx_device*&&) (invoke.h:95) +==5409== +==5409== 64 bytes in 1 blocks are still reachable in loss record 16 of 54 +==5409== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x585A9B9: strdup (strdup.c:42) +==5409== by 0x4EEB465: VerilatedModule::VerilatedModule(char const*) (verilated.cpp:1566) +==5409== by 0x4F08AC5: Vcache_simX_VX_Cache_Bank__pi9::Vcache_simX_VX_Cache_Bank__pi9(char const*) (Vcache_simX_VX_Cache_Bank__pi9.cpp:15) +==5409== by 0x4F636A7: Vcache_simX__Syms::Vcache_simX__Syms(Vcache_simX*, char const*) (Vcache_simX__Syms.cpp:28) +==5409== by 0x4EFC82E: Vcache_simX::Vcache_simX(char const*) (Vcache_simX.cpp:46) +==5409== by 0x4EB8589: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:140) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== by 0x4EE2F96: std::__invoke_result::type std::__invoke(void (*&&)(vx_device*), vx_device*&&) (invoke.h:95) +==5409== +==5409== 64 bytes in 1 blocks are still reachable in loss record 17 of 54 +==5409== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x585A9B9: strdup (strdup.c:42) +==5409== by 0x4EEB465: VerilatedModule::VerilatedModule(char const*) (verilated.cpp:1566) +==5409== by 0x4F08AC5: Vcache_simX_VX_Cache_Bank__pi9::Vcache_simX_VX_Cache_Bank__pi9(char const*) (Vcache_simX_VX_Cache_Bank__pi9.cpp:15) +==5409== by 0x4F636D8: Vcache_simX__Syms::Vcache_simX__Syms(Vcache_simX*, char const*) (Vcache_simX__Syms.cpp:28) +==5409== by 0x4EFC82E: Vcache_simX::Vcache_simX(char const*) (Vcache_simX.cpp:46) +==5409== by 0x4EB8589: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:140) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== by 0x4EE2F96: std::__invoke_result::type std::__invoke(void (*&&)(vx_device*), vx_device*&&) (invoke.h:95) +==5409== +==5409== 128 bytes in 1 blocks are still reachable in loss record 18 of 54 +==5409== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EB80C5: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:125) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== by 0x4EE2F96: std::__invoke_result::type std::__invoke(void (*&&)(vx_device*), vx_device*&&) (invoke.h:95) +==5409== by 0x4EE3D2E: decltype (__invoke((_S_declval<0ul>)(), (_S_declval<1ul>)())) std::thread::_Invoker >::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (thread:234) +==5409== by 0x4EE3CCF: std::thread::_Invoker >::operator()() (thread:243) +==5409== by 0x4EE3C9F: std::thread::_State_impl > >::_M_run() (thread:186) +==5409== by 0x52D966E: ??? (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.25) +==5409== by 0x5F536DA: start_thread (pthread_create.c:463) +==5409== +==5409== 128 bytes in 1 blocks are still reachable in loss record 19 of 54 +==5409== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EB8267: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:126) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== by 0x4EE2F96: std::__invoke_result::type std::__invoke(void (*&&)(vx_device*), vx_device*&&) (invoke.h:95) +==5409== by 0x4EE3D2E: decltype (__invoke((_S_declval<0ul>)(), (_S_declval<1ul>)())) std::thread::_Invoker >::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (thread:234) +==5409== by 0x4EE3CCF: std::thread::_Invoker >::operator()() (thread:243) +==5409== by 0x4EE3C9F: std::thread::_State_impl > >::_M_run() (thread:186) +==5409== by 0x52D966E: ??? (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.25) +==5409== by 0x5F536DA: start_thread (pthread_create.c:463) +==5409== +==5409== 128 bytes in 1 blocks are still reachable in loss record 20 of 54 +==5409== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EBA7E0: Harp::Core::writeback() (core.cpp:648) +==5409== by 0x4EB8807: Harp::Core::step() (core.cpp:195) +==5409== by 0x4EE2CD3: vx_device::run() (vx_driver.cpp:146) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== by 0x4EE2F96: std::__invoke_result::type std::__invoke(void (*&&)(vx_device*), vx_device*&&) (invoke.h:95) +==5409== by 0x4EE3D2E: decltype (__invoke((_S_declval<0ul>)(), (_S_declval<1ul>)())) std::thread::_Invoker >::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (thread:234) +==5409== by 0x4EE3CCF: std::thread::_Invoker >::operator()() (thread:243) +==5409== by 0x4EE3C9F: std::thread::_State_impl > >::_M_run() (thread:186) +==5409== by 0x52D966E: ??? (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.25) +==5409== +==5409== 128 bytes in 1 blocks are still reachable in loss record 21 of 54 +==5409== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EBA5DB: Harp::Core::execute_unit() (core.cpp:611) +==5409== by 0x4EB881F: Harp::Core::step() (core.cpp:199) +==5409== by 0x4EE2CD3: vx_device::run() (vx_driver.cpp:146) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== by 0x4EE2F96: std::__invoke_result::type std::__invoke(void (*&&)(vx_device*), vx_device*&&) (invoke.h:95) +==5409== by 0x4EE3D2E: decltype (__invoke((_S_declval<0ul>)(), (_S_declval<1ul>)())) std::thread::_Invoker >::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (thread:234) +==5409== by 0x4EE3CCF: std::thread::_Invoker >::operator()() (thread:243) +==5409== by 0x4EE3C9F: std::thread::_State_impl > >::_M_run() (thread:186) +==5409== by 0x52D966E: ??? (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.25) +==5409== +==5409== 128 bytes in 1 blocks are still reachable in loss record 22 of 54 +==5409== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EB9BB8: Harp::Core::scheduler() (core.cpp:491) +==5409== by 0x4EB882B: Harp::Core::step() (core.cpp:201) +==5409== by 0x4EE2CD3: vx_device::run() (vx_driver.cpp:146) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== by 0x4EE2F96: std::__invoke_result::type std::__invoke(void (*&&)(vx_device*), vx_device*&&) (invoke.h:95) +==5409== by 0x4EE3D2E: decltype (__invoke((_S_declval<0ul>)(), (_S_declval<1ul>)())) std::thread::_Invoker >::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (thread:234) +==5409== by 0x4EE3CCF: std::thread::_Invoker >::operator()() (thread:243) +==5409== by 0x4EE3C9F: std::thread::_State_impl > >::_M_run() (thread:186) +==5409== by 0x52D966E: ??? (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.25) +==5409== +==5409== 128 bytes in 1 blocks are still reachable in loss record 23 of 54 +==5409== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EB9305: Harp::Core::fetch() (core.cpp:413) +==5409== by 0x4EB8843: Harp::Core::step() (core.cpp:205) +==5409== by 0x4EE2CD3: vx_device::run() (vx_driver.cpp:146) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== by 0x4EE2F96: std::__invoke_result::type std::__invoke(void (*&&)(vx_device*), vx_device*&&) (invoke.h:95) +==5409== by 0x4EE3D2E: decltype (__invoke((_S_declval<0ul>)(), (_S_declval<1ul>)())) std::thread::_Invoker >::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (thread:234) +==5409== by 0x4EE3CCF: std::thread::_Invoker >::operator()() (thread:243) +==5409== by 0x4EE3C9F: std::thread::_State_impl > >::_M_run() (thread:186) +==5409== by 0x52D966E: ??? (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.25) +==5409== +==5409== 128 bytes in 1 blocks are still reachable in loss record 24 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC6CEB: __gnu_cxx::new_allocator::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC5902: std::allocator_traits >::allocate(std::allocator&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC3A6D: std::_Vector_base >::_M_allocate(unsigned long) (stl_vector.h:172) +==5409== by 0x4EC0DD0: std::_Vector_base >::_M_create_storage(unsigned long) (stl_vector.h:187) +==5409== by 0x4EBEF2A: std::_Vector_base >::_Vector_base(unsigned long, std::allocator const&) (stl_vector.h:138) +==5409== by 0x4EBD0ED: std::vector >::vector(unsigned long, std::allocator const&) (stl_vector.h:284) +==5409== by 0x4EB892E: Harp::Core::getCacheDelays(Harp::trace_inst_t*) (core.cpp:225) +==5409== by 0x4EB9444: Harp::Core::fetch() (core.cpp:423) +==5409== by 0x4EB8843: Harp::Core::step() (core.cpp:205) +==5409== by 0x4EE2CD3: vx_device::run() (vx_driver.cpp:146) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== +==5409== 128 bytes in 16 blocks are still reachable in loss record 25 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC5233: __gnu_cxx::new_allocator::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC2DA0: std::allocator_traits >::allocate(std::allocator&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC0247: std::_Bvector_base >::_M_allocate(unsigned long) (stl_bvector.h:492) +==5409== by 0x4EBE097: std::vector >::_M_insert_aux(std::_Bit_iterator, bool) (vector.tcc:807) +==5409== by 0x4EBCB31: std::vector >::push_back(bool) (stl_bvector.h:928) +==5409== by 0x4EBB575: Harp::Warp::Warp(Harp::Core*, unsigned int) (core.cpp:756) +==5409== by 0x4EB865E: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:158) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== +==5409== 128 bytes in 16 blocks are still reachable in loss record 26 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC5233: __gnu_cxx::new_allocator::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC2DA0: std::allocator_traits >::allocate(std::allocator&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC0247: std::_Bvector_base >::_M_allocate(unsigned long) (stl_bvector.h:492) +==5409== by 0x4EBE097: std::vector >::_M_insert_aux(std::_Bit_iterator, bool) (vector.tcc:807) +==5409== by 0x4EBCB31: std::vector >::push_back(bool) (stl_bvector.h:928) +==5409== by 0x4EBB58E: Harp::Warp::Warp(Harp::Core*, unsigned int) (core.cpp:757) +==5409== by 0x4EB865E: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:158) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== +==5409== 128 bytes in 16 blocks are still reachable in loss record 27 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC5233: __gnu_cxx::new_allocator::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC2DA0: std::allocator_traits >::allocate(std::allocator&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC0247: std::_Bvector_base >::_M_allocate(unsigned long) (stl_bvector.h:492) +==5409== by 0x4EBE39E: std::vector >::_M_initialize(unsigned long) (stl_bvector.h:1094) +==5409== by 0x4EBCBE7: std::vector >::vector(std::vector > const&) (stl_bvector.h:616) +==5409== by 0x4EC892B: Harp::Warp::Warp(Harp::Warp const&) (core.h:154) +==5409== by 0x4EC8BF6: void std::_Construct(Harp::Warp*, Harp::Warp const&) (stl_construct.h:75) +==5409== by 0x4EC7AA2: Harp::Warp* std::__uninitialized_copy::__uninit_copy(Harp::Warp const*, Harp::Warp const*, Harp::Warp*) (stl_uninitialized.h:83) +==5409== by 0x4EC6CA9: Harp::Warp* std::uninitialized_copy(Harp::Warp const*, Harp::Warp const*, Harp::Warp*) (stl_uninitialized.h:134) +==5409== by 0x4EC58A8: Harp::Warp* std::__uninitialized_copy_a(Harp::Warp const*, Harp::Warp const*, Harp::Warp*, std::allocator&) (stl_uninitialized.h:289) +==5409== by 0x4EC39F0: Harp::Warp* std::__uninitialized_move_if_noexcept_a >(Harp::Warp*, Harp::Warp*, Harp::Warp*, std::allocator&) (stl_uninitialized.h:312) +==5409== +==5409== 128 bytes in 16 blocks are still reachable in loss record 28 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC5233: __gnu_cxx::new_allocator::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC2DA0: std::allocator_traits >::allocate(std::allocator&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC0247: std::_Bvector_base >::_M_allocate(unsigned long) (stl_bvector.h:492) +==5409== by 0x4EBE39E: std::vector >::_M_initialize(unsigned long) (stl_bvector.h:1094) +==5409== by 0x4EBCBE7: std::vector >::vector(std::vector > const&) (stl_bvector.h:616) +==5409== by 0x4EC894B: Harp::Warp::Warp(Harp::Warp const&) (core.h:154) +==5409== by 0x4EC8BF6: void std::_Construct(Harp::Warp*, Harp::Warp const&) (stl_construct.h:75) +==5409== by 0x4EC7AA2: Harp::Warp* std::__uninitialized_copy::__uninit_copy(Harp::Warp const*, Harp::Warp const*, Harp::Warp*) (stl_uninitialized.h:83) +==5409== by 0x4EC6CA9: Harp::Warp* std::uninitialized_copy(Harp::Warp const*, Harp::Warp const*, Harp::Warp*) (stl_uninitialized.h:134) +==5409== by 0x4EC58A8: Harp::Warp* std::__uninitialized_copy_a(Harp::Warp const*, Harp::Warp const*, Harp::Warp*, std::allocator&) (stl_uninitialized.h:289) +==5409== by 0x4EC39F0: Harp::Warp* std::__uninitialized_move_if_noexcept_a >(Harp::Warp*, Harp::Warp*, Harp::Warp*, std::allocator&) (stl_uninitialized.h:312) +==5409== +==5409== 288 bytes in 1 blocks are possibly lost in loss record 29 of 54 +==5409== at 0x4C31B25: calloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x40134A6: allocate_dtv (dl-tls.c:286) +==5409== by 0x40134A6: _dl_allocate_tls (dl-tls.c:530) +==5409== by 0x5F54227: allocate_stack (allocatestack.c:627) +==5409== by 0x5F54227: pthread_create@@GLIBC_2.2.5 (pthread_create.c:644) +==5409== by 0x52D9924: std::thread::_M_start_thread(std::unique_ptr >, void (*)()) (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.25) +==5409== by 0x4EE304E: std::thread::thread(void (&)(vx_device*), vx_device*&&) (thread:126) +==5409== by 0x4EE282A: vx_device::vx_device() (vx_driver.cpp:85) +==5409== by 0x4EE1AF8: vx_dev_open (vx_driver.cpp:189) +==5409== by 0x109458: main (test.cpp:118) +==5409== +==5409== 512 bytes in 1 blocks are still reachable in loss record 30 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC6CEB: __gnu_cxx::new_allocator::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC5902: std::allocator_traits >::allocate(std::allocator&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EF2137: std::_Deque_base >::_M_allocate_node() (stl_deque.h:602) +==5409== by 0x4EF2A6E: std::_Deque_base >::_M_create_nodes(unsigned int**, unsigned int**) (stl_deque.h:727) +==5409== by 0x4EF0AB0: std::_Deque_base >::_M_initialize_map(unsigned long) (stl_deque.h:701) +==5409== by 0x4EEF433: std::_Deque_base >::_Deque_base() (stl_deque.h:492) +==5409== by 0x4EEE2D1: std::deque >::deque() (stl_deque.h:888) +==5409== by 0x4EEC7F6: VerilatedImp::VerilatedImp() (verilated_imp.h:203) +==5409== by 0x4EEC229: __static_initialization_and_destruction_0(int, int) (verilated.cpp:45) +==5409== by 0x4EEC25F: _GLOBAL__sub_I_verilated.cpp (verilated.cpp:1719) +==5409== by 0x4010732: call_init (dl-init.c:72) +==5409== by 0x4010732: _dl_init (dl-init.c:119) +==5409== +==5409== 1,008 bytes in 18 blocks are still reachable in loss record 31 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC920B: __gnu_cxx::new_allocator > >::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC8256: std::allocator_traits > > >::allocate(std::allocator > >&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC735C: std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_get_node() (stl_tree.h:588) +==5409== by 0x4EC63EB: std::_Rb_tree_node >* std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_create_node const&>(std::pair const&) (stl_tree.h:642) +==5409== by 0x4EC509A: std::_Rb_tree_node >* std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node::operator() const&>(std::pair const&) const (stl_tree.h:556) +==5409== by 0x4EC2A93: std::_Rb_tree_iterator > std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_insert_ const&, std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node>(std::_Rb_tree_node_base*, std::_Rb_tree_node_base*, std::pair const&, std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node&) (stl_tree.h:1753) +==5409== by 0x4EBFEDD: std::_Rb_tree_iterator > std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_insert_unique_ const&, std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node>(std::_Rb_tree_const_iterator >, std::pair const&, std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node&) (stl_tree.h:2206) +==5409== by 0x4EBDDE4: void std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_insert_unique const*>(std::pair const*, std::pair const*) (stl_tree.h:2452) +==5409== by 0x4EBC881: std::map, std::allocator > >::map(std::initializer_list >, std::less const&, std::allocator > const&) (stl_map.h:224) +==5409== by 0x4EBBDC0: __static_initialization_and_destruction_0(int, int) (instruction.h:49) +==5409== by 0x4EBBE37: _GLOBAL__sub_I_core.cpp (core.cpp:871) +==5409== +==5409== 1,008 bytes in 18 blocks are still reachable in loss record 32 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC920B: __gnu_cxx::new_allocator > >::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC8256: std::allocator_traits > > >::allocate(std::allocator > >&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC735C: std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_get_node() (stl_tree.h:588) +==5409== by 0x4EC63EB: std::_Rb_tree_node >* std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_create_node const&>(std::pair const&) (stl_tree.h:642) +==5409== by 0x4EC509A: std::_Rb_tree_node >* std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node::operator() const&>(std::pair const&) const (stl_tree.h:556) +==5409== by 0x4EC2A93: std::_Rb_tree_iterator > std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_insert_ const&, std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node>(std::_Rb_tree_node_base*, std::_Rb_tree_node_base*, std::pair const&, std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node&) (stl_tree.h:1753) +==5409== by 0x4EBFEDD: std::_Rb_tree_iterator > std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_insert_unique_ const&, std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node>(std::_Rb_tree_const_iterator >, std::pair const&, std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node&) (stl_tree.h:2206) +==5409== by 0x4EBDDE4: void std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_insert_unique const*>(std::pair const*, std::pair const*) (stl_tree.h:2452) +==5409== by 0x4EBC881: std::map, std::allocator > >::map(std::initializer_list >, std::less const&, std::allocator > const&) (stl_map.h:224) +==5409== by 0x4ECE691: __static_initialization_and_destruction_0(int, int) (instruction.h:49) +==5409== by 0x4ECE708: _GLOBAL__sub_I_enc.cpp (enc.cpp:330) +==5409== +==5409== 1,008 bytes in 18 blocks are still reachable in loss record 33 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC920B: __gnu_cxx::new_allocator > >::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC8256: std::allocator_traits > > >::allocate(std::allocator > >&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC735C: std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_get_node() (stl_tree.h:588) +==5409== by 0x4EC63EB: std::_Rb_tree_node >* std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_create_node const&>(std::pair const&) (stl_tree.h:642) +==5409== by 0x4EC509A: std::_Rb_tree_node >* std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node::operator() const&>(std::pair const&) const (stl_tree.h:556) +==5409== by 0x4EC2A93: std::_Rb_tree_iterator > std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_insert_ const&, std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node>(std::_Rb_tree_node_base*, std::_Rb_tree_node_base*, std::pair const&, std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node&) (stl_tree.h:1753) +==5409== by 0x4EBFEDD: std::_Rb_tree_iterator > std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_insert_unique_ const&, std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node>(std::_Rb_tree_const_iterator >, std::pair const&, std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node&) (stl_tree.h:2206) +==5409== by 0x4EBDDE4: void std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_insert_unique const*>(std::pair const*, std::pair const*) (stl_tree.h:2452) +==5409== by 0x4EBC881: std::map, std::allocator > >::map(std::initializer_list >, std::less const&, std::allocator > const&) (stl_map.h:224) +==5409== by 0x4EDAF5C: __static_initialization_and_destruction_0(int, int) (instruction.h:49) +==5409== by 0x4EDAFD3: _GLOBAL__sub_I_instruction.cpp (instruction.cpp:2462) +==5409== +==5409== 1,008 bytes in 18 blocks are still reachable in loss record 34 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC920B: __gnu_cxx::new_allocator > >::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC8256: std::allocator_traits > > >::allocate(std::allocator > >&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC735C: std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_get_node() (stl_tree.h:588) +==5409== by 0x4EC63EB: std::_Rb_tree_node >* std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_create_node const&>(std::pair const&) (stl_tree.h:642) +==5409== by 0x4EC509A: std::_Rb_tree_node >* std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node::operator() const&>(std::pair const&) const (stl_tree.h:556) +==5409== by 0x4EC2A93: std::_Rb_tree_iterator > std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_insert_ const&, std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node>(std::_Rb_tree_node_base*, std::_Rb_tree_node_base*, std::pair const&, std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node&) (stl_tree.h:1753) +==5409== by 0x4EBFEDD: std::_Rb_tree_iterator > std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_insert_unique_ const&, std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node>(std::_Rb_tree_const_iterator >, std::pair const&, std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node&) (stl_tree.h:2206) +==5409== by 0x4EBDDE4: void std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_insert_unique const*>(std::pair const*, std::pair const*) (stl_tree.h:2452) +==5409== by 0x4EBC881: std::map, std::allocator > >::map(std::initializer_list >, std::less const&, std::allocator > const&) (stl_map.h:224) +==5409== by 0x4EDF494: __static_initialization_and_destruction_0(int, int) (instruction.h:49) +==5409== by 0x4EDF50B: _GLOBAL__sub_I_mem.cpp (mem.cpp:401) +==5409== +==5409== 1,008 bytes in 18 blocks are still reachable in loss record 35 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC920B: __gnu_cxx::new_allocator > >::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC8256: std::allocator_traits > > >::allocate(std::allocator > >&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC735C: std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_get_node() (stl_tree.h:588) +==5409== by 0x4EC63EB: std::_Rb_tree_node >* std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_create_node const&>(std::pair const&) (stl_tree.h:642) +==5409== by 0x4EC509A: std::_Rb_tree_node >* std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node::operator() const&>(std::pair const&) const (stl_tree.h:556) +==5409== by 0x4EC2A93: std::_Rb_tree_iterator > std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_insert_ const&, std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node>(std::_Rb_tree_node_base*, std::_Rb_tree_node_base*, std::pair const&, std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node&) (stl_tree.h:1753) +==5409== by 0x4EBFEDD: std::_Rb_tree_iterator > std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_insert_unique_ const&, std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node>(std::_Rb_tree_const_iterator >, std::pair const&, std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Alloc_node&) (stl_tree.h:2206) +==5409== by 0x4EBDDE4: void std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_M_insert_unique const*>(std::pair const*, std::pair const*) (stl_tree.h:2452) +==5409== by 0x4EBC881: std::map, std::allocator > >::map(std::initializer_list >, std::less const&, std::allocator > const&) (stl_map.h:224) +==5409== by 0x4EE1E91: __static_initialization_and_destruction_0(int, int) (instruction.h:49) +==5409== by 0x4EE1F08: _GLOBAL__sub_I_vx_driver.cpp (vx_driver.cpp:268) +==5409== +==5409== 1,024 bytes in 16 blocks are still reachable in loss record 36 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC7BFB: __gnu_cxx::new_allocator::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC6E48: std::allocator_traits >::allocate(std::allocator&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC5B07: std::_Deque_base >::_M_allocate_map(unsigned long) (stl_deque.h:616) +==5409== by 0x4EC3CB7: std::_Deque_base >::_M_initialize_map(unsigned long) (stl_deque.h:689) +==5409== by 0x4EC1081: std::_Deque_base >::_Deque_base() (stl_deque.h:492) +==5409== by 0x4EBF155: std::deque >::deque() (stl_deque.h:888) +==5409== by 0x4EBD38F: std::stack > >::stack >, void>() (stl_stack.h:149) +==5409== by 0x4EBB26C: Harp::Warp::Warp(Harp::Core*, unsigned int) (core.cpp:738) +==5409== by 0x4EB865E: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:158) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== +==5409== 1,024 bytes in 16 blocks are still reachable in loss record 37 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC7BFB: __gnu_cxx::new_allocator::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC6E48: std::allocator_traits >::allocate(std::allocator&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC5B07: std::_Deque_base >::_M_allocate_map(unsigned long) (stl_deque.h:616) +==5409== by 0x4EC3CB7: std::_Deque_base >::_M_initialize_map(unsigned long) (stl_deque.h:689) +==5409== by 0x4ECA61A: std::_Deque_base >::_Deque_base(std::allocator const&, unsigned long) (stl_deque.h:500) +==5409== by 0x4EC9A0A: std::deque >::deque(std::deque > const&) (stl_deque.h:949) +==5409== by 0x4EC8842: std::stack > >::stack(std::stack > > const&) (stl_stack.h:99) +==5409== by 0x4EC896B: Harp::Warp::Warp(Harp::Warp const&) (core.h:154) +==5409== by 0x4EC8BF6: void std::_Construct(Harp::Warp*, Harp::Warp const&) (stl_construct.h:75) +==5409== by 0x4EC7AA2: Harp::Warp* std::__uninitialized_copy::__uninit_copy(Harp::Warp const*, Harp::Warp const*, Harp::Warp*) (stl_uninitialized.h:83) +==5409== by 0x4EC6CA9: Harp::Warp* std::uninitialized_copy(Harp::Warp const*, Harp::Warp const*, Harp::Warp*) (stl_uninitialized.h:134) +==5409== +==5409== 2,048 bytes in 16 blocks are still reachable in loss record 38 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC6CEB: __gnu_cxx::new_allocator::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC5902: std::allocator_traits >::allocate(std::allocator&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC3A6D: std::_Vector_base >::_M_allocate(unsigned long) (stl_vector.h:172) +==5409== by 0x4EC0DD0: std::_Vector_base >::_M_create_storage(unsigned long) (stl_vector.h:187) +==5409== by 0x4EBEF2A: std::_Vector_base >::_Vector_base(unsigned long, std::allocator const&) (stl_vector.h:138) +==5409== by 0x4EBD0ED: std::vector >::vector(unsigned long, std::allocator const&) (stl_vector.h:284) +==5409== by 0x4EBB2AE: Harp::Warp::Warp(Harp::Core*, unsigned int) (core.cpp:738) +==5409== by 0x4EB865E: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:158) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== +==5409== 2,048 bytes in 16 blocks are still reachable in loss record 39 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC6CEB: __gnu_cxx::new_allocator::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC5902: std::allocator_traits >::allocate(std::allocator&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC3A6D: std::_Vector_base >::_M_allocate(unsigned long) (stl_vector.h:172) +==5409== by 0x4EC0DD0: std::_Vector_base >::_M_create_storage(unsigned long) (stl_vector.h:187) +==5409== by 0x4EBEF2A: std::_Vector_base >::_Vector_base(unsigned long, std::allocator const&) (stl_vector.h:138) +==5409== by 0x4EC9B62: std::vector >::vector(std::vector > const&) (stl_vector.h:328) +==5409== by 0x4EC898B: Harp::Warp::Warp(Harp::Warp const&) (core.h:154) +==5409== by 0x4EC8BF6: void std::_Construct(Harp::Warp*, Harp::Warp const&) (stl_construct.h:75) +==5409== by 0x4EC7AA2: Harp::Warp* std::__uninitialized_copy::__uninit_copy(Harp::Warp const*, Harp::Warp const*, Harp::Warp*) (stl_uninitialized.h:83) +==5409== by 0x4EC6CA9: Harp::Warp* std::uninitialized_copy(Harp::Warp const*, Harp::Warp const*, Harp::Warp*) (stl_uninitialized.h:134) +==5409== by 0x4EC58A8: Harp::Warp* std::__uninitialized_copy_a(Harp::Warp const*, Harp::Warp const*, Harp::Warp*, std::allocator&) (stl_uninitialized.h:289) +==5409== +==5409== 7,680 bytes in 16 blocks are still reachable in loss record 40 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC8CB2: __gnu_cxx::new_allocator::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC7C25: std::allocator_traits >::allocate(std::allocator&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC6E73: std::_Deque_base >::_M_allocate_node() (stl_deque.h:602) +==5409== by 0x4EC5B84: std::_Deque_base >::_M_create_nodes(Harp::DomStackEntry**, Harp::DomStackEntry**) (stl_deque.h:727) +==5409== by 0x4EC3D10: std::_Deque_base >::_M_initialize_map(unsigned long) (stl_deque.h:701) +==5409== by 0x4EC1081: std::_Deque_base >::_Deque_base() (stl_deque.h:492) +==5409== by 0x4EBF155: std::deque >::deque() (stl_deque.h:888) +==5409== by 0x4EBD38F: std::stack > >::stack >, void>() (stl_stack.h:149) +==5409== by 0x4EBB26C: Harp::Warp::Warp(Harp::Core*, unsigned int) (core.cpp:738) +==5409== by 0x4EB865E: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:158) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== +==5409== 7,680 bytes in 16 blocks are still reachable in loss record 41 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC8CB2: __gnu_cxx::new_allocator::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC7C25: std::allocator_traits >::allocate(std::allocator&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC6E73: std::_Deque_base >::_M_allocate_node() (stl_deque.h:602) +==5409== by 0x4EC5B84: std::_Deque_base >::_M_create_nodes(Harp::DomStackEntry**, Harp::DomStackEntry**) (stl_deque.h:727) +==5409== by 0x4EC3D10: std::_Deque_base >::_M_initialize_map(unsigned long) (stl_deque.h:701) +==5409== by 0x4ECA61A: std::_Deque_base >::_Deque_base(std::allocator const&, unsigned long) (stl_deque.h:500) +==5409== by 0x4EC9A0A: std::deque >::deque(std::deque > const&) (stl_deque.h:949) +==5409== by 0x4EC8842: std::stack > >::stack(std::stack > > const&) (stl_stack.h:99) +==5409== by 0x4EC896B: Harp::Warp::Warp(Harp::Warp const&) (core.h:154) +==5409== by 0x4EC8BF6: void std::_Construct(Harp::Warp*, Harp::Warp const&) (stl_construct.h:75) +==5409== by 0x4EC7AA2: Harp::Warp* std::__uninitialized_copy::__uninit_copy(Harp::Warp const*, Harp::Warp const*, Harp::Warp*) (stl_uninitialized.h:83) +==5409== +==5409== 9,088 bytes in 1 blocks are still reachable in loss record 42 of 54 +==5409== at 0x4C320A6: memalign (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x52AF34C: operator new(unsigned long, std::align_val_t) (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.25) +==5409== by 0x4EB8577: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:140) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== by 0x4EE2F96: std::__invoke_result::type std::__invoke(void (*&&)(vx_device*), vx_device*&&) (invoke.h:95) +==5409== by 0x4EE3D2E: decltype (__invoke((_S_declval<0ul>)(), (_S_declval<1ul>)())) std::thread::_Invoker >::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (thread:234) +==5409== by 0x4EE3CCF: std::thread::_Invoker >::operator()() (thread:243) +==5409== by 0x4EE3C9F: std::thread::_State_impl > >::_M_run() (thread:186) +==5409== by 0x52D966E: ??? (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.25) +==5409== +==5409== 12,288 bytes in 16 blocks are still reachable in loss record 43 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC6D6E: __gnu_cxx::new_allocator, std::allocator > > >::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC599A: std::allocator_traits, std::allocator > > > >::allocate(std::allocator, std::allocator > > >&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC3B0B: std::_Vector_base, std::allocator > >, std::allocator, std::allocator > > > >::_M_allocate(unsigned long) (stl_vector.h:172) +==5409== by 0x4EC1317: void std::vector, std::allocator > >, std::allocator, std::allocator > > > >::_M_realloc_insert, std::allocator > > >(__gnu_cxx::__normal_iterator, std::allocator > >*, std::vector, std::allocator > >, std::allocator, std::allocator > > > > >, std::vector, std::allocator > >&&) (vector.tcc:406) +==5409== by 0x4EBF385: void std::vector, std::allocator > >, std::allocator, std::allocator > > > >::emplace_back, std::allocator > > >(std::vector, std::allocator > >&&) (vector.tcc:105) +==5409== by 0x4EBD4C9: std::vector, std::allocator > >, std::allocator, std::allocator > > > >::push_back(std::vector, std::allocator > >&&) (stl_vector.h:954) +==5409== by 0x4EBB406: Harp::Warp::Warp(Harp::Core*, unsigned int) (core.cpp:744) +==5409== by 0x4EB865E: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:158) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== +==5409== 12,288 bytes in 16 blocks are still reachable in loss record 44 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC6DE6: __gnu_cxx::new_allocator, std::allocator > > >::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC5A18: std::allocator_traits, std::allocator > > > >::allocate(std::allocator, std::allocator > > >&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC3B8D: std::_Vector_base, std::allocator > >, std::allocator, std::allocator > > > >::_M_allocate(unsigned long) (stl_vector.h:172) +==5409== by 0x4EC1A9F: void std::vector, std::allocator > >, std::allocator, std::allocator > > > >::_M_realloc_insert, std::allocator > > >(__gnu_cxx::__normal_iterator, std::allocator > >*, std::vector, std::allocator > >, std::allocator, std::allocator > > > > >, std::vector, std::allocator > >&&) (vector.tcc:406) +==5409== by 0x4EBF645: void std::vector, std::allocator > >, std::allocator, std::allocator > > > >::emplace_back, std::allocator > > >(std::vector, std::allocator > >&&) (vector.tcc:105) +==5409== by 0x4EBD681: std::vector, std::allocator > >, std::allocator, std::allocator > > > >::push_back(std::vector, std::allocator > >&&) (stl_vector.h:954) +==5409== by 0x4EBB4C6: Harp::Warp::Warp(Harp::Core*, unsigned int) (core.cpp:749) +==5409== by 0x4EB865E: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:158) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== +==5409== 12,288 bytes in 16 blocks are still reachable in loss record 45 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC6D6E: __gnu_cxx::new_allocator, std::allocator > > >::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC599A: std::allocator_traits, std::allocator > > > >::allocate(std::allocator, std::allocator > > >&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC3B0B: std::_Vector_base, std::allocator > >, std::allocator, std::allocator > > > >::_M_allocate(unsigned long) (stl_vector.h:172) +==5409== by 0x4EC0EDA: std::_Vector_base, std::allocator > >, std::allocator, std::allocator > > > >::_M_create_storage(unsigned long) (stl_vector.h:187) +==5409== by 0x4EBF000: std::_Vector_base, std::allocator > >, std::allocator, std::allocator > > > >::_Vector_base(unsigned long, std::allocator, std::allocator > > > const&) (stl_vector.h:138) +==5409== by 0x4EC96B8: std::vector, std::allocator > >, std::allocator, std::allocator > > > >::vector(std::vector, std::allocator > >, std::allocator, std::allocator > > > > const&) (stl_vector.h:328) +==5409== by 0x4EC88DA: Harp::Warp::Warp(Harp::Warp const&) (core.h:154) +==5409== by 0x4EC8BF6: void std::_Construct(Harp::Warp*, Harp::Warp const&) (stl_construct.h:75) +==5409== by 0x4EC7AA2: Harp::Warp* std::__uninitialized_copy::__uninit_copy(Harp::Warp const*, Harp::Warp const*, Harp::Warp*) (stl_uninitialized.h:83) +==5409== by 0x4EC6CA9: Harp::Warp* std::uninitialized_copy(Harp::Warp const*, Harp::Warp const*, Harp::Warp*) (stl_uninitialized.h:134) +==5409== by 0x4EC58A8: Harp::Warp* std::__uninitialized_copy_a(Harp::Warp const*, Harp::Warp const*, Harp::Warp*, std::allocator&) (stl_uninitialized.h:289) +==5409== +==5409== 12,288 bytes in 16 blocks are still reachable in loss record 46 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC6DE6: __gnu_cxx::new_allocator, std::allocator > > >::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC5A18: std::allocator_traits, std::allocator > > > >::allocate(std::allocator, std::allocator > > >&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC3B8D: std::_Vector_base, std::allocator > >, std::allocator, std::allocator > > > >::_M_allocate(unsigned long) (stl_vector.h:172) +==5409== by 0x4EC0FB0: std::_Vector_base, std::allocator > >, std::allocator, std::allocator > > > >::_M_create_storage(unsigned long) (stl_vector.h:187) +==5409== by 0x4EBF0BA: std::_Vector_base, std::allocator > >, std::allocator, std::allocator > > > >::_Vector_base(unsigned long, std::allocator, std::allocator > > > const&) (stl_vector.h:138) +==5409== by 0x4EC97CE: std::vector, std::allocator > >, std::allocator, std::allocator > > > >::vector(std::vector, std::allocator > >, std::allocator, std::allocator > > > > const&) (stl_vector.h:328) +==5409== by 0x4EC88F5: Harp::Warp::Warp(Harp::Warp const&) (core.h:154) +==5409== by 0x4EC8BF6: void std::_Construct(Harp::Warp*, Harp::Warp const&) (stl_construct.h:75) +==5409== by 0x4EC7AA2: Harp::Warp* std::__uninitialized_copy::__uninit_copy(Harp::Warp const*, Harp::Warp const*, Harp::Warp*) (stl_uninitialized.h:83) +==5409== by 0x4EC6CA9: Harp::Warp* std::uninitialized_copy(Harp::Warp const*, Harp::Warp const*, Harp::Warp*) (stl_uninitialized.h:134) +==5409== by 0x4EC58A8: Harp::Warp* std::__uninitialized_copy_a(Harp::Warp const*, Harp::Warp const*, Harp::Warp*, std::allocator&) (stl_uninitialized.h:289) +==5409== +==5409== 14,080 bytes in 1 blocks are still reachable in loss record 47 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC6C78: __gnu_cxx::new_allocator::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC5869: std::allocator_traits >::allocate(std::allocator&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC398B: std::_Vector_base >::_M_allocate(unsigned long) (stl_vector.h:172) +==5409== by 0x4EC0B51: void std::vector >::_M_realloc_insert(__gnu_cxx::__normal_iterator > >, Harp::Warp&&) (vector.tcc:406) +==5409== by 0x4EBEDF0: void std::vector >::emplace_back(Harp::Warp&&) (vector.tcc:105) +==5409== by 0x4EBCFD3: std::vector >::push_back(Harp::Warp&&) (stl_vector.h:954) +==5409== by 0x4EB8670: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:158) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== +==5409== 15,872 bytes in 1 blocks are still reachable in loss record 48 of 54 +==5409== at 0x4C320A6: memalign (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x52AF34C: operator new(unsigned long, std::align_val_t) (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.25) +==5409== by 0x4EFC819: Vcache_simX::Vcache_simX(char const*) (Vcache_simX.cpp:46) +==5409== by 0x4EB8589: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:140) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== by 0x4EE37C0: void std::__invoke_impl(std::__invoke_other, void (*&&)(vx_device*), vx_device*&&) (invoke.h:60) +==5409== by 0x4EE2F96: std::__invoke_result::type std::__invoke(void (*&&)(vx_device*), vx_device*&&) (invoke.h:95) +==5409== by 0x4EE3D2E: decltype (__invoke((_S_declval<0ul>)(), (_S_declval<1ul>)())) std::thread::_Invoker >::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (thread:234) +==5409== by 0x4EE3CCF: std::thread::_Invoker >::operator()() (thread:243) +==5409== by 0x4EE3C9F: std::thread::_State_impl > >::_M_run() (thread:186) +==5409== +==5409== 32,832 bytes in 1 blocks are still reachable in loss record 49 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EE1AED: vx_dev_open (vx_driver.cpp:189) +==5409== by 0x109458: main (test.cpp:118) +==5409== +==5409== 196,608 bytes in 512 blocks are still reachable in loss record 50 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC6ECE: __gnu_cxx::new_allocator >::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC5C76: std::allocator_traits > >::allocate(std::allocator >&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC3E6B: std::_Vector_base, std::allocator > >::_M_allocate(unsigned long) (stl_vector.h:172) +==5409== by 0x4EC1641: void std::vector, std::allocator > >::_M_realloc_insert >(__gnu_cxx::__normal_iterator*, std::vector, std::allocator > > >, Harp::Reg&&) (vector.tcc:406) +==5409== by 0x4EBF42D: void std::vector, std::allocator > >::emplace_back >(Harp::Reg&&) (vector.tcc:105) +==5409== by 0x4EBD563: std::vector, std::allocator > >::push_back(Harp::Reg&&) (stl_vector.h:954) +==5409== by 0x4EBB488: Harp::Warp::Warp(Harp::Core*, unsigned int) (core.cpp:746) +==5409== by 0x4EB865E: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:158) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== +==5409== 196,608 bytes in 512 blocks are still reachable in loss record 51 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC6ECE: __gnu_cxx::new_allocator >::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC5C76: std::allocator_traits > >::allocate(std::allocator >&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC3E6B: std::_Vector_base, std::allocator > >::_M_allocate(unsigned long) (stl_vector.h:172) +==5409== by 0x4EC1150: std::_Vector_base, std::allocator > >::_M_create_storage(unsigned long) (stl_vector.h:187) +==5409== by 0x4EBF1E4: std::_Vector_base, std::allocator > >::_Vector_base(unsigned long, std::allocator > const&) (stl_vector.h:138) +==5409== by 0x4ECBF16: std::vector, std::allocator > >::vector(std::vector, std::allocator > > const&) (stl_vector.h:328) +==5409== by 0x4ECB8CC: void std::_Construct, std::allocator > >, std::vector, std::allocator > > const&>(std::vector, std::allocator > >*, std::vector, std::allocator > > const&) (stl_construct.h:75) +==5409== by 0x4ECB378: std::vector, std::allocator > >* std::__uninitialized_copy::__uninit_copy<__gnu_cxx::__normal_iterator, std::allocator > > const*, std::vector, std::allocator > >, std::allocator, std::allocator > > > > >, std::vector, std::allocator > >*>(__gnu_cxx::__normal_iterator, std::allocator > > const*, std::vector, std::allocator > >, std::allocator, std::allocator > > > > >, __gnu_cxx::__normal_iterator, std::allocator > > const*, std::vector, std::allocator > >, std::allocator, std::allocator > > > > >, std::vector, std::allocator > >*) (stl_uninitialized.h:83) +==5409== by 0x4ECACF7: std::vector, std::allocator > >* std::uninitialized_copy<__gnu_cxx::__normal_iterator, std::allocator > > const*, std::vector, std::allocator > >, std::allocator, std::allocator > > > > >, std::vector, std::allocator > >*>(__gnu_cxx::__normal_iterator, std::allocator > > const*, std::vector, std::allocator > >, std::allocator, std::allocator > > > > >, __gnu_cxx::__normal_iterator, std::allocator > > const*, std::vector, std::allocator > >, std::allocator, std::allocator > > > > >, std::vector, std::allocator > >*) (stl_uninitialized.h:134) +==5409== by 0x4ECA2BC: std::vector, std::allocator > >* std::__uninitialized_copy_a<__gnu_cxx::__normal_iterator, std::allocator > > const*, std::vector, std::allocator > >, std::allocator, std::allocator > > > > >, std::vector, std::allocator > >*, std::vector, std::allocator > > >(__gnu_cxx::__normal_iterator, std::allocator > > const*, std::vector, std::allocator > >, std::allocator, std::allocator > > > > >, __gnu_cxx::__normal_iterator, std::allocator > > const*, std::vector, std::allocator > >, std::allocator, std::allocator > > > > >, std::vector, std::allocator > >*, std::allocator, std::allocator > > >&) (stl_uninitialized.h:289) +==5409== by 0x4EC9706: std::vector, std::allocator > >, std::allocator, std::allocator > > > >::vector(std::vector, std::allocator > >, std::allocator, std::allocator > > > > const&) (stl_vector.h:331) +==5409== +==5409== 786,432 bytes in 16 blocks are still reachable in loss record 52 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC7272: __gnu_cxx::new_allocator >::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC61E6: std::allocator_traits > >::allocate(std::allocator >&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC4BA9: std::_Vector_base, std::allocator > >::_M_allocate(unsigned long) (stl_vector.h:172) +==5409== by 0x4EC20F3: void std::vector, std::allocator > >::_M_realloc_insert >(__gnu_cxx::__normal_iterator*, std::vector, std::allocator > > >, Harp::Reg&&) (vector.tcc:406) +==5409== by 0x4EBF795: void std::vector, std::allocator > >::emplace_back >(Harp::Reg&&) (vector.tcc:105) +==5409== by 0x4EBD785: std::vector, std::allocator > >::push_back(Harp::Reg&&) (stl_vector.h:954) +==5409== by 0x4EBB5DF: Harp::Warp::Warp(Harp::Core*, unsigned int) (core.cpp:763) +==5409== by 0x4EB865E: Harp::Core::Core(Harp::ArchDef const&, Harp::Decoder&, Harp::MemoryUnit&, unsigned int) (core.cpp:158) +==5409== by 0x4EE2C64: vx_device::run() (vx_driver.cpp:141) +==5409== by 0x4EE2E31: vx_device::thread_proc() (vx_driver.cpp:163) +==5409== by 0x4EE2EA9: vx_device::__thread_proc__(vx_device*) (vx_driver.cpp:175) +==5409== +==5409== 786,432 bytes in 16 blocks are still reachable in loss record 53 of 54 +==5409== at 0x4C3017F: operator new(unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EC7272: __gnu_cxx::new_allocator >::allocate(unsigned long, void const*) (new_allocator.h:111) +==5409== by 0x4EC61E6: std::allocator_traits > >::allocate(std::allocator >&, unsigned long) (alloc_traits.h:436) +==5409== by 0x4EC4BA9: std::_Vector_base, std::allocator > >::_M_allocate(unsigned long) (stl_vector.h:172) +==5409== by 0x4ECAE08: std::_Vector_base, std::allocator > >::_M_create_storage(unsigned long) (stl_vector.h:187) +==5409== by 0x4ECA46C: std::_Vector_base, std::allocator > >::_Vector_base(unsigned long, std::allocator > const&) (stl_vector.h:138) +==5409== by 0x4EC98E4: std::vector, std::allocator > >::vector(std::vector, std::allocator > > const&) (stl_vector.h:328) +==5409== by 0x4EC8910: Harp::Warp::Warp(Harp::Warp const&) (core.h:154) +==5409== by 0x4EC8BF6: void std::_Construct(Harp::Warp*, Harp::Warp const&) (stl_construct.h:75) +==5409== by 0x4EC7AA2: Harp::Warp* std::__uninitialized_copy::__uninit_copy(Harp::Warp const*, Harp::Warp const*, Harp::Warp*) (stl_uninitialized.h:83) +==5409== by 0x4EC6CA9: Harp::Warp* std::uninitialized_copy(Harp::Warp const*, Harp::Warp const*, Harp::Warp*) (stl_uninitialized.h:134) +==5409== by 0x4EC58A8: Harp::Warp* std::__uninitialized_copy_a(Harp::Warp const*, Harp::Warp const*, Harp::Warp*, std::allocator&) (stl_uninitialized.h:289) +==5409== +==5409== 1,048,576 bytes in 1 blocks are still reachable in loss record 54 of 54 +==5409== at 0x4C3089F: operator new[](unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) +==5409== by 0x4EDF76E: Harp::RAM::get(unsigned int) (mem.h:199) +==5409== by 0x4EDF961: Harp::RAM::operator[](unsigned int) (mem.h:306) +==5409== by 0x4EE250A: Harp::RAM::write(unsigned int, unsigned int, unsigned char*) (mem.h:219) +==5409== by 0x4EE2945: vx_device::upload(void*, unsigned long, unsigned long, unsigned long) (vx_driver.cpp:99) +==5409== by 0x4EE1CF4: vx_copy_to_fpga (vx_driver.cpp:241) +==5409== by 0x109334: upload_program(void*, char const*, unsigned int) (test.cpp:92) +==5409== by 0x109477: main (test.cpp:121) +==5409== +==5409== LEAK SUMMARY: +==5409== definitely lost: 0 bytes in 0 blocks +==5409== indirectly lost: 0 bytes in 0 blocks +==5409== possibly lost: 288 bytes in 1 blocks +==5409== still reachable: 3,164,942 bytes in 1,400 blocks +==5409== suppressed: 0 bytes in 0 blocks +==5409== +==5409== For counts of detected and suppressed errors, rerun with: -v +==5409== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0) diff --git a/driver/sim/vx_driver.cpp b/driver/sim/vx_driver.cpp new file mode 100644 index 00000000..7e1606ea --- /dev/null +++ b/driver/sim/vx_driver.cpp @@ -0,0 +1,270 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "../../simX/include/debug.h" +#include "../../simX/include/types.h" +#include "../../simX/include/core.h" +#include "../../simX/include/enc.h" +#include "../../simX/include/instruction.h" +#include "../../simX/include/mem.h" +#include "../../simX/include/obj.h" +#include "../../simX/include/archdef.h" +#include "../../simX/include/help.h" + +#define CACHE_LINESIZE 64 + +#define PAGE_SIZE 4096 + +#define CHECK_RES(_expr) \ + do { \ + fpga_result res = _expr; \ + if (res == FPGA_OK) \ + break; \ + printf("OPAE Error: '%s' returned %d!\n", #_expr, (int)res); \ + return -1; \ + } while (false) + +/////////////////////////////////////////////////////////////////////////////// + +static size_t align_size(size_t size) { + return CACHE_LINESIZE * ((size + CACHE_LINESIZE - 1) / CACHE_LINESIZE); +} + +/////////////////////////////////////////////////////////////////////////////// + +class vx_device; + +class vx_buffer { +public: + vx_buffer(size_t size, vx_device* device) + : size_(size) + , device_(device) { + auto aligned_asize = align_size(size); + data_ = malloc(aligned_asize); + } + + ~vx_buffer() { + if (data_) { + free(data_); + } + } + + auto data() const { + return data_; + } + + auto size() const { + return size_; + } + + auto device() const { + return device_; + } + +private: + size_t size_; + vx_device* device_; + void* data_; +}; + +/////////////////////////////////////////////////////////////////////////////// + +class vx_device { +public: + vx_device() + : is_done_(false) + , is_running_(false) + , thread_(__thread_proc__, this) + {} + + ~vx_device() { + mutex_.lock(); + is_done_ = false; + mutex_.unlock(); + + thread_.join(); + } + + int upload(void* src, size_t dest_addr, size_t size, size_t src_offset) { + if (dest_addr + size > ram_.size()) + return -1; + ram_.write(dest_addr, size, (uint8_t*)src + src_offset); + return 0; + } + + int download(const void* dest, size_t src_addr, size_t size, size_t dest_offset) { + if (src_addr + size > ram_.size()) + return -1; + ram_.read(src_addr, size, (uint8_t*)dest + dest_offset); + return 0; + } + + int start() { + if (this->wait(-1) != 0) + return -1; + + mutex_.lock(); + is_running_ = true; + mutex_.unlock(); + + return 0; + } + + int wait(long long timeout) { + for (;;) { + mutex_.lock(); + bool is_running = is_running_; + mutex_.unlock(); + + if (!is_running || 0 == timeout--) + break; + + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + return 0; + } + +private: + + void run() { + Harp::ArchDef arch("rv32i", false); + Harp::WordDecoder dec(arch); + Harp::MemoryUnit mu(PAGE_SIZE, arch.getWordSize(), true); + Harp::Core core(arch, dec, mu); + mu.attach(ram_, 0); + + while (core.running()) { + core.step(); + } + core.printStats(); + } + + void thread_proc() { + std::cout << "Device ready..." << std::endl; + + for (;;) { + mutex_.lock(); + bool is_done = is_done_; + bool is_running = is_running_; + mutex_.unlock(); + + if (is_done) + break; + + if (is_running) { + std::cout << "Device running..." << std::endl; + + this->run(); + + mutex_.lock(); + is_running_ = false; + mutex_.unlock(); + + std::cout << "Device ready..." << std::endl; + } + } + } + + static void __thread_proc__(vx_device* device) { + device->thread_proc(); + } + + bool is_done_; + bool is_running_; + std::thread thread_; + Harp::RAM ram_; + std::mutex mutex_; +}; + +/////////////////////////////////////////////////////////////////////////////// + +extern vx_device_h vx_dev_open() { + + auto device = new vx_device(); + + return (vx_device_h)device; +} + +extern int vx_dev_close(vx_device_h hdevice) { + if (nullptr == hdevice) + return -1; + + delete (vx_device*)hdevice; + + return 0; +} + +extern vx_buffer_h vx_buf_alloc(vx_device_h hdevice, size_t size) { + if (nullptr == hdevice) + return nullptr; + + auto buffer = new vx_buffer(size, (vx_device*)hdevice); + if (nullptr == buffer->data()) { + delete buffer; + return nullptr; + } + + return (vx_buffer*)buffer; +} + +extern void* vs_buf_ptr(vx_buffer_h hbuffer) { + if (nullptr == hbuffer) + return nullptr; + + return ((vx_buffer*)hbuffer)->data(); +} + +extern int vx_buf_release(vx_buffer_h hbuffer) { + if (nullptr == hbuffer) + return -1; + + delete (vx_buffer*)hbuffer; + + return 0; +} + +extern int vx_copy_to_fpga(vx_buffer_h hbuffer, size_t dest_addr, size_t size, size_t src_offset) { + if (nullptr == hbuffer) + return -1; + + auto buffer = (vx_buffer*)hbuffer; + + if (size + src_offset > buffer->size()) + return -1; + + return buffer->device()->upload(buffer->data(), dest_addr, size, src_offset); +} + +extern int vx_copy_from_fpga(vx_buffer_h hbuffer, size_t src_addr, size_t size, size_t dest_offset) { + if (nullptr == hbuffer) + return -1; + + auto buffer = (vx_buffer*)hbuffer; + + if (size + dest_offset > buffer->size()) + return -1; + + return buffer->device()->download(buffer->data(), src_addr, size, dest_offset); +} + +extern int vx_start(vx_device_h hdevice) { + if (nullptr == hdevice) + return -1; + + return ((vx_device*)hdevice)->start(); +} + +extern int vx_ready_wait(vx_device_h hdevice, long long timeout) { + if (nullptr == hdevice) + return -1; + + return ((vx_device*)hdevice)->wait(timeout); +} diff --git a/driver/sw/Makefile b/driver/sw/Makefile new file mode 100644 index 00000000..1355750a --- /dev/null +++ b/driver/sw/Makefile @@ -0,0 +1,71 @@ + +DRV_CFLAGS += -O0 -g -Wall -Wextra -pedantic -Wfatal-errors + +DRV_CFLAGS += -I/tools/opae/1.4.0/include + +DRV_LDFLAGS += -L/tools/opae/1.4.0/lib + +# stack execution protection +DRV_LDFLAGS +=-z noexecstack + +# data relocation and projection +DRV_LDFLAGS +=-z relro -z now + +# stack buffer overrun detection +# Note that CentOS 7 has gcc 4.8 by default. When we switch +# to a system with gcc 4.9 or newer this should be changed to +# CFLAGS="-fstack-protector-strong" +DRV_CFLAGS +=-fstack-protector + +# Position independent code +DRV_CFLAGS += -fPIC + +DRV_LDFLAGS += -luuid + +DRV_LDFLAGS += -shared + +FPGA_LIBS += -lopae-c +ASE_LIBS += -lopae-c-ase + +CXXFLAGS += -std=c++17 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors + +LDFLAGS += -L. + +PROJECT = libvxdrv.so +PROJECT_ASE = libvxdrv_ase.so + +AFU_JSON_INFO = vortex_afu.h + +all: $(PROJECT) $(PROJECT_ASE) test test_ase + +# AFU info from JSON file, including AFU UUID +$(AFU_JSON_INFO): ../hw/vortex_afu.json + afu_json_mgr json-info --afu-json=$^ --c-hdr=$@ + +$(PROJECT): vx_driver.o + $(CC) $(DRV_CFLAGS) $^ $(DRV_LDFLAGS) $(FPGA_LIBS) -o $@ + +$(PROJECT_ASE): vx_driver.o + $(CC) $(DRV_CFLAGS) -DUSE_ASE $^ $(DRV_LDFLAGS) $(ASE_LIBS) -o $@ + +test: test.o $(PROJECT) + $(CXX) $(CXXFLAGS) test.o $(LDFLAGS) -lvxdrv -o $@ + +test_ase: test.o $(PROJECT_ASE) + $(CXX) $(CXXFLAGS) -DUSE_ASE test.o $(LDFLAGS) -lvxdrv_ase -o $@ + +vx_driver.o: vx_driver.c + $(CC) $(DRV_CFLAGS) -c $^ -o $@ + +test.o: test.cpp $(AFU_JSON_INFO) + $(CXX) $(CXXFLAGS) -c test.cpp -o $@ + +.depend: vx_driver.c test.cpp + $(CXX) $(CXXFLAGS) -MM $^ > .depend; + +clean: + rm -rf $(PROJECT) $(PROJECT_ASE) test test_ase $(AFU_JSON_INFO) *.so *.o .depend + +ifneq ($(MAKECMDGOALS),clean) + -include .depend +endif \ No newline at end of file diff --git a/driver/sw/test.cpp b/driver/sw/test.cpp new file mode 100644 index 00000000..97a690a2 --- /dev/null +++ b/driver/sw/test.cpp @@ -0,0 +1,142 @@ +#include +#include +#include +#include + +#include "utils.h" + +#define CACHE_LINESIZE 64 + +const char* program_file = nullptr; + +static void show_usage() { + std::cout << "Vortex Driver Test." << std::endl; + std::cout << "Usage: [-f: program] [-h: help]" << std::endl; +} + +static void parse_args(int argc, char **argv) { + int c; + while ((c = getopt(argc, argv, "f:h?")) != -1) { + switch (c) { + case 'f': { + program_file = optarg; + } break; + case 'h': + case '?': { + show_usage(); + exit(0); + } break; + default: + show_usage(); + exit(-1); + } + } + + if (nullptr == program_file) { + show_usage(); + exit(-1); + } +} + +static int upload_program(vx_device_h device, const char* filename, uint32_t transfer_size = 16 * VX_CACHE_LINESIZE) { + std::ifstream ifs(filename); + if (!ifs) { + std::cout << "error: " << filename << " not found" << std::endl; + return -1; + } + + // allocate device buffer + auto buffer = vx_buf_alloc(device, transfer_size); + if (nullptr == buffer) + return -1; + + // get buffer address + auto buf_ptr = (uint8_t*)vs_buf_ptr(buffer); + + // + // copy hex program + // + + char line[ihex_t::MAX_LINE_SIZE]; + uint32_t hex_offset = 0; + uint32_t prev_hex_address = 0; + uint32_t dest_address = -1; + uint32_t src_offset = 0; + + while (true) { + ifs.getline(line, ihex_t::MAX_LINE_SIZE); + if (!ifs) + break; + + ihex_t ihex; + parse_ihex_line(line, &ihex); + if (ihex.is_eof) + break; + + if (ihex.has_offset) { + hex_offset = ihex.offset; + } + + if (ihex.data_size != 0) { + auto hex_address = ihex.address + hex_offset; + if (dest_address == (uint32_t)-1) { + dest_address = (hex_address / VX_CACHE_LINESIZE) * VX_CACHE_LINESIZE; + src_offset = hex_address - dest_address; + } else { + auto delta = hex_address - prev_hex_address; + src_offset += delta; + } + for (uint32_t i = 0; i < ihex.data_size; ++i) { + if (src_offset >= transfer_size) { + // flush current batch to FPGA + vx_copy_to_fpga(buffer, dest_address, transfer_size, 0); + dest_address = (hex_address/ VX_CACHE_LINESIZE) * VX_CACHE_LINESIZE; + src_offset = hex_address - dest_address; + } + buf_ptr[src_offset++] = ihex.data[i]; + ++hex_address; + } + prev_hex_address = hex_address; + } + } + + // flush last batch to FPGA + if (src_offset) { + vx_copy_to_fpga(buffer, dest_address, src_offset, 0); + } + + vx_buf_release(buffer); + + return 0; +} + +int main(int argc, char *argv[]) { + // parse command arguments + parse_args(argc, argv); + + // open device connection + auto device = vx_dev_open(); + + // upload program + if (0 != upload_program(device, program_file)) { + vx_dev_close(device); + return -1; + } + + // start device + if (0 != vx_start(device)) { + vx_dev_close(device); + return -1; + } + + // wait for completion + if (0 != vx_ready_wait(device, -1)) { + vx_dev_close(device); + return -1; + } + + // close device + vx_dev_close(device); + + return 0; +} \ No newline at end of file diff --git a/driver/sw/utils.cpp b/driver/sw/utils.cpp new file mode 100644 index 00000000..262c6ee1 --- /dev/null +++ b/driver/sw/utils.cpp @@ -0,0 +1,71 @@ + + +#include +#include "utils.h" + +static uint32_t hti_old(char c) { + if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + return c - '0'; + } + +static uint32_t hToI_old(char *c, uint32_t size) { + uint32_t value = 0; + for (uint32_t i = 0; i < size; i++) { + value += hti_old(c[i]) << ((size - i - 1) * 4); + } + return value; +} + + +int parse_ihex_line(char* line, ihex_t* out) { + if (line[0] != ':') { + std::cout << "error: invalid line entry!" << std::endl; + return -1; + } + + uint32_t data_size = 0; + uint32_t address = 0; + uint32_t offset = 0; + bool has_offset = false; + bool is_eof = false; + + auto record_type = hToI_old(line + 7, 2); + + switch (record_type) { + case 0: { // data + data_size = hToI_old(line + 1, 2); + address = hToI_old(line + 3, 4); + for (uint32_t i = 0; i < data_size; i++) { + out->data[i] = hToI_old(line + 9 + i * 2, 2); + } + } break; + case 1: // end of file + is_eof = true; + break; + case 2: // extended segment address + offset = hToI_old(line + 9, 4) << 4; + has_offset = true; + break; + case 3: // start segment address + break; + case 4: // extended linear address + offset = hToI_old(line + 9, 4) << 16; + has_offset = true; + break; + case 5: // start linear address + break; + default: + return -1; + } + + out->address = address; + out->data_size = data_size; + out->offset = offset; + out->has_offset = has_offset; + out->is_eof = is_eof; + + return 0; + } \ No newline at end of file diff --git a/driver/sw/utils.h b/driver/sw/utils.h new file mode 100644 index 00000000..cc82ea63 --- /dev/null +++ b/driver/sw/utils.h @@ -0,0 +1,15 @@ + +#pragma once + +struct ihex_t { + static constexpr int MAX_LINE_SIZE = 524; + static constexpr int MAX_DATA_SIZE = 255; + uint8_t data[MAX_DATA_SIZE]; + uint32_t address; + uint32_t data_size; + uint32_t offset; + bool has_offset; + bool is_eof; +}; + +int parse_ihex_line(char* line, ihex_t* out); \ No newline at end of file diff --git a/driver/sw/vx_driver.c b/driver/sw/vx_driver.c new file mode 100644 index 00000000..4de6ea4b --- /dev/null +++ b/driver/sw/vx_driver.c @@ -0,0 +1,259 @@ +#include "vx_driver.h" + +#include +#include +#include +#include +#include +#include + +#include + +// MMIO Address Mappings +#define AFU_ID AFU_ACCEL_UUID + +#define MMIO_COPY_IO_ADDRESS 0X120 +#define MMIO_COPY_AVM_ADDRESS 0x100 +#define MMIO_COPY_DATA_SIZE 0X118 + +#define MMIO_CMD_TYPE 0X110 // MMIO location set by SW to denote read/write. read: 3; write: 1; vortex: 7 +#define MMIO_READY_FOR_CMD 0X198 + +#define CHECK_RES(_expr) \ + do { \ + fpga_result res = _expr; \ + if (res == FPGA_OK) \ + break; \ + printf("OPAE Error: '%s' returned %d!\n", #_expr, (int)res); \ + return -1; \ + } while (false) + +/////////////////////////////////////////////////////////////////////////////// + +typedef struct vx_buffer_ { + uint64_t wsid; + volatile void* host_ptr; + uint64_t io_addr; + fpga_handle hdevice; + size_t size; +} vx_buffer_t; + +static size_t align_size(size_t size) { + return VX_CACHE_LINESIZE * ((size + VX_CACHE_LINESIZE - 1) / VX_CACHE_LINESIZE); +} + +/////////////////////////////////////////////////////////////////////////////// + +// Search for an accelerator matching the requested UUID and connect to it +// Convert this to void if required as storing the fpga_handle to params variable +extern vx_device_h vx_dev_open(const char *accel_uuid) { + fpga_properties filter = NULL; + fpga_result res; + fpga_guid guid; + fpga_token accel_token; + uint32_t num_matches; + fpga_handle accel_handle; + + // Set up a filter that will search for an accelerator + fpgaGetProperties(NULL, &filter); + fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR); + + // Add the desired UUID to the filter + uuid_parse(accel_uuid, guid); + fpgaPropertiesSetGUID(filter, guid); + + // Do the search across the available FPGA contexts + num_matches = 1; + fpgaEnumerate(&filter, 1, &accel_token, 1, &num_matches); + + // Not needed anymore + fpgaDestroyProperties(&filter); + + if (num_matches < 1) { + fprintf(stderr, "Accelerator %s not found!\n", accel_uuid); + return NULL; + } + + // Open accelerator + res = fpgaOpen(accel_token, &accel_handle, 0); + if (FPGA_OK != res) { + return NULL; + } + + // Done with token + fpgaDestroyToken(&accel_token); + + return accel_handle; +} + +// Close the fpga when all the operations are done +extern int vx_dev_close(vx_device_h hdevice) { + if (NULL == hdevice) + return -1; + + fpgaClose(hdevice); + + return 0; +} + +extern vx_buffer_h vx_buf_alloc(vx_device_h hdevice, size_t size) { + fpga_result res; + void* host_ptr; + uint64_t wsid; + uint64_t io_addr; + vx_buffer_t* buffer; + + if (NULL == hdevice) + return NULL; + + size_t asize = align_size(size); + + res = fpgaPrepareBuffer(hdevice, asize, &host_ptr, &wsid, 0); + if (FPGA_OK != res) { + return NULL; + } + + // Get the physical address of the buffer in the accelerator + res = fpgaGetIOAddress(hdevice, wsid, &io_addr); + if (FPGA_OK != res) { + fpgaReleaseBuffer(hdevice, wsid); + return NULL; + } + + buffer = (vx_buffer_t*)malloc(sizeof(vx_buffer_t)); + if (NULL == buffer) { + fpgaReleaseBuffer(hdevice, wsid); + return NULL; + } + + buffer->wsid = wsid; + buffer->host_ptr = host_ptr; + buffer->io_addr = io_addr; + buffer->hdevice = hdevice; + buffer->size = size; + + return (vx_buffer_h)buffer; +} + +extern volatile void* vs_buf_ptr(vx_buffer_h hbuffer) { + vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer); + if (NULL == buffer) + return NULL; + + return buffer->host_ptr; +} + +extern int vx_buf_release(vx_buffer_h hbuffer) { + vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer); + if (NULL == buffer) + return -1; + + fpgaReleaseBuffer(buffer->hdevice, buffer->wsid); + + free(hbuffer); + + return 0; +} + +// Check if HW is ready for SW +static int ready_for_sw(fpga_handle hdevice) { + uint64_t data = 0; + struct timespec sleep_time; + +#ifdef USE_ASE + sleep_time.tv_sec = 1; + sleep_time.tv_nsec = 0; +#else + sleep_time.tv_sec = 0; + sleep_time.tv_nsec = 1000000; +#endif + + do { + CHECK_RES(fpgaReadMMIO64(hdevice, 0, MMIO_READY_FOR_CMD, &data)); + nanosleep(&sleep_time, NULL); + } while (data != 0x1); + + return 0; +} + +extern int vx_copy_to_fpga(vx_buffer_h hbuffer, size_t dest_addr, size_t size, size_t src_offset) { + vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer); + + // bound checking + if (size + src_offset > buffer->size) + return -1; + + // Ensure ready for new command + if (ready_for_sw(buffer->hdevice) != 0) + return -1; + + CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_COPY_AVM_ADDRESS, dest_addr)); + CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_COPY_IO_ADDRESS, (buffer->io_addr + src_offset)/VX_CACHE_LINESIZE)); + CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_COPY_DATA_SIZE, size)); + CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_CMD_TYPE, 1)); // WRITE CMD + + // Wait for the write operation to finish + return ready_for_sw(buffer->hdevice); +} + +extern int vx_copy_from_fpga(vx_buffer_h hbuffer, size_t src_addr, size_t size, size_t dest_offset) { + vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer); + + // bound checking + if (size + dest_offset > buffer->size) + return -1; + + // Ensure ready for new command + if (ready_for_sw(buffer->hdevice) != 0) + return -1; + + CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_COPY_AVM_ADDRESS, src_addr)); + CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_COPY_IO_ADDRESS, (buffer->io_addr + dest_offset)/VX_CACHE_LINESIZE)); + CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_COPY_DATA_SIZE, size)); + CHECK_RES(fpgaWriteMMIO64(buffer->hdevice, 0, MMIO_CMD_TYPE, 3)); // READ CMD + + // Wait for the write operation to finish + return ready_for_sw(buffer->hdevice); +} + +extern int vx_start(vx_device_h hdevice) { + if (NULL == hdevice) + return -1; + + // Ensure ready for new command + if (ready_for_sw(hdevice) != 0) + return -1; + + CHECK_RES(fpgaWriteMMIO64(hdevice, 0, MMIO_CMD_TYPE, 7)); // START CMD + + return 0; +} + +extern int vx_ready_wait(vx_device_h hdevice, long long timeout) { + if (NULL == hdevice) + return -1; + + uint64_t data = 0; + struct timespec sleep_time; + +#ifdef USE_ASE + sleep_time.tv_sec = 1; + sleep_time.tv_nsec = 0; +#else + sleep_time.tv_sec = 0; + sleep_time.tv_nsec = 1000000; +#endif + + // to milliseconds + long long sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000); + + do { + CHECK_RES(fpgaReadMMIO64(hdevice, 0, MMIO_READY_FOR_CMD, &data)); + nanosleep(&sleep_time, NULL); + sleep_time_ms -= sleep_time_ms; + if (timeout <= sleep_time_ms) + break; + } while (data != 0x1); + + return 0; +} diff --git a/driver/sw/vx_driver.h b/driver/sw/vx_driver.h new file mode 100644 index 00000000..c1da9ece --- /dev/null +++ b/driver/sw/vx_driver.h @@ -0,0 +1,47 @@ +#ifndef __VX_DRIVER_H__ +#define __VX_DRIVER_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void* vx_device_h; + +typedef void* vx_buffer_h; + +#define VX_CACHE_LINESIZE 64 + +// open the device and connect to it +vx_device_h vx_dev_open(); + +// Close the device when all the operations are done +int vx_dev_close(vx_device_h hdevice); + +// Allocate shared buffer with device +vx_buffer_h vx_buf_alloc(vx_device_h hdevice, size_t size); + +// Get host pointer address +void* vs_buf_ptr(vx_buffer_h hbuffer); + +// release buffer +int vx_buf_release(vx_buffer_h hbuffer); + +// Copy bytes from buffer to device local memory +int vx_copy_to_fpga(vx_buffer_h hbuffer, size_t dest_addr, size_t size, size_t src_offset); + +// Copy bytes from device local memory to buffer +int vx_copy_from_fpga(vx_buffer_h hbuffer, size_t src_addr, size_t size, size_t dst_offset); + +// Start device execution +int vx_start(vx_device_h hdevice); + +// Wait for device ready with milliseconds timeout +int vx_ready_wait(vx_device_h hdevice, long long timeout); + +#ifdef __cplusplus +} +#endif + +#endif // __VX_DRIVER_H__ diff --git a/emulator/include/mem.h b/emulator/include/mem.h index 3d1776ac..63e1b215 100644 --- a/emulator/include/mem.h +++ b/emulator/include/mem.h @@ -334,6 +334,7 @@ namespace Harp { if(fp == 0){ std::cout << path << " not found" << std::endl; } + //Preload 0x0 <-> 0x80000000 jumps ((uint32_t*)this->get(0))[0] = 0xf1401073; ((uint32_t*)this->get(0))[1] = 0xf1401073; @@ -349,10 +350,7 @@ namespace Harp { ((uint32_t*)this->get(0xb0000000))[0] = 0x01C02023; // F00FFF10 ((uint32_t*)this->get(0xf00fff10))[0] = 0x12345678; - - - fseek(fp, 0, SEEK_END); uint32_t size = ftell(fp); fseek(fp, 0, SEEK_SET); diff --git a/simX/core.cpp b/simX/core.cpp index 7ba684a4..73400647 100644 --- a/simX/core.cpp +++ b/simX/core.cpp @@ -126,17 +126,14 @@ Core::Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id): INIT_TRACE(inst_in_lsu); INIT_TRACE(inst_in_wb); - for (int i = 0; i < 32; i++) - { + for (int i = 0; i < 32; i++) { stallWarp[i] = false; - for (int j = 0; j < 32; j++) - { + for (int j = 0; j < 32; j++) { renameTable[i][j] = true; } } - for(int i = 0; i < 32; i++) - { + for(int i = 0; i < 32; i++) { vecRenameTable[i] = true; } @@ -157,8 +154,9 @@ Core::Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id): cache_simulator->reset = 0; cache_simulator->clk = 0; - for (unsigned i = 0; i < a.getNWarps(); ++i) + for (unsigned i = 0; i < a.getNWarps(); ++i) { w.push_back(Warp(this, i)); + } w[0].activeThreads = 1; w[0].spawned = true; @@ -634,8 +632,6 @@ void Core::execute_unit() void Core::writeback() { - - if (inst_in_wb.rd > 0) renameTable[inst_in_wb.wid][inst_in_wb.rd] = true; if (inst_in_wb.vd > 0) vecRenameTable[inst_in_wb.vd] = true; diff --git a/simX/include/mem.h b/simX/include/mem.h index f0f340e7..96d1d38c 100644 --- a/simX/include/mem.h +++ b/simX/include/mem.h @@ -168,16 +168,18 @@ namespace Harp { bool disableVm; }; - class RAM : public MemDevice { public: uint8_t* mem[1 << 12]; RAM(){ - for(uint32_t i = 0;i < (1 << 12);i++) mem[i] = NULL; + for(uint32_t i = 0;i < (1 << 12);i++) + mem[i] = NULL; } ~RAM(){ - for(uint32_t i = 0;i < (1 << 12);i++) if(mem[i]) delete [] mem[i]; + for(uint32_t i = 0;i < (1 << 12);i++) + if(mem[i]) + delete [] mem[i]; } void clear(){ @@ -218,7 +220,7 @@ namespace Harp { } } - virtual Size size() const { return (1<<31); }; + virtual Size size() const { return -1; } void getBlock(uint32_t address, uint8_t *data) { @@ -325,137 +327,9 @@ namespace Harp { // MEMORY UTILS - uint32_t hti_old(char c) { - if (c >= 'A' && c <= 'F') - return c - 'A' + 10; - if (c >= 'a' && c <= 'f') - return c - 'a' + 10; - return c - '0'; - } - - uint32_t hToI_old(char *c, uint32_t size) { - uint32_t value = 0; - for (uint32_t i = 0; i < size; i++) { - value += hti_old(c[i]) << ((size - i - 1) * 4); - } - return value; - } - - - - void loadHexImpl(std::string path) { - this->clear(); - FILE *fp = fopen(&path[0], "r"); - if(fp == 0){ - std::cout << path << " not found" << std::endl; - } - //Preload 0x0 <-> 0x80000000 jumps - ((uint32_t*)this->get(0))[0] = 0xf1401073; - ((uint32_t*)this->get(0))[1] = 0xf1401073; - - // ((uint32_t*)this->get(0))[1] = 0xf1401073; - ((uint32_t*)this->get(0))[2] = 0x30101073; - - ((uint32_t*)this->get(0))[3] = 0x800000b7; - ((uint32_t*)this->get(0))[4] = 0x000080e7; - - ((uint32_t*)this->get(0x80000000))[0] = 0x00000097; - - ((uint32_t*)this->get(0xb0000000))[0] = 0x01C02023; - // F00FFF10 - ((uint32_t*)this->get(0xf00fff10))[0] = 0x12345678; - - - - ((uint32_t*)this->get(0x70000000))[0] = 0x00008067; - - { - uint32_t init_addr = 0x70000004; - for (int off = 0; off < 1024; off+=4) - { - uint32_t new_addr = init_addr+off; - ((uint32_t*)this->get(new_addr))[0] = 0x00000000; - } - } - - { - uint32_t init_addr = 0x71000000; - for (int off = 0; off < 1024; off+=4) - { - uint32_t new_addr = init_addr+off; - ((uint32_t*)this->get(new_addr))[0] = 0x00000000; - } - } - - { - uint32_t init_addr = 0x72000000; - for (int off = 0; off < 1024; off+=4) - { - uint32_t new_addr = init_addr+off; - ((uint32_t*)this->get(new_addr))[0] = 0x00000000; - } - } - - - fseek(fp, 0, SEEK_END); - uint32_t size = ftell(fp); - fseek(fp, 0, SEEK_SET); - char* content = new char[size]; - int x = fread(content, 1, size, fp); - - if (!x) { std::cout << "COULD NOT READ FILE\n"; std::abort();} - - int offset = 0; - char* line = content; - // std::cout << "WHTA\n"; - while (1) { - if (line[0] == ':') { - uint32_t byteCount = hToI_old(line + 1, 2); - uint32_t nextAddr = hToI_old(line + 3, 4) + offset; - uint32_t key = hToI_old(line + 7, 2); - switch (key) { - case 0: - for (uint32_t i = 0; i < byteCount; i++) { - - unsigned add = nextAddr + i; - - *(this->get(add)) = hToI_old(line + 9 + i * 2, 2); - // std::cout << "lhi: Address: " << std::hex <<(add) << "\tValue: " << std::hex << hToI_old(line + 9 + i * 2, 2) << std::endl; - } - break; - case 2: - // cout << offset << endl; - offset = hToI_old(line + 9, 4) << 4; - break; - case 4: - // cout << offset << endl; - offset = hToI_old(line + 9, 4) << 16; - break; - default: - // cout << "??? " << key << endl; - break; - } - } - - while (*line != '\n' && size != 0) { - line++; - size--; - } - if (size <= 1) - break; - line++; - size--; - } - - - if (content) delete[] content; - } + void loadHexImpl(std::string path); }; - - - - } diff --git a/simX/mem.cpp b/simX/mem.cpp index b39a0ee4..0b826ac5 100644 --- a/simX/mem.cpp +++ b/simX/mem.cpp @@ -280,6 +280,120 @@ void DiskControllerMemDevice::write(Addr a, Word w) { } } +static uint32_t hti_old(char c) { + if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + return c - '0'; + } +static uint32_t hToI_old(char *c, uint32_t size) { + uint32_t value = 0; + for (uint32_t i = 0; i < size; i++) { + value += hti_old(c[i]) << ((size - i - 1) * 4); + } + return value; +} +void RAM::loadHexImpl(std::string path) { + this->clear(); + FILE *fp = fopen(&path[0], "r"); + if(fp == 0){ + std::cout << path << " not found" << std::endl; + } + //Preload 0x0 <-> 0x80000000 jumps + ((uint32_t*)this->get(0))[0] = 0xf1401073; + ((uint32_t*)this->get(0))[1] = 0xf1401073; + ((uint32_t*)this->get(0))[2] = 0x30101073; + ((uint32_t*)this->get(0))[3] = 0x800000b7; + ((uint32_t*)this->get(0))[4] = 0x000080e7; + + ((uint32_t*)this->get(0x80000000))[0] = 0x00000097; + + ((uint32_t*)this->get(0xb0000000))[0] = 0x01C02023; + + ((uint32_t*)this->get(0xf00fff10))[0] = 0x12345678; + + ((uint32_t*)this->get(0x70000000))[0] = 0x00008067; + + { + uint32_t init_addr = 0x70000004; + for (int off = 0; off < 1024; off+=4) { + uint32_t new_addr = init_addr+off; + ((uint32_t*)this->get(new_addr))[0] = 0x00000000; + } + } + + { + uint32_t init_addr = 0x71000000; + for (int off = 0; off < 1024; off+=4) { + uint32_t new_addr = init_addr+off; + ((uint32_t*)this->get(new_addr))[0] = 0x00000000; + } + } + + { + uint32_t init_addr = 0x72000000; + for (int off = 0; off < 1024; off+=4) { + uint32_t new_addr = init_addr+off; + ((uint32_t*)this->get(new_addr))[0] = 0x00000000; + } + } + + fseek(fp, 0, SEEK_END); + uint32_t size = ftell(fp); + fseek(fp, 0, SEEK_SET); + char* content = new char[size]; + int x = fread(content, 1, size, fp); + + if (!x) { + std::cout << "COULD NOT READ FILE\n"; std::abort(); + } + + int offset = 0; + char* line = content; + // std::cout << "WHTA\n"; + while (1) { + if (line[0] == ':') { + uint32_t byteCount = hToI_old(line + 1, 2); + uint32_t nextAddr = hToI_old(line + 3, 4) + offset; + uint32_t key = hToI_old(line + 7, 2); + switch (key) { + case 0: + for (uint32_t i = 0; i < byteCount; i++) { + unsigned add = nextAddr + i; + *(this->get(add)) = hToI_old(line + 9 + i * 2, 2); + // std::cout << "lhi: Address: " << std::hex <<(add) << "\tValue: " << std::hex << hToI_old(line + 9 + i * 2, 2) << std::endl; + } + break; + case 2: + // cout << offset << endl; + offset = hToI_old(line + 9, 4) << 4; + break; + case 4: + // cout << offset << endl; + offset = hToI_old(line + 9, 4) << 16; + break; + default: + // cout << "??? " << key << endl; + break; + } + } + + while (*line != '\n' && size != 0) { + line++; + size--; + } + + if (size <= 1) + break; + + line++; + size--; + } + + if (content) + delete[] content; + } \ No newline at end of file diff --git a/simX/simX.cpp b/simX/simX.cpp index 5ef0cfae..103b0693 100644 --- a/simX/simX.cpp +++ b/simX/simX.cpp @@ -55,44 +55,43 @@ HarpToolMode findMode(int argc, char** argv) { } int emu_main(int argc, char **argv) { - string archString("rv32i"); - string imgFileName("a.dsfsdout.bin"); - bool showHelp, showStats, basicMachine, batch; - bool cpu_mode(false); + string archString("rv32i"); + string imgFileName("a.dsfsdout.bin"); + bool showHelp, showStats, basicMachine, batch; + bool cpu_mode(false); - /* Read the command line arguments. */ - CommandLineArgFlag fh("-h", "--help", "", showHelp); - CommandLineArgSetterfc("-c", "--core", "", imgFileName); - CommandLineArgSetterfa("-a", "--arch", "", archString); - CommandLineArgFlag fs("-s", "--stats", "", showStats); - CommandLineArgFlag fb("-b", "--basic", "", basicMachine); - CommandLineArgFlag fi("-i", "--batch", "", batch); - CommandLineArgFlag fx("-x", "--cpu", "", cpu_mode); - - CommandLineArg::readArgs(argc, argv); - - if (showHelp) { - cout << Help::emuHelp; - return 0; - } + /* Read the command line arguments. */ + CommandLineArgFlag fh("-h", "--help", "", showHelp); + CommandLineArgSetterfc("-c", "--core", "", imgFileName); + CommandLineArgSetterfa("-a", "--arch", "", archString); + CommandLineArgFlag fs("-s", "--stats", "", showStats); + CommandLineArgFlag fb("-b", "--basic", "", basicMachine); + CommandLineArgFlag fi("-i", "--batch", "", batch); + CommandLineArgFlag fx("-x", "--cpu", "", cpu_mode); + + CommandLineArg::readArgs(argc, argv); + + if (showHelp) { + cout << Help::emuHelp; + return 0; + } - /* Instantiate a Core, RAM, and console output. */ - ArchDef arch(archString, cpu_mode); + /* Instantiate a Core, RAM, and console output. */ + ArchDef arch(archString, cpu_mode); - Decoder *dec; + Decoder *dec; - switch (arch.getEncChar()) { + switch (arch.getEncChar()) { case 'b': dec = new WordDecoder(arch); break; case 'w': dec = new WordDecoder(arch); break; case 'r': dec = new WordDecoder(arch); break; default: cout << "Unrecognized decoder type: '" << arch.getEncChar() << "'.\n"; return 1; - } + } // std::cout << "TESTING: " << tests[t] << "\n"; - MemoryUnit mu(4096, arch.getWordSize(), basicMachine); Core core(arch, *dec, mu/*, ID in multicore implementations*/);