added support for write-through cache, removed cache snooping support

This commit is contained in:
Blaise Tine
2020-12-23 23:51:02 -08:00
parent d956e268b9
commit 703a861fe9
55 changed files with 1077 additions and 2178 deletions

View File

@@ -43,9 +43,6 @@ int vx_buf_release(vx_buffer_h hbuffer);
// allocate device memory and return address
int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr);
// Copy bytes from device local memory to buffer
int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size);
// Copy bytes from buffer to device local memory
int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset);

View File

@@ -41,7 +41,6 @@
#define CMD_MEM_READ AFU_IMAGE_CMD_MEM_READ
#define CMD_MEM_WRITE AFU_IMAGE_CMD_MEM_WRITE
#define CMD_RUN AFU_IMAGE_CMD_RUN
#define CMD_CLFLUSH AFU_IMAGE_CMD_CLFLUSH
#define CMD_CSR_READ AFU_IMAGE_CMD_CSR_READ
#define CMD_CSR_WRITE AFU_IMAGE_CMD_CSR_WRITE
@@ -462,36 +461,6 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size,
return 0;
}
extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
if (nullptr == hdevice
|| 0 >= size)
return -1;
vx_device_t* device = ((vx_device_t*)hdevice);
size_t asize = align_size(size, CACHE_BLOCK_SIZE);
// check alignment
if (!is_aligned(dev_maddr, CACHE_BLOCK_SIZE))
return -1;
// Ensure ready for new command
if (vx_ready_wait(hdevice, -1) != 0)
return -1;
auto ls_shift = (int)std::log2(CACHE_BLOCK_SIZE);
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_MEM_ADDR, dev_maddr >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_DATA_SIZE, asize >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CLFLUSH));
// Wait for the write operation to finish
if (vx_ready_wait(hdevice, -1) != 0)
return -1;
return 0;
}
extern int vx_start(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;

View File

@@ -7,9 +7,8 @@
#define AFU_ACCEL_NAME "vortex_afu"
#define AFU_ACCEL_UUID "35F9452B-25C2-434C-93D5-6F8C60DB361C"
#define AFU_IMAGE_CMD_CLFLUSH 4
#define AFU_IMAGE_CMD_CSR_READ 5
#define AFU_IMAGE_CMD_CSR_WRITE 6
#define AFU_IMAGE_CMD_CSR_READ 4
#define AFU_IMAGE_CMD_CSR_WRITE 5
#define AFU_IMAGE_CMD_MEM_READ 1
#define AFU_IMAGE_CMD_MEM_WRITE 2
#define AFU_IMAGE_CMD_RUN 3

View File

@@ -140,19 +140,6 @@ public:
return 0;
}
int flush_caches(size_t dev_maddr, size_t size) {
if (future_.valid()) {
future_.wait(); // ensure prior run completed
}
simulator_.attach_ram(&ram_);
simulator_.flush_caches(dev_maddr, size);
while (simulator_.snp_req_active()) {
simulator_.step();
};
simulator_.attach_ram(NULL);
return 0;
}
int set_csr(int core_id, int addr, unsigned value) {
if (future_.valid()) {
future_.wait(); // ensure prior run completed
@@ -257,16 +244,6 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr)
return device->alloc_local_mem(size, dev_maddr);
}
extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
if (nullptr == hdevice
|| 0 >= size)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->flush_caches(dev_maddr, size);
}
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
if (nullptr == hdevice

View File

@@ -267,14 +267,6 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr)
return device->alloc_local_mem(size, dev_maddr);
}
extern int vx_flush_caches(vx_device_h hdevice, size_t /*dev_maddr*/, size_t size) {
if (nullptr == hdevice
|| 0 >= size)
return -1;
// this functionality is not need by simX
return 0;
}
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
if (nullptr == hdevice
|| 0 >= size

View File

@@ -16,10 +16,6 @@ extern int vx_alloc_dev_mem(vx_device_h /*hdevice*/, size_t /*size*/, size_t* /*
return -1;
}
extern int vx_flush_caches(vx_device_h /*hdevice*/, size_t /*dev_maddr*/, size_t /*size*/) {
return -1;
}
extern int vx_alloc_shared_mem(vx_device_h /*hdevice*/, size_t /*size*/, vx_buffer_h* /*hbuffer*/) {
return -1;
}

View File

@@ -13,3 +13,8 @@ clean:
$(MAKE) -C demo clean
$(MAKE) -C dogfood clean
clean-all:
$(MAKE) -C basic clean-all
$(MAKE) -C demo clean-all
$(MAKE) -C dogfood clean-all

View File

@@ -171,17 +171,11 @@ int run_kernel_test(const kernel_arg_t& kernel_arg,
RT_CHECK(vx_ready_wait(device, -1));
auto t3 = std::chrono::high_resolution_clock::now();
// flush the caches
std::cout << "flush the caches" << std::endl;
auto t4 = std::chrono::high_resolution_clock::now();
RT_CHECK(vx_flush_caches(device, kernel_arg.dst_ptr, buf_size));
auto t5 = std::chrono::high_resolution_clock::now();
// read buffer from local memory
std::cout << "read buffer from local memory" << std::endl;
auto t6 = std::chrono::high_resolution_clock::now();
auto t4 = std::chrono::high_resolution_clock::now();
RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0));
auto t7 = std::chrono::high_resolution_clock::now();
auto t5 = std::chrono::high_resolution_clock::now();
// verify result
@@ -210,8 +204,6 @@ int run_kernel_test(const kernel_arg_t& kernel_arg,
elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(t3 - t2).count();
printf("execute time: %lg ms\n", elapsed);
elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(t5 - t4).count();
printf("flush time: %lg ms\n", elapsed);
elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(t7 - t6).count();
printf("download time: %lg ms\n", elapsed);
elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_start).count();
printf("Total elapsed time: %lg ms\n", elapsed);

Binary file not shown.

View File

@@ -142,11 +142,11 @@ Disassembly of section .text:
80000180: 00008067 ret
80000184 <vx_num_cycles>:
80000184: c0002573 rdcycle a0
80000184: b0002573 csrr a0,mcycle
80000188: 00008067 ret
8000018c <vx_num_instrs>:
8000018c: c0202573 rdinstret a0
8000018c: b0202573 csrr a0,minstret
80000190: 00008067 ret
80000194 <atexit>:
@@ -390,7 +390,7 @@ Disassembly of section .text:
80000518: 00492703 lw a4,4(s2)
8000051c: 148a2783 lw a5,328(s4)
80000520: 01871463 bne a4,s8,80000528 <__call_exitprocs+0xe4>
80000524: f8f904e3 beq s2,a5,800004ac <__call_exitprocs+0x68>
80000524: f92784e3 beq a5,s2,800004ac <__call_exitprocs+0x68>
80000528: f80788e3 beqz a5,800004b8 <__call_exitprocs+0x74>
8000052c: 00078913 mv s2,a5
80000530: f5dff06f j 8000048c <__call_exitprocs+0x48>
@@ -450,21 +450,20 @@ Disassembly of section .comment:
0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm
4: 2820 fld fs0,80(s0)
6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm
a: 3920 fld fs0,112(a0)
c: 322e fld ft4,232(sp)
e: 302e fld ft0,232(sp)
...
a: 3120 fld fs0,96(a0)
c: 2e30 fld fa2,88(a2)
e: 2e32 fld ft8,264(sp)
10: 0030 addi a2,sp,8
Disassembly of section .riscv.attributes:
00000000 <.riscv.attributes>:
0: 2541 jal 680 <_start-0x7ffff980>
0: 2941 jal 490 <_start-0x7ffffb70>
2: 0000 unimp
4: 7200 flw fs0,32(a2)
6: 7369 lui t1,0xffffa
8: 01007663 bgeu zero,a6,14 <_start-0x7fffffec>
c: 0000001b 0x1b
10: 1004 addi s1,sp,32
c: 001f 0000 1004 0x10040000001f
12: 7205 lui tp,0xfffe1
14: 3376 fld ft6,376(sp)
16: 6932 flw fs2,12(sp)
@@ -473,3 +472,5 @@ Disassembly of section .riscv.attributes:
1c: 326d jal fffff9c6 <__global_pointer$+0x7fffdc5e>
1e: 3070 fld fa2,224(s0)
20: 665f 7032 0030 0x307032665f
26: 0108 addi a0,sp,128
28: 0b0a slli s6,s6,0x2

Binary file not shown.

View File

@@ -69,10 +69,6 @@ int run_test(const kernel_arg_t& kernel_arg,
std::cout << "wait for completion" << std::endl;
RT_CHECK(vx_ready_wait(device, -1));
// flush the destination buffer caches
std::cout << "flush the destination buffer caches" << std::endl;
RT_CHECK(vx_flush_caches(device, kernel_arg.dst_ptr, buf_size));
// download destination buffer
std::cout << "download destination buffer" << std::endl;
RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0));

Binary file not shown.

View File

@@ -165,11 +165,11 @@ Disassembly of section .text:
800001d4: 00008067 ret
800001d8 <vx_num_cycles>:
800001d8: c0002573 rdcycle a0
800001d8: b0002573 csrr a0,mcycle
800001dc: 00008067 ret
800001e0 <vx_num_instrs>:
800001e0: c0202573 rdinstret a0
800001e0: b0202573 csrr a0,minstret
800001e4: 00008067 ret
800001e8 <spawn_warp_runonce>:
@@ -461,7 +461,7 @@ Disassembly of section .text:
8000061c: 00492703 lw a4,4(s2)
80000620: 148a2783 lw a5,328(s4)
80000624: 01871463 bne a4,s8,8000062c <__call_exitprocs+0xe4>
80000628: f8f904e3 beq s2,a5,800005b0 <__call_exitprocs+0x68>
80000628: f92784e3 beq a5,s2,800005b0 <__call_exitprocs+0x68>
8000062c: f80788e3 beqz a5,800005bc <__call_exitprocs+0x74>
80000630: 00078913 mv s2,a5
80000634: f5dff06f j 80000590 <__call_exitprocs+0x48>
@@ -527,22 +527,20 @@ Disassembly of section .comment:
0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm
4: 2820 fld fs0,80(s0)
6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm
a: 3920 fld fs0,112(a0)
c: 322e fld ft4,232(sp)
e: 302e fld ft0,232(sp)
...
a: 3120 fld fs0,96(a0)
c: 2e30 fld fa2,88(a2)
e: 2e32 fld ft8,264(sp)
10: 0030 addi a2,sp,8
Disassembly of section .riscv.attributes:
00000000 <.riscv.attributes>:
0: 2041 jal 80 <_start-0x7fffff80>
0: 2941 jal 490 <_start-0x7ffffb70>
2: 0000 unimp
4: 7200 flw fs0,32(a2)
6: 7369 lui t1,0xffffa
8: 01007663 bgeu zero,a6,14 <_start-0x7fffffec>
c: 0016 c.slli zero,0x5
e: 0000 unimp
10: 1004 addi s1,sp,32
c: 001f 0000 1004 0x10040000001f
12: 7205 lui tp,0xfffe1
14: 3376 fld ft6,376(sp)
16: 6932 flw fs2,12(sp)
@@ -550,4 +548,6 @@ Disassembly of section .riscv.attributes:
1a: 5f30 lw a2,120(a4)
1c: 326d jal fffff9c6 <__global_pointer$+0x7fffdb56>
1e: 3070 fld fa2,224(s0)
...
20: 665f 7032 0030 0x307032665f
26: 0108 addi a0,sp,128
28: 0b0a slli s6,s6,0x2

Binary file not shown.

View File

@@ -245,10 +245,6 @@ int main(int argc, char *argv[]) {
std::cout << "wait for completion" << std::endl;
RT_CHECK(vx_ready_wait(device, -1));
// flush the destination buffer caches
std::cout << "flush the destination buffer caches" << std::endl;
RT_CHECK(vx_flush_caches(device, kernel_arg.dst_ptr, buf_size));
// download destination buffer
std::cout << "download destination buffer" << std::endl;
RT_CHECK(vx_copy_from_dev(dst_buf, kernel_arg.dst_ptr, buf_size, 0));

Binary file not shown.

File diff suppressed because it is too large Load Diff

Binary file not shown.