Repeat vx_wmma issue & hardcode dst address
This commit is contained in:
@@ -138,6 +138,7 @@ void test_maxu_reduce() {
|
||||
y = reduced;
|
||||
}
|
||||
|
||||
// assumes NUM_THREADS == 4
|
||||
unsigned bit_vectors[4] = {0b11010110000111001100010100100110, 0b10010100011010001010000000001110, 0b10001001010111110001110000000010, 0b00010011010100101101110111001111};
|
||||
|
||||
void test_and_reduce() {
|
||||
@@ -213,4 +214,4 @@ int main()
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,6 +65,7 @@ float results[32*8];
|
||||
void store_wmma_result() {
|
||||
int tid = vx_thread_id();
|
||||
|
||||
float *results = reinterpret_cast<float *>(0xc0000000UL);
|
||||
asm volatile ("fsw f16, %0" :: "m"(results[tid*8+0]));
|
||||
asm volatile ("fsw f17, %0" :: "m"(results[tid*8+1]));
|
||||
asm volatile ("fsw f18, %0" :: "m"(results[tid*8+2]));
|
||||
@@ -87,10 +88,13 @@ int main()
|
||||
{
|
||||
vx_tmc(-1);
|
||||
vx_wmma_load();
|
||||
vx_wmma();
|
||||
#pragma GCC unroll 100
|
||||
for (int i = 0; i < 100; i++) {
|
||||
vx_wmma();
|
||||
}
|
||||
store_wmma_result();
|
||||
vx_tmc(1);
|
||||
// print_wmma_result();
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user