Proper SIMT with fine-grain scheduler implemented

This commit is contained in:
felsabbagh3
2019-05-10 00:49:54 -07:00
parent 96dac5e1ce
commit 48468ed26a
27 changed files with 6080 additions and 3375 deletions

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@@ -1,393 +1,375 @@
:0200000480007A :0200000480007A
:10000000130510007310050213052000731015026C :10000000130520007310050213052000731015025C
:10001000731040F17310103037F1FF7FEF0080193B :10001000731040F17310103037F1FF7FEF00001BB9
:10002000EF10C06D73000000938B0600130D0700E6 :10002000EF10806A73000000938B0600130D070029
:10003000130F01009303050013051000635C7500A6 :10003000130F01009303050013051000635C7500A6
:1000400013010180130305006B5003001305150015 :1000400013010180130305006B5003001305150015
:100050006FF0DFFE13010F0013050000930F060081 :100050006FF0DFFE1300000013000000130000002B
:10006000938D0300EBE0BF01170500001305051B8E :1000600013000000130000001300000013010F0034
:100070006B40050017030000130343FB6B000300F4 :1000700013050000930F0600938D0300EBE0BF0112
:1000800067800000170200011302022623200200ED :10008000170500001305051B6B4005001703000052
:100090002322120023242200232632002328420098 :100090001303C3F96B00030067800000170200011F
:1000A000232A5200232C6200232E72002320820276 :1000A0001302022023200200232212002324220014
:1000B000232292022324A2022326B2022328C20270 :1000B0002326320023284200232A5200232C6200E8
:1000C000232AD202232CE202232EF202232002054D :1000C000232E720023208202232292022324A202E2
:1000D0002322120523242205232632052328420544 :1000D0002326B2022328C202232AD202232CE202C0
:1000E000232A5205232C6205232E72052320820722 :1000E000232EF202232002052322120523242205B7
:1000F000232292072324A2072326B2072328C2071C :1000F0002326320523284205232A5205232C620594
:10010000232AD207232CE207232EF2071302100022 :10010000232E720523208207232292072324A2078D
:1001100067800000170200011302021D0320020085 :100110002326B2072328C207232AD207232CE2076B
:1001200083204200032182008321C20003220201B6 :10012000232EF2071302100067800000170200015F
:1001300083224201032382018323C201032402029A :1001300013020217032002008320420003218200E1
:1001400083244202032582028325C202032602037E :100140008321C20003220201832242010323820190
:1001500083264203032782038327C2030328020462 :100150008323C20103240202832442020325820274
:1001600083284204032982048329C204032A020546 :100160008325C20203260203832642030327820358
:10017000832A4205032B8205832BC205032C02062A :100170008327C2030328020483284204032982043C
:10018000832C4206032D8206832DC206032E02070E :100180008329C204032A0205832A4205032B820520
:10019000832E4207032F8207832FC207130200001A :10019000832BC205032C0206832C4206032D820604
:1001A000678000007325000267800000732510023D :1001A000832DC206032E0207832E4207032F8207E8
:1001B00067800000130101FE232E1100232C810013 :1001B000832FC2071302000067800000732500022E
:1001C00013040102232604FE6F0000030327C4FE6C :1001C000678000007325100267800000130101FEA4
:1001D0009307404C3307F702B707008193874736F0 :1001D000232E1100232C810013040102232604FE88
:1001E000B307F70013850700EF0040408327C4FEE4 :1001E0006F0000030327C4FE9307404C3307F70258
:1001F000938717002326F4FE0327C4FE930770009D :1001F000B70700819387C731B307F700138507005E
:10020000E3D6E7FC130000008320C1010324810131 :10020000EF0040408327C4FE938717002326F4FEA7
:100210001301010267800000130101FD2326110272 :100210000327C4FE93077000E3D6E7FC1300000039
:10022000232481022322A1031304010313070D00D9 :100220008320C101032481011301010267800000C2
:100230009307404C3307F702B7070081938747368F :10023000130101FD23261102232481022322A1039D
:10024000B307F70013850700EF00804B9307050005 :100240001304010313070D009307404C3307F70213
:10025000638A070213070D00B707008113172700F1 :10025000B70700819387C731B307F70013850700FD
:100260009387472CB307F7001307100023A0E7007C :10026000EF00804B93070500638A070213070D0018
:1002700093070D0063960700EFF0DFE96F0080053C :10027000B7070081131727009387C727B307F70035
:100280007300000013070D009307404C3307F7027B :100280001307100023A0E70093070D0063960700F3
:10029000B707008193874736B307F700130784FD3C :10029000EFF0DFE96F0080057300000013070D0029
:1002A0009305070013850700EF00003D832704FE38 :1002A0009307404C3307F702B70700819387C731A4
:1002B000138107008327C4FD832584FD032644FEA4 :1002B000B307F700130784FD9305070013850700B4
:1002C000832684FE0327C4FE13850700EFF0DFD5E5 :1002C000EF00003D832704FE138107008327C4FD50
:1002D000730000008320C10203248102032D410228 :1002D000832584FD032644FE832684FE0327C4FE73
:1002E0001301010367800000130101FB23261104A1 :1002E00013850700EFF05FD4730000008320C10284
:1002F0002324810413040105EFF0DFEA2324A4FE84 :1002F00003248102032D41021301010367800000E2
:1003000093090100930710002326F4FE6F004008B4 :10030000130101FB23261104232481041304010596
:100310000327C4FE9307404C3307F702B707008159 :10031000EFF0DFEA2324A4FE930901009307100005
:1003200093874736B307F70013850700EF00403D7A :100320002326F4FE6F0040080327C4FE9307404CC9
:1003300093070500639807040327C4FE9307404C06 :100330003307F702B70700819387C731B307F70088
:100340003307F702B707008193874736B307F700F3 :1003400013850700EF00403D9307050063980704FD
:10035000130704FD9305070013850700EF00C03164 :100350000327C4FE9307404C3307F702B707008119
:10036000832784FD13810700832744FD832504FD33 :100360009387C731B307F700130704FD9305070010
:100370000326C4FD832604FE032744FE13850700DD :1003700013850700EF00C031832784FD1381070038
:10038000EFF05FCF8327C4FE938717002326F4FE88 :10038000832744FD832504FD0326C4FD832604FE44
:100390008327C4FE032784FEE3ECE7F613810900FC :10039000032744FE13850700EFF05FCF8327C4FED9
:1003A000EFF05FCE9307020063880704B707008170 :1003A000938717002326F4FE8327C4FE032784FEC9
:1003B00013854736EF00C03493070500639E07029C :1003B000E3ECE7F613810900EFF05FCE930702004C
:1003C000930784FB93850700B707008113854736A1 :1003C00063880704B70700811385C731EF00C03485
:1003D000EF00802A832704FC138107008327C4FBD6 :1003D00093070500639E0702930784FB938507003C
:1003E000832584FB032644FC832684FC0327C4FC6A :1003E000B70700811385C731EF00802A832704FCFB
:1003F00013850700EFF05FC3130000008320C104E2 :1003F000138107008327C4FB832584FB032644FC69
:10040000032481041301010567800000130101FB2F :10040000832684FC0327C4FC13850700EFF0DFC1BB
:10041000232611042324810413040105232EA4FAA6 :10041000130000008320C10403248104130101059B
:10042000232CB4FA232AC4FA2328D4FAEFF09FD855 :1004200067800000130101FB2326110423248104AB
:10043000EFF05FD72322A4FE13090100232604FE58 :1004300013040105232EA4FA232CB4FA232AC4FAA8
:10044000232404FE6F00C008B709FFFF3301310108 :100440002328D4FAEFF09FD8EFF05FD72322A4FE41
:10045000832784FE2326F4FC832784FB2328F4FCD3 :1004500013090100232604FE232404FE6F00C008B4
:1004600093070100232AF4FC832744FB232CF4FC8C :10046000B709FFFF33013101832784FE2326F4FC03
:10047000832704FB232EF4FC8327C4FE2320F4FEF1 :10047000832784FB2328F4FC93070100232AF4FC40
:100480000327C4FE9307404C3307F702B7070081E8 :10048000832744FB232CF4FC832704FB232EF4FC5A
:1004900093874736B307F7001307C4FC930507009B :100490008327C4FE2320F4FE0327C4FE9307404CA9
:1004A00013850700EF00C0168327C4FE938717004B :1004A0003307F702B70700819387C731B307F70017
:1004B0002326F4FE8327C4FE032744FE63E4E700FB :1004B0001307C4FC9305070013850700EF00C0165F
:1004C000232604FE832784FE938717002324F4FE4B :1004C0008327C4FE938717002326F4FE8327C4FEE8
:1004D000032784FE8327C4FBE368F7F613010900B2 :1004D000032744FE63E4E700232604FE832784FE0B
:1004E000EFF09FE0130000008320C1040324810487 :1004E000938717002324F4FE032784FE8327C4FB8D
:1004F0001301010567800000130101FD232611028D :1004F000E368F7F613010900EFF09FE01300000036
:100500002324810213040103232EA4FCEFF09FC9CE :100500008320C104032481041301010567800000D6
:100510002320A4FE232604FE6F004005232604FEAC :10051000130101FD23261102232481021304010388
:10052000232404FE6F00C003B7070081032784FE65 :10052000232EA4FCEFF09FC92320A4FE232604FE63
:10053000131727009387472CB307F70003A707007B :100530006F004005232604FE232404FE6F00C00341
:10054000930710006318F7008327C4FE93871700F2 :10054000B7070081032784FE131727009387C72767
:100550002326F4FE832784FE938717002324F4FECA :10055000B307F70003A70700930710006318F7001D
:10056000832784FE032704FEE3E0E7FC0327C4FEA1 :100560008327C4FE938717002326F4FE832784FE87
:100570008327C4FDE314F7FA232204FE6F008002F0 :10057000938717002324F4FE832784FE032704FEB9
:10058000B7070081032744FE131727009387472CE2 :10058000E3E0E7FC0327C4FE8327C4FDE314F7FA86
:10059000B307F70023A00700832744FE93871700C3 :10059000232204FE6F008002B7070081032744FE78
:1005A0002322F4FE832744FE032704FEE3EAE7FC4C :1005A000131727009387C727B307F70023A0070077
:1005B000130000008320C102032481021301010300 :1005B000832744FE938717002322F4FE832744FEFB
:1005C00067800000130101FF2326810023247101AD :1005C000032704FEE3EAE7FC130000008320C102D6
:1005D0001304010193870B00138507000324C10056 :1005D000032481021301010367800000130101FF5E
:1005E000832B810013010101678000009302050045 :1005E00023268100232471011304010193870B004A
:1005F000130300009303700023A0620023A2620093 :1005F000138507000324C100832B8100130101012F
:1006000023A4620023A6720023A862006780000072 :10060000678000009302050013030000930370004D
:100610009302050003A382001303130023A46200C6 :1006100023A0620023A2620023A4620023A672002A
:100620001383420183AE420093935E003303730051 :1006200023A86200678000009302050003A38200F4
:1006300003AE05002320C30103AE45002322C301FE :100630001303130023A462001383420183AE42001C
:1006400003AE85002324C30103AEC5002326C301E6 :1006400093935E003303730003AE05002320C301C0
:1006500003AE05012328C30103AE4501232AC301CC :1006500003AE45002322C30103AE85002324C3015A
:10066000938E1E00130F20036394EE01930E00007F :1006600003AEC5002326C30103AE05012328C30141
:1006700023A2D201678000009302050003A3820039 :1006700003AE4501232AC301938E1E00130F2003EE
:100680001303F3FF23A462001383420183AE02002D :100680006394EE01930E000023A2D2016780000064
:10069000930F2003138F0E00130F1F006314FF012D :100690009302050003A382001303F3FF23A4620067
:1006A000130F000023A0E20193935E003303730055 :1006A0001383420183AE0200930F2003138F0E00C9
:1006B000032E030023A0C501032E430023A2C5017E :1006B000130F1F006314FF01130F000023A0E201BA
:1006C000032E830023A4C501032EC30023A6C50166 :1006C00093935E0033037300032E030023A0C50140
:1006D000032E030123A8C501032E430123AAC5014C :1006D000032E430023A2C501032E830023A4C501DA
:1006E000678000009302050003A382001305000049 :1006E000032EC30023A6C501032E030123A8C501C1
:1006F000130E200363146E001305150067800000BD :1006F000032E430123AAC501678000009302050071
:100700009302050003A3820013050000130E0000EE :1007000003A3820013050000130E200363146E0080
:1007100063146E0013051500678000009302050046 :1007100013051500678000009302050003A3820003
:1007200003A3C20083A3020133B563006780000006 :1007200013050000130E000063146E00130515007E
:10073000130141FF232011002322B100834505004E :10073000678000009302050003A3C20083A30201A7
:1007400063880500EF00C001130515006FF01FFF5F :1007400033B5630067800000130141FF23201100CF
:1007500083200100832541001301C1006780000050 :100750002322B1008345050063880500EF00C00136
:10076000B708010023A0B80067800000130101FD55 :10076000130515006FF01FFF832001008325410052
:10077000232611022324810213040103232EA4FC47 :100770001301C10067800000B708010023A0B80082
:100780000327C4FD9307F00063E4E702B707008185 :1007800067800000130101FD23261102232481024A
:100790000327C4FD1317270093874720B307F700EB :1007900013040103232EA4FC0327C4FD9307F000D8
:1007A00083A7070013850700EFF09FF86F0040074D :1007A00063E4E702B70700810327C4FD131727009E
:1007B000930700022326F4FEA30504FE8327C4FE4C :1007B0009387C71BB307F70083A7070013850700BC
:1007C0009387C7FF0327C4FDB357F70093F7F700DC :1007C000EFF09FF86F004007930700022326F4FE26
:1007D0002322F4FE832744FE63860700930710005C :1007D000A30504FE8327C4FE9387C7FF0327C4FD38
:1007E000A305F4FE8347B4FE63820702B7070081C6 :1007E000B357F70093F7F7002322F4FE832744FE64
:1007F000032744FE1317270093874720B307F7000A :1007F0006386070093071000A305F4FE8347B4FE49
:1008000083A7070013850700EFF09FF28327C4FE3C :1008000063820702B7070081032744FE13172700FE
:100810009387C7FF2326F4FE8327C4FEE340F0FA44 :100810009387C71BB307F70083A70700138507005B
:100820008320C102032481021301010367800000B9 :10082000EFF09FF28327C4FE9387C7FF2326F4FED1
:10083000130101FE232E1100232C81001304010259 :100830008327C4FEE340F0FA8320C102032481022F
:100840002326A4FE2324B4FE0325C4FEEFF05FEEAE :100840001301010367800000130101FE232E110034
:10085000032584FEEFF09FF1B70700811385070899 :10085000232C8100130401022326A4FE2324B4FECA
:10086000EFF01FED130000008320C101032481017C :100860000325C4FEEFF05FEE032584FEEFF09FF159
:100870001301010267800000130101FD232611020C :10087000B707008113850708EFF01FED1300000094
:100880002324810213040103232EA4FC232CB4FC93 :100880008320C1010324810113010102678000005C
:10089000232AC4FC2328D4FCB73700810327C4FDD6 :10089000130101FD23261102232481021304010305
:1008A00023A2E79AB73700819387479A032784FDED :1008A000232EA4FC232CB4FC232AC4FC2328D4FC30
:1008B00023A2E700B73700819387479A032744FDB7 :1008B000B73700810327C4FD23AEE794B737008123
:1008C00023A4E700B73700819387479A032704FDE5 :1008C0009387C795032784FD23A2E700B7370081EC
:1008D00023A6E700EFF09F8D2324A4FE032704FD49 :1008D0009387C795032744FD23A4E700B73700811A
:1008E000832784FEB357F7022326F4FE032704FD73 :1008E0009387C795032704FD23A6E700EFF09F8DAC
:1008F000832784FEB377F702638807008327C4FE4B :1008F0002324A4FE032704FD832784FEB357F702B5
:10090000938717002326F4FE8325C4FEB7070081D2 :100900002326F4FE032704FD832784FEB377F70232
:100910001385470CEFF0DFF1B73700819387479AD3 :10091000638807008327C4FE938717002326F4FE0D
:100920000327C4FE23A8E700032704FD832784FED2 :100920008325C4FEB70700811385470CEFF0DFF184
:100930006362F702B73700819386479AB717008042 :10093000B73700819387C7950327C4FE23A8E70034
:100940001386C79A832584FE032504FDEFF01FACB0 :10094000032704FD832784FE6362F702B737008123
:100950006F000002B73700819386479AB71700806F :100950009386C795B71700801386479C832584FE2E
:100960001386C79A832504FD032504FDEFF01FAA13 :10096000032504FDEFF01FAC6F000002B7370081D4
:10097000EFF05F832322A4FE032704FD832744FEB8 :100970009386C795B71700801386479C832504FD8F
:1009800063F8E700032544FEEFF01FB76F00C000D7 :10098000032504FDEFF01FAAEFF05F832322A4FEEE
:10099000032504FDEFF05FB6130000008320C102C1 :10099000032704FD832744FE63F8E700032544FE94
:1009A000032481021301010367800000130101FA8F :1009A000EFF01FB76F00C000032504FDEFF05FB646
:1009B000232E1104232C8104130401062326A4FAF8 :1009B000130000008320C1020324810213010103FC
:1009C0002324B4FAEFF01FC0232CA4FC832784FD5A :1009C00067800000130101FA232E1104232C8104F7
:1009D00083A70700232AF4FC832784FD83A747000D :1009D000130401062326A4FA2324B4FAEFF01FC05F
:1009E0002328F4FC832784FD83A787002326F4FCB7 :1009E000232CA4FC832784FD83A70700232AF4FC7F
:1009F000832784FD83A707012326F4FE0327C4FE73 :1009F000832784FD83A747002328F4FC832784FDF5
:100A00008327C4FAB307F7022324F4FE8327C4FE26 :100A000083A787002326F4FC832784FD83A707019F
:100A1000639A0700930710002326F4FE8327C4FA85 :100A10002326F4FE0327C4FE8327C4FAB307F70294
:100A20002324F4FE832784FD83A7C7002324F4FC3A :100A20002324F4FE8327C4FE639A07009307100073
:100A3000232204FE6F004012232004FE232E04FC18 :100A30002326F4FE8327C4FA2324F4FE832784FDAF
:100A40006F008007032784FA832784FCB307F7022B :100A400083A7C7002324F4FC232204FE6F00800D3B
:100A50000327C4FDB307F7002322F4FC032784FC1B :100A5000232004FE232E04FC6F008007032784FA62
:100A60008327C4FDB307F702032784FEB307F7000B :100A6000832784FCB307F7020327C4FDB307F7000D
:100A70002320F4FC832744FC93972700032744FD9D :100A70002322F4FC032784FC8327C4FDB307F70279
:100A8000B307F70003A70700832704FC9397270009 :100A8000032784FEB307F7002320F4FC832744FCEC
:100A9000832604FDB387F60083A70700B307F70298 :100A900093972700032744FDB307F70003A7070038
:100AA000032704FEB307F7002320F4FE8327C4FDC9 :100AA000832704FC93972700832604FDB387F60071
:100AB00093871700232EF4FC0327C4FD832784FCAF :100AB00083A70700B307F702032704FEB307F70075
:100AC000E362F7F8032784FA832784FC3307F702ED :100AC0002320F4FE8327C4FD93871700232EF4FC14
:100AD000832784FEB307F700232EF4FA032784FE4E :100AD0000327C4FD832784FCE362F7F8032784FA25
:100AE000832784FCB337F70093F7F70F232CF4FA2E :100AE000832784FC3307F702832784FEB307F700CC
:100AF000832784FB93B71700A30BF4FA834774FB97 :100AF000232EF4FA8327C4FB939727000327C4FC13
:100B0000138F0700B7170080938F47B46B200F0037 :100B0000B307F700032704FE23A0E700832784FE32
:100B10007B70FF018327C4FB939727000327C4FC46 :100B1000938717002324F4FE832744FE938717004E
:100B2000B307F700032704FE23A0E700832784FE12 :100B20002322F4FE832744FE0327C4FEE3E2E7F218
:100B3000938717002324F4FEB7170080138E87B421 :100B3000130000008320C105032481051301010671
:100B400067000E00130000006B300000832744FE96 :100B400067800000130101FC232E1102232C810277
:100B5000938717002322F4FE832744FE0327C4FE55 :100B500013040104232EA4FC232CB4FC232AC4FC7C
:100B6000E3ECE7EC130000008320C10503248105BA :100B60002328D4FC2326E4FCB73700810327C4FDE7
:100B70001301010667800000130101FC232E1102FE :100B700023A8E796B737008193870797032784FD5B
:100B8000232C810213040104232EA4FC232CB4FC87 :100B800023A2E700B737008193870797032744FD27
:100B9000232AC4FC2328D4FC2326E4FCB737008195 :100B900023A4E700B7370081938707970327C4FC96
:100BA0000327C4FD23ACE79AB73700819387879B5F :100BA00023A6E700B737008193870797032704FD43
:100BB000032784FD23A2E700B73700819387879B33 :100BB00023A8E700EFF00FE12324A4FE0327C4FCE1
:100BC000032744FD23A4E700B73700819387879B61 :100BC000832784FEB357F7022326F4FE0327C4FCD1
:100BD0000327C4FC23A6E700B73700819387879BD0 :100BD000832784FEB377F702638807008327C4FE68
:100BE000032704FD23A8E700EFF04FDC2324A4FE35 :100BE000938717002326F4FEB73700819387079772
:100BF0000327C4FC832784FEB357F7022326F4FEA1 :100BF0000327C4FE23AAE7000327C4FC832784FE3F
:100C00000327C4FC832784FEB377F70263880700B9 :100C00006362F702B737008193860797B7170080B2
:100C10008327C4FE938717002326F4FEB73700818D :100C10001386C7C7832584FE032504FDEFF09F805C
:100C20009387879B0327C4FE23AAE7000327C4FCFE :100C20006F000002B737008193860797B7170080DF
:100C3000832784FE6362F702B73700819386879B20 :100C30001386C7C78325C4FC032504FDEFF08FFE90
:100C4000B7170080138607CB832584FE032504FD98 :100C4000EFF0CFD72322A4FE032704FD832744FE21
:100C5000EFF0CFFB6F000002B73700819386879BD0 :100C500063F8E700032544FEEFF09F8B6F00C000B0
:100C6000B7170080138607CB8325C4FC032504FD3A :100C6000032504FDEFF0DF8A130000008320C10399
:100C7000EFF0CFF9EFF00FD32322A4FE032704FDFA :100C7000032481031301010467800000130101FBB9
:100C8000832744FE63F8E700032544FEEFF0DF8688 :100C8000232611042324810413040105232EA4FA2E
:100C90006F00C000032504FDEFF01F861300000065 :100C9000232CB4FAEFF09F942320A4FE832704FEB4
:100CA0008320C10303248103130101046780000032 :100CA00083A70700232EF4FC832704FE83A74700B5
:100CB000130101FB232611042324810413040105DD :100CB000232CF4FC832704FE83A78700232AF4FC5B
:100CC000232EA4FA232CB4FAEFF0DF8F2320A4FE06 :100CC000832704FE83A747012326F4FE0327C4FEDF
:100CD000832704FE83A70700232EF4FC832704FE4A :100CD0008327C4FBB307F7022324F4FE8327C4FE53
:100CE00083A74700232CF4FC832704FE83A78700F7 :100CE000639A0700930710002326F4FE8327C4FBB2
:100CF000232AF4FC832704FE83A747012326F4FE5E :100CF0002324F4FE832704FE83A7C7002328F4FCE3
:100D00000327C4FE8327C4FBB307F7022324F4FEA2 :100D0000232204FE6F00000C032784FB832704FDCD
:100D10008327C4FE639A0700930710002326F4FE7E :100D10003307F702832784FEB307F7002326F4FC8A
:100D20008327C4FB2324F4FE832704FE83A7C70084 :100D2000032784FE832704FDB337F70093F7F70FFB
:100D30002328F4FC232204FE6F00000C032784FB0D :100D30002324F4FC832784FC93B71700A303F4FC5B
:100D4000832704FD3307F702832784FEB307F700E8 :100D4000834774FC138F0700B7170080938F07DB6E
:100D50002326F4FC032784FE832704FDB337F70022 :100D50006B200F007B70FF018327C4FC9397270053
:100D600093F7F70F2324F4FC832784FC93B7170031 :100D60000327C4FDB307F70083A607008327C4FC4D
:100D7000A303F4FC834774FC138F0700B7170080AC :100D700093972700032784FDB307F70003A7070015
:100D8000938F47DE6B200F007B70FF018327C4FC2D :100D80008327C4FC93972700032644FDB307F6008E
:100D9000939727000327C4FDB307F70083A6070036 :100D90003387E60023A0E700832784FE93871700AC
:100DA0008327C4FC93972700032784FDB307F7002C :100DA0002324F4FEB7170080138E47DB67000E0084
:100DB00003A707008327C4FC93972700032644FD5D :100DB000130000006B300000832744FE9387170068
:100DC000B307F6003387E60023A0E700832784FEFD :100DC0002322F4FE832744FE0327C4FEE3EEE7F26A
:100DD000938717002324F4FEB7170080138E87DE55 :100DD000130000008320C1040324810413010105D2
:100DE00067000E00130000006B300000832744FEF4 :100DE00067800000130101FC232E1102232C8102D5
:100DF000938717002322F4FE832744FE0327C4FEB3 :100DF00013040104232EA4FC232CB4FC232AC4FCDA
:100E0000E3EEE7F2130000008320C1040324810411 :100E00002328D4FC2326E4FCB73700810327C4FD44
:100E10001301010567800000130101FC232E11025C :100E100023A8E796B737008193870797032784FDB8
:100E2000232C810213040104232EA4FC232CB4FCE4 :100E200023A2E700B737008193870797032744FD84
:100E3000232AC4FC2328D4FC2326E4FCB7370081F2 :100E300023A4E700B7370081938707970327C4FCF3
:100E40000327C4FD23ACE79AB73700819387879BBC :100E400023A6E700B737008193870797032704FDA0
:100E5000032784FD23A2E700B73700819387879B90 :100E500023A8E700EFF00FB72324A4FE0327C4FC68
:100E6000032744FD23A4E700B73700819387879BBE :100E6000832784FEB357F7022326F4FE0327C4FC2E
:100E70000327C4FC23A6E700B73700819387879B2D :100E7000832784FEB377F702638807008327C4FEC5
:100E8000032704FD23A8E700EFF04FB22324A4FEBC :100E8000938717002326F4FEB737008193870797CF
:100E90000327C4FC832784FEB357F7022326F4FEFE :100E90000327C4FE23AAE7000327C4FC832784FE9C
:100EA0000327C4FC832784FEB377F7026388070017 :100EA0006362F702B737008193860797B717008010
:100EB0008327C4FE938717002326F4FEB7370081EB :100EB0001386C7F1832584FE032504FDEFF08FD64A
:100EC0009387879B0327C4FE23AAE7000327C4FC5C :100EC0006F000002B737008193860797B71700803D
:100ED000832784FE6362F702B73700819386879B7E :100ED0001386C7F18325C4FC032504FDEFF08FD4EE
:100EE000B7170080138607F5832584FE032504FDCC :100EE000EFF0CFAD2322A4FE032704FD832744FEA9
:100EF000EFF0CFD16F000002B73700819386879B58 :100EF00063F8E700032544FEEFF08FE16F00C000C8
:100F0000B7170080138607F58325C4FC032504FD6D :100F0000032504FDEFF0CFE0130000008320C103B0
:100F1000EFF0CFCFEFF00FA92322A4FE032704FDAB :100F1000032481031301010467800000130101FB16
:100F2000832744FE63F8E700032544FEEFF0CFDC9F :100F2000232611042324810413040105232EA4FA8B
:100F30006F00C000032504FDEFF00FDC130000007C :100F3000232CB4FAEFF08FEA2320A4FE832704FECB
:100F40008320C1030324810313010104678000008F :100F400083A70700232EF4FC832704FE83A7470012
:100F5000130101FB2326110423248104130401053A :100F5000232CF4FC832704FE83A78700232AF4FCB8
:100F6000232EA4FA232CB4FAEFF0CFE52320A4FE1D :100F6000832704FE83A747012326F4FE0327C4FE3C
:100F7000832704FE83A70700232EF4FC832704FEA7 :100F70008327C4FBB307F7022324F4FE8327C4FEB0
:100F800083A74700232CF4FC832704FE83A7870054 :100F8000639A0700930710002326F4FE8327C4FB0F
:100F9000232AF4FC832704FE83A747012326F4FEBB :100F90002324F4FE832704FE83A7C7002328F4FC40
:100FA0000327C4FE8327C4FBB307F7022324F4FE00 :100FA000232204FE6F00000C032784FB832704FD2B
:100FB0008327C4FE639A0700930710002326F4FEDC :100FB0003307F702832784FEB307F7002326F4FCE8
:100FC0008327C4FB2324F4FE832704FE83A7C700E2 :100FC000032784FE832704FDB337F70093F7F70F59
:100FD0002328F4FC232204FE6F00000C032784FB6B :100FD0002324F4FC832784FC93B71700A303F4FCB9
:100FE000832704FD3307F702832784FEB307F70046 :100FE000834774FC138F0700B7170080938F0705A2
:100FF0002326F4FC032784FE832704FDB337F70080 :100FF0006B200F007B70FF018327C4FC93972700B1
:1010000093F7F70F2324F4FC832784FC93B717008E :101000000327C4FDB307F70083A607008327C4FCAA
:10101000A303F4FC834774FC138F0700B717008009 :1010100093972700032784FDB307F70003A7070072
:10102000938F47086B200F007B70FF018327C4FC60 :101020008327C4FC93972700032644FDB307F600EB
:10103000939727000327C4FDB307F70083A6070093 :101030003387E64023A0E700832784FE93871700C9
:101040008327C4FC93972700032784FDB307F70089 :101040002324F4FEB7170080138E470567000E00B7
:1010500003A707008327C4FC93972700032644FDBA :10105000130000006B300000832744FE93871700C5
:10106000B307F6003387E64023A0E700832784FE1A :101060002322F4FE832744FE0327C4FEE3EEE7F2C7
:10107000938717002324F4FEB7170080138E870888 :10107000130000008320C10403248104130101052F
:1010800067000E00130000006B300000832744FE51 :1010800067800000130101FC232E1102232C810232
:10109000938717002322F4FE832744FE0327C4FE10 :1010900013040104232EA4FC232CB4FC232AC4FC37
:1010A000E3EEE7F2130000008320C104032481046F :1010A0002328D4FC2326E4FCB73700810327C4FDA2
:1010B0001301010567800000130101FC232E1102BA :1010B00023A4E798B737008193878798032784FD97
:1010C000232C810213040104232EA4FC232CB4FC42 :1010C00023A2E700B737008193878798032744FD61
:1010D000232AC4FC2328D4FC2326E4FCB737008150 :1010D00023A4E700B7370081938787980327C4FCD0
:1010E0000327C4FD23A8E79CB73700819387079D9A :1010E00023A6E700B737008193878798032704FD7D
:1010F000032784FD23A2E700B73700819387079D6C :1010F00023A8E700EFF00F8D2324A4FE0327C4FCF0
:10110000032744FD23A4E700B73700819387079D99 :10110000832784FEB357F7022326F4FE0327C4FC8B
:101110000327C4FC23A6E700B73700819387079D08 :10111000832784FEB377F702638807008327C4FE22
:10112000032704FD23A8E700EFF04F882324A4FE43 :10112000938717002326F4FEB737008193878798AB
:101130000327C4FC832784FEB357F7022326F4FE5B :101130000327C4FE23AAE7000327C4FC832784FEF9
:101140000327C4FC832784FEB377F7026388070074 :101140006362F702B737008193868798B7170080EC
:101150008327C4FE938717002326F4FEB737008148 :101150001386C71B832584FE032504FDEFF08FACA7
:101160009387079D0327C4FE23AAE7000327C4FC37 :101160006F000002B737008193868798B717008019
:10117000832784FE6362F702B73700819386079D59 :101170001386C71B8325C4FC032504FDEFF08FAA4B
:10118000B71700801386071F832584FE032504FDFF :10118000EFF0CF832322A4FE032704FD832744FE30
:10119000EFF0CFA76F000002B73700819386079D5D :1011900063F8E700032544FEEFF08FB76F00C0004F
:1011A000B71700801386071F8325C4FC032504FDA1 :1011A000032504FDEFF0CFB6130000008320C10338
:1011B000EFF0CFA5EFE01FFF2322A4FE032704FDDD :1011B000032481031301010467800000130101FB74
:1011C000832744FE63F8E700032544FEEFF0CFB227 :1011C000232611042324810413040105232EA4FAE9
:1011D0006F00C000032504FDEFF00FB21300000004 :1011D000232CB4FAEFF08FC02320A4FE832704FE53
:1011E0008320C103032481031301010467800000ED :1011E00083A70700232EF4FC832704FE83A7470070
:1011F000130101FB23261104232481041304010598 :1011F00083A70700232CF4FC832704FE83A7870022
:10120000232EA4FA232CB4FAEFF0CFBB2320A4FEA4 :10120000232AF4FC832704FE83A747012326F4FE48
:10121000832704FE83A70700232EF4FC832704FE04 :101210000327C4FE8327C4FBB307F7022324F4FE8D
:1012200083A7470083A70700232CF4FC832704FE31 :101220008327C4FE639A0700930710002326F4FE69
:1012300083A78700232AF4FC832704FE83A74701A2 :101230008327C4FB2324F4FE832704FE83A7C7006F
:101240002326F4FE0327C4FE8327C4FBB307F7025B :101240002328F4FC232204FE6F00000B032784FBF9
:101250002324F4FE8327C4FE639A0700930710003B :10125000832704FD3307F702832784FEB307F700D3
:101260002326F4FE8327C4FB2324F4FE832704FEF5 :101260002326F4FC032784FE832704FDB337F7000D
:1012700083A7C7002328F4FC232204FE6F00000B81 :1012700093F7F70F2324F4FC832784FC93B717001C
:10128000032784FB832704FD3307F702832784FEAB :10128000A303F4FC834774FC138F0700B717008097
:10129000B307F7002326F4FC032784FE832704FD0D :10129000938F472E6B200F007B70FF018327C4FCC8
:1012A000B337F70093F7F70F2324F4FC832784FC6C :1012A000939727000327C4FDB307F70083A6070021
:1012B00093B71700A303F4FC834774FC138F070054 :1012B0008327C4FC93972700032744FDB307F70057
:1012C000B7170080938F87316B200F007B70FF0171 :1012C000032784FD3387E60023A0E700832784FEFD
:1012D0008327C4FC939727000327C4FDB307F700B7 :1012D000938717002324F4FEB7170080138E872E00
:1012E00083A607008327C4FC93972700032744FDA8 :1012E00067000E00130000006B300000832744FEEF
:1012F000B307F700032784FD3387E60023A0E70048 :1012F000938717002322F4FE832744FE0327C4FEAE
:10130000832784FE938717002324F4FEB7170080F9 :10130000E3E6E7F4130000008320C1040324810412
:10131000138EC73167000E00130000006B30000011 :101310001301010567800000130101FC232E110257
:10132000832744FE938717002322F4FE832744FE7D :10132000232C810213040104232EA4FC232CB4FCDF
:101330000327C4FEE3E6E7F4130000008320C104A2 :10133000232AC4FC2328D4FC2326E4FCB7370081ED
:10134000032481041301010567800000130101FCDF :101340000327C4FD23A4E798B737008193878798C4
:10135000232E1102232C810213040104232EA4FC4A :10135000032784FD23A2E700B7370081938787988E
:10136000232CB4FC232AC4FC2328D4FC2326E4FC2D :10136000032744FD23A4E700B737008193878798BC
:10137000B73700810327C4FD23A8E79CB737008156 :101370000327C4FC23A6E700B7370081938787982B
:101380009387079D032784FD23A2E700B7370081D9 :10138000032704FD23A8E700EFE0DFE32324A4FE06
:101390009387079D032744FD23A4E700B737008107 :101390000327C4FC832784FEB357F7022326F4FEF9
:1013A0009387079D0327C4FC23A6E700B737008176 :1013A0000327C4FC832784FEB377F7026388070012
:1013B0009387079D032704FD23A8E700EFE01FDFC5 :1013B0008327C4FE938717002326F4FEB7370081E6
:1013C0002324A4FE0327C4FC832784FEB357F7021B :1013C000938787980327C4FE23AAE7000327C4FC5A
:1013D0002326F4FE0327C4FC832784FEB377F70299 :1013D000832784FE6362F702B7370081938687987C
:1013E000638807008327C4FE938717002326F4FE33 :1013E000B717008013860745832584FE032504FD77
:1013F000B73700819387079D0327C4FE23AAE70020 :1013F000EFF04F836F000002B73700819386879824
:101400000327C4FC832784FE6362F702B737008199 :10140000B7170080138607458325C4FC032504FD18
:101410009386079DB717008013864748832584FE6F :10141000EFF04F81EFE09FDA2322A4FE032704FDC3
:10142000032504FDEFE09FFE6F000002B737008147 :10142000832744FE63F8E700032544FEEFF04F8E68
:101430009386079DB7170080138647488325C4FC11 :101430006F00C000032504FDEFF08F8D1300000046
:10144000032504FDEFE09FFCEFE0DFD52322A4FE9F :101440008320C1030324810313010104678000008A
:10145000032704FD832744FE63F8E700032544FEC9 :10145000130101FB23261104232481041304010535
:10146000EFF08F896F00C000032504FDEFF0CF88F7 :10146000232EA4FA232CB4FAEFF04F972320A4FEE6
:10147000130000008320C10303248103130101042E :10147000832704FE83A70700232EF4FC832704FEA2
:1014800067800000130101FB23261104232481043B :1014800083A7470083A70700232CF4FC832704FECF
:1014900013040105232EA4FA232CB4FAEFF08F9243 :1014900083A78700232AF4FC832704FE83A7470140
:1014A0002320A4FE832704FE83A70700232EF4FC39 :1014A0002326F4FE0327C4FE8327C4FBB307F702F9
:1014B000832704FE83A7470083A70700232CF4FC9F :1014B0002324F4FE8327C4FE639A070093071000D9
:1014C000832704FE83A78700232AF4FC832704FED6 :1014C0002326F4FE8327C4FB2324F4FE832704FE93
:1014D00083A747012326F4FE0327C4FE8327C4FB0A :1014D00083A7C7002328F4FC232204FE6F00000B1F
:1014E000B307F7022324F4FE8327C4FE639A0700A0 :1014E000032784FB832704FD3307F702832784FE49
:1014F000930710002326F4FE8327C4FB2324F4FE65 :1014F000B307F7002326F4FC032784FE832704FDAB
:10150000832704FE83A7C7002328F4FC232204FEBC :10150000B337F70093F7F70F2324F4FC832784FC09
:101510006F00000B032784FB832704FD3307F702CA :1015100093B71700A303F4FC834774FC138F0700F1
:10152000832784FEB307F7002326F4FC032784FEF9 :10152000B7170080938F87576B200F007B70FF01E8
:10153000832704FDB337F70093F7F70F2324F4FC58 :101530008327C4FC939727000327C4FDB307F70054
:10154000832784FC93B71700A303F4FC834774FC40 :1015400083A607008327C4FC93972700032744FD45
:10155000138F0700B7170080938FC75A6B200F00B7 :10155000B307F700032784FD3387E60223A0E700E3
:101560007B70FF018327C4FC939727000327C4FDEA :10156000832784FE938717002324F4FEB717008097
:10157000B307F70083A607008327C4FC93972700CF :10157000138EC75767000E00130000006B30000089
:10158000032744FDB307F700032784FD3387E602F2 :10158000832744FE938717002322F4FE832744FE1B
:1015900023A0E700832784FE938717002324F4FE0B :101590000327C4FEE3E6E7F4130000008320C10440
:1015A000B7170080138E075B67000E001300000062 :1015A000032481041301010567800000130101FE7B
:1015B0006B300000832744FE938717002322F4FE3C :1015B000232E810013040102232604FE6F00800401
:1015C000832744FE0327C4FEE3E6E7F4130000008C :1015C000B73700810327C4FE131727009387079AB4
:1015D0008320C104032481041301010567800000F6 :1015D000B307F7001307300023A0E700B7470081E7
:1015E000130101FE232E810013040102232604FEB1 :1015E0000327C4FE131727009387079AB307F70052
:1015F0006F008004B73700810327C4FE131727004C :1015F0001307200023A0E7008327C4FE938717006A
:101600009387879EB307F7001307300023A0E700F6 :101600002326F4FE0327C4FE9307F00FE3DAE7FA7C
:10161000B74700810327C4FE131727009387879ECF :10161000130000000324C1011301010267800000D0
:10162000B307F7001307200023A0E7008327C4FEB9 :10162000130101FD23261102232481021304010367
:10163000938717002326F4FE0327C4FE9307F003C5 :10163000232EA4FCB707008113850711EFF0CF908C
:10164000E3DAE7FA130000000324C10113010102E9 :10164000232604FE6F0080058327C4FE638E0700F7
:1016500067800000130101FD23261102232481026B :101650008327C4FE93F7F70063980700B70700815C
:1016600013040103232EA4FCB7070081138507117F :1016600013854713EFF04F8E8327C4FE939727000F
:10167000EFF00F8C232604FE6F0080058327C4FE45 :101670000327C4FDB307F70083A7070013850700FE
:10168000638E07008327C4FE93F7770063980700F3 :10168000EFF04F90B707008113858713EFF0CF8BF2
:10169000B707008113854713EFF08F898327C4FEB6 :101690008327C4FE938717002326F4FE0327C4FE86
:1016A000939727000327C4FDB307F70083A707001C :1016A0009307F00FE3D2E7FAB70700811385C7135A
:1016B00013850700EFF08F8BB70700811385871321 :1016B000EFF08F89130000008320C1020324810210
:1016C000EFF00F878327C4FE938717002326F4FECD :1016C0001301010367800000130101FF23261100AD
:1016D0000327C4FE9307F003E3D2E7FAB7070081BC :1016D0002324810013040101EFF05FED9306000164
:1016E0001385C713EFF0CF84130000008320C102DD :1016E000B75700811386079AB74700819385079AF9
:1016F000032481021301010367800000130101FE2E :1016F000B73700811385079AEFF08F99B7070081FC
:10170000232E1100232C810013040102EFF05FED62 :1017000013850716EFF04F84B75700811385079AAA
:1017100093068000B75700811386879EB7470081E4 :10171000EFF01FF193070000138507008320C1003D
:101720009385879EB73700811385879EEFF0CF940E :0C17200003248100130101016780000018
:10173000B707008113850716EFE09FFFB7570081B9
:101740001385879EEFF01FF113078000930680003A
:10175000B75700811386879EB74700819385879E80
:10176000B73700811385879EEFF00FC1B70700815F
:101770001385C717EFE0DFFBB75700811385879EFE
:10178000EFF05FED1307800093068000B7570081EC
:101790001386879EB74700819385879EB737008160
:1017A0001385879EEFF04FE7B70700811385071970
:1017B000EFE01FF8B75700811385879EEFF09FE990
:1017C000930730002326F4FE9305C4FE1307800020
:1017D00093068000B75700811386879EB757008114
:1017E0001385879EEFF05F8DB70700811385871AF9
:1017F000EFE01FF4B75700811385879EEFF09FE558
:101800009305C4FE1307800093068000B75700813C
:101810001386879EB75700811385879EEFF01FB30D
:10182000B70700811385871AEFE09FF0B757008153
:101830001385879EEFF01FE29307000013850700D2
:101840008320C1010324810113010102678000008C
:02000004810079 :02000004810079
:10000000300000003100000032000000330000002A :10000000300000003100000032000000330000002A
:10001000340000003500000036000000370000000A :10001000340000003500000036000000370000000A
@@ -412,27 +394,22 @@
:100140002D2D2D2D2D2D2D2D2D2D2D2D2D2D2D2DDF :100140002D2D2D2D2D2D2D2D2D2D2D2D2D2D2D2DDF
:100150002D2D2D2D2D2D2D2D2D2D2D2D0A00000079 :100150002D2D2D2D2D2D2D2D2D2D2D2D0A00000079
:100160000A0A4D6174726978206D756C7469706CDF :100160000A0A4D6174726978206D756C7469706CDF
:1001700069636174696F6E0A000000000A0A4D61CC :0901700069636174696F6E0A0095
:1001800074726978204164646974696F6E0A000052 :10017C000000008104000081080000810C00008157
:100190000A0A4D61747269782053756274726163E2 :10018C001000008114000081180000811C00008107
:1001A00074696F6E0A0000000A0A4D617472697802 :10019C002000008124000081280000812C000081B7
:1001B00020456C656D656E74204164646974696F77 :1001AC003000008134000081380000813C00008167
:0301C0006E0A00C4 :1001BC004000008144000081480000814C00008117
:1001C4000000008104000081080000810C0000810F :1001CC005000008154000081580000815C000081C7
:1001D4001000008114000081180000811C000081BF :1001DC006000008164000081680000816C00008177
:1001E4002000008124000081280000812C0000816F :1001EC007000008174000081780000817C00008127
:1001F4003000008134000081380000813C0000811F :1001FC0084000081880000818C00008190000081C7
:100204004000008144000081480000814C000081CE :10020C0094000081980000819C000081A000008176
:100214005000008154000081580000815C0000817E :10021C00A4000081A8000081AC000081B000008126
:100224006000008164000081680000816C0000812E :10022C00B4000081B8000081BC000081C0000081D6
:100234007000008174000081780000817C000081DE :10023C00D0000081D4000081D8000081DC00008156
:1002440084000081880000818C000081900000817E :10024C00E0000081E4000081E8000081EC00008106
:1002540094000081980000819C000081A00000812E :10025C00F0000081F4000081F8000081FC000081B6
:10026400A4000081A8000081AC000081B0000081DE :10026C000001008104010081080100810C01008162
:10027400B4000081B8000081BC000081C00000818E
:10028400D0000081D4000081D8000081DC0000810E
:10029400E0000081E4000081E8000081EC000081BE
:1002A400F0000081F4000081F8000081FC0000816E
:1002B4000001008104010081080100810C0100811A
:040000058000000077 :040000058000000077
:00000001FF :00000001FF

View File

@@ -74,21 +74,21 @@ void _vx_mat_mult(unsigned tid, unsigned wid)
unsigned total = 0; unsigned total = 0;
for (unsigned place = 0; place < mat_dim; ++place) for (unsigned place = 0; place < mat_dim; ++place)
{ {
unsigned x_i = (wid * mat_dim) + place; unsigned x_i = (wid * mat_dim) + place;
unsigned y_i = (mat_dim * place) + i_index; unsigned y_i = (mat_dim * place ) + i_index;
total += (x_ptr[x_i] * y_ptr[y_i]); total += (x_ptr[x_i] * y_ptr[y_i]);
} }
int final_i = (wid * mat_dim) + i_index; int final_i = (wid * mat_dim) + i_index;
unsigned cond = i_index < mat_dim; // unsigned cond = i_index < mat_dim;
__if(cond) // __if(cond)
{ // {
z_ptr[final_i] = total; z_ptr[final_i] = total;
i_index++; i_index++;
} // }
__else // __else
__end_if // __end_if
} }
// for (int z = 0; z < ((1000 * wid) + 1000); z++); // for (int z = 0; z < ((1000 * wid) + 1000); z++);

View File

@@ -5,10 +5,10 @@ unsigned x[1024] = {0};
unsigned y[1024] = {0}; unsigned y[1024] = {0};
unsigned z[1024] = {0}; unsigned z[1024] = {0};
#define MAT_DIM 8 #define MAT_DIM 16
#define NUM_COLS 8 #define NUM_COLS 16
#define NUM_ROWS 8 #define NUM_ROWS 16
void initialize_mats() void initialize_mats()
{ {
@@ -42,28 +42,28 @@ int main()
print_matrix(z); print_matrix(z);
// matrix addition // // matrix addition
vx_mat_add(x, y, z, NUM_ROWS, NUM_COLS); // vx_mat_add(x, y, z, NUM_ROWS, NUM_COLS);
vx_print_str("\n\nMatrix Addition\n"); // vx_print_str("\n\nMatrix Addition\n");
print_matrix(z); // print_matrix(z);
// matrix sub // // matrix sub
vx_mat_sub(x, y, z, NUM_ROWS, NUM_COLS); // vx_mat_sub(x, y, z, NUM_ROWS, NUM_COLS);
vx_print_str("\n\nMatrix Subtraction\n"); // vx_print_str("\n\nMatrix Subtraction\n");
print_matrix(z); // print_matrix(z);
unsigned scal = 3; // unsigned scal = 3;
// matrix element add // // matrix element add
vx_e_mat_add(z, &scal, z, NUM_ROWS, NUM_COLS); // vx_e_mat_add(z, &scal, z, NUM_ROWS, NUM_COLS);
vx_print_str("\n\nMatrix Element Addition\n"); // vx_print_str("\n\nMatrix Element Addition\n");
print_matrix(z); // print_matrix(z);
// matrix element add // // matrix element add
vx_e_mat_mult(z, &scal, z, NUM_ROWS, NUM_COLS); // vx_e_mat_mult(z, &scal, z, NUM_ROWS, NUM_COLS);
vx_print_str("\n\nMatrix Element Addition\n"); // vx_print_str("\n\nMatrix Element Addition\n");
print_matrix(z); // print_matrix(z);
return 0; return 0;

View File

@@ -6,7 +6,7 @@
.type _start, @function .type _start, @function
.global _start .global _start
_start: _start:
li a0, 1 # Num Warps li a0, 2 # Num Warps
csrw 0x20, a0 # Setting the number of available warps csrw 0x20, a0 # Setting the number of available warps
li a0, 2 # Num Threads li a0, 2 # Num Threads
csrw 0x21, a0 # Setting the number of available threads csrw 0x21, a0 # Setting the number of available threads
@@ -31,11 +31,17 @@ loop_cond:
loop_body: loop_body:
addi sp,sp,-2048 # Allocate 2k stack for new thread addi sp,sp,-2048 # Allocate 2k stack for new thread
mv t1, a0 # #lane = i mv t1, a0 # #lane = i
.word 0x3506b # clone register state .word 0x3506b # clone register state
loop_inc: loop_inc:
addi a0, a0, 1 addi a0, a0, 1
j loop_cond j loop_cond
loop_done: loop_done:
nop
nop
nop
nop
nop
nop
mv sp,t5 # Restoring the stack mv sp,t5 # Restoring the stack
li a0,0 # setting tid = 0 for main thread li a0,0 # setting tid = 0 for main thread
mv t6,a2 # setting func_addr mv t6,a2 # setting func_addr

View File

@@ -4,6 +4,7 @@
module VX_context ( module VX_context (
input wire clk, input wire clk,
input wire in_warp, input wire in_warp,
input wire in_wb_warp,
input wire in_valid[`NT_M1:0], input wire in_valid[`NT_M1:0],
input wire in_write_register, input wire in_write_register,
input wire[4:0] in_rd, input wire[4:0] in_rd,
@@ -20,18 +21,26 @@ module VX_context (
output reg[31:0] out_a_reg_data[`NT_M1:0], output reg[31:0] out_a_reg_data[`NT_M1:0],
output reg[31:0] out_b_reg_data[`NT_M1:0], output reg[31:0] out_b_reg_data[`NT_M1:0],
output wire out_clone_stall output wire out_clone_stall,
output wire[31:0] w0_t0_registers[31:0]
); );
reg[5:0] state_stall;
initial begin
state_stall = 0;
end
wire[31:0] rd1_register[`NT_M1:0]; wire[31:0] rd1_register[`NT_M1:0];
wire[31:0] rd2_register[`NT_M1:0]; wire[31:0] rd2_register[`NT_M1:0];
/* verilator lint_off UNUSED */ /* verilator lint_off UNUSED */
wire[31:0] clone_regsiters[31:0]; wire[31:0] clone_regsiters[31:0];
/* verilator lint_on UNUSED */ /* verilator lint_on UNUSED */
assign w0_t0_registers = clone_regsiters;
VX_register_file vx_register_file_master( VX_register_file vx_register_file_master(
.clk (clk), .clk (clk),
.in_warp (in_warp), .in_wb_warp (in_wb_warp),
.in_valid (in_valid[0]), .in_valid (in_valid[0]),
.in_write_register (in_write_register), .in_write_register (in_write_register),
.in_rd (in_rd), .in_rd (in_rd),
@@ -52,6 +61,7 @@ module VX_context (
VX_register_file_slave vx_register_file_slave( VX_register_file_slave vx_register_file_slave(
.clk (clk), .clk (clk),
.in_warp (in_warp), .in_warp (in_warp),
.in_wb_warp (in_wb_warp),
.in_valid (in_valid[index]), .in_valid (in_valid[index]),
.in_write_register (in_write_register), .in_write_register (in_write_register),
.in_rd (in_rd), .in_rd (in_rd),
@@ -64,11 +74,10 @@ module VX_context (
.out_src1_data (rd1_register[index]), .out_src1_data (rd1_register[index]),
.out_src2_data (rd2_register[index]) .out_src2_data (rd2_register[index])
); );
end end
endgenerate endgenerate
reg[5:0] state_stall = 0;
always @(posedge clk) begin always @(posedge clk) begin
if ((in_is_clone) && state_stall == 0) begin if ((in_is_clone) && state_stall == 0) begin
state_stall <= 10; state_stall <= 10;

145
rtl/VX_context_slave.v Normal file
View File

@@ -0,0 +1,145 @@
`include "VX_define.v"
module VX_context_slave (
input wire clk,
input wire in_warp,
input wire in_wb_warp,
input wire in_valid[`NT_M1:0],
input wire in_write_register,
input wire[4:0] in_rd,
input wire[31:0] in_write_data[`NT_M1:0],
input wire[4:0] in_src1,
input wire[4:0] in_src2,
input wire[31:0] in_curr_PC,
input wire in_is_clone,
input wire in_is_jal,
input wire in_src1_fwd,
input wire[31:0] in_src1_fwd_data[`NT_M1:0],
input wire in_src2_fwd,
input wire[31:0] in_src2_fwd_data[`NT_M1:0],
input wire[31:0] in_wspawn_regs[31:0],
input wire in_wspawn,
output reg[31:0] out_a_reg_data[`NT_M1:0],
output reg[31:0] out_b_reg_data[`NT_M1:0],
output wire out_clone_stall
);
wire[31:0] rd1_register[`NT_M1:0];
wire[31:0] rd2_register[`NT_M1:0];
/* verilator lint_off UNUSED */
wire[31:0] clone_regsiters[31:0];
/* verilator lint_on UNUSED */
reg[5:0] clone_state_stall = 0;
reg[5:0] wspawn_state_stall = 0;
initial begin
clone_state_stall = 0;
wspawn_state_stall = 0;
end
wire to_wspawn = wspawn_state_stall == 2;
// always @(*) begin
// if (to_wspawn)
// $display("-----> to_wspawn == 1");
// end
VX_register_file_master_slave vx_register_file_master(
.clk (clk),
.in_wb_warp (in_wb_warp),
.in_valid (in_valid[0]),
.in_write_register (in_write_register),
.in_rd (in_rd),
.in_data (in_write_data[0]),
.in_src1 (in_src1),
.in_src2 (in_src2),
.in_wspawn (in_wspawn),
.in_to_wspawn (to_wspawn),
.in_wspawn_regs (in_wspawn_regs),
.out_regs (clone_regsiters),
.out_src1_data (rd1_register[0]),
.out_src2_data (rd2_register[0])
);
genvar index;
generate
for (index=1; index < `NT; index=index+1)
begin: gen_code_label
wire to_clone;
assign to_clone = (index == rd1_register[0]) && (clone_state_stall == 1);
VX_register_file_slave vx_register_file_slave(
.clk (clk),
.in_warp (in_warp),
.in_wb_warp (in_wb_warp),
.in_valid (in_valid[index]),
.in_write_register (in_write_register),
.in_rd (in_rd),
.in_data (in_write_data[index]),
.in_src1 (in_src1),
.in_src2 (in_src2),
.in_clone (in_is_clone),
.in_to_clone (to_clone),
.in_regs (clone_regsiters),
.out_src1_data (rd1_register[index]),
.out_src2_data (rd2_register[index])
);
end
endgenerate
// always @(*) begin
// if (in_valid[0] && in_valid[1]) begin
// $display("Reg write: %h %h", in_write_data[0], in_write_data[1]);
// end else if (in_valid[0]) begin
// $display("Reg write: %h", in_write_data[0]);
// end
// end
// for clone
always @(posedge clk) begin
if ((in_is_clone) && clone_state_stall == 0) begin
clone_state_stall <= 10;
// $display("CLONEEE BITCH %d, 1 =? %h = %h -- %d", clone_state_stall, rd1_register[0], to_clone_1, in_is_clone);
end else if (clone_state_stall == 1) begin
// $display("ENDING CLONE, 1 =? %h = %h -- %d", rd1_register[0], to_clone_1, in_is_clone);
clone_state_stall <= 0;
end else if (clone_state_stall > 0) begin
clone_state_stall <= clone_state_stall - 1;
// $display("CLONEEE BITCH %d, 1 =? %h = %h -- %d", clone_state_stall, rd1_register[0], to_clone_1, in_is_clone);
end
end
// for wspawn
always @(posedge clk) begin
if ((in_wspawn) && wspawn_state_stall == 0) begin
wspawn_state_stall <= 10;
// $display("starting wspawn stalling -- in_wspawn: %d -- stall %d", in_wspawn, wspwan_stall);
end else if (wspawn_state_stall == 1) begin
// $display("ENDING wspawn stalling -- in_wspawn %d -- stall: %d", in_wspawn, wspwan_stall);
wspawn_state_stall <= 0;
end else if (wspawn_state_stall > 0) begin
wspawn_state_stall <= wspawn_state_stall - 1;
// $display("wspawn state: %d in_wspawn: %d -- stall: %d", wspawn_state_stall, in_wspawn, wspwan_stall);
end
end
genvar index_out_reg;
generate
for (index_out_reg = 0; index_out_reg < `NT; index_out_reg = index_out_reg + 1)
begin
assign out_a_reg_data[index_out_reg] = ( (in_is_jal == 1'b1) ? in_curr_PC : ((in_src1_fwd == 1'b1) ? in_src1_fwd_data[index_out_reg] : rd1_register[index_out_reg]));
assign out_b_reg_data[index_out_reg] = (in_src2_fwd == 1'b1) ? in_src2_fwd_data[index_out_reg] : rd2_register[index_out_reg];
end
endgenerate
wire clone_stall = ((clone_state_stall == 0) && in_is_clone) || ((clone_state_stall != 1) && in_is_clone);
wire wspwan_stall = ((wspawn_state_stall == 0) && in_wspawn) || (wspawn_state_stall > 1);
assign out_clone_stall = clone_stall || wspwan_stall;
endmodule

View File

@@ -3,55 +3,55 @@
module VX_decode( module VX_decode(
// Fetch Inputs // Fetch Inputs
input wire clk, input wire clk,
input wire[31:0] in_instruction, input wire[31:0] in_instruction,
input wire[31:0] in_curr_PC, input wire[31:0] in_curr_PC,
input wire in_valid[`NT_M1:0], input wire in_valid[`NT_M1:0],
// WriteBack inputs // WriteBack inputs
input wire[31:0] in_write_data[`NT_M1:0], input wire[31:0] in_write_data[`NT_M1:0],
input wire[4:0] in_rd, input wire[4:0] in_rd,
input wire[1:0] in_wb, input wire[1:0] in_wb,
input wire in_wb_valid[`NT_M1:0], input wire in_wb_valid[`NT_M1:0],
input wire[`NW_M1:0] in_wb_warp_num, input wire[`NW_M1:0] in_wb_warp_num,
// FORWARDING INPUTS // FORWARDING INPUTS
input wire in_src1_fwd, input wire in_src1_fwd,
input wire[31:0] in_src1_fwd_data[`NT_M1:0], input wire[31:0] in_src1_fwd_data[`NT_M1:0],
input wire in_src2_fwd, input wire in_src2_fwd,
input wire[31:0] in_src2_fwd_data[`NT_M1:0], input wire[31:0] in_src2_fwd_data[`NT_M1:0],
input wire[`NW_M1:0] in_warp_num,
output wire[11:0] out_csr_address,
output wire out_is_csr,
output wire[31:0] out_csr_mask,
input wire[`NW_M1:0] in_warp_num,
output wire[11:0] out_csr_address,
output wire out_is_csr,
output wire[31:0] out_csr_mask,
// Outputs // Outputs
output wire[4:0] out_rd, output wire[4:0] out_rd,
output wire[4:0] out_rs1, output wire[4:0] out_rs1,
output wire[4:0] out_rs2, output wire[4:0] out_rs2,
output wire[31:0] out_a_reg_data[`NT_M1:0], output wire[31:0] out_a_reg_data[`NT_M1:0],
output wire[31:0] out_b_reg_data[`NT_M1:0], output wire[31:0] out_b_reg_data[`NT_M1:0],
output wire[1:0] out_wb, output wire[1:0] out_wb,
output wire[4:0] out_alu_op, output wire[4:0] out_alu_op,
output wire out_rs2_src, output wire out_rs2_src,
output reg[31:0] out_itype_immed, output reg[31:0] out_itype_immed,
output wire[2:0] out_mem_read, output wire[2:0] out_mem_read,
output wire[2:0] out_mem_write, output wire[2:0] out_mem_write,
output reg[2:0] out_branch_type, output reg[2:0] out_branch_type,
output reg out_branch_stall, output reg out_branch_stall,
output reg out_jal, output reg out_jal,
output reg[31:0] out_jal_offset, output reg[31:0] out_jal_offset,
output reg[19:0] out_upper_immed, output reg[19:0] out_upper_immed,
output wire[31:0] out_PC_next, output wire[31:0] out_PC_next,
output reg out_clone_stall, output reg out_clone_stall,
output wire out_change_mask, output wire out_change_mask,
output wire out_thread_mask[`NT_M1:0], output wire out_thread_mask[`NT_M1:0],
output wire out_valid[`NT_M1:0], output wire out_valid[`NT_M1:0],
output wire[`NW_M1:0] out_warp_num output wire[`NW_M1:0] out_warp_num,
output wire out_wspawn,
output wire[31:0] out_wspawn_pc,
output wire out_ebreak
); );
wire[6:0] curr_opcode; wire[6:0] curr_opcode;
@@ -73,6 +73,7 @@ module VX_decode(
wire is_clone; wire is_clone;
wire is_jalrs; wire is_jalrs;
wire is_jmprt; wire is_jmprt;
wire is_wspawn;
wire write_register; wire write_register;
@@ -110,11 +111,28 @@ module VX_decode(
reg[4:0] alu_op; reg[4:0] alu_op;
reg[4:0] mul_alu; reg[4:0] mul_alu;
wire context_zero_valid = (in_wb_warp_num == 0); wire[31:0] w0_t0_registers[31:0];
VX_context VX_Context( wire context_zero_valid = (in_wb_warp_num == 0);
wire[31:0] zero_a_reg_data[`NT_M1:0];
wire[31:0] zero_b_reg_data[`NT_M1:0];
reg zero_clone_stall;
// always @(*) begin
// $display("DECODE WARP: %h", in_warp_num);
// end
wire curr_warp_zero = in_warp_num == 0;
wire curr_warp_one = in_warp_num == 1;
// always @(*) begin
// $display("DECODE WARP: %h PC: %h",in_warp_num, in_curr_PC);
// end
VX_context VX_Context_zero(
.clk (clk), .clk (clk),
.in_warp (context_zero_valid), .in_warp (curr_warp_zero),
.in_wb_warp (context_zero_valid),
.in_valid (in_wb_valid), .in_valid (in_wb_valid),
.in_rd (in_rd), .in_rd (in_rd),
.in_src1 (out_rs1), .in_src1 (out_rs1),
@@ -128,13 +146,52 @@ module VX_decode(
.in_src2_fwd_data (in_src2_fwd_data), .in_src2_fwd_data (in_src2_fwd_data),
.in_write_register(write_register), .in_write_register(write_register),
.in_write_data (in_write_data), .in_write_data (in_write_data),
.out_a_reg_data (out_a_reg_data), .out_a_reg_data (zero_a_reg_data),
.out_b_reg_data (out_b_reg_data), .out_b_reg_data (zero_b_reg_data),
.out_clone_stall (out_clone_stall) .out_clone_stall (zero_clone_stall),
); .w0_t0_registers (w0_t0_registers)
);
wire context_one_valid = (in_wb_warp_num == 1);
wire[31:0] one_a_reg_data[`NT_M1:0];
wire[31:0] one_b_reg_data[`NT_M1:0];
reg one_clone_stall;
VX_context_slave VX_Context_one(
.clk (clk),
.in_warp (curr_warp_one),
.in_wb_warp (context_one_valid),
.in_valid (in_wb_valid),
.in_rd (in_rd),
.in_src1 (out_rs1),
.in_src2 (out_rs2),
.in_curr_PC (in_curr_PC),
.in_is_clone (is_clone),
.in_is_jal (is_jal),
.in_src1_fwd (in_src1_fwd),
.in_src1_fwd_data (in_src1_fwd_data),
.in_src2_fwd (in_src2_fwd),
.in_src2_fwd_data (in_src2_fwd_data),
.in_write_register(write_register),
.in_write_data (in_write_data),
.in_wspawn_regs (w0_t0_registers),
.in_wspawn (is_wspawn),
.out_a_reg_data (one_a_reg_data),
.out_b_reg_data (one_b_reg_data),
.out_clone_stall (one_clone_stall)
);
assign out_a_reg_data = curr_warp_zero ? zero_a_reg_data : one_a_reg_data;
assign out_b_reg_data = curr_warp_zero ? zero_b_reg_data : one_b_reg_data;
assign out_clone_stall = zero_clone_stall || one_clone_stall;
// always @(*) begin
// if (context_one_valid) begin
// $display("PC: %h -> src1: %h\tsrc2: %h",in_curr_PC, one_a_reg_data[0], one_b_reg_data[0]);
// end
// end
assign out_warp_num = in_warp_num; assign out_warp_num = in_warp_num;
assign out_valid = in_valid; assign out_valid = in_valid;
assign write_register = (in_wb != 2'h0) ? (1'b1) : (1'b0); assign write_register = (in_wb != 2'h0) ? (1'b1) : (1'b0);
@@ -171,6 +228,10 @@ module VX_decode(
assign is_clone = is_gpgpu && (func3 == 5); assign is_clone = is_gpgpu && (func3 == 5);
assign is_jalrs = is_gpgpu && (func3 == 6); assign is_jalrs = is_gpgpu && (func3 == 6);
assign is_jmprt = is_gpgpu && (func3 == 4); assign is_jmprt = is_gpgpu && (func3 == 4);
assign is_wspawn = is_gpgpu && (func3 == 0);
assign out_wspawn = is_wspawn;
assign out_wspawn_pc = out_a_reg_data[0];
// always @(*) begin // always @(*) begin
// if (is_jalrs) begin // if (is_jalrs) begin
@@ -259,7 +320,7 @@ module VX_decode(
case(curr_opcode) case(curr_opcode)
`LUI_INST: out_upper_immed = {func7, out_rs2, out_rs1, func3}; `LUI_INST: out_upper_immed = {func7, out_rs2, out_rs1, func3};
`AUIPC_INST: out_upper_immed = {func7, out_rs2, out_rs1, func3}; `AUIPC_INST: out_upper_immed = {func7, out_rs2, out_rs1, func3};
default: out_upper_immed = 20'h0; default: out_upper_immed = 20'h0;
endcase // curr_opcode endcase // curr_opcode
end end
@@ -306,6 +367,7 @@ module VX_decode(
end end
`SYS_INST: `SYS_INST:
begin begin
// $display("SYS EBREAK %h", (jal_sys_jal && in_valid[0]) );
out_jal = jal_sys_jal && in_valid[0]; out_jal = jal_sys_jal && in_valid[0];
out_jal_offset = jal_sys_off; out_jal_offset = jal_sys_off;
end end
@@ -317,6 +379,13 @@ module VX_decode(
endcase endcase
end end
wire is_ebreak;
assign is_ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && in_valid[0]);
assign out_ebreak = is_ebreak;
// CSR // CSR

View File

@@ -68,6 +68,13 @@ module VX_execute (
end end
endgenerate endgenerate
// always @(*) begin
// if ((in_alu_op == `MUL) && (in_warp_num == 1)) begin
// $display("@PC: %h ---> %d * %d = %d\t%d * %d = %d", in_curr_PC, in_a_reg_data[0], in_b_reg_data[0], out_alu_result[0], in_a_reg_data[1], in_b_reg_data[1], out_alu_result[1]);
// end
// end
assign out_jal_dest = $signed(in_a_reg_data[0]) + $signed(in_jal_offset); assign out_jal_dest = $signed(in_a_reg_data[0]) + $signed(in_jal_offset);
assign out_jal = in_jal; assign out_jal = in_jal;

View File

@@ -2,30 +2,34 @@
`include "VX_define.v" `include "VX_define.v"
module VX_fetch ( module VX_fetch (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire in_branch_dir, input wire in_branch_dir,
input wire in_freeze, input wire in_freeze,
input wire[31:0] in_branch_dest, input wire[31:0] in_branch_dest,
input wire in_branch_stall, input wire in_branch_stall,
input wire in_fwd_stall, input wire in_fwd_stall,
input wire in_branch_stall_exe, input wire in_branch_stall_exe,
input wire in_clone_stall, input wire in_clone_stall,
input wire in_jal, input wire in_jal,
input wire[31:0] in_jal_dest, input wire[31:0] in_jal_dest,
input wire in_interrupt, input wire in_interrupt,
input wire in_debug, input wire in_debug,
input wire[31:0] in_instruction, input wire[31:0] in_instruction,
input wire in_thread_mask[`NT_M1:0], input wire in_thread_mask[`NT_M1:0],
input wire in_change_mask, input wire in_change_mask,
input wire[`NW_M1:0] in_decode_warp_num, input wire[`NW_M1:0] in_decode_warp_num,
input wire[`NW_M1:0] in_memory_warp_num, input wire[`NW_M1:0] in_memory_warp_num,
input wire in_wspawn,
input wire[31:0] in_wspawn_pc,
input wire in_ebreak,
output wire[31:0] out_instruction, output wire[31:0] out_instruction,
output wire out_delay, output wire out_delay,
output wire[`NW_M1:0] out_warp_num, output wire[`NW_M1:0] out_warp_num,
output wire[31:0] out_curr_PC, output wire[31:0] out_curr_PC,
output wire out_valid[`NT_M1:0] output wire out_valid[`NT_M1:0],
output wire out_ebreak
); );
reg stall; reg stall;
@@ -39,42 +43,98 @@ module VX_fetch (
warp_state = 0; warp_state = 0;
end end
wire add_warp = in_wspawn && !in_ebreak && !in_clone_stall;
wire remove_warp = in_ebreak && !in_wspawn && !in_clone_stall;
always @(posedge clk or posedge reset) begin always @(posedge clk or posedge reset) begin
if (reset || (warp_num == warp_state)) begin if (reset || (warp_num == warp_state) || remove_warp || add_warp) begin
warp_num <= 0; warp_num <= 0;
end else begin end else begin
warp_num <= warp_num + 1; warp_num <= warp_num + 1;
end end
if (add_warp) begin
// $display("Adding a new warp %h", warp_state);
warp_state <= warp_state + 1;
end else if (remove_warp) begin
// $display("Removing a warp %h", warp_state);
warp_state <= warp_state - 1;
end
end end
assign out_ebreak = (warp_state == 0) && in_ebreak;
assign stall = in_clone_stall || in_branch_stall || in_fwd_stall || in_branch_stall_exe || in_interrupt || in_freeze || in_debug; assign stall = in_clone_stall || in_branch_stall || in_fwd_stall || in_branch_stall_exe || in_interrupt || in_freeze || in_debug;
wire[31:0] warp_pc;
wire warp_valid[`NT_M1:0];
wire warp_zero_change_mask = in_change_mask && (in_decode_warp_num == 0);
wire warp_zero_jal = in_jal && (in_memory_warp_num == 0);
wire warp_zero_branch = in_branch_dir && (in_memory_warp_num == 0);
wire warp_zero_stall = stall || (warp_num == 1);
wire warp_zero_wspawn = 0;
wire[31:0] warp_zero_wspawn_pc = 32'h0;
wire warp_zero_change_mask = in_change_mask && (in_decode_warp_num == 0); wire[31:0] warp_zero_pc;
wire warp_zero_jal = in_jal && (in_memory_warp_num == 0); wire warp_zero_valid[`NT_M1:0];
wire warp_zero_branch = in_branch_dir && (in_memory_warp_num == 0); VX_warp VX_Warp_zero(
VX_warp VX_Warp(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall), .stall (warp_zero_stall),
.in_thread_mask(in_thread_mask), .in_thread_mask(in_thread_mask),
.in_change_mask(warp_zero_change_mask), .in_change_mask(warp_zero_change_mask),
.in_jal (warp_zero_jal), .in_jal (warp_zero_jal),
.in_jal_dest (in_jal_dest), .in_jal_dest (in_jal_dest),
.in_branch_dir (warp_zero_branch), .in_branch_dir (warp_zero_branch),
.in_branch_dest(in_branch_dest), .in_branch_dest(in_branch_dest),
.out_PC (warp_pc), .in_wspawn (warp_zero_wspawn),
.out_valid (warp_valid) .in_wspawn_pc (warp_zero_wspawn_pc),
.out_PC (warp_zero_pc),
.out_valid (warp_zero_valid)
); );
assign out_PC = warp_pc; wire warp_one_change_mask = in_change_mask && (in_decode_warp_num == 1);
wire warp_one_jal = in_jal && (in_memory_warp_num == 1);
wire warp_one_branch = in_branch_dir && (in_memory_warp_num == 1);
wire warp_one_stall = stall || (warp_num == 0);
wire[31:0] warp_one_pc;
wire warp_one_valid[`NT_M1:0];
VX_warp VX_Warp_one(
.clk (clk),
.reset (reset),
.stall (warp_one_stall),
.in_thread_mask(in_thread_mask),
.in_change_mask(warp_one_change_mask),
.in_jal (warp_one_jal),
.in_jal_dest (in_jal_dest),
.in_branch_dir (warp_one_branch),
.in_branch_dest(in_branch_dest),
.in_wspawn (in_wspawn),
.in_wspawn_pc (in_wspawn_pc),
.out_PC (warp_one_pc),
.out_valid (warp_one_valid)
);
// always @(*) begin
// if (in_wspawn) begin
// $display("Spawning a warp @ %h",in_wspawn_pc);
// end
// end
// always @(posedge clk) begin
// $display("curr warp: %h Threads:%d%d PC: %h", warp_num, out_valid[0],out_valid[1], out_PC);
// end
// always @(*) begin
// if (warp_num == 1) begin
// $display("Going to PC: %h", warp_one_pc);
// end
// end
assign out_PC = (warp_num == 0) ? warp_zero_pc : warp_one_pc;
assign out_valid = (warp_num == 0) ? warp_zero_valid : warp_one_valid;
// always @(*) begin // always @(*) begin
// $display("FETCH PC: %h (%h, %h, %h)",delete, delete, in_jal_dest, in_branch_dest); // $display("FETCH PC: %h (%h, %h, %h)",delete, delete, in_jal_dest, in_branch_dest);
@@ -82,9 +142,9 @@ module VX_fetch (
assign out_curr_PC = out_PC; assign out_curr_PC = out_PC;
assign out_valid = warp_valid;
assign out_warp_num = warp_num; assign out_warp_num = warp_num;
assign out_delay = 0; assign out_delay = 0;
assign out_instruction = stall ? 32'b0 : in_instruction; assign out_instruction = stall ? 32'b0 : in_instruction;

View File

@@ -65,6 +65,12 @@ module VX_memory (
assign out_cache_driver_in_data = in_rd2; assign out_cache_driver_in_data = in_rd2;
assign out_cache_driver_in_valid = in_valid; assign out_cache_driver_in_valid = in_valid;
// always @(*) begin
// if (in_valid[0] && (in_mem_write == `SW_MEM_WRITE) && (in_alu_result[0] >= 32'h810049a0)) begin
// $display("SW$ PC: %h - Warp: %h -> [%h]%h = %h || [%h]%h = %h",in_curr_PC, in_warp_num, in_valid[0], in_alu_result[0], in_rd2[0], in_valid[1], in_alu_result[1], in_rd2[1]);
// end
// end
// wire[31:0] sm_out_data[`NT_M1:0]; // wire[31:0] sm_out_data[`NT_M1:0];
@@ -113,7 +119,13 @@ module VX_memory (
end end
`BLT: out_branch_dir = (in_alu_result[0][31] == 0) ? `NOT_TAKEN : `TAKEN; `BLT: out_branch_dir = (in_alu_result[0][31] == 0) ? `NOT_TAKEN : `TAKEN;
`BGT: out_branch_dir = (in_alu_result[0][31] == 0) ? `TAKEN : `NOT_TAKEN; `BGT: out_branch_dir = (in_alu_result[0][31] == 0) ? `TAKEN : `NOT_TAKEN;
`BLTU: out_branch_dir = (in_alu_result[0][31] == 0) ? `NOT_TAKEN : `TAKEN; `BLTU:
begin
out_branch_dir = (in_alu_result[0][31] == 0) ? `NOT_TAKEN : `TAKEN;
if (in_warp_num == 1) begin
// $display("BLTU PC:%h : %d < %d = %d", in_curr_PC, in_rs1, in_rs2, (in_alu_result[0][31] == 0));
end
end
`BGTU: out_branch_dir = (in_alu_result[0][31] == 0) ? `TAKEN : `NOT_TAKEN; `BGTU: out_branch_dir = (in_alu_result[0][31] == 0) ? `TAKEN : `NOT_TAKEN;
`NO_BRANCH: out_branch_dir = `NOT_TAKEN; `NO_BRANCH: out_branch_dir = `NOT_TAKEN;
default: out_branch_dir = `NOT_TAKEN; default: out_branch_dir = `NOT_TAKEN;

View File

@@ -2,7 +2,7 @@
module VX_register_file ( module VX_register_file (
input wire clk, input wire clk,
input wire in_warp, input wire in_wb_warp,
input wire in_valid, input wire in_valid,
input wire in_write_register, input wire in_write_register,
input wire[4:0] in_rd, input wire[4:0] in_rd,
@@ -30,6 +30,11 @@ module VX_register_file (
// end // end
// end // end
// always @(*) begin
// $display("TID: %d: %h",10,registers[10]);
// $display("WID: %d: %h",11,registers[11]);
// end
assign out_regs = registers; assign out_regs = registers;
assign write_data = in_data; assign write_data = in_data;
@@ -38,7 +43,7 @@ module VX_register_file (
assign write_enable = (in_write_register && (in_rd != 5'h0)) && in_valid; assign write_enable = (in_write_register && (in_rd != 5'h0)) && in_valid;
always @(posedge clk) begin always @(posedge clk) begin
if(write_enable && in_warp) begin if(write_enable && in_wb_warp) begin
// $display("RF: Writing %h to %d",write_data, write_register); // $display("RF: Writing %h to %d",write_data, write_register);
registers[write_register] <= write_data; registers[write_register] <= write_data;
end end

View File

@@ -0,0 +1,72 @@
module VX_register_file_master_slave (
input wire clk,
input wire in_wb_warp,
input wire in_valid,
input wire in_write_register,
input wire[4:0] in_rd,
input wire[31:0] in_data,
input wire[4:0] in_src1,
input wire[4:0] in_src2,
input wire in_wspawn,
input wire in_to_wspawn,
input wire[31:0] in_wspawn_regs[31:0],
output reg[31:0] out_src1_data,
output reg[31:0] out_src2_data,
output wire[31:0] out_regs[31:0]
);
reg[31:0] registers[31:0];
wire[31:0] write_data;
wire[4:0] write_register;
wire write_enable;
assign out_regs = registers;
// reg[5:0] i;
// always @(posedge clk) begin
// for (i = 0; i < 32; i++) begin
// $display("%d: %h",i, registers[i[4:0]]);
// end
// end
// integer i;
assign write_data = in_data;
assign write_register = in_rd;
// always @(*) begin
// $display("TID: %d: %h",10,registers[10]);
// $display("WID: %d: %h",11,registers[11]);
// end
assign write_enable = (in_write_register && (in_rd != 5'h0)) && in_valid && in_wb_warp;
always @(posedge clk) begin
if(write_enable && !in_wspawn) begin
// $display("RF: Writing %h to %d",write_data, write_register);
registers[write_register] <= write_data;
end else if (in_wspawn && in_to_wspawn) begin
// $display("WSPAWN IN MASTER SLAVE");
registers <= in_wspawn_regs;
end
end
// always @(posedge clk) begin
// for (i = 0; i < 32; i = i + 1)
// $display("(%d): %x", i, registers[i]);
// end
always @(negedge clk) begin
out_src1_data <= registers[in_src1];
out_src2_data <= registers[in_src2];
end
endmodule

View File

@@ -6,6 +6,7 @@
module VX_register_file_slave ( module VX_register_file_slave (
input wire clk, input wire clk,
input wire in_warp, input wire in_warp,
input wire in_wb_warp,
input wire in_valid, input wire in_valid,
input wire in_write_register, input wire in_write_register,
input wire[4:0] in_rd, input wire[4:0] in_rd,
@@ -37,17 +38,23 @@ module VX_register_file_slave (
// integer i; // integer i;
// always @(*) begin
// if (in_warp) begin
// $display("TID: %d: %h",10,registers[10]);
// $display("WID: %d: %h",11,registers[11]);
// end
// end
assign write_data = in_data; assign write_data = in_data;
assign write_register = in_rd; assign write_register = in_rd;
assign write_enable = (in_write_register && (in_rd != 5'h0)) && in_valid; assign write_enable = (in_write_register && (in_rd != 5'h0)) && in_valid && in_wb_warp;
always @(posedge clk) begin always @(posedge clk) begin
if(write_enable && !in_clone && in_warp) begin if(write_enable && !in_clone) begin
// $display("RF: Writing %h to %d",write_data, write_register); // $display("RF: Writing %h to %d",write_data, write_register);
registers[write_register] <= write_data; registers[write_register] <= write_data;
end else if (in_clone && in_to_clone) begin end else if (in_clone && in_to_clone && in_warp) begin
// $display("CLONING IN SLAVE");
registers <= in_regs; registers <= in_regs;
end end
end end

View File

@@ -11,7 +11,8 @@ module VX_warp (
input wire[31:0] in_jal_dest, input wire[31:0] in_jal_dest,
input wire in_branch_dir, input wire in_branch_dir,
input wire[31:0] in_branch_dest, input wire[31:0] in_branch_dest,
input wire in_wspawn,
input wire[31:0] in_wspawn_pc,
output wire[31:0] out_PC, output wire[31:0] out_PC,
output wire out_valid[`NT_M1:0] output wire out_valid[`NT_M1:0]
@@ -62,7 +63,10 @@ module VX_warp (
always @(posedge clk or posedge reset) begin always @(posedge clk or posedge reset) begin
if (reset) begin if (reset) begin
real_PC <= 0; real_PC <= 0;
end else if (stall == 1'b0) begin end else if (in_wspawn == 1'b1) begin
// $display("Inside warp ***** Spawn @ %H",in_wspawn_pc);
real_PC <= in_wspawn_pc;
end else if (!stall) begin
real_PC <= use_PC + 32'h4; real_PC <= use_PC + 32'h4;
end else begin end else begin
real_PC <= use_PC; real_PC <= use_PC;

View File

@@ -13,7 +13,8 @@ module Vortex(
output wire[2:0] out_cache_driver_in_mem_read, output wire[2:0] out_cache_driver_in_mem_read,
output wire[2:0] out_cache_driver_in_mem_write, output wire[2:0] out_cache_driver_in_mem_write,
output wire out_cache_driver_in_valid[`NT_M1:0], output wire out_cache_driver_in_valid[`NT_M1:0],
output wire[31:0] out_cache_driver_in_data[`NT_M1:0] output wire[31:0] out_cache_driver_in_data[`NT_M1:0],
output wire out_ebreak
); );
// wire[31:0] in_cache_driver_out_data[`NT_M1:0]; // wire[31:0] in_cache_driver_out_data[`NT_M1:0];
@@ -25,11 +26,12 @@ module Vortex(
assign curr_PC = fetch_curr_PC; assign curr_PC = fetch_curr_PC;
// From fetch // From fetch
wire[31:0] fetch_instruction; wire[31:0] fetch_instruction;
wire fetch_delay; wire fetch_delay;
wire[31:0] fetch_curr_PC; wire[31:0] fetch_curr_PC;
wire fetch_valid[`NT_M1:0]; wire fetch_valid[`NT_M1:0];
wire[`NW_M1:0] fetch_warp_num; wire[`NW_M1:0] fetch_warp_num;
wire fetch_ebreak;
// From f_d_register // From f_d_register
wire[31:0] f_d_instruction; wire[31:0] f_d_instruction;
@@ -62,7 +64,10 @@ wire decode_valid[`NT_M1:0];
wire decode_clone_stall; wire decode_clone_stall;
wire decode_change_mask; wire decode_change_mask;
wire decode_thread_mask[`NT_M1:0]; wire decode_thread_mask[`NT_M1:0];
wire[`NW_M1:0] decode_warp_num; wire[`NW_M1:0] decode_warp_num;
wire decode_wspawn;
wire[31:0] decode_wspawn_pc;
wire decode_ebreak;
// From d_e_register // From d_e_register
wire[11:0] d_e_csr_address; wire[11:0] d_e_csr_address;
@@ -193,7 +198,7 @@ wire debug;
assign debug = 1'b0; assign debug = 1'b0;
assign interrupt = 1'b0; assign interrupt = 1'b0;
assign total_freeze = fetch_delay || memory_delay; assign total_freeze = fetch_delay || memory_delay;
assign out_ebreak = fetch_ebreak;
VX_fetch vx_fetch( VX_fetch vx_fetch(
.clk (clk), .clk (clk),
@@ -214,12 +219,16 @@ VX_fetch vx_fetch(
.in_change_mask (decode_change_mask), .in_change_mask (decode_change_mask),
.in_decode_warp_num (decode_warp_num), .in_decode_warp_num (decode_warp_num),
.in_memory_warp_num (memory_warp_num), .in_memory_warp_num (memory_warp_num),
.in_wspawn (decode_wspawn),
.in_wspawn_pc (decode_wspawn_pc),
.in_ebreak (decode_ebreak),
.out_instruction (fetch_instruction), .out_instruction (fetch_instruction),
.out_delay (fetch_delay), .out_delay (fetch_delay),
.out_curr_PC (fetch_curr_PC), .out_curr_PC (fetch_curr_PC),
.out_warp_num (fetch_warp_num), .out_warp_num (fetch_warp_num),
.out_valid (fetch_valid) .out_valid (fetch_valid),
.out_ebreak (fetch_ebreak)
); );
@@ -280,7 +289,10 @@ VX_decode vx_decode(
.out_clone_stall (decode_clone_stall), .out_clone_stall (decode_clone_stall),
.out_change_mask (decode_change_mask), .out_change_mask (decode_change_mask),
.out_thread_mask (decode_thread_mask), .out_thread_mask (decode_thread_mask),
.out_warp_num (decode_warp_num) .out_warp_num (decode_warp_num),
.out_wspawn (decode_wspawn),
.out_wspawn_pc (decode_wspawn_pc),
.out_ebreak (decode_ebreak)
); );

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@@ -25,6 +25,7 @@ VL_MODULE(VVortex) {
VL_IN8(reset,0,0); VL_IN8(reset,0,0);
VL_OUT8(out_cache_driver_in_mem_read,2,0); VL_OUT8(out_cache_driver_in_mem_read,2,0);
VL_OUT8(out_cache_driver_in_mem_write,2,0); VL_OUT8(out_cache_driver_in_mem_write,2,0);
VL_OUT8(out_ebreak,0,0);
VL_IN(fe_instruction,31,0); VL_IN(fe_instruction,31,0);
VL_OUT(curr_PC,31,0); VL_OUT(curr_PC,31,0);
VL_IN(in_cache_driver_out_data[2],31,0); VL_IN(in_cache_driver_out_data[2],31,0);
@@ -41,23 +42,35 @@ VL_MODULE(VVortex) {
VL_SIG8(Vortex__DOT__decode_branch_type,2,0); VL_SIG8(Vortex__DOT__decode_branch_type,2,0);
VL_SIG8(Vortex__DOT__decode_jal,0,0); VL_SIG8(Vortex__DOT__decode_jal,0,0);
VL_SIG8(Vortex__DOT__decode_clone_stall,0,0); VL_SIG8(Vortex__DOT__decode_clone_stall,0,0);
VL_SIG8(Vortex__DOT__decode_change_mask,0,0);
VL_SIG8(Vortex__DOT__execute_branch_stall,0,0); VL_SIG8(Vortex__DOT__execute_branch_stall,0,0);
VL_SIG8(Vortex__DOT__memory_branch_dir,0,0);
VL_SIG8(Vortex__DOT__forwarding_fwd_stall,0,0); VL_SIG8(Vortex__DOT__forwarding_fwd_stall,0,0);
VL_SIG8(Vortex__DOT__forwarding_src1_fwd,0,0); VL_SIG8(Vortex__DOT__forwarding_src1_fwd,0,0);
VL_SIG8(Vortex__DOT__forwarding_src2_fwd,0,0); VL_SIG8(Vortex__DOT__forwarding_src2_fwd,0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__stall,0,0); VL_SIG8(Vortex__DOT__vx_fetch__DOT__stall,0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_num,1,0); VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_num,1,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_state,1,0); VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_state,1,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__add_warp,0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__remove_warp,0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_zero_change_mask,0,0); VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_zero_change_mask,0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_zero_stall,0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_one_change_mask,0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_one_stall,0,0);
VL_SIG8(Vortex__DOT__vx_f_d_reg__DOT__warp_num,1,0); VL_SIG8(Vortex__DOT__vx_f_d_reg__DOT__warp_num,1,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__is_itype,0,0); VL_SIG8(Vortex__DOT__vx_decode__DOT__is_itype,0,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__is_csr,0,0); VL_SIG8(Vortex__DOT__vx_decode__DOT__is_csr,0,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__is_clone,0,0); VL_SIG8(Vortex__DOT__vx_decode__DOT__is_clone,0,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__is_jalrs,0,0); VL_SIG8(Vortex__DOT__vx_decode__DOT__is_jalrs,0,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__is_jmprt,0,0); VL_SIG8(Vortex__DOT__vx_decode__DOT__is_jmprt,0,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__is_wspawn,0,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__jal_sys_jal,0,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__mul_alu,4,0); VL_SIG8(Vortex__DOT__vx_decode__DOT__mul_alu,4,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__is_ebreak,0,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__temp_final_alu,4,0); VL_SIG8(Vortex__DOT__vx_decode__DOT__temp_final_alu,4,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__VX_Context__DOT__state_stall,5,0); VL_SIG8(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT__state_stall,5,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT__clone_state_stall,5,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT__wspawn_state_stall,5,0);
VL_SIG8(Vortex__DOT__vx_d_e_reg__DOT__rd,4,0); VL_SIG8(Vortex__DOT__vx_d_e_reg__DOT__rd,4,0);
VL_SIG8(Vortex__DOT__vx_d_e_reg__DOT__alu_op,4,0); VL_SIG8(Vortex__DOT__vx_d_e_reg__DOT__alu_op,4,0);
VL_SIG8(Vortex__DOT__vx_d_e_reg__DOT__wb,1,0); VL_SIG8(Vortex__DOT__vx_d_e_reg__DOT__wb,1,0);
@@ -89,20 +102,24 @@ VL_MODULE(VVortex) {
VL_SIG16(Vortex__DOT__decode_csr_address,11,0); VL_SIG16(Vortex__DOT__decode_csr_address,11,0);
VL_SIG16(Vortex__DOT__vx_decode__DOT__alu_tempp,11,0); VL_SIG16(Vortex__DOT__vx_decode__DOT__alu_tempp,11,0);
VL_SIG16(Vortex__DOT__vx_d_e_reg__DOT__csr_address,11,0); VL_SIG16(Vortex__DOT__vx_d_e_reg__DOT__csr_address,11,0);
};
struct {
VL_SIG16(Vortex__DOT__vx_e_m_reg__DOT__csr_address,11,0); VL_SIG16(Vortex__DOT__vx_e_m_reg__DOT__csr_address,11,0);
VL_SIG16(Vortex__DOT__vx_csr_handler__DOT__decode_csr_address,11,0); VL_SIG16(Vortex__DOT__vx_csr_handler__DOT__decode_csr_address,11,0);
VL_SIG(Vortex__DOT__decode_itype_immed,31,0); VL_SIG(Vortex__DOT__decode_itype_immed,31,0);
VL_SIG(Vortex__DOT__decode_jal_offset,31,0); VL_SIG(Vortex__DOT__decode_jal_offset,31,0);
VL_SIG(Vortex__DOT__memory_branch_dest,31,0);
VL_SIG(Vortex__DOT__csr_decode_csr_data,31,0); VL_SIG(Vortex__DOT__csr_decode_csr_data,31,0);
VL_SIG(Vortex__DOT__vx_fetch__DOT__VX_Warp__DOT__real_PC,31,0); VL_SIG(Vortex__DOT__vx_fetch__DOT__out_PC,31,0);
VL_SIG(Vortex__DOT__vx_fetch__DOT__VX_Warp__DOT__temp_PC,31,0); VL_SIG(Vortex__DOT__vx_fetch__DOT__VX_Warp_zero__DOT__real_PC,31,0);
VL_SIG(Vortex__DOT__vx_fetch__DOT__VX_Warp_zero__DOT__temp_PC,31,0);
VL_SIG(Vortex__DOT__vx_fetch__DOT__VX_Warp_one__DOT__real_PC,31,0);
VL_SIG(Vortex__DOT__vx_fetch__DOT__VX_Warp_one__DOT__temp_PC,31,0);
VL_SIG(Vortex__DOT__vx_f_d_reg__DOT__instruction,31,0); VL_SIG(Vortex__DOT__vx_f_d_reg__DOT__instruction,31,0);
VL_SIG(Vortex__DOT__vx_f_d_reg__DOT__curr_PC,31,0); VL_SIG(Vortex__DOT__vx_f_d_reg__DOT__curr_PC,31,0);
VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__PC_next_out,31,0); VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__PC_next_out,31,0);
VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__itype_immed,31,0); VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__itype_immed,31,0);
VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__upper_immed,19,0); VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__upper_immed,19,0);
};
struct {
VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__csr_mask,31,0); VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__csr_mask,31,0);
VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__curr_PC,31,0); VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__curr_PC,31,0);
VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__jal_offset,31,0); VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__jal_offset,31,0);
@@ -142,16 +159,30 @@ VL_MODULE(VVortex) {
VL_SIG(Vortex__DOT__writeback_write_data[2],31,0); VL_SIG(Vortex__DOT__writeback_write_data[2],31,0);
VL_SIG(Vortex__DOT__forwarding_src1_fwd_data[2],31,0); VL_SIG(Vortex__DOT__forwarding_src1_fwd_data[2],31,0);
VL_SIG(Vortex__DOT__forwarding_src2_fwd_data[2],31,0); VL_SIG(Vortex__DOT__forwarding_src2_fwd_data[2],31,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_valid[2],0,0); VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_zero_valid[2],0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__VX_Warp__DOT__valid[2],0,0); VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_one_valid[2],0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__VX_Warp_zero__DOT__valid[2],0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__VX_Warp_one__DOT__valid[2],0,0);
VL_SIG8(Vortex__DOT__vx_f_d_reg__DOT__valid[2],0,0); VL_SIG8(Vortex__DOT__vx_f_d_reg__DOT__valid[2],0,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__w0_t0_registers[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__zero_a_reg_data[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__zero_b_reg_data[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__one_a_reg_data[2],31,0);
};
struct {
VL_SIG(Vortex__DOT__vx_decode__DOT__one_b_reg_data[2],31,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__jalrs_thread_mask[2],0,0); VL_SIG8(Vortex__DOT__vx_decode__DOT__jalrs_thread_mask[2],0,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__jmprt_thread_mask[2],0,0); VL_SIG8(Vortex__DOT__vx_decode__DOT__jmprt_thread_mask[2],0,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context__DOT__rd1_register[2],31,0); VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT__rd1_register[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context__DOT__rd2_register[2],31,0); VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT__rd2_register[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context__DOT__clone_regsiters[32],31,0); VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT__clone_regsiters[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context__DOT__vx_register_file_master__DOT__registers[32],31,0); VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT__vx_register_file_master__DOT__registers[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context__DOT__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__DOT__registers[32],31,0); VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__DOT__registers[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT__rd1_register[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT__rd2_register[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT__clone_regsiters[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT__vx_register_file_master__DOT__registers[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__DOT__registers[32],31,0);
VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__a_reg_data[2],31,0); VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__a_reg_data[2],31,0);
VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__b_reg_data[2],31,0); VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__b_reg_data[2],31,0);
VL_SIG8(Vortex__DOT__vx_d_e_reg__DOT__valid[2],0,0); VL_SIG8(Vortex__DOT__vx_d_e_reg__DOT__valid[2],0,0);
@@ -167,8 +198,6 @@ VL_MODULE(VVortex) {
VL_SIG(Vortex__DOT__vx_writeback__DOT__out_pc_data[2],31,0); VL_SIG(Vortex__DOT__vx_writeback__DOT__out_pc_data[2],31,0);
VL_SIG(Vortex__DOT__vx_forwarding__DOT__use_execute_PC_next[2],31,0); VL_SIG(Vortex__DOT__vx_forwarding__DOT__use_execute_PC_next[2],31,0);
VL_SIG(Vortex__DOT__vx_forwarding__DOT__use_memory_PC_next[2],31,0); VL_SIG(Vortex__DOT__vx_forwarding__DOT__use_memory_PC_next[2],31,0);
};
struct {
VL_SIG(Vortex__DOT__vx_forwarding__DOT__use_writeback_PC_next[2],31,0); VL_SIG(Vortex__DOT__vx_forwarding__DOT__use_writeback_PC_next[2],31,0);
VL_SIG16(Vortex__DOT__vx_csr_handler__DOT__csr[4096],11,0); VL_SIG16(Vortex__DOT__vx_csr_handler__DOT__csr[4096],11,0);
}; };
@@ -179,13 +208,16 @@ VL_MODULE(VVortex) {
struct { struct {
// Begin mtask footprint all: // Begin mtask footprint all:
VL_SIG8(__Vtableidx1,2,0); VL_SIG8(__Vtableidx1,2,0);
VL_SIG8(__Vdly__Vortex__DOT__vx_fetch__DOT__warp_num,1,0);
VL_SIG8(__Vclklast__TOP__clk,0,0); VL_SIG8(__Vclklast__TOP__clk,0,0);
VL_SIG8(__Vclklast__TOP__reset,0,0); VL_SIG8(__Vclklast__TOP__reset,0,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context__DOT____Vcellout__vx_register_file_master__out_src2_data,31,0); VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT____Vcellout__vx_register_file_master__out_src2_data,31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context__DOT____Vcellout__vx_register_file_master__out_src1_data,31,0); VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT____Vcellout__vx_register_file_master__out_src1_data,31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context__DOT____Vcellout__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__out_src2_data,31,0); VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT____Vcellout__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__out_src2_data,31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context__DOT____Vcellout__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__out_src1_data,31,0); VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT____Vcellout__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__out_src1_data,31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT____Vcellout__vx_register_file_master__out_src2_data,31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT____Vcellout__vx_register_file_master__out_src1_data,31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT____Vcellout__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__out_src2_data,31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT____Vcellout__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__out_src1_data,31,0);
VL_SIG(Vortex__DOT__vx_execute__DOT____Vcellout__genblk1__BRA__0__KET____DOT__vx_alu__out_alu_result,31,0); VL_SIG(Vortex__DOT__vx_execute__DOT____Vcellout__genblk1__BRA__0__KET____DOT__vx_alu__out_alu_result,31,0);
VL_SIG(Vortex__DOT__vx_execute__DOT____Vcellout__genblk1__BRA__1__KET____DOT__vx_alu__out_alu_result,31,0); VL_SIG(Vortex__DOT__vx_execute__DOT____Vcellout__genblk1__BRA__1__KET____DOT__vx_alu__out_alu_result,31,0);
VL_SIG8(Vortex__DOT____Vcellout__vx_fetch__out_valid[2],0,0); VL_SIG8(Vortex__DOT____Vcellout__vx_fetch__out_valid[2],0,0);
@@ -239,24 +271,37 @@ VL_MODULE(VVortex) {
VL_SIG(Vortex__DOT____Vcellinp__vx_writeback__in_mem_result[2],31,0); VL_SIG(Vortex__DOT____Vcellinp__vx_writeback__in_mem_result[2],31,0);
VL_SIG(Vortex__DOT____Vcellinp__vx_writeback__in_alu_result[2],31,0); VL_SIG(Vortex__DOT____Vcellinp__vx_writeback__in_alu_result[2],31,0);
VL_SIG(Vortex__DOT____Vcellout__vx_forwarding__out_src2_fwd_data[2],31,0); VL_SIG(Vortex__DOT____Vcellout__vx_forwarding__out_src2_fwd_data[2],31,0);
};
struct {
VL_SIG(Vortex__DOT____Vcellout__vx_forwarding__out_src1_fwd_data[2],31,0); VL_SIG(Vortex__DOT____Vcellout__vx_forwarding__out_src1_fwd_data[2],31,0);
VL_SIG(Vortex__DOT____Vcellinp__vx_forwarding__in_writeback_mem_data[2],31,0); VL_SIG(Vortex__DOT____Vcellinp__vx_forwarding__in_writeback_mem_data[2],31,0);
VL_SIG(Vortex__DOT____Vcellinp__vx_forwarding__in_writeback_alu_result[2],31,0); VL_SIG(Vortex__DOT____Vcellinp__vx_forwarding__in_writeback_alu_result[2],31,0);
};
struct {
VL_SIG(Vortex__DOT____Vcellinp__vx_forwarding__in_memory_mem_data[2],31,0); VL_SIG(Vortex__DOT____Vcellinp__vx_forwarding__in_memory_mem_data[2],31,0);
VL_SIG(Vortex__DOT____Vcellinp__vx_forwarding__in_memory_alu_result[2],31,0); VL_SIG(Vortex__DOT____Vcellinp__vx_forwarding__in_memory_alu_result[2],31,0);
VL_SIG(Vortex__DOT____Vcellinp__vx_forwarding__in_execute_alu_result[2],31,0); VL_SIG(Vortex__DOT____Vcellinp__vx_forwarding__in_execute_alu_result[2],31,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT____Vcellout__VX_Warp__out_valid[2],0,0); VL_SIG8(Vortex__DOT__vx_fetch__DOT____Vcellout__VX_Warp_zero__out_valid[2],0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT____Vcellinp__VX_Warp__in_thread_mask[2],0,0); VL_SIG8(Vortex__DOT__vx_fetch__DOT____Vcellinp__VX_Warp_zero__in_thread_mask[2],0,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellout__VX_Context__out_b_reg_data[2],31,0); VL_SIG8(Vortex__DOT__vx_fetch__DOT____Vcellout__VX_Warp_one__out_valid[2],0,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellout__VX_Context__out_a_reg_data[2],31,0); VL_SIG8(Vortex__DOT__vx_fetch__DOT____Vcellinp__VX_Warp_one__in_thread_mask[2],0,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context__in_write_data[2],31,0); VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellout__VX_Context_zero__w0_t0_registers[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context__in_src2_fwd_data[2],31,0); VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellout__VX_Context_zero__out_b_reg_data[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context__in_src1_fwd_data[2],31,0); VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellout__VX_Context_zero__out_a_reg_data[2],31,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context__in_valid[2],0,0); VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context_zero__in_write_data[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context__DOT____Vcellout__vx_register_file_master__out_regs[32],31,0); VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context_zero__in_src2_fwd_data[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context__DOT____Vcellinp__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__in_regs[32],31,0); VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context_zero__in_src1_fwd_data[2],31,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context_zero__in_valid[2],0,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellout__VX_Context_one__out_b_reg_data[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellout__VX_Context_one__out_a_reg_data[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context_one__in_wspawn_regs[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context_one__in_write_data[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context_one__in_src2_fwd_data[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context_one__in_src1_fwd_data[2],31,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context_one__in_valid[2],0,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT____Vcellout__vx_register_file_master__out_regs[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT____Vcellinp__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__in_regs[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT____Vcellout__vx_register_file_master__out_regs[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT____Vcellinp__vx_register_file_master__in_wspawn_regs[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT____Vcellinp__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__in_regs[32],31,0);
}; };
static VL_ST_SIG8(__Vtable1_Vortex__DOT__vx_decode__DOT__mul_alu[8],4,0); static VL_ST_SIG8(__Vtable1_Vortex__DOT__vx_decode__DOT__mul_alu[8],4,0);

Binary file not shown.

Binary file not shown.

View File

@@ -1 +1 @@
obj_dir/VVortex.cpp obj_dir/VVortex.h obj_dir/VVortex.mk obj_dir/VVortex__Syms.cpp obj_dir/VVortex__Syms.h obj_dir/VVortex__ver.d obj_dir/VVortex_classes.mk : /usr/local/Cellar/verilator/4.010/bin/verilator_bin /usr/local/Cellar/verilator/4.010/bin/verilator_bin VX_alu.v VX_context.v VX_csr_handler.v VX_d_e_reg.v VX_decode.v VX_define.v VX_e_m_reg.v VX_execute.v VX_f_d_reg.v VX_fetch.v VX_forwarding.v VX_m_w_reg.v VX_memory.v VX_register_file.v VX_register_file_slave.v VX_warp.v VX_writeback.v Vortex.v obj_dir/VVortex.cpp obj_dir/VVortex.h obj_dir/VVortex.mk obj_dir/VVortex__Syms.cpp obj_dir/VVortex__Syms.h obj_dir/VVortex__ver.d obj_dir/VVortex_classes.mk : /usr/local/Cellar/verilator/4.010/bin/verilator_bin /usr/local/Cellar/verilator/4.010/bin/verilator_bin VX_alu.v VX_context.v VX_context_slave.v VX_csr_handler.v VX_d_e_reg.v VX_decode.v VX_define.v VX_e_m_reg.v VX_execute.v VX_f_d_reg.v VX_fetch.v VX_forwarding.v VX_m_w_reg.v VX_memory.v VX_register_file.v VX_register_file_master_slave.v VX_register_file_slave.v VX_warp.v VX_writeback.v Vortex.v

View File

@@ -2,28 +2,30 @@
C "-Wall -cc Vortex.v --exe test_bench.cpp" C "-Wall -cc Vortex.v --exe test_bench.cpp"
S 4608404 12889046060 1553037052 0 1548678579 0 "/usr/local/Cellar/verilator/4.010/bin/verilator_bin" S 4608404 12889046060 1553037052 0 1548678579 0 "/usr/local/Cellar/verilator/4.010/bin/verilator_bin"
S 2785 12889457986 1554064009 0 1554064009 0 "VX_alu.v" S 2785 12889457986 1554064009 0 1554064009 0 "VX_alu.v"
S 3288 12890338917 1557354788 0 1557354788 0 "VX_context.v" S 3486 12890338917 1557473618 0 1557473618 0 "VX_context.v"
S 4928 12890355578 1557474515 0 1557474515 0 "VX_context_slave.v"
S 1495 12889457987 1554023089 0 1554023089 0 "VX_csr_handler.v" S 1495 12889457987 1554023089 0 1554023089 0 "VX_csr_handler.v"
S 5512 12889457988 1557345046 0 1557345046 0 "VX_d_e_reg.v" S 5512 12889457988 1557345046 0 1557345046 0 "VX_d_e_reg.v"
S 12085 12890307904 1557354665 0 1557354665 0 "VX_decode.v" S 14563 12890307904 1557474495 0 1557474495 0 "VX_decode.v"
S 1574 12890307906 1557343909 0 1557343909 0 "VX_define.v" S 1574 12890307906 1557343909 0 1557343909 0 "VX_define.v"
S 4267 12889457992 1557345117 0 1557345117 0 "VX_e_m_reg.v" S 4267 12889457992 1557345117 0 1557345117 0 "VX_e_m_reg.v"
S 3405 12889457993 1557348460 0 1557348460 0 "VX_execute.v" S 3692 12889457993 1557447660 0 1557447660 0 "VX_execute.v"
S 1751 12889457994 1557344924 0 1557344924 0 "VX_f_d_reg.v" S 1751 12889457994 1557344924 0 1557344924 0 "VX_f_d_reg.v"
S 2362 12890309989 1557358323 0 1557358323 0 "VX_fetch.v" S 4619 12890309989 1557474372 0 1557474372 0 "VX_fetch.v"
S 6293 12889457996 1557348346 0 1557348346 0 "VX_forwarding.v" S 6293 12889457996 1557348346 0 1557348346 0 "VX_forwarding.v"
S 1866 12889457997 1557348551 0 1557348551 0 "VX_m_w_reg.v" S 1866 12889457997 1557348551 0 1557348551 0 "VX_m_w_reg.v"
S 3847 12890309990 1557348518 0 1557348518 0 "VX_memory.v" S 4352 12890309990 1557474440 0 1557474440 0 "VX_memory.v"
S 1118 12889457999 1557354753 0 1557354753 0 "VX_register_file.v" S 1249 12889457999 1557474005 0 1557474005 0 "VX_register_file.v"
S 1428 12889458000 1557354772 0 1557354772 0 "VX_register_file_slave.v" S 1655 12890356143 1557474338 0 1557474338 0 "VX_register_file_master_slave.v"
S 1499 12890308905 1557267602 0 1557267602 0 "VX_warp.v" S 1599 12889458000 1557474345 0 1557474345 0 "VX_register_file_slave.v"
S 1686 12890308905 1557474462 0 1557474462 0 "VX_warp.v"
S 1568 12890307909 1557348531 0 1557348531 0 "VX_writeback.v" S 1568 12890307909 1557348531 0 1557348531 0 "VX_writeback.v"
S 18244 12890307910 1557357447 0 1557357447 0 "Vortex.v" S 18714 12890307910 1557368874 0 1557368874 0 "Vortex.v"
T 277561 12890339974 1557358338 0 1557358338 0 "obj_dir/VVortex.cpp" T 451065 12890356589 1557474518 0 1557474518 0 "obj_dir/VVortex.cpp"
T 16771 12890339973 1557358338 0 1557358338 0 "obj_dir/VVortex.h" T 20559 12890356588 1557474518 0 1557474518 0 "obj_dir/VVortex.h"
T 1800 12890339976 1557358338 0 1557358338 0 "obj_dir/VVortex.mk" T 1800 12890356591 1557474518 0 1557474518 0 "obj_dir/VVortex.mk"
T 530 12890339972 1557358338 0 1557358338 0 "obj_dir/VVortex__Syms.cpp" T 530 12890356587 1557474518 0 1557474518 0 "obj_dir/VVortex__Syms.cpp"
T 711 12890339971 1557358338 0 1557358338 0 "obj_dir/VVortex__Syms.h" T 711 12890356586 1557474518 0 1557474518 0 "obj_dir/VVortex__Syms.h"
T 512 12890339977 1557358338 0 1557358338 0 "obj_dir/VVortex__ver.d" T 563 12890356592 1557474518 0 1557474518 0 "obj_dir/VVortex__ver.d"
T 0 0 1557358338 0 1557358338 0 "obj_dir/VVortex__verFiles.dat" T 0 0 1557474518 0 1557474518 0 "obj_dir/VVortex__verFiles.dat"
T 1159 12890339975 1557358338 0 1557358338 0 "obj_dir/VVortex_classes.mk" T 1159 12890356590 1557474518 0 1557474518 0 "obj_dir/VVortex_classes.mk"

Binary file not shown.

View File

@@ -1,7 +1,7 @@
# Dynamic Instructions: 122612 # Dynamic Instructions: 222955
# of total cycles: 122624 # of total cycles: 222962
# of forwarding stalls: 0 # of forwarding stalls: 0
# of branch stalls: 0 # of branch stalls: 0
# CPI: 1.0001 # CPI: 1.00003
# time to simulate: 6.95312e-310 milliseconds # time to simulate: 6.95312e-310 milliseconds
# GRADE: Failed on test: 0 # GRADE: Failed on test: 4294967295

View File

@@ -326,11 +326,12 @@ bool Vortex::simulate(std::string file_to_simulate)
bool istop; bool istop;
bool dstop; bool dstop;
bool cont = false;
// for (int i = 0; i < 500; i++) // for (int i = 0; i < 500; i++)
// unsigned cycles; // unsigned cycles;
while (this->stop && (!(stop && (counter > 5)))) counter = 0;
while (this->stop && ((counter < 5)))
{ {
// std::cout << "************* Cycle: " << cycle << "\n"; // std::cout << "************* Cycle: " << cycle << "\n";
@@ -347,10 +348,12 @@ bool Vortex::simulate(std::string file_to_simulate)
vortex->eval(); vortex->eval();
stop = istop && dstop; // stop = istop && dstop;
stop = vortex->out_ebreak;
if (stop) if (stop || cont)
{ {
cont = true;
counter++; counter++;
} else } else
{ {