changed args structure
This commit is contained in:
@@ -5,7 +5,7 @@ extern void printc(char);
|
||||
|
||||
|
||||
int main(void);
|
||||
void matAddition (unsigned, unsigned);
|
||||
void matMult (unsigned, unsigned);
|
||||
|
||||
#include "./lib/lib.h"
|
||||
|
||||
@@ -16,16 +16,29 @@ void matAddition (unsigned, unsigned);
|
||||
|
||||
|
||||
|
||||
unsigned x[64] = {0};
|
||||
unsigned x[256] = {0};
|
||||
|
||||
unsigned y[64] = {0};
|
||||
unsigned y[256] = {0};
|
||||
|
||||
unsigned z[64] = {0};
|
||||
unsigned z[256] = {0};
|
||||
|
||||
#define MAT_DIM 8
|
||||
#define MAT_DIM 16
|
||||
#define MAX_THREADS 8
|
||||
|
||||
#define NUM_WARPS MAT_DIM
|
||||
#define NUM_THREADS MAT_DIM
|
||||
#define NUM_THREADS MAX_THREADS
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned * x;
|
||||
unsigned * y;
|
||||
unsigned * z;
|
||||
unsigned mat_dim;
|
||||
unsigned offset;
|
||||
|
||||
} matMult_arg_t;
|
||||
|
||||
matMult_arg_t args;
|
||||
|
||||
int main()
|
||||
{
|
||||
@@ -41,9 +54,16 @@ int main()
|
||||
y[i] = 2;
|
||||
}
|
||||
|
||||
createWarps(NUM_WARPS, NUM_THREADS, matAddition, (void *) x, (void *) y, (void *) z);
|
||||
|
||||
args.x = x;
|
||||
args.y = y;
|
||||
args.z = z;
|
||||
args.mat_dim = MAT_DIM;
|
||||
args.offset = (MAT_DIM/MAX_THREADS);
|
||||
|
||||
wait_for_done(NUM_WARPS);
|
||||
createWarps(NUM_WARPS, NUM_THREADS, matMult, (void *) (&args));
|
||||
|
||||
wait_for_done(8);
|
||||
|
||||
print_consol("-------------------------\n");
|
||||
print_consol("FINAL Z\n");
|
||||
@@ -60,25 +80,35 @@ int main()
|
||||
}
|
||||
|
||||
|
||||
void matAddition(unsigned tid, unsigned wid)
|
||||
void matMult(unsigned tid, unsigned wid)
|
||||
{
|
||||
matMult_arg_t * args = (matMult_arg_t *) get_1st_arg();
|
||||
|
||||
unsigned * x_ptr = (unsigned *) get_1st_arg();
|
||||
unsigned * y_ptr = (unsigned *) get_2nd_arg();
|
||||
unsigned * z_ptr = (unsigned *) get_3rd_arg();
|
||||
unsigned * x_ptr = args->x;
|
||||
unsigned * y_ptr = args->y;
|
||||
unsigned * z_ptr = args->z;
|
||||
|
||||
unsigned off = args->offset;
|
||||
|
||||
unsigned total = 0;
|
||||
for (unsigned place = 0; place < MAT_DIM; place++)
|
||||
unsigned i_index = off * tid;
|
||||
unsigned mat_dim = args->mat_dim;
|
||||
|
||||
for (int iter = 0; iter < off; ++iter)
|
||||
{
|
||||
unsigned x_i = (wid * MAT_DIM) + place;
|
||||
unsigned y_i = (MAT_DIM * place) + tid;
|
||||
unsigned total = 0;
|
||||
for (unsigned place = 0; place < mat_dim; ++place)
|
||||
{
|
||||
unsigned x_i = (wid * mat_dim) + place;
|
||||
unsigned y_i = (mat_dim * place) + i_index;
|
||||
|
||||
total += (x_ptr[x_i] * y_ptr[y_i]);
|
||||
total += (x_ptr[x_i] * y_ptr[y_i]);
|
||||
}
|
||||
|
||||
int final_i = (wid * mat_dim) + i_index;
|
||||
z_ptr[final_i] = total;
|
||||
i_index++;
|
||||
}
|
||||
|
||||
int final_i = (wid * MAT_DIM) + tid;
|
||||
z_ptr[final_i] = total;
|
||||
|
||||
return;
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@@ -1,151 +1,158 @@
|
||||
:0200000480007A
|
||||
:1000000037F1FF7FEF00C00C73000000938B0600F8
|
||||
:10001000130C0700938C0700130D0800130F010049
|
||||
:100020009303050013051000635C75001301018044
|
||||
:10003000130305006B500300130515006FF0DFFE7E
|
||||
:1000400013010F0013050000930F0600938D0300AA
|
||||
:10005000EBE0BF0117050000130545516B4005009B
|
||||
:10006000B708010023A0B800678000001703000054
|
||||
:10007000130303FA6B00030067800000130141FFC4
|
||||
:10008000232011002322B100834505006388050069
|
||||
:10009000EFF01FFD130515006FF01FFF8320010017
|
||||
:1000A000832541001301C10067800000130141FF57
|
||||
:1000B000232011002322B10093050503EFF05FFA1E
|
||||
:1000C00083200100832541001301C10067800000E7
|
||||
:1000D000130101FE232E1100232C810013040102C1
|
||||
:1000E000232604FE6F0000030327C4FE9307406528
|
||||
:1000F0003307F702B707008193870746B307F70076
|
||||
:1001000013850700EF00C0238327C4FE93871700E1
|
||||
:100110002326F4FE0327C4FE93077000E3D6E7FC12
|
||||
:10012000232404FE6F008004B7070081032784FEA8
|
||||
:100130001317270093874715B307F70013073000FD
|
||||
:1001400023A0E700B7070081032784FE13172700C9
|
||||
:1001500093874725B307F7001307200023A0E70084
|
||||
:10016000832784FE938717002324F4FE032784FE4D
|
||||
:100170009307F003E3DAE7FAB7070081938747357F
|
||||
:100180003707008113074725B70600819386461578
|
||||
:10019000370600801306C6249305800013058000EF
|
||||
:1001A000EF00005913058000EF000068B7070081D9
|
||||
:1001B00013850704EFF09FECB70700811385C7058F
|
||||
:1001C000EFF0DFEB232204FE6F004005832744FE9F
|
||||
:1001D00093F7770063980700B707008113858706B8
|
||||
:1001E000EFF0DFE9B7070081032744FE131727006C
|
||||
:1001F00093874735B307F70083A7070013850700E8
|
||||
:10020000EF00402AB70700811385C706EFF01FE70C
|
||||
:10021000832744FE938717002322F4FE032744FE1E
|
||||
:100220009307F003E3D4E7FAB707008113850707C4
|
||||
:10023000EFF0DFE493070000138507008320C1017E
|
||||
:10024000032481011301010267800000130101FCF6
|
||||
:10025000232E1102232C8102130401042326A4FC63
|
||||
:100260002324B4FCEF0080642322A4FEEF00806608
|
||||
:100270002320A4FEEF008068232EA4FC232604FE86
|
||||
:10028000232404FE6F000007832784FC9397370024
|
||||
:10029000032784FEB307F700232CF4FC832784FE96
|
||||
:1002A000939737000327C4FCB307F700232AF4FC15
|
||||
:1002B000832784FD93972700032744FEB307F700A5
|
||||
:1002C00003A70700832744FD93972700832604FE96
|
||||
:1002D000B387F60083A70700B307F7020327C4FE1E
|
||||
:1002E000B307F7002326F4FE832784FE93871700C5
|
||||
:1002F0002324F4FE032784FE93077000E3F6E7F857
|
||||
:10030000832784FC139737008327C4FCB307F700C7
|
||||
:100310002328F4FC832704FD939727000327C4FDBB
|
||||
:10032000B307F7000327C4FE23A0E7001300000073
|
||||
:100330008320C103032481031301010467800000AB
|
||||
:1003400093020500130300009303700023A06200D2
|
||||
:1003500023A2620023A4620023A6720023A86200E5
|
||||
:10036000678000009302050003A3820013031300BB
|
||||
:1003700023A462001383420183AE420093935E0084
|
||||
:100380003303730003AE05002320C30103AE450011
|
||||
:100390002322C30103AE85002324C30103AEC5009D
|
||||
:1003A0002326C30103AE05012328C30103AE450183
|
||||
:1003B000232AC30103AE8501232CC30103AEC5016B
|
||||
:1003C000232EC301938E1E00130F20036394EE01AE
|
||||
:1003D000930E000023A2D201678000009302050063
|
||||
:1003E00003A382001303F3FF23A4620013834201DB
|
||||
:1003F00083AE0200930F2003138F0E00130F1F0014
|
||||
:100400006314FF01130F000023A0E20193935E0029
|
||||
:1004100033037300032E030023A0C501032E430002
|
||||
:1004200023A2C501032E830023A4C501032EC3000C
|
||||
:1004300023A6C501032E030123A8C501032E4301F2
|
||||
:1004400023AAC501032E830123ACC501032EC301DA
|
||||
:1004500023AEC501678000009302050003A382005C
|
||||
:1004600013050000130E200363146E00130515001E
|
||||
:10047000678000009302050003A3820013050000BB
|
||||
:10048000130E000063146E00130515006780000052
|
||||
:100490009302050003A3C20083A3020133B56300E6
|
||||
:1004A00067800000130101FD23261102232481022D
|
||||
:1004B00013040103232EA4FC0327C4FD9307F000BB
|
||||
:1004C00063E4E702B70700810327C4FD1317270081
|
||||
:1004D00093874711B307F70083A707001385070029
|
||||
:1004E000EFF0DFB96F004007930700022326F4FE08
|
||||
:1004F000A30504FE8327C4FE9387C7FF0327C4FD1B
|
||||
:10050000B357F70093F7F7002322F4FE832744FE46
|
||||
:100510006386070093071000A305F4FE8347B4FE2B
|
||||
:1005200063820702B7070081032744FE13172700E1
|
||||
:1005300093874711B307F70083A7070013850700C8
|
||||
:10054000EFF0DFB38327C4FE9387C7FF2326F4FEB3
|
||||
:100550008327C4FEE340F0FA8320C1020324810212
|
||||
:100560001301010367800000130101FD232611021E
|
||||
:10057000232481022322A1031304010313070D0086
|
||||
:10058000930740653307F702B70700819387074653
|
||||
:10059000B307F70013850700EFF0DFED93070500C1
|
||||
:1005A0006380070213070D00B73700819387077038
|
||||
:1005B000B307F700130710002380E7007300000063
|
||||
:1005C00013070D00930740653307F702B707008153
|
||||
:1005D00093870746B307F700130704FD9305070049
|
||||
:1005E00013850700EFF09FDF832784FD1381070049
|
||||
:1005F000032544FD832504FD0326C4FD832604FE54
|
||||
:10060000032744FE832784FE0328C4FEEFF01FA0C7
|
||||
:1006100073000000130000008320C1020324810244
|
||||
:10062000032D41021301010367800000130101FC47
|
||||
:10063000232E1102232C81021304010493090100CB
|
||||
:10064000232604FE6F0080080327C4FE930740653D
|
||||
:100650003307F702B707008193870746B307F70010
|
||||
:1006600013850700EFF01FE193070500639A070465
|
||||
:100670000327C4FE930740653307F702B7070081DD
|
||||
:1006800093870746B307F7001307C4FC93050700D9
|
||||
:1006900013850700EFF09FD4832744FD13810700E3
|
||||
:1006A000032504FD8325C4FC032684FD8326C4FDA5
|
||||
:1006B000032704FE832744FE032884FEEFF01F9BDC
|
||||
:1006C0008327C4FE938717002326F4FE0327C4FE66
|
||||
:1006D00093076000E3DAE7F61381090013000000D6
|
||||
:1006E0008320C103032481031301010467800000F8
|
||||
:1006F000130101FD2326810213040103232EA4FC10
|
||||
:10070000232604FE6F0000018327C4FE9387170091
|
||||
:100710002326F4FE0327C4FE8327C4FDE346F7FE29
|
||||
:10072000130000000324C1021301010367800000CD
|
||||
:10073000130101FA232E1104232C81041304010652
|
||||
:10074000232EA4FA232CB4FA232AC4FA2328D4FA99
|
||||
:100750002326E4FA2324F4FA13090100232604FED5
|
||||
:10076000232404FE6F00C009B709FFFF33013101E4
|
||||
:10077000832784FE2324F4FC832784FB2326F4FCB4
|
||||
:10078000930701002328F4FC832744FB232AF4FC6D
|
||||
:10079000832704FB232CF4FC8327C4FA232EF4FCC8
|
||||
:1007A000832784FA2320F4FE8327C4FE2322F4FE49
|
||||
:1007B0000327C4FE930740653307F702B70700819C
|
||||
:1007C00093870746B307F700130784FC93050700D8
|
||||
:1007D00013850700EFF01FB98327C4FE9387170026
|
||||
:1007E0002326F4FE0327C4FE9307600063D4E700CA
|
||||
:1007F000232604FE832784FE938717002324F4FE18
|
||||
:10080000032784FE8327C4FBE360F7F61301090086
|
||||
:10081000EFF0DFE1130000008320C1050324810510
|
||||
:100820001301010667800000130101FD23268102E8
|
||||
:1008300013040103232EA4FCA30704FE6F0000058C
|
||||
:1008400093071000A307F4FE232404FE6F00400367
|
||||
:100850008347F4FE3737008193060770032784FE31
|
||||
:100860003387E60003470700B3F7E700B337F0002C
|
||||
:10087000A307F4FE832784FE938717002324F4FE46
|
||||
:10088000832784FE0327C4FDE3E4E7FC8347F4FEEB
|
||||
:1008900093C7170093F7F70FE39407FA13000000CC
|
||||
:1008A0000324C1021301010367800000130101FF4B
|
||||
:1008B00023268100232471011304010193870B0077
|
||||
:1008C000138507000324C100832B8100130101015C
|
||||
:1008D00067800000130101FF23268100232481018A
|
||||
:1008E0001304010193070C00138507000324C100C2
|
||||
:1008F000032C81001301010167800000130101FF37
|
||||
:1009000023268100232491011304010193870C0005
|
||||
:10091000138507000324C100832C8100130101010A
|
||||
:0409200067800000EC
|
||||
:1000000037F1FF7FEF00400C73000000938B060078
|
||||
:10001000130D0700130F01009303050013051000D3
|
||||
:10002000635C750013010180130305006B5003002E
|
||||
:10003000130515006FF0DFFE13010F00130500001C
|
||||
:10004000930F0600938D0300EBE0BF01170500003E
|
||||
:100050001305055B6B400500B708010023A0B8003D
|
||||
:100060006780000017030000130383FA6B0003008E
|
||||
:1000700067800000130141FF232011002322B100FB
|
||||
:100080008345050063880500EFF01FFD130515008B
|
||||
:100090006FF01FFF83200100832541001301C10081
|
||||
:1000A00067800000130141FF232011002322B100CB
|
||||
:1000B00093050503EFF05FFA8320010083254100DB
|
||||
:1000C0001301C10067800000130101FE232E1100FF
|
||||
:1000D000232C810013040102232604FE6F00000379
|
||||
:1000E0000327C4FE9307404C3307F702B71700817C
|
||||
:1000F000938747D7B307F70013850700EF00802FDA
|
||||
:100100008327C4FE938717002326F4FE0327C4FE2B
|
||||
:1001100093077000E3D6E7FC232404FE6F008004FD
|
||||
:10012000B7070081032784FE13172700938747151D
|
||||
:10013000B307F7001307300023A0E700B7070081DB
|
||||
:10014000032784FE1317270093874755B307F7004B
|
||||
:100150001307200023A0E700832784FE938717005E
|
||||
:100160002324F4FE032784FE9307F00FE3DAE7FA73
|
||||
:10017000B7170081370700811307471523A0E7D67B
|
||||
:10018000B7170081938707D63707008113074755B4
|
||||
:1001900023A2E700B7170081938707D6371700819E
|
||||
:1001A0001307479523A4E700B7170081938707D665
|
||||
:1001B0001307000123A6E700B7170081938707D62E
|
||||
:1001C0001307200023A8E700B7170081938607D6FE
|
||||
:1001D000B70700801386C728930580001305000128
|
||||
:1001E000EF00C05D13058000EF00406BB707008192
|
||||
:1001F00013850704EFF01FE8B70700811385C705D3
|
||||
:10020000EFF05FE7232204FE6F004005832744FEE2
|
||||
:1002100093F7F70063980700B707008113858706F7
|
||||
:10022000EFF05FE5B7170081032744FE131727009F
|
||||
:1002300093874795B307F70083A707001385070047
|
||||
:10024000EF00802FB70700811385C706EFF09FE20C
|
||||
:10025000832744FE938717002322F4FE032744FEDE
|
||||
:100260009307F00FE3D4E7FAB70700811385070778
|
||||
:10027000EFF05FE093070000138507008320C101C2
|
||||
:10028000032481011301010267800000130101FAB8
|
||||
:10029000232E1104232C8104130401062326A4FA1F
|
||||
:1002A0002324B4FAEF00C067232EA4FC8327C4FDE7
|
||||
:1002B00083A70700232CF4FC8327C4FD83A74700F2
|
||||
:1002C000232AF4FC8327C4FD83A787002328F4FC9A
|
||||
:1002D0008327C4FD83A707012326F4FC0327C4FC5E
|
||||
:1002E0008327C4FAB307F7022326F4FE8327C4FD4D
|
||||
:1002F00083A7C7002324F4FC232404FE6F00800D91
|
||||
:10030000232204FE232004FE6F008007032784FAC3
|
||||
:10031000832784FCB307F702032704FEB307F70023
|
||||
:100320002322F4FC032784FC832704FEB307F7028F
|
||||
:100330000327C4FEB307F7002320F4FC832744FC03
|
||||
:1003400093972700032784FDB307F70003A707004F
|
||||
:10035000832704FC93972700832644FDB387F60088
|
||||
:1003600083A70700B307F702032744FEB307F7008C
|
||||
:100370002322F4FE832704FE938717002320F4FE34
|
||||
:10038000032704FE832784FCE362F7F8032784FA3B
|
||||
:10039000832784FC3307F7028327C4FEB307F700E3
|
||||
:1003A000232EF4FA8327C4FB93972700032704FD29
|
||||
:1003B000B307F700032744FE23A0E7008327C4FE0A
|
||||
:1003C000938717002326F4FE832784FE9387170064
|
||||
:1003D0002324F4FE832784FE0327C4FCE3E2E7F230
|
||||
:1003E000130000008320C1050324810513010106C9
|
||||
:1003F0006780000093020500130300009303700060
|
||||
:1004000023A0620023A2620023A4620023A672003C
|
||||
:1004100023A86200678000009302050003A3820006
|
||||
:100420001303130023A462001383420183AE42002E
|
||||
:1004300093935E003303730003AE05002320C301D2
|
||||
:1004400003AE45002322C30103AE85002324C3016C
|
||||
:1004500003AEC5002326C30103AE05012328C30153
|
||||
:1004600003AE4501232AC301938E1E00130F200300
|
||||
:100470006394EE01930E000023A2D2016780000076
|
||||
:100480009302050003A382001303F3FF23A4620079
|
||||
:100490001383420183AE0200930F2003138F0E00DB
|
||||
:1004A000130F1F006314FF01130F000023A0E201CC
|
||||
:1004B00093935E0033037300032E030023A0C50152
|
||||
:1004C000032E430023A2C501032E830023A4C501EC
|
||||
:1004D000032EC30023A6C501032E030123A8C501D3
|
||||
:1004E000032E430123AAC501678000009302050083
|
||||
:1004F00003A3820013050000130E200363146E0093
|
||||
:1005000013051500678000009302050003A3820015
|
||||
:1005100013050000130E000063146E001305150090
|
||||
:10052000678000009302050003A3C20083A30201B9
|
||||
:1005300033B5630067800000130101FD232611021B
|
||||
:100540002324810213040103232EA4FC0327C4FDEA
|
||||
:100550009307F00063E4E702B70700810327C4FDB7
|
||||
:100560001317270093874711B307F70083A70700E6
|
||||
:1005700013850700EFF01FB06F00400793070002DC
|
||||
:100580002326F4FEA30504FE8327C4FE9387C7FF3A
|
||||
:100590000327C4FDB357F70093F7F7002322F4FEB7
|
||||
:1005A000832744FE6386070093071000A305F4FE2B
|
||||
:1005B0008347B4FE63820702B7070081032744FE26
|
||||
:1005C0001317270093874711B307F70083A7070086
|
||||
:1005D00013850700EFF01FAA8327C4FE9387C7FF88
|
||||
:1005E0002326F4FE8327C4FEE340F0FA8320C102F1
|
||||
:1005F000032481021301010367800000130101FD40
|
||||
:1006000023261102232481022322A10313040103C0
|
||||
:1006100013070D009307404C3307F702B71700810B
|
||||
:10062000938747D7B307F70013850700EFF0DFED97
|
||||
:10063000930705006380070213070D00B737008199
|
||||
:1006400093874739B307F700130710002380E700AB
|
||||
:100650007300000013070D009307404C3307F702A7
|
||||
:10066000B7170081938747D7B307F700130784FDB7
|
||||
:100670009305070013850700EFF09FE0832704FE32
|
||||
:10068000138107008327C4FD832584FD032644FED0
|
||||
:10069000832684FE0327C4FE13850700EFF01F970F
|
||||
:1006A00073000000130000008320C10203248102B4
|
||||
:1006B000032D41021301010367800000130101FDB6
|
||||
:1006C000232611022324810213040103930901004C
|
||||
:1006D000232604FE6F0040080327C4FE9307404C06
|
||||
:1006E0003307F702B7170081938747D7B307F7009F
|
||||
:1006F00013850700EFF05FE1930705006398070497
|
||||
:100700000327C4FE9307404C3307F702B717008155
|
||||
:10071000938747D7B307F700130744FD93050700F6
|
||||
:1007200013850700EFF0DFD58327C4FD1381070091
|
||||
:10073000832784FD832544FD032604FE832644FE8F
|
||||
:10074000032784FE13850700EFF0DF918327C4FEA3
|
||||
:10075000938717002326F4FE0327C4FE9307600047
|
||||
:10076000E3DCE7F613810900130000008320C102D7
|
||||
:10077000032481021301010367800000130101FDBE
|
||||
:100780002326810213040103232EA4FC232604FE46
|
||||
:100790006F0000018327C4FE938717002326F4FE11
|
||||
:1007A0000327C4FE8327C4FDE346F7FE13000000C1
|
||||
:1007B0000324C1021301010367800000130101FC3F
|
||||
:1007C000232E1102232C8102130401042326A4FCEE
|
||||
:1007D0002324B4FC2322C4FC2320D4FC13090100ED
|
||||
:1007E000232604FE232404FE6F00C008B709FFFF80
|
||||
:1007F00033013101832784FE2328F4FC832784FC02
|
||||
:10080000232AF4FC93070100232CF4FC832744FCE7
|
||||
:10081000232EF4FC832704FC2320F4FE8327C4FE4C
|
||||
:100820002322F4FE0327C4FE9307404C3307F7024C
|
||||
:10083000B7170081938747D7B307F700130704FD65
|
||||
:100840009305070013850700EFF01FBD8327C4FE43
|
||||
:10085000938717002326F4FE0327C4FE9307600046
|
||||
:1008600063D4E700232604FE832784FE93871700C2
|
||||
:100870002324F4FE032784FE8327C4FCE368F7F6F1
|
||||
:1008800013010900EFF09FE3130000008320C10370
|
||||
:10089000032481031301010467800000130101FD9B
|
||||
:1008A0002326810213040103232EA4FCA30704FEC4
|
||||
:1008B0006F00000593071000A307F4FE232404FE35
|
||||
:1008C0006F0040038347F4FE3737008193064739B2
|
||||
:1008D000032784FE3387E60003470700B3F7E700EA
|
||||
:1008E000B337F000A307F4FE832784FE9387170035
|
||||
:1008F0002324F4FE832784FE0327C4FDE3E4E7FCFE
|
||||
:100900008347F4FE93C7170093F7F70FE39407FAB2
|
||||
:10091000130000000324C1021301010367800000DB
|
||||
:10092000130101FF23268100232471011304010117
|
||||
:1009300093870B00138507000324C100832B8100DC
|
||||
:100940001301010167800000130101FF23268100CC
|
||||
:10095000232481011304010193070C001385070070
|
||||
:100960000324C100032C81001301010167800000F2
|
||||
:10097000130101FF232681002324910113040101A7
|
||||
:1009800093870C00138507000324C100832C81008A
|
||||
:08099000130101016780000062
|
||||
:02000004810079
|
||||
:10000000300000003100000032000000330000002A
|
||||
:10001000340000003500000036000000370000000A
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#include "lib.h"
|
||||
|
||||
|
||||
extern void createThreads(unsigned, unsigned, unsigned, void *, void *, void *, unsigned);
|
||||
extern void wspawn(unsigned, unsigned, unsigned, void *, void *, void *, unsigned);
|
||||
extern void createThreads(unsigned, unsigned, unsigned, void *, unsigned);
|
||||
extern void wspawn(unsigned, unsigned, unsigned, void *, unsigned);
|
||||
extern void print_consol(char *);
|
||||
extern void printc(char);
|
||||
|
||||
@@ -33,9 +33,6 @@ void reschedule_warps()
|
||||
|
||||
if (queue_isEmpty(q+curr_warp))
|
||||
{
|
||||
// print_consol("done: ");
|
||||
// int_print(curr_warp);
|
||||
// print_consol("\n");
|
||||
done[curr_warp] = true;
|
||||
ECALL;
|
||||
}
|
||||
@@ -43,7 +40,7 @@ void reschedule_warps()
|
||||
Job j;
|
||||
queue_dequeue(q+curr_warp,&j);
|
||||
asm __volatile__("mv sp,%0"::"r" (j.base_sp):);
|
||||
createThreads(j.n_threads, j.wid, j.func_ptr, j.x, j.y, j.z, j.assigned_warp);
|
||||
createThreads(j.n_threads, j.wid, j.func_ptr, j.args, j.assigned_warp);
|
||||
|
||||
ECALL;
|
||||
|
||||
@@ -60,7 +57,7 @@ void schedule_warps()
|
||||
Job j;
|
||||
queue_dequeue(q+curr_warp,&j);
|
||||
asm __volatile__("mv sp,%0"::"r" (j.base_sp):);
|
||||
wspawn(j.n_threads, j.wid, j.func_ptr, j.x, j.y, j.z, j.assigned_warp);
|
||||
wspawn(j.n_threads, j.wid, j.func_ptr, j.args, j.assigned_warp);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -75,7 +72,7 @@ void sleep(int t)
|
||||
|
||||
|
||||
|
||||
void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, void * x_ptr, void * y_ptr, void * z_ptr)
|
||||
void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, void * args)
|
||||
{
|
||||
asm __volatile__("addi s2, sp, 0");
|
||||
int warp = 0;
|
||||
@@ -90,9 +87,7 @@ void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, void * x_ptr, v
|
||||
j.n_threads = num_threads;
|
||||
j.base_sp = stack_ptr;
|
||||
j.func_ptr = (unsigned) func;
|
||||
j.x = x_ptr;
|
||||
j.y = y_ptr;
|
||||
j.z = z_ptr;
|
||||
j.args = args;
|
||||
j.assigned_warp = warp;
|
||||
|
||||
queue_enqueue(q + warp,&j);
|
||||
|
||||
@@ -35,7 +35,7 @@ static bool done[] = {false, false, false, false, false, false, false};
|
||||
static int main_sp[1];
|
||||
|
||||
#define FUNC void (func)(unsigned, unsigned)
|
||||
void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, void *, void *, void *);
|
||||
void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, void *);
|
||||
void reschedule_warps(void);
|
||||
void int_print(unsigned);
|
||||
void wait_for_done(unsigned);
|
||||
@@ -46,5 +46,10 @@ void * get_3rd_arg(void);
|
||||
void sleep(int);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -13,10 +13,8 @@ _start:
|
||||
.type createThreads, @function
|
||||
.global createThreads
|
||||
createThreads:
|
||||
mv s7 ,a3 # Moving x_ptr to s7
|
||||
mv s8 ,a4 # Moving y_ptr to s8
|
||||
mv s9 ,a5 # Moving z_ptr to s9
|
||||
mv s10,a6 # Moving assigned_warp to s10
|
||||
mv s7 ,a3 # Moving args to s7
|
||||
mv s10,a4 # Moving assigned_warp to s10
|
||||
mv t5 ,sp # Saving the current stack pointer to t5
|
||||
mv t2 , a0 # t2 = num_threads
|
||||
loop_init:
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
/* ---- Original Script: /opt/riscv32i/riscv32-unknown-elf/lib/ldscripts/elf32lriscv.x ---- */
|
||||
/* Default linker script, for normal executables */
|
||||
/* Copyright (C) 2014-2017 Free Software Foundation, Inc.
|
||||
Copying and distribution of this script, with or without modification,
|
||||
are permitted in any medium without royalty provided the copyright
|
||||
notice and this notice are preserved. */
|
||||
OUTPUT_FORMAT("elf32-littleriscv", "elf32-littleriscv",
|
||||
"elf32-littleriscv")
|
||||
OUTPUT_ARCH(riscv)
|
||||
ENTRY(main)
|
||||
SECTIONS
|
||||
{
|
||||
. = 0x80000000;
|
||||
.text :
|
||||
{
|
||||
*(.text)
|
||||
*(.text.unlikely .text.*_unlikely .text.unlikely.*)
|
||||
*(.text.exit .text.exit.*)
|
||||
*(.text.startup .text.startup.*)
|
||||
*(.text.hot .text.hot.*)
|
||||
*(.stub .text.* .gnu.linkonce.t.*)
|
||||
/* .gnu.warning sections are handled specially by elf32.em. */
|
||||
*(.gnu.warning)
|
||||
}
|
||||
.init :
|
||||
{
|
||||
KEEP (*(SORT_NONE(.init)))
|
||||
}
|
||||
.plt : { *(.plt) }
|
||||
.iplt : { *(.iplt) }
|
||||
.fini :
|
||||
{
|
||||
KEEP (*(SORT_NONE(.fini)))
|
||||
}
|
||||
PROVIDE (__etext = .);
|
||||
PROVIDE (_etext = .);
|
||||
PROVIDE (etext = .);
|
||||
. = 0x81000000;
|
||||
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
|
||||
.rodata1 : { *(.rodata1) }
|
||||
. = 0x82000000;
|
||||
.comment : { *(.comment) }
|
||||
|
||||
}
|
||||
@@ -1,481 +0,0 @@
|
||||
|
||||
queue.elf: file format elf32-littleriscv
|
||||
|
||||
|
||||
Disassembly of section .text:
|
||||
|
||||
80000000 <main>:
|
||||
80000000: fd010113 addi sp,sp,-48
|
||||
80000004: 02112623 sw ra,44(sp)
|
||||
80000008: 02812423 sw s0,40(sp)
|
||||
8000000c: 03010413 addi s0,sp,48
|
||||
80000010: 800007b7 lui a5,0x80000
|
||||
80000014: 5e078793 addi a5,a5,1504 # 800005e0 <y+0xff0004b4>
|
||||
80000018: fef42023 sw a5,-32(s0)
|
||||
8000001c: 810007b7 lui a5,0x81000
|
||||
80000020: 0ec78793 addi a5,a5,236 # 810000ec <y+0xffffffc0>
|
||||
80000024: fef42223 sw a5,-28(s0)
|
||||
80000028: 810007b7 lui a5,0x81000
|
||||
8000002c: 12c78793 addi a5,a5,300 # 8100012c <y+0x0>
|
||||
80000030: fef42423 sw a5,-24(s0)
|
||||
80000034: 810007b7 lui a5,0x81000
|
||||
80000038: 00078793 mv a5,a5
|
||||
8000003c: fef42623 sw a5,-20(s0)
|
||||
80000040: fe042603 lw a2,-32(s0)
|
||||
80000044: fe442683 lw a3,-28(s0)
|
||||
80000048: fe842703 lw a4,-24(s0)
|
||||
8000004c: fec42783 lw a5,-20(s0)
|
||||
80000050: fcc42823 sw a2,-48(s0)
|
||||
80000054: fcd42a23 sw a3,-44(s0)
|
||||
80000058: fce42c23 sw a4,-40(s0)
|
||||
8000005c: fcf42e23 sw a5,-36(s0)
|
||||
80000060: fd040793 addi a5,s0,-48
|
||||
80000064: 00078513 mv a0,a5
|
||||
80000068: 390000ef jal ra,800003f8 <enqueue>
|
||||
8000006c: fe042603 lw a2,-32(s0)
|
||||
80000070: fe442683 lw a3,-28(s0)
|
||||
80000074: fe842703 lw a4,-24(s0)
|
||||
80000078: fec42783 lw a5,-20(s0)
|
||||
8000007c: fcc42823 sw a2,-48(s0)
|
||||
80000080: fcd42a23 sw a3,-44(s0)
|
||||
80000084: fce42c23 sw a4,-40(s0)
|
||||
80000088: fcf42e23 sw a5,-36(s0)
|
||||
8000008c: fd040793 addi a5,s0,-48
|
||||
80000090: 00078513 mv a0,a5
|
||||
80000094: 364000ef jal ra,800003f8 <enqueue>
|
||||
80000098: fe042603 lw a2,-32(s0)
|
||||
8000009c: fe442683 lw a3,-28(s0)
|
||||
800000a0: fe842703 lw a4,-24(s0)
|
||||
800000a4: fec42783 lw a5,-20(s0)
|
||||
800000a8: fcc42823 sw a2,-48(s0)
|
||||
800000ac: fcd42a23 sw a3,-44(s0)
|
||||
800000b0: fce42c23 sw a4,-40(s0)
|
||||
800000b4: fcf42e23 sw a5,-36(s0)
|
||||
800000b8: fd040793 addi a5,s0,-48
|
||||
800000bc: 00078513 mv a0,a5
|
||||
800000c0: 338000ef jal ra,800003f8 <enqueue>
|
||||
800000c4: fe042603 lw a2,-32(s0)
|
||||
800000c8: fe442683 lw a3,-28(s0)
|
||||
800000cc: fe842703 lw a4,-24(s0)
|
||||
800000d0: fec42783 lw a5,-20(s0)
|
||||
800000d4: fcc42823 sw a2,-48(s0)
|
||||
800000d8: fcd42a23 sw a3,-44(s0)
|
||||
800000dc: fce42c23 sw a4,-40(s0)
|
||||
800000e0: fcf42e23 sw a5,-36(s0)
|
||||
800000e4: fd040793 addi a5,s0,-48
|
||||
800000e8: 00078513 mv a0,a5
|
||||
800000ec: 30c000ef jal ra,800003f8 <enqueue>
|
||||
800000f0: fe042603 lw a2,-32(s0)
|
||||
800000f4: fe442683 lw a3,-28(s0)
|
||||
800000f8: fe842703 lw a4,-24(s0)
|
||||
800000fc: fec42783 lw a5,-20(s0)
|
||||
80000100: fcc42823 sw a2,-48(s0)
|
||||
80000104: fcd42a23 sw a3,-44(s0)
|
||||
80000108: fce42c23 sw a4,-40(s0)
|
||||
8000010c: fcf42e23 sw a5,-36(s0)
|
||||
80000110: fd040793 addi a5,s0,-48
|
||||
80000114: 00078513 mv a0,a5
|
||||
80000118: 2e0000ef jal ra,800003f8 <enqueue>
|
||||
8000011c: fe042603 lw a2,-32(s0)
|
||||
80000120: fe442683 lw a3,-28(s0)
|
||||
80000124: fe842703 lw a4,-24(s0)
|
||||
80000128: fec42783 lw a5,-20(s0)
|
||||
8000012c: fcc42823 sw a2,-48(s0)
|
||||
80000130: fcd42a23 sw a3,-44(s0)
|
||||
80000134: fce42c23 sw a4,-40(s0)
|
||||
80000138: fcf42e23 sw a5,-36(s0)
|
||||
8000013c: fd040793 addi a5,s0,-48
|
||||
80000140: 00078513 mv a0,a5
|
||||
80000144: 2b4000ef jal ra,800003f8 <enqueue>
|
||||
80000148: fe040793 addi a5,s0,-32
|
||||
8000014c: 00078513 mv a0,a5
|
||||
80000150: 36c000ef jal ra,800004bc <dequeue>
|
||||
80000154: fd040793 addi a5,s0,-48
|
||||
80000158: 00078513 mv a0,a5
|
||||
8000015c: 360000ef jal ra,800004bc <dequeue>
|
||||
80000160: fd042603 lw a2,-48(s0)
|
||||
80000164: fd442683 lw a3,-44(s0)
|
||||
80000168: fd842703 lw a4,-40(s0)
|
||||
8000016c: fdc42783 lw a5,-36(s0)
|
||||
80000170: fec42023 sw a2,-32(s0)
|
||||
80000174: fed42223 sw a3,-28(s0)
|
||||
80000178: fee42423 sw a4,-24(s0)
|
||||
8000017c: fef42623 sw a5,-20(s0)
|
||||
80000180: fe042603 lw a2,-32(s0)
|
||||
80000184: fe442683 lw a3,-28(s0)
|
||||
80000188: fe842703 lw a4,-24(s0)
|
||||
8000018c: fec42783 lw a5,-20(s0)
|
||||
80000190: fcc42823 sw a2,-48(s0)
|
||||
80000194: fcd42a23 sw a3,-44(s0)
|
||||
80000198: fce42c23 sw a4,-40(s0)
|
||||
8000019c: fcf42e23 sw a5,-36(s0)
|
||||
800001a0: fd040793 addi a5,s0,-48
|
||||
800001a4: 00078513 mv a0,a5
|
||||
800001a8: 250000ef jal ra,800003f8 <enqueue>
|
||||
800001ac: fe042603 lw a2,-32(s0)
|
||||
800001b0: fe442683 lw a3,-28(s0)
|
||||
800001b4: fe842703 lw a4,-24(s0)
|
||||
800001b8: fec42783 lw a5,-20(s0)
|
||||
800001bc: fcc42823 sw a2,-48(s0)
|
||||
800001c0: fcd42a23 sw a3,-44(s0)
|
||||
800001c4: fce42c23 sw a4,-40(s0)
|
||||
800001c8: fcf42e23 sw a5,-36(s0)
|
||||
800001cc: fd040793 addi a5,s0,-48
|
||||
800001d0: 00078513 mv a0,a5
|
||||
800001d4: 224000ef jal ra,800003f8 <enqueue>
|
||||
800001d8: 3a4000ef jal ra,8000057c <isFull>
|
||||
800001dc: 00050793 mv a5,a0
|
||||
800001e0: 02079863 bnez a5,80000210 <main+0x210>
|
||||
800001e4: fe042603 lw a2,-32(s0)
|
||||
800001e8: fe442683 lw a3,-28(s0)
|
||||
800001ec: fe842703 lw a4,-24(s0)
|
||||
800001f0: fec42783 lw a5,-20(s0)
|
||||
800001f4: fcc42823 sw a2,-48(s0)
|
||||
800001f8: fcd42a23 sw a3,-44(s0)
|
||||
800001fc: fce42c23 sw a4,-40(s0)
|
||||
80000200: fcf42e23 sw a5,-36(s0)
|
||||
80000204: fd040793 addi a5,s0,-48
|
||||
80000208: 00078513 mv a0,a5
|
||||
8000020c: 1ec000ef jal ra,800003f8 <enqueue>
|
||||
80000210: 36c000ef jal ra,8000057c <isFull>
|
||||
80000214: 00050793 mv a5,a0
|
||||
80000218: 02079863 bnez a5,80000248 <main+0x248>
|
||||
8000021c: fe042603 lw a2,-32(s0)
|
||||
80000220: fe442683 lw a3,-28(s0)
|
||||
80000224: fe842703 lw a4,-24(s0)
|
||||
80000228: fec42783 lw a5,-20(s0)
|
||||
8000022c: fcc42823 sw a2,-48(s0)
|
||||
80000230: fcd42a23 sw a3,-44(s0)
|
||||
80000234: fce42c23 sw a4,-40(s0)
|
||||
80000238: fcf42e23 sw a5,-36(s0)
|
||||
8000023c: fd040793 addi a5,s0,-48
|
||||
80000240: 00078513 mv a0,a5
|
||||
80000244: 1b4000ef jal ra,800003f8 <enqueue>
|
||||
80000248: 334000ef jal ra,8000057c <isFull>
|
||||
8000024c: 00050793 mv a5,a0
|
||||
80000250: 02079863 bnez a5,80000280 <main+0x280>
|
||||
80000254: fe042603 lw a2,-32(s0)
|
||||
80000258: fe442683 lw a3,-28(s0)
|
||||
8000025c: fe842703 lw a4,-24(s0)
|
||||
80000260: fec42783 lw a5,-20(s0)
|
||||
80000264: fcc42823 sw a2,-48(s0)
|
||||
80000268: fcd42a23 sw a3,-44(s0)
|
||||
8000026c: fce42c23 sw a4,-40(s0)
|
||||
80000270: fcf42e23 sw a5,-36(s0)
|
||||
80000274: fd040793 addi a5,s0,-48
|
||||
80000278: 00078513 mv a0,a5
|
||||
8000027c: 17c000ef jal ra,800003f8 <enqueue>
|
||||
80000280: 2fc000ef jal ra,8000057c <isFull>
|
||||
80000284: 00050793 mv a5,a0
|
||||
80000288: 02079863 bnez a5,800002b8 <main+0x2b8>
|
||||
8000028c: fe042603 lw a2,-32(s0)
|
||||
80000290: fe442683 lw a3,-28(s0)
|
||||
80000294: fe842703 lw a4,-24(s0)
|
||||
80000298: fec42783 lw a5,-20(s0)
|
||||
8000029c: fcc42823 sw a2,-48(s0)
|
||||
800002a0: fcd42a23 sw a3,-44(s0)
|
||||
800002a4: fce42c23 sw a4,-40(s0)
|
||||
800002a8: fcf42e23 sw a5,-36(s0)
|
||||
800002ac: fd040793 addi a5,s0,-48
|
||||
800002b0: 00078513 mv a0,a5
|
||||
800002b4: 144000ef jal ra,800003f8 <enqueue>
|
||||
800002b8: 2c4000ef jal ra,8000057c <isFull>
|
||||
800002bc: 00050793 mv a5,a0
|
||||
800002c0: 02079863 bnez a5,800002f0 <main+0x2f0>
|
||||
800002c4: fe042603 lw a2,-32(s0)
|
||||
800002c8: fe442683 lw a3,-28(s0)
|
||||
800002cc: fe842703 lw a4,-24(s0)
|
||||
800002d0: fec42783 lw a5,-20(s0)
|
||||
800002d4: fcc42823 sw a2,-48(s0)
|
||||
800002d8: fcd42a23 sw a3,-44(s0)
|
||||
800002dc: fce42c23 sw a4,-40(s0)
|
||||
800002e0: fcf42e23 sw a5,-36(s0)
|
||||
800002e4: fd040793 addi a5,s0,-48
|
||||
800002e8: 00078513 mv a0,a5
|
||||
800002ec: 10c000ef jal ra,800003f8 <enqueue>
|
||||
800002f0: 28c000ef jal ra,8000057c <isFull>
|
||||
800002f4: 00050793 mv a5,a0
|
||||
800002f8: 02079863 bnez a5,80000328 <main+0x328>
|
||||
800002fc: fe042603 lw a2,-32(s0)
|
||||
80000300: fe442683 lw a3,-28(s0)
|
||||
80000304: fe842703 lw a4,-24(s0)
|
||||
80000308: fec42783 lw a5,-20(s0)
|
||||
8000030c: fcc42823 sw a2,-48(s0)
|
||||
80000310: fcd42a23 sw a3,-44(s0)
|
||||
80000314: fce42c23 sw a4,-40(s0)
|
||||
80000318: fcf42e23 sw a5,-36(s0)
|
||||
8000031c: fd040793 addi a5,s0,-48
|
||||
80000320: 00078513 mv a0,a5
|
||||
80000324: 0d4000ef jal ra,800003f8 <enqueue>
|
||||
80000328: fd040793 addi a5,s0,-48
|
||||
8000032c: 00078513 mv a0,a5
|
||||
80000330: 18c000ef jal ra,800004bc <dequeue>
|
||||
80000334: fd040793 addi a5,s0,-48
|
||||
80000338: 00078513 mv a0,a5
|
||||
8000033c: 180000ef jal ra,800004bc <dequeue>
|
||||
80000340: fd040793 addi a5,s0,-48
|
||||
80000344: 00078513 mv a0,a5
|
||||
80000348: 174000ef jal ra,800004bc <dequeue>
|
||||
8000034c: fd040793 addi a5,s0,-48
|
||||
80000350: 00078513 mv a0,a5
|
||||
80000354: 168000ef jal ra,800004bc <dequeue>
|
||||
80000358: fd040793 addi a5,s0,-48
|
||||
8000035c: 00078513 mv a0,a5
|
||||
80000360: 15c000ef jal ra,800004bc <dequeue>
|
||||
80000364: fd040793 addi a5,s0,-48
|
||||
80000368: 00078513 mv a0,a5
|
||||
8000036c: 150000ef jal ra,800004bc <dequeue>
|
||||
80000370: fd040793 addi a5,s0,-48
|
||||
80000374: 00078513 mv a0,a5
|
||||
80000378: 144000ef jal ra,800004bc <dequeue>
|
||||
8000037c: fd040793 addi a5,s0,-48
|
||||
80000380: 00078513 mv a0,a5
|
||||
80000384: 138000ef jal ra,800004bc <dequeue>
|
||||
80000388: fd040793 addi a5,s0,-48
|
||||
8000038c: 00078513 mv a0,a5
|
||||
80000390: 12c000ef jal ra,800004bc <dequeue>
|
||||
80000394: fd040793 addi a5,s0,-48
|
||||
80000398: 00078513 mv a0,a5
|
||||
8000039c: 120000ef jal ra,800004bc <dequeue>
|
||||
800003a0: 00000793 li a5,0
|
||||
800003a4: 00078513 mv a0,a5
|
||||
800003a8: 02c12083 lw ra,44(sp)
|
||||
800003ac: 02812403 lw s0,40(sp)
|
||||
800003b0: 03010113 addi sp,sp,48
|
||||
800003b4: 00008067 ret
|
||||
|
||||
800003b8 <initialize_queue>:
|
||||
800003b8: ff010113 addi sp,sp,-16
|
||||
800003bc: 00812623 sw s0,12(sp)
|
||||
800003c0: 01010413 addi s0,sp,16
|
||||
800003c4: 810007b7 lui a5,0x81000
|
||||
800003c8: 04078793 addi a5,a5,64 # 81000040 <y+0xffffff14>
|
||||
800003cc: 0a07a023 sw zero,160(a5)
|
||||
800003d0: 810007b7 lui a5,0x81000
|
||||
800003d4: 04078793 addi a5,a5,64 # 81000040 <y+0xffffff14>
|
||||
800003d8: 0a07a223 sw zero,164(a5)
|
||||
800003dc: 810007b7 lui a5,0x81000
|
||||
800003e0: 04078793 addi a5,a5,64 # 81000040 <y+0xffffff14>
|
||||
800003e4: 0a07a423 sw zero,168(a5)
|
||||
800003e8: 00000013 nop
|
||||
800003ec: 00c12403 lw s0,12(sp)
|
||||
800003f0: 01010113 addi sp,sp,16
|
||||
800003f4: 00008067 ret
|
||||
|
||||
800003f8 <enqueue>:
|
||||
800003f8: ff010113 addi sp,sp,-16
|
||||
800003fc: 00812623 sw s0,12(sp)
|
||||
80000400: 00912423 sw s1,8(sp)
|
||||
80000404: 01010413 addi s0,sp,16
|
||||
80000408: 00050493 mv s1,a0
|
||||
8000040c: 810007b7 lui a5,0x81000
|
||||
80000410: 04078793 addi a5,a5,64 # 81000040 <y+0xffffff14>
|
||||
80000414: 0a87a783 lw a5,168(a5)
|
||||
80000418: 00178713 addi a4,a5,1
|
||||
8000041c: 810007b7 lui a5,0x81000
|
||||
80000420: 04078793 addi a5,a5,64 # 81000040 <y+0xffffff14>
|
||||
80000424: 0ae7a423 sw a4,168(a5)
|
||||
80000428: 810007b7 lui a5,0x81000
|
||||
8000042c: 04078793 addi a5,a5,64 # 81000040 <y+0xffffff14>
|
||||
80000430: 0a47a703 lw a4,164(a5)
|
||||
80000434: 810007b7 lui a5,0x81000
|
||||
80000438: 00471713 slli a4,a4,0x4
|
||||
8000043c: 04078793 addi a5,a5,64 # 81000040 <y+0xffffff14>
|
||||
80000440: 00f707b3 add a5,a4,a5
|
||||
80000444: 0004a583 lw a1,0(s1)
|
||||
80000448: 0044a603 lw a2,4(s1)
|
||||
8000044c: 0084a683 lw a3,8(s1)
|
||||
80000450: 00c4a703 lw a4,12(s1)
|
||||
80000454: 00b7a023 sw a1,0(a5)
|
||||
80000458: 00c7a223 sw a2,4(a5)
|
||||
8000045c: 00d7a423 sw a3,8(a5)
|
||||
80000460: 00e7a623 sw a4,12(a5)
|
||||
80000464: 810007b7 lui a5,0x81000
|
||||
80000468: 04078793 addi a5,a5,64 # 81000040 <y+0xffffff14>
|
||||
8000046c: 0a47a783 lw a5,164(a5)
|
||||
80000470: 00178713 addi a4,a5,1
|
||||
80000474: 00900793 li a5,9
|
||||
80000478: 02e7e263 bltu a5,a4,8000049c <enqueue+0xa4>
|
||||
8000047c: 810007b7 lui a5,0x81000
|
||||
80000480: 04078793 addi a5,a5,64 # 81000040 <y+0xffffff14>
|
||||
80000484: 0a47a783 lw a5,164(a5)
|
||||
80000488: 00178713 addi a4,a5,1
|
||||
8000048c: 810007b7 lui a5,0x81000
|
||||
80000490: 04078793 addi a5,a5,64 # 81000040 <y+0xffffff14>
|
||||
80000494: 0ae7a223 sw a4,164(a5)
|
||||
80000498: 0100006f j 800004a8 <enqueue+0xb0>
|
||||
8000049c: 810007b7 lui a5,0x81000
|
||||
800004a0: 04078793 addi a5,a5,64 # 81000040 <y+0xffffff14>
|
||||
800004a4: 0a07a223 sw zero,164(a5)
|
||||
800004a8: 00000013 nop
|
||||
800004ac: 00c12403 lw s0,12(sp)
|
||||
800004b0: 00812483 lw s1,8(sp)
|
||||
800004b4: 01010113 addi sp,sp,16
|
||||
800004b8: 00008067 ret
|
||||
|
||||
800004bc <dequeue>:
|
||||
800004bc: fd010113 addi sp,sp,-48
|
||||
800004c0: 02812623 sw s0,44(sp)
|
||||
800004c4: 03010413 addi s0,sp,48
|
||||
800004c8: fca42e23 sw a0,-36(s0)
|
||||
800004cc: 810007b7 lui a5,0x81000
|
||||
800004d0: 04078793 addi a5,a5,64 # 81000040 <y+0xffffff14>
|
||||
800004d4: 0a87a783 lw a5,168(a5)
|
||||
800004d8: fff78713 addi a4,a5,-1
|
||||
800004dc: 810007b7 lui a5,0x81000
|
||||
800004e0: 04078793 addi a5,a5,64 # 81000040 <y+0xffffff14>
|
||||
800004e4: 0ae7a423 sw a4,168(a5)
|
||||
800004e8: 810007b7 lui a5,0x81000
|
||||
800004ec: 04078793 addi a5,a5,64 # 81000040 <y+0xffffff14>
|
||||
800004f0: 0a07a703 lw a4,160(a5)
|
||||
800004f4: 810007b7 lui a5,0x81000
|
||||
800004f8: 00471713 slli a4,a4,0x4
|
||||
800004fc: 04078793 addi a5,a5,64 # 81000040 <y+0xffffff14>
|
||||
80000500: 00f707b3 add a5,a4,a5
|
||||
80000504: 0007a603 lw a2,0(a5)
|
||||
80000508: 0047a683 lw a3,4(a5)
|
||||
8000050c: 0087a703 lw a4,8(a5)
|
||||
80000510: 00c7a783 lw a5,12(a5)
|
||||
80000514: fec42023 sw a2,-32(s0)
|
||||
80000518: fed42223 sw a3,-28(s0)
|
||||
8000051c: fee42423 sw a4,-24(s0)
|
||||
80000520: fef42623 sw a5,-20(s0)
|
||||
80000524: 810007b7 lui a5,0x81000
|
||||
80000528: 04078793 addi a5,a5,64 # 81000040 <y+0xffffff14>
|
||||
8000052c: 0a07a783 lw a5,160(a5)
|
||||
80000530: 00178713 addi a4,a5,1
|
||||
80000534: 00900793 li a5,9
|
||||
80000538: 02e7e263 bltu a5,a4,8000055c <dequeue+0xa0>
|
||||
8000053c: 810007b7 lui a5,0x81000
|
||||
80000540: 04078793 addi a5,a5,64 # 81000040 <y+0xffffff14>
|
||||
80000544: 0a07a783 lw a5,160(a5)
|
||||
80000548: 00178713 addi a4,a5,1
|
||||
8000054c: 810007b7 lui a5,0x81000
|
||||
80000550: 04078793 addi a5,a5,64 # 81000040 <y+0xffffff14>
|
||||
80000554: 0ae7a023 sw a4,160(a5)
|
||||
80000558: 0100006f j 80000568 <dequeue+0xac>
|
||||
8000055c: 810007b7 lui a5,0x81000
|
||||
80000560: 04078793 addi a5,a5,64 # 81000040 <y+0xffffff14>
|
||||
80000564: 0a07a023 sw zero,160(a5)
|
||||
80000568: 00000013 nop
|
||||
8000056c: fdc42503 lw a0,-36(s0)
|
||||
80000570: 02c12403 lw s0,44(sp)
|
||||
80000574: 03010113 addi sp,sp,48
|
||||
80000578: 00008067 ret
|
||||
|
||||
8000057c <isFull>:
|
||||
8000057c: ff010113 addi sp,sp,-16
|
||||
80000580: 00812623 sw s0,12(sp)
|
||||
80000584: 01010413 addi s0,sp,16
|
||||
80000588: 810007b7 lui a5,0x81000
|
||||
8000058c: 04078793 addi a5,a5,64 # 81000040 <y+0xffffff14>
|
||||
80000590: 0a87a783 lw a5,168(a5)
|
||||
80000594: ff678793 addi a5,a5,-10
|
||||
80000598: 0017b793 seqz a5,a5
|
||||
8000059c: 0ff7f793 andi a5,a5,255
|
||||
800005a0: 00078513 mv a0,a5
|
||||
800005a4: 00c12403 lw s0,12(sp)
|
||||
800005a8: 01010113 addi sp,sp,16
|
||||
800005ac: 00008067 ret
|
||||
|
||||
800005b0 <isEmpty>:
|
||||
800005b0: ff010113 addi sp,sp,-16
|
||||
800005b4: 00812623 sw s0,12(sp)
|
||||
800005b8: 01010413 addi s0,sp,16
|
||||
800005bc: 810007b7 lui a5,0x81000
|
||||
800005c0: 04078793 addi a5,a5,64 # 81000040 <y+0xffffff14>
|
||||
800005c4: 0a87a783 lw a5,168(a5)
|
||||
800005c8: 0017b793 seqz a5,a5
|
||||
800005cc: 0ff7f793 andi a5,a5,255
|
||||
800005d0: 00078513 mv a0,a5
|
||||
800005d4: 00c12403 lw s0,12(sp)
|
||||
800005d8: 01010113 addi sp,sp,16
|
||||
800005dc: 00008067 ret
|
||||
|
||||
800005e0 <func>:
|
||||
800005e0: ff010113 addi sp,sp,-16
|
||||
800005e4: 00812623 sw s0,12(sp)
|
||||
800005e8: 01010413 addi s0,sp,16
|
||||
800005ec: 00000013 nop
|
||||
800005f0: 00c12403 lw s0,12(sp)
|
||||
800005f4: 01010113 addi sp,sp,16
|
||||
800005f8: 00008067 ret
|
||||
|
||||
Disassembly of section .bss:
|
||||
|
||||
81000000 <z>:
|
||||
...
|
||||
|
||||
81000040 <q>:
|
||||
...
|
||||
|
||||
Disassembly of section .data:
|
||||
|
||||
810000ec <x>:
|
||||
810000ec: 0001 nop
|
||||
810000ee: 0000 unimp
|
||||
810000f0: 0001 nop
|
||||
810000f2: 0000 unimp
|
||||
810000f4: 0006 c.slli zero,0x1
|
||||
810000f6: 0000 unimp
|
||||
810000f8: 0000 unimp
|
||||
810000fa: 0000 unimp
|
||||
810000fc: 00000003 lb zero,0(zero) # 0 <main-0x80000000>
|
||||
81000100: 0001 nop
|
||||
81000102: 0000 unimp
|
||||
81000104: 0001 nop
|
||||
81000106: 0000 unimp
|
||||
81000108: 0002 c.slli64 zero
|
||||
8100010a: 0000 unimp
|
||||
8100010c: 0000 unimp
|
||||
8100010e: 0000 unimp
|
||||
81000110: 00000003 lb zero,0(zero) # 0 <main-0x80000000>
|
||||
81000114: 0006 c.slli zero,0x1
|
||||
81000116: 0000 unimp
|
||||
81000118: 00000007 0x7
|
||||
8100011c: 0005 c.nop 1
|
||||
8100011e: 0000 unimp
|
||||
81000120: 00000007 0x7
|
||||
81000124: 00000007 0x7
|
||||
81000128: 0009 c.nop 2
|
||||
...
|
||||
|
||||
8100012c <y>:
|
||||
8100012c: 0000 unimp
|
||||
8100012e: 0000 unimp
|
||||
81000130: 0002 c.slli64 zero
|
||||
81000132: 0000 unimp
|
||||
81000134: 0002 c.slli64 zero
|
||||
81000136: 0000 unimp
|
||||
81000138: 0000 unimp
|
||||
8100013a: 0000 unimp
|
||||
8100013c: 0005 c.nop 1
|
||||
8100013e: 0000 unimp
|
||||
81000140: 0000 unimp
|
||||
81000142: 0000 unimp
|
||||
81000144: 0001 nop
|
||||
81000146: 0000 unimp
|
||||
81000148: 0001 nop
|
||||
8100014a: 0000 unimp
|
||||
8100014c: 0004 0x4
|
||||
8100014e: 0000 unimp
|
||||
81000150: 0002 c.slli64 zero
|
||||
...
|
||||
8100015a: 0000 unimp
|
||||
8100015c: 00000003 lb zero,0(zero) # 0 <main-0x80000000>
|
||||
81000160: 0002 c.slli64 zero
|
||||
81000162: 0000 unimp
|
||||
81000164: 00000003 lb zero,0(zero) # 0 <main-0x80000000>
|
||||
81000168: 0002 c.slli64 zero
|
||||
...
|
||||
|
||||
Disassembly of section .comment:
|
||||
|
||||
82000000 <.comment>:
|
||||
82000000: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm
|
||||
82000004: 2820 fld fs0,80(s0)
|
||||
82000006: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm
|
||||
8200000a: 3820 fld fs0,112(s0)
|
||||
8200000c: 322e fld ft4,232(sp)
|
||||
8200000e: 302e fld ft0,232(sp)
|
||||
...
|
||||
@@ -15,9 +15,7 @@ typedef struct Job_t
|
||||
unsigned n_threads;
|
||||
unsigned base_sp;
|
||||
unsigned func_ptr;
|
||||
void * x;
|
||||
void * y;
|
||||
void * z;
|
||||
void * args;
|
||||
unsigned assigned_warp;
|
||||
|
||||
} Job;
|
||||
|
||||
@@ -1,108 +0,0 @@
|
||||
:0200000480007A
|
||||
:10000000130101FD2326110223248102130401039D
|
||||
:10001000B70700809387075E2320F4FEB7070081AF
|
||||
:100020009387C70E2322F4FEB70700819387C71278
|
||||
:100030002324F4FEB7070081938707002326F4FEEC
|
||||
:10004000032604FE832644FE032784FE8327C4FE82
|
||||
:100050002328C4FC232AD4FC232CE4FC232EF4FC08
|
||||
:10006000930704FD13850700EF000039032604FE03
|
||||
:10007000832644FE032784FE8327C4FE2328C4FC72
|
||||
:10008000232AD4FC232CE4FC232EF4FC930704FD48
|
||||
:1000900013850700EF004036032604FE832644FE46
|
||||
:1000A000032784FE8327C4FE2328C4FC232AD4FC10
|
||||
:1000B000232CE4FC232EF4FC930704FD1385070096
|
||||
:1000C000EF008033032604FE832644FE032784FECC
|
||||
:1000D0008327C4FE2328C4FC232AD4FC232CE4FC5D
|
||||
:1000E000232EF4FC930704FD13850700EF00C030B6
|
||||
:1000F000032604FE832644FE032784FE8327C4FED2
|
||||
:100100002328C4FC232AD4FC232CE4FC232EF4FC57
|
||||
:10011000930704FD13850700EF00002E032604FE5D
|
||||
:10012000832644FE032784FE8327C4FE2328C4FCC1
|
||||
:10013000232AD4FC232CE4FC232EF4FC930704FD97
|
||||
:1001400013850700EF00402B930704FE138507007B
|
||||
:10015000EF00C036930704FD13850700EF0000365B
|
||||
:10016000032604FD832644FD032784FD8327C4FD65
|
||||
:100170002320C4FE2322D4FE2324E4FE2326F4FEFF
|
||||
:10018000032604FE832644FE032784FE8327C4FE41
|
||||
:100190002328C4FC232AD4FC232CE4FC232EF4FCC7
|
||||
:1001A000930704FD13850700EF000025032604FED6
|
||||
:1001B000832644FE032784FE8327C4FE2328C4FC31
|
||||
:1001C000232AD4FC232CE4FC232EF4FC930704FD07
|
||||
:1001D00013850700EF004022EF00403A9307050027
|
||||
:1001E00063980702032604FE832644FE032784FE49
|
||||
:1001F0008327C4FE2328C4FC232AD4FC232CE4FC3C
|
||||
:10020000232EF4FC930704FD13850700EF00C01EA6
|
||||
:10021000EF00C0369307050063980702032604FE2B
|
||||
:10022000832644FE032784FE8327C4FE2328C4FCC0
|
||||
:10023000232AD4FC232CE4FC232EF4FC930704FD96
|
||||
:1002400013850700EF00401BEF00403393070500C4
|
||||
:1002500063980702032604FE832644FE032784FED8
|
||||
:100260008327C4FE2328C4FC232AD4FC232CE4FCCB
|
||||
:10027000232EF4FC930704FD13850700EF00C0173D
|
||||
:10028000EF00C02F9307050063980702032604FEC2
|
||||
:10029000832644FE032784FE8327C4FE2328C4FC50
|
||||
:1002A000232AD4FC232CE4FC232EF4FC930704FD26
|
||||
:1002B00013850700EF004014EF00402C9307050062
|
||||
:1002C00063980702032604FE832644FE032784FE68
|
||||
:1002D0008327C4FE2328C4FC232AD4FC232CE4FC5B
|
||||
:1002E000232EF4FC930704FD13850700EF00C010D4
|
||||
:1002F000EF00C0289307050063980702032604FE59
|
||||
:10030000832644FE032784FE8327C4FE2328C4FCDF
|
||||
:10031000232AD4FC232CE4FC232EF4FC930704FDB5
|
||||
:1003200013850700EF00400D930704FD13850700B8
|
||||
:10033000EF00C018930704FD13850700EF000018B5
|
||||
:10034000930704FD13850700EF004017930704FD92
|
||||
:1003500013850700EF008016930704FD138507003F
|
||||
:10036000EF00C015930704FD13850700EF0000158B
|
||||
:10037000930704FD13850700EF004014930704FD65
|
||||
:1003800013850700EF008013930704FD1385070012
|
||||
:10039000EF00C012930704FD13850700EF00001261
|
||||
:1003A00093070000138507008320C1020324810204
|
||||
:1003B0001301010367800000130101FF2326810060
|
||||
:1003C00013040101B70700819387070423A0070ADC
|
||||
:1003D000B70700819387070423A2070AB7070081A4
|
||||
:1003E0009387070423A4070A130000000324C10015
|
||||
:1003F0001301010167800000130101FF2326810022
|
||||
:10040000232491001304010193040500B707008120
|
||||
:100410009387070483A7870A13871700B70700810C
|
||||
:100420009387070423A4E70AB7070081938707048B
|
||||
:1004300003A7470AB70700811317470093870704EC
|
||||
:10044000B307F70083A5040003A6440083A6840035
|
||||
:1004500003A7C40023A0B70023A2C70023A4D7008A
|
||||
:1004600023A6E700B70700819387070483A7470AFD
|
||||
:10047000138717009307900063E2E702B707008134
|
||||
:100480009387070483A7470A13871700B7070081DC
|
||||
:100490009387070423A2E70A6F000001B7070081D2
|
||||
:1004A0009387070423A2070A130000000324C10056
|
||||
:1004B000832481001301010167800000130101FD05
|
||||
:1004C0002326810213040103232EA4FCB707008115
|
||||
:1004D0009387070483A7870A1387F7FFB70700816D
|
||||
:1004E0009387070423A4E70AB707008193870704CB
|
||||
:1004F00003A7070AB707008113174700938707046C
|
||||
:10050000B307F70003A6070083A6470003A78700E9
|
||||
:1005100083A7C7002320C4FE2322D4FE2324E4FEA5
|
||||
:100520002326F4FEB70700819387070483A7070AF1
|
||||
:10053000138717009307900063E2E702B707008173
|
||||
:100540009387070483A7070A13871700B70700815B
|
||||
:100550009387070423A0E70A6F000001B707008113
|
||||
:100560009387070423A0070A130000000325C4FD96
|
||||
:100570000324C1021301010367800000130101FF7E
|
||||
:100580002326810013040101B70700819387070424
|
||||
:1005900083A7870A938767FF93B7170093F7F70F2F
|
||||
:1005A000138507000324C1001301010167800000C7
|
||||
:1005B000130101FF2326810013040101B707008105
|
||||
:1005C0009387070483A7870A93B7170093F7F70F5A
|
||||
:1005D000138507000324C100130101016780000097
|
||||
:1005E000130101FF23268100130401011300000001
|
||||
:0C05F0000324C10013010101678000001A
|
||||
:02000004810079
|
||||
:1000EC0001000000010000000600000000000000FC
|
||||
:1000FC0003000000010000000100000002000000ED
|
||||
:10010C0000000000030000000600000007000000D3
|
||||
:10011C0005000000070000000700000009000000B7
|
||||
:10012C0000000000020000000200000000000000BF
|
||||
:10013C0005000000000000000100000001000000AC
|
||||
:10014C00040000000200000000000000000000009D
|
||||
:10015C000300000002000000030000000200000089
|
||||
:040000058000000077
|
||||
:00000001FF
|
||||
@@ -39,14 +39,10 @@ queue_enqueue:
|
||||
sw t3, 8 (t1) #
|
||||
lw t3, 12(a1) # func_ptr
|
||||
sw t3, 12(t1) #
|
||||
lw t3, 16(a1) # x
|
||||
lw t3, 16(a1) # args
|
||||
sw t3, 16(t1) #
|
||||
lw t3, 20(a1) # y
|
||||
lw t3, 20(a1) # assigned_warp
|
||||
sw t3, 20(t1) #
|
||||
lw t3, 24(a1) # z
|
||||
sw t3, 24(t1) #
|
||||
lw t3, 28(a1) # assigned_warp
|
||||
sw t3, 28(t1) #
|
||||
addi t4, t4, 1 # end_i++
|
||||
li t5, SIZE # size
|
||||
bne t4, t5, ec # if ((q.end_i + 1) == SIZE)
|
||||
@@ -83,14 +79,10 @@ dc:
|
||||
sw t3, 8 (a1) #
|
||||
lw t3, 12(t1) # func_ptr
|
||||
sw t3, 12(a1) #
|
||||
lw t3, 16(t1) # x
|
||||
lw t3, 16(t1) # args
|
||||
sw t3, 16(a1) #
|
||||
lw t3, 20(t1) # y
|
||||
lw t3, 20(t1) # assigned_warp
|
||||
sw t3, 20(a1) #
|
||||
lw t3, 24(t1) # z
|
||||
sw t3, 24(a1) #
|
||||
lw t3, 28(t1) # assigned_warp
|
||||
sw t3, 28(a1) #
|
||||
ret
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user