@@ -418,14 +418,15 @@ static void ei_load(IR *ir) {
418418 assert (0 <= pow && pow < 4 );
419419 const char * * regs = kRegSizeTable [pow ];
420420 dst = regs [ir -> dst -> phys ];
421+ int u = ir -> dst -> flag & VRF_UNSIGNED ;
421422 switch (pow ) {
422423 case 0 :
423- if (ir -> flag & IRF_UNSIGNED ) LDRB (dst , src );
424- else LDRSB (dst , src );
424+ if (u ) LDRB (dst , src );
425+ else LDRSB (dst , src );
425426 break ;
426427 case 1 :
427- if (ir -> flag & IRF_UNSIGNED ) LDRH (dst , src );
428- else LDRSH (dst , src );
428+ if (u ) LDRH (dst , src );
429+ else LDRSH (dst , src );
429430 break ;
430431 case 2 : case 3 :
431432 LDR (dst , src );
@@ -452,7 +453,7 @@ static void ei_store(IR *ir) {
452453 src = kZeroRegTable [pow ];
453454 else
454455 mov_immediate (src = kTmpRegTable [pow ], ir -> opr1 -> fixnum , pow >= 3 ,
455- ir -> flag & IRF_UNSIGNED );
456+ ir -> opr1 -> flag & VRF_UNSIGNED );
456457 } else {
457458 src = kRegSizeTable [pow ][ir -> opr1 -> phys ];
458459 }
@@ -558,7 +559,7 @@ static void ei_div(IR *ir) {
558559 int pow = ir -> dst -> vsize ;
559560 assert (0 <= pow && pow < 4 );
560561 const char * * regs = kRegSizeTable [pow ];
561- if (!(ir -> flag & IRF_UNSIGNED ))
562+ if (!(ir -> opr1 -> flag & VRF_UNSIGNED ))
562563 SDIV (regs [ir -> dst -> phys ], regs [ir -> opr1 -> phys ], regs [ir -> opr2 -> phys ]);
563564 else
564565 UDIV (regs [ir -> dst -> phys ], regs [ir -> opr1 -> phys ], regs [ir -> opr2 -> phys ]);
@@ -573,7 +574,7 @@ static void ei_mod(IR *ir) {
573574 const char * num = regs [ir -> opr1 -> phys ];
574575 const char * div = regs [ir -> opr2 -> phys ];
575576 const char * tmp = kTmpRegTable [pow ];
576- if (!(ir -> flag & IRF_UNSIGNED ))
577+ if (!(ir -> opr1 -> flag & VRF_UNSIGNED ))
577578 SDIV (tmp , num , div );
578579 else
579580 UDIV (tmp , num , div );
@@ -620,25 +621,26 @@ static void ei_lshift(IR *ir) {
620621}
621622
622623static void ei_rshift (IR * ir ) {
623- #define RSHIFT_INST (a , b , c ) do { if (ir->flag & IRF_UNSIGNED ) LSR(a, b, c); else ASR(a, b, c); } while (0)
624+ #define RSHIFT_INST (a , b , c ) do { if (ir->dst-> flag & VRF_UNSIGNED ) LSR(a, b, c); else ASR(a, b, c); } while (0)
624625 assert (!(ir -> opr1 -> flag & VRF_CONST ));
625626 int pow = ir -> dst -> vsize ;
626627 assert (0 <= pow && pow < 4 );
627628 const char * * regs = kRegSizeTable [pow ];
629+ const char * dst = regs [ir -> dst -> phys ], * opr1 = regs [ir -> opr1 -> phys ];
628630 if (ir -> opr2 -> flag & VRF_CONST ) {
629- if (ir -> flag & IRF_UNSIGNED ) {
631+ if (ir -> opr1 -> flag & VRF_UNSIGNED ) {
630632 if ((uint64_t )ir -> opr2 -> fixnum >= (pow < 3 ? 32 : 64 ))
631- mov_immediate (regs [ ir -> dst -> phys ] , 0 , pow >= 3 , true);
633+ mov_immediate (dst , 0 , pow >= 3 , true);
632634 else
633- RSHIFT_INST (regs [ ir -> dst -> phys ], regs [ ir -> opr1 -> phys ] , IM (ir -> opr2 -> fixnum ));
635+ RSHIFT_INST (dst , opr1 , IM (ir -> opr2 -> fixnum ));
634636 } else {
635637 uint64_t shift = ir -> opr2 -> fixnum ;
636638 shift &= pow < 3 ? 31 : 63 ;
637639 const char * opr2 = IM (shift );
638- RSHIFT_INST (regs [ ir -> dst -> phys ], regs [ ir -> opr1 -> phys ] , opr2 );
640+ RSHIFT_INST (dst , opr1 , opr2 );
639641 }
640642 } else {
641- RSHIFT_INST (regs [ ir -> dst -> phys ], regs [ ir -> opr1 -> phys ] , regs [ir -> opr2 -> phys ]);
643+ RSHIFT_INST (dst , opr1 , regs [ir -> opr2 -> phys ]);
642644 }
643645#undef RSHIFT_INST
644646}
@@ -697,14 +699,14 @@ static void ei_cast(IR *ir) {
697699 default : assert (false); break ;
698700 }
699701 const char * src = kRegSizeTable [pows ][ir -> opr1 -> phys ];
700- if (ir -> cast . src_unsigned ) UCVTF (dst , src );
701- else SCVTF (dst , src );
702+ if (ir -> opr1 -> flag & VRF_UNSIGNED ) UCVTF (dst , src );
703+ else SCVTF (dst , src );
702704 }
703705 } else if (ir -> opr1 -> flag & VRF_FLONUM ) {
704706 assert (!(ir -> opr1 -> flag & VRF_CONST ));
705707 // flonum->fix
706708 int powd = ir -> dst -> vsize ;
707- if (ir -> flag & IRF_UNSIGNED ) {
709+ if (ir -> dst -> flag & VRF_UNSIGNED ) {
708710 switch (ir -> opr1 -> vsize ) {
709711 case SZ_FLOAT : FCVTZU (kRegSizeTable [powd ][ir -> dst -> phys ], kFReg32s [ir -> opr1 -> phys ]); break ;
710712 case SZ_DOUBLE : FCVTZU (kRegSizeTable [powd ][ir -> dst -> phys ], kFReg64s [ir -> opr1 -> phys ]); break ;
@@ -719,30 +721,47 @@ static void ei_cast(IR *ir) {
719721 }
720722 } else {
721723 // fix->fix
722- if (ir -> dst -> vsize <= ir -> opr1 -> vsize ) {
724+
725+ // dst \ src | s8 | u8 | s16 | u16 | s16 | u32 | s64 | u64 |
726+ // s8 | \\ | sxtb | sxtb | sxtb | sxtb | sxtb | sxtb | sxtb |
727+ // u8 | uxtb | \\ | uxtb | uxtb | uxtb | uxtb | uxtb | uxtb |
728+ // s16 | ---- | ---- | \\ | sxth | sxth | sxth | sxth | sxth |
729+ // u16 | uxtb | ---- | uxth | \\ | uxth | uxth | uxth | uxth |
730+ // s16 | ---- | ---- | ---- | ---- | \\ | ---- | ---- | ---- |
731+ // u32 | ---- | ---- | ---- | ---- | ---- | \\ | ---- | ---- |
732+ // s64 | sxtb | uxtb | sxth | uxth | sxtw | uxtw | \\ | ---- |
733+ // u64 | sxtb | uxtb | sxth | uxth | sxtw | uxtw | ---- | \\ |
734+
735+ int pows = ir -> opr1 -> vsize ;
736+ int powd = ir -> dst -> vsize ;
737+ assert (0 <= pows && pows < 4 );
738+ assert (0 <= powd && powd < 4 );
739+ bool du = ir -> dst -> flag & VRF_UNSIGNED , su = ir -> opr1 -> flag & VRF_UNSIGNED ;
740+ if (powd == VRegSize4 ||
741+ (powd == pows && (du == su || powd == VRegSize8 )) ||
742+ (powd < VRegSize4 && (powd > pows && (!du || su ))) ||
743+ (powd > VRegSize4 && (pows >= VRegSize8 /*|| du == su*/ ))) {
723744 if (ir -> dst -> phys != ir -> opr1 -> phys ) {
724- int pow = ir -> dst -> vsize ;
725- assert (0 <= pow && pow < 4 );
745+ int pow = powd ;
726746 const char * * regs = kRegSizeTable [pow ];
727747 MOV (regs [ir -> dst -> phys ], regs [ir -> opr1 -> phys ]);
728748 }
729749 } else {
730- int pows = ir -> opr1 -> vsize ;
731- int powd = ir -> dst -> vsize ;
732- assert (0 <= pows && pows < 4 );
733- assert (0 <= powd && powd < 4 );
734- if (ir -> cast .src_unsigned ) {
735- switch (pows ) {
736- case 0 : UXTB (kRegSizeTable [powd ][ir -> dst -> phys ], kRegSizeTable [pows ][ir -> opr1 -> phys ]); break ;
737- case 1 : UXTH (kRegSizeTable [powd ][ir -> dst -> phys ], kRegSizeTable [pows ][ir -> opr1 -> phys ]); break ;
738- case 2 : UXTW (kRegSizeTable [powd ][ir -> dst -> phys ], kRegSizeTable [pows ][ir -> opr1 -> phys ]); break ;
750+ const char * dst = kRegSizeTable [powd ][ir -> dst -> phys ];
751+ const char * src = kRegSizeTable [powd < VRegSize4 ? powd : pows ][ir -> opr1 -> phys ];
752+ int pow = powd <= VRegSize4 ? powd : pows ;
753+ if (powd <= VRegSize4 ? du : su ) {
754+ switch (pow ) {
755+ case 0 : UXTB (dst , src ); break ;
756+ case 1 : UXTH (dst , src ); break ;
757+ case 2 : UXTW (dst , src ); break ;
739758 default : assert (false); break ;
740759 }
741760 } else {
742- switch (pows ) {
743- case 0 : SXTB (kRegSizeTable [ powd ][ ir -> dst -> phys ], kRegSizeTable [ pows ][ ir -> opr1 -> phys ] ); break ;
744- case 1 : SXTH (kRegSizeTable [ powd ][ ir -> dst -> phys ], kRegSizeTable [ pows ][ ir -> opr1 -> phys ] ); break ;
745- case 2 : SXTW (kRegSizeTable [ powd ][ ir -> dst -> phys ], kRegSizeTable [ pows ][ ir -> opr1 -> phys ] ); break ;
761+ switch (pow ) {
762+ case 0 : SXTB (dst , src ); break ;
763+ case 1 : SXTH (dst , src ); break ;
764+ case 2 : SXTW (dst , src ); break ;
746765 default : assert (false); break ;
747766 }
748767 }
@@ -776,7 +795,7 @@ static void emit_mov(int dstphys, VReg *opr1, bool is_unsigned) {
776795}
777796
778797static void ei_mov (IR * ir ) {
779- emit_mov (ir -> dst -> phys , ir -> opr1 , ir -> flag & IRF_UNSIGNED );
798+ emit_mov (ir -> dst -> phys , ir -> opr1 , ir -> dst -> flag & VRF_UNSIGNED );
780799}
781800
782801static void ei_result (IR * ir ) {
@@ -790,43 +809,36 @@ static void ei_result(IR *ir) {
790809 bool is_flo = ir -> opr1 -> flag & VRF_FLONUM ;
791810 assert ((size_t )ir -> result .index < ARRAY_SIZE (kRegIndices [is_flo ]));
792811 int dstphys = kRegIndices [is_flo ][ir -> result .index ];
793- emit_mov (dstphys , ir -> opr1 , ir -> flag & IRF_UNSIGNED );
812+ emit_mov (dstphys , ir -> opr1 , ir -> opr1 -> flag & VRF_UNSIGNED );
794813}
795814
815+ static const char * kCondTable [][8 ] = {
816+ { // Signed.
817+ [COND_EQ ] = CEQ , [COND_NE ] = CNE ,
818+ [COND_LT ] = CLT , [COND_GT ] = CGT , [COND_LE ] = CLE , [COND_GE ] = CGE ,
819+ },
820+ { // Unsigned.
821+ [COND_EQ ] = CEQ , [COND_NE ] = CNE ,
822+ [COND_LT ] = CLO , [COND_GT ] = CHI , [COND_LE ] = CLS , [COND_GE ] = CHS ,
823+ },
824+ };
825+
796826static void ei_cond (IR * ir ) {
797827 cmp_vregs (ir -> opr1 , ir -> opr2 );
798828
799829 assert (!(ir -> dst -> flag & VRF_CONST ));
800- const char * dst = kReg32s [ir -> dst -> phys ]; // Assume bool is 4 byte .
830+ const char * dst = kReg32s [ir -> dst -> phys ]; // Minimum register size .
801831 int cond = ir -> cond .kind ;
802832 // On aarch64, flag for comparing flonum is signed.
803833 if (ir -> opr1 -> flag & VRF_FLONUM ) {
804- assert ((cond & ~COND_MASK ) == 0 );
805834 switch (cond ) {
806835 case COND_LT : CSET (dst , CMI ); return ;
807836 case COND_LE : CSET (dst , CLS ); return ;
808837 default : break ;
809838 }
810839 }
811840
812- switch (cond ) {
813- case COND_EQ | COND_UNSIGNED : // Fallthrough
814- case COND_EQ : CSET (dst , CEQ ); break ;
815-
816- case COND_NE | COND_UNSIGNED : // Fallthrough
817- case COND_NE : CSET (dst , CNE ); break ;
818-
819- case COND_LT : CSET (dst , CLT ); break ;
820- case COND_GT : CSET (dst , CGT ); break ;
821- case COND_LE : CSET (dst , CLE ); break ;
822- case COND_GE : CSET (dst , CGE ); break ;
823-
824- case COND_LT | COND_UNSIGNED : CSET (dst , CLO ); break ;
825- case COND_GT | COND_UNSIGNED : CSET (dst , CHI ); break ;
826- case COND_LE | COND_UNSIGNED : CSET (dst , CLS ); break ;
827- case COND_GE | COND_UNSIGNED : CSET (dst , CHS ); break ;
828- default : assert (false); break ;
829- }
841+ CSET (dst , kCondTable [ir -> opr1 -> flag & VRF_UNSIGNED ? 1 : 0 ][cond ]);
830842}
831843
832844static void ei_jmp (IR * ir ) {
@@ -843,7 +855,7 @@ static void ei_jmp(IR *ir) {
843855 assert (0 <= pow && pow < 4 );
844856 const char * * regs = kRegSizeTable [pow ];
845857 const char * opr1 = regs [ir -> opr1 -> phys ];
846- switch (cond & COND_MASK ) {
858+ switch (cond ) {
847859 case COND_EQ : CBZ (opr1 , label ); return ;
848860 case COND_NE : CBNZ (opr1 , label ); return ;
849861 default : break ;
@@ -853,7 +865,6 @@ static void ei_jmp(IR *ir) {
853865 cmp_vregs (ir -> opr1 , ir -> opr2 );
854866
855867 if (ir -> opr1 -> flag & VRF_FLONUM ) {
856- assert ((cond & ~COND_MASK ) == 0 );
857868 switch (cond ) {
858869 case COND_LT : Bcc (CMI , label ); return ;
859870 case COND_LE : Bcc (CLS , label ); return ;
@@ -862,24 +873,7 @@ static void ei_jmp(IR *ir) {
862873 }
863874
864875 // On aarch64, flag for comparing flonum is signed.
865- switch (cond ) {
866- case COND_EQ | COND_UNSIGNED : // Fallthrough
867- case COND_EQ : Bcc (CEQ , label ); break ;
868-
869- case COND_NE | COND_UNSIGNED : // Fallthrough
870- case COND_NE : Bcc (CNE , label ); break ;
871-
872- case COND_LT : Bcc (CLT , label ); break ;
873- case COND_GT : Bcc (CGT , label ); break ;
874- case COND_LE : Bcc (CLE , label ); break ;
875- case COND_GE : Bcc (CGE , label ); break ;
876-
877- case COND_LT | COND_UNSIGNED : Bcc (CLO , label ); break ;
878- case COND_GT | COND_UNSIGNED : Bcc (CHI , label ); break ;
879- case COND_LE | COND_UNSIGNED : Bcc (CLS , label ); break ;
880- case COND_GE | COND_UNSIGNED : Bcc (CHS , label ); break ;
881- default : assert (false); break ;
882- }
876+ Bcc (kCondTable [ir -> opr1 -> flag & VRF_UNSIGNED ? 1 : 0 ][cond ], label );
883877}
884878
885879static void ei_tjmp (IR * ir ) {
@@ -930,7 +924,7 @@ static void ei_pusharg(IR *ir) {
930924#endif
931925 const char * dst = kRegSizeTable [pow ][index ];
932926 if (ir -> opr1 -> flag & VRF_CONST )
933- mov_immediate (dst , ir -> opr1 -> fixnum , pow >= 3 , ir -> flag & IRF_UNSIGNED );
927+ mov_immediate (dst , ir -> opr1 -> fixnum , pow >= 3 , ir -> opr1 -> flag & VRF_UNSIGNED );
934928 else if (ir -> pusharg .index != ir -> opr1 -> phys )
935929 MOV (dst , kRegSizeTable [pow ][ir -> opr1 -> phys ]);
936930 }
@@ -1063,6 +1057,26 @@ void tweak_irs(FuncBackend *fnbe) {
10631057 }
10641058#endif
10651059
1060+ // Arithmetic operation under 4 bytes.
1061+ {
1062+ VReg * dst = ir -> dst ;
1063+ if (dst != NULL && !(dst -> flag & VRF_FLONUM ) && dst -> vsize < VRegSize4 ) {
1064+ switch (ir -> kind ) {
1065+ default : break ;
1066+ case IR_ADD : case IR_SUB : case IR_MUL : // case IR_DIV: case IR_MOD:
1067+ case IR_LSHIFT : // case IR_BITAND: case IR_BITOR: case IR_BITXOR: case IR_RSHIFT:
1068+ {
1069+ // insert_tmp_mov(&ir->opr1, irs, j++);
1070+ VReg * tmp = reg_alloc_spawn (ra , VRegSize4 , dst -> flag & VRF_MASK );
1071+ IR * cast = new_ir_bop_raw (IR_CAST , dst , tmp , NULL ); // new_ir_cast.
1072+ vec_insert (irs , j + 1 , cast );
1073+ ir -> dst = tmp ;
1074+ }
1075+ break ;
1076+ }
1077+ }
1078+ }
1079+
10661080 switch (ir -> kind ) {
10671081 case IR_LOAD :
10681082 if (ir -> opr1 -> flag & VRF_CONST ) {
@@ -1082,8 +1096,7 @@ void tweak_irs(FuncBackend *fnbe) {
10821096 if (ir -> opr2 -> fixnum < 0 ) {
10831097 ir -> kind = IR_SUB ;
10841098 VReg * old = ir -> opr2 ;
1085- ir -> opr2 = reg_alloc_spawn_const (ra , - old -> fixnum , old -> vsize );
1086- ir -> opr2 -> flag = old -> flag ;
1099+ ir -> opr2 = reg_alloc_spawn_const (ra , - old -> fixnum , old -> vsize , old -> flag );
10871100 }
10881101 if (!is_im13_addsubimm (ir -> opr2 -> fixnum ))
10891102 insert_tmp_mov (& ir -> opr2 , irs , j ++ );
@@ -1104,8 +1117,7 @@ void tweak_irs(FuncBackend *fnbe) {
11041117 if (ir -> opr2 -> fixnum < 0 ) {
11051118 ir -> kind = IR_ADD ;
11061119 VReg * old = ir -> opr2 ;
1107- ir -> opr2 = reg_alloc_spawn_const (ra , - old -> fixnum , old -> vsize );
1108- ir -> opr2 -> flag = old -> flag ;
1120+ ir -> opr2 = reg_alloc_spawn_const (ra , - old -> fixnum , old -> vsize , old -> flag );
11091121 }
11101122 if (!is_im13_addsubimm (ir -> opr2 -> fixnum ))
11111123 insert_tmp_mov (& ir -> opr2 , irs , j ++ );
@@ -1140,7 +1152,7 @@ void tweak_irs(FuncBackend *fnbe) {
11401152 {
11411153 assert (!(ir -> opr1 -> flag & VRF_CONST ));
11421154 // Allocate temporary register to use calculation.
1143- VReg * tmp = reg_alloc_spawn (ra , VRegSize8 , 0 );
1155+ VReg * tmp = reg_alloc_spawn (ra , VRegSize8 , VRF_UNSIGNED );
11441156 IR * keep = new_ir_keep (tmp , NULL , NULL ); // Notify the register begins to be used.
11451157 vec_insert (irs , j ++ , keep );
11461158
0 commit comments