Home | History | Annotate | Line # | Download | only in sljit_src
      1  1.4  alnsn /*	$NetBSD: sljitNativeTILEGX_64.c,v 1.4 2019/01/20 23:14:16 alnsn Exp $	*/
      2  1.2  alnsn 
      3  1.1  alnsn /*
      4  1.1  alnsn  *    Stack-less Just-In-Time compiler
      5  1.1  alnsn  *
      6  1.1  alnsn  *    Copyright 2013-2013 Tilera Corporation(jiwang (at) tilera.com). All rights reserved.
      7  1.4  alnsn  *    Copyright Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
      8  1.1  alnsn  *
      9  1.1  alnsn  * Redistribution and use in source and binary forms, with or without modification, are
     10  1.1  alnsn  * permitted provided that the following conditions are met:
     11  1.1  alnsn  *
     12  1.1  alnsn  *   1. Redistributions of source code must retain the above copyright notice, this list of
     13  1.1  alnsn  *      conditions and the following disclaimer.
     14  1.1  alnsn  *
     15  1.1  alnsn  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
     16  1.1  alnsn  *      of conditions and the following disclaimer in the documentation and/or other materials
     17  1.1  alnsn  *      provided with the distribution.
     18  1.1  alnsn  *
     19  1.1  alnsn  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
     20  1.1  alnsn  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     21  1.1  alnsn  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
     22  1.1  alnsn  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     23  1.1  alnsn  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     24  1.1  alnsn  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     25  1.1  alnsn  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26  1.1  alnsn  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     27  1.1  alnsn  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28  1.1  alnsn  */
     29  1.1  alnsn 
     30  1.1  alnsn /* TileGX architecture. */
     31  1.1  alnsn /* Contributed by Tilera Corporation. */
     32  1.1  alnsn #include "sljitNativeTILEGX-encoder.c"
     33  1.1  alnsn 
     34  1.1  alnsn #define SIMM_8BIT_MAX (0x7f)
     35  1.1  alnsn #define SIMM_8BIT_MIN (-0x80)
     36  1.1  alnsn #define SIMM_16BIT_MAX (0x7fff)
     37  1.1  alnsn #define SIMM_16BIT_MIN (-0x8000)
     38  1.1  alnsn #define SIMM_17BIT_MAX (0xffff)
     39  1.1  alnsn #define SIMM_17BIT_MIN (-0x10000)
     40  1.1  alnsn #define SIMM_32BIT_MAX (0x7fffffff)
     41  1.3  alnsn #define SIMM_32BIT_MIN (-0x7fffffff - 1)
     42  1.1  alnsn #define SIMM_48BIT_MAX (0x7fffffff0000L)
     43  1.3  alnsn #define SIMM_48BIT_MIN (-0x800000000000L)
     44  1.1  alnsn #define IMM16(imm) ((imm) & 0xffff)
     45  1.1  alnsn 
     46  1.1  alnsn #define UIMM_16BIT_MAX (0xffff)
     47  1.1  alnsn 
     48  1.3  alnsn #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
     49  1.3  alnsn #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
     50  1.3  alnsn #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
     51  1.3  alnsn #define ADDR_TMP (SLJIT_NUMBER_OF_REGISTERS + 5)
     52  1.1  alnsn #define PIC_ADDR_REG TMP_REG2
     53  1.1  alnsn 
     54  1.3  alnsn static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
     55  1.1  alnsn 	63, 0, 1, 2, 3, 4, 30, 31, 32, 33, 34, 54, 5, 16, 6, 7
     56  1.1  alnsn };
     57  1.1  alnsn 
     58  1.1  alnsn #define SLJIT_LOCALS_REG_mapped 54
     59  1.1  alnsn #define TMP_REG1_mapped 5
     60  1.1  alnsn #define TMP_REG2_mapped 16
     61  1.1  alnsn #define TMP_REG3_mapped 6
     62  1.1  alnsn #define ADDR_TMP_mapped 7
     63  1.1  alnsn 
     64  1.1  alnsn /* Flags are keept in volatile registers. */
     65  1.1  alnsn #define EQUAL_FLAG 8
     66  1.1  alnsn /* And carry flag as well. */
     67  1.1  alnsn #define ULESS_FLAG 9
     68  1.1  alnsn #define UGREATER_FLAG 10
     69  1.1  alnsn #define LESS_FLAG 11
     70  1.1  alnsn #define GREATER_FLAG 12
     71  1.1  alnsn #define OVERFLOW_FLAG 13
     72  1.1  alnsn 
     73  1.1  alnsn #define ZERO 63
     74  1.1  alnsn #define RA 55
     75  1.1  alnsn #define TMP_EREG1 14
     76  1.1  alnsn #define TMP_EREG2 15
     77  1.1  alnsn 
     78  1.1  alnsn #define LOAD_DATA 0x01
     79  1.1  alnsn #define WORD_DATA 0x00
     80  1.1  alnsn #define BYTE_DATA 0x02
     81  1.1  alnsn #define HALF_DATA 0x04
     82  1.1  alnsn #define INT_DATA 0x06
     83  1.1  alnsn #define SIGNED_DATA 0x08
     84  1.1  alnsn #define DOUBLE_DATA 0x10
     85  1.1  alnsn 
     86  1.1  alnsn /* Separates integer and floating point registers */
     87  1.1  alnsn #define GPR_REG 0xf
     88  1.1  alnsn 
     89  1.1  alnsn #define MEM_MASK 0x1f
     90  1.1  alnsn 
     91  1.1  alnsn #define WRITE_BACK 0x00020
     92  1.1  alnsn #define ARG_TEST 0x00040
     93  1.1  alnsn #define ALT_KEEP_CACHE 0x00080
     94  1.1  alnsn #define CUMULATIVE_OP 0x00100
     95  1.1  alnsn #define LOGICAL_OP 0x00200
     96  1.1  alnsn #define IMM_OP 0x00400
     97  1.1  alnsn #define SRC2_IMM 0x00800
     98  1.1  alnsn 
     99  1.1  alnsn #define UNUSED_DEST 0x01000
    100  1.1  alnsn #define REG_DEST 0x02000
    101  1.1  alnsn #define REG1_SOURCE 0x04000
    102  1.1  alnsn #define REG2_SOURCE 0x08000
    103  1.1  alnsn #define SLOW_SRC1 0x10000
    104  1.1  alnsn #define SLOW_SRC2 0x20000
    105  1.1  alnsn #define SLOW_DEST 0x40000
    106  1.1  alnsn 
    107  1.1  alnsn /* Only these flags are set. UNUSED_DEST is not set when no flags should be set.
    108  1.1  alnsn  */
    109  1.1  alnsn #define CHECK_FLAGS(list) (!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list))))
    110  1.1  alnsn 
    111  1.3  alnsn SLJIT_API_FUNC_ATTRIBUTE const char *sljit_get_platform_name(void)
    112  1.1  alnsn {
    113  1.1  alnsn 	return "TileGX" SLJIT_CPUINFO;
    114  1.1  alnsn }
    115  1.1  alnsn 
    116  1.1  alnsn /* Length of an instruction word */
    117  1.1  alnsn typedef sljit_uw sljit_ins;
    118  1.1  alnsn 
    119  1.1  alnsn struct jit_instr {
    120  1.1  alnsn 	const struct tilegx_opcode* opcode;
    121  1.1  alnsn 	tilegx_pipeline pipe;
    122  1.1  alnsn 	unsigned long input_registers;
    123  1.1  alnsn 	unsigned long output_registers;
    124  1.1  alnsn 	int operand_value[4];
    125  1.1  alnsn 	int line;
    126  1.1  alnsn };
    127  1.1  alnsn 
    128  1.1  alnsn /* Opcode Helper Macros */
    129  1.1  alnsn #define TILEGX_X_MODE 0
    130  1.1  alnsn 
    131  1.1  alnsn #define X_MODE create_Mode(TILEGX_X_MODE)
    132  1.1  alnsn 
    133  1.1  alnsn #define FNOP_X0 \
    134  1.1  alnsn 	create_Opcode_X0(RRR_0_OPCODE_X0) | \
    135  1.1  alnsn 	create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \
    136  1.1  alnsn 	create_UnaryOpcodeExtension_X0(FNOP_UNARY_OPCODE_X0)
    137  1.1  alnsn 
    138  1.1  alnsn #define FNOP_X1 \
    139  1.1  alnsn 	create_Opcode_X1(RRR_0_OPCODE_X1) | \
    140  1.1  alnsn 	create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
    141  1.1  alnsn 	create_UnaryOpcodeExtension_X1(FNOP_UNARY_OPCODE_X1)
    142  1.1  alnsn 
    143  1.1  alnsn #define NOP \
    144  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | FNOP_X0 | FNOP_X1
    145  1.1  alnsn 
    146  1.1  alnsn #define ANOP_X0 \
    147  1.1  alnsn 	create_Opcode_X0(RRR_0_OPCODE_X0) | \
    148  1.1  alnsn 	create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \
    149  1.1  alnsn 	create_UnaryOpcodeExtension_X0(NOP_UNARY_OPCODE_X0)
    150  1.1  alnsn 
    151  1.1  alnsn #define BPT create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
    152  1.1  alnsn 	create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
    153  1.1  alnsn 	create_UnaryOpcodeExtension_X1(ILL_UNARY_OPCODE_X1) | \
    154  1.1  alnsn 	create_Dest_X1(0x1C) | create_SrcA_X1(0x25) | ANOP_X0
    155  1.1  alnsn 
    156  1.1  alnsn #define ADD_X1 \
    157  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
    158  1.1  alnsn 	create_RRROpcodeExtension_X1(ADD_RRR_0_OPCODE_X1) | FNOP_X0
    159  1.1  alnsn 
    160  1.1  alnsn #define ADDI_X1 \
    161  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
    162  1.1  alnsn 	create_Imm8OpcodeExtension_X1(ADDI_IMM8_OPCODE_X1) | FNOP_X0
    163  1.1  alnsn 
    164  1.1  alnsn #define SUB_X1 \
    165  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
    166  1.1  alnsn 	create_RRROpcodeExtension_X1(SUB_RRR_0_OPCODE_X1) | FNOP_X0
    167  1.1  alnsn 
    168  1.1  alnsn #define NOR_X1 \
    169  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
    170  1.1  alnsn 	create_RRROpcodeExtension_X1(NOR_RRR_0_OPCODE_X1) | FNOP_X0
    171  1.1  alnsn 
    172  1.1  alnsn #define OR_X1 \
    173  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
    174  1.1  alnsn 	create_RRROpcodeExtension_X1(OR_RRR_0_OPCODE_X1) | FNOP_X0
    175  1.1  alnsn 
    176  1.1  alnsn #define AND_X1 \
    177  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
    178  1.1  alnsn 	create_RRROpcodeExtension_X1(AND_RRR_0_OPCODE_X1) | FNOP_X0
    179  1.1  alnsn 
    180  1.1  alnsn #define XOR_X1 \
    181  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
    182  1.1  alnsn 	create_RRROpcodeExtension_X1(XOR_RRR_0_OPCODE_X1) | FNOP_X0
    183  1.1  alnsn 
    184  1.1  alnsn #define CMOVNEZ_X0 \
    185  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \
    186  1.1  alnsn 	create_RRROpcodeExtension_X0(CMOVNEZ_RRR_0_OPCODE_X0) | FNOP_X1
    187  1.1  alnsn 
    188  1.1  alnsn #define CMOVEQZ_X0 \
    189  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \
    190  1.1  alnsn 	create_RRROpcodeExtension_X0(CMOVEQZ_RRR_0_OPCODE_X0) | FNOP_X1
    191  1.1  alnsn 
    192  1.1  alnsn #define ADDLI_X1 \
    193  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(ADDLI_OPCODE_X1) | FNOP_X0
    194  1.1  alnsn 
    195  1.1  alnsn #define V4INT_L_X1 \
    196  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
    197  1.1  alnsn 	create_RRROpcodeExtension_X1(V4INT_L_RRR_0_OPCODE_X1) | FNOP_X0
    198  1.1  alnsn 
    199  1.1  alnsn #define BFEXTU_X0 \
    200  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \
    201  1.1  alnsn 	create_BFOpcodeExtension_X0(BFEXTU_BF_OPCODE_X0) | FNOP_X1
    202  1.1  alnsn 
    203  1.1  alnsn #define BFEXTS_X0 \
    204  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \
    205  1.1  alnsn 	create_BFOpcodeExtension_X0(BFEXTS_BF_OPCODE_X0) | FNOP_X1
    206  1.1  alnsn 
    207  1.1  alnsn #define SHL16INSLI_X1 \
    208  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHL16INSLI_OPCODE_X1) | FNOP_X0
    209  1.1  alnsn 
    210  1.1  alnsn #define ST_X1 \
    211  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
    212  1.1  alnsn 	create_RRROpcodeExtension_X1(ST_RRR_0_OPCODE_X1) | create_Dest_X1(0x0) | FNOP_X0
    213  1.1  alnsn 
    214  1.1  alnsn #define LD_X1 \
    215  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
    216  1.1  alnsn 	create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
    217  1.1  alnsn 	create_UnaryOpcodeExtension_X1(LD_UNARY_OPCODE_X1) | FNOP_X0
    218  1.1  alnsn 
    219  1.1  alnsn #define JR_X1 \
    220  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
    221  1.1  alnsn 	create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
    222  1.1  alnsn 	create_UnaryOpcodeExtension_X1(JR_UNARY_OPCODE_X1) | FNOP_X0
    223  1.1  alnsn 
    224  1.1  alnsn #define JALR_X1 \
    225  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
    226  1.1  alnsn 	create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
    227  1.1  alnsn 	create_UnaryOpcodeExtension_X1(JALR_UNARY_OPCODE_X1) | FNOP_X0
    228  1.1  alnsn 
    229  1.1  alnsn #define CLZ_X0 \
    230  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \
    231  1.1  alnsn 	create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \
    232  1.1  alnsn 	create_UnaryOpcodeExtension_X0(CNTLZ_UNARY_OPCODE_X0) | FNOP_X1
    233  1.1  alnsn 
    234  1.1  alnsn #define CMPLTUI_X1 \
    235  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
    236  1.1  alnsn 	create_Imm8OpcodeExtension_X1(CMPLTUI_IMM8_OPCODE_X1) | FNOP_X0
    237  1.1  alnsn 
    238  1.1  alnsn #define CMPLTU_X1 \
    239  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
    240  1.1  alnsn 	create_RRROpcodeExtension_X1(CMPLTU_RRR_0_OPCODE_X1) | FNOP_X0
    241  1.1  alnsn 
    242  1.1  alnsn #define CMPLTS_X1 \
    243  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
    244  1.1  alnsn 	create_RRROpcodeExtension_X1(CMPLTS_RRR_0_OPCODE_X1) | FNOP_X0
    245  1.1  alnsn 
    246  1.1  alnsn #define XORI_X1 \
    247  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
    248  1.1  alnsn 	create_Imm8OpcodeExtension_X1(XORI_IMM8_OPCODE_X1) | FNOP_X0
    249  1.1  alnsn 
    250  1.1  alnsn #define ORI_X1 \
    251  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
    252  1.1  alnsn 	create_Imm8OpcodeExtension_X1(ORI_IMM8_OPCODE_X1) | FNOP_X0
    253  1.1  alnsn 
    254  1.1  alnsn #define ANDI_X1 \
    255  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
    256  1.1  alnsn 	create_Imm8OpcodeExtension_X1(ANDI_IMM8_OPCODE_X1) | FNOP_X0
    257  1.1  alnsn 
    258  1.1  alnsn #define SHLI_X1 \
    259  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \
    260  1.1  alnsn 	create_ShiftOpcodeExtension_X1(SHLI_SHIFT_OPCODE_X1) | FNOP_X0
    261  1.1  alnsn 
    262  1.1  alnsn #define SHL_X1 \
    263  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
    264  1.1  alnsn 	create_RRROpcodeExtension_X1(SHL_RRR_0_OPCODE_X1) | FNOP_X0
    265  1.1  alnsn 
    266  1.1  alnsn #define SHRSI_X1 \
    267  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \
    268  1.1  alnsn 	create_ShiftOpcodeExtension_X1(SHRSI_SHIFT_OPCODE_X1) | FNOP_X0
    269  1.1  alnsn 
    270  1.1  alnsn #define SHRS_X1 \
    271  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
    272  1.1  alnsn 	create_RRROpcodeExtension_X1(SHRS_RRR_0_OPCODE_X1) | FNOP_X0
    273  1.1  alnsn 
    274  1.1  alnsn #define SHRUI_X1 \
    275  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \
    276  1.1  alnsn 	create_ShiftOpcodeExtension_X1(SHRUI_SHIFT_OPCODE_X1) | FNOP_X0
    277  1.1  alnsn 
    278  1.1  alnsn #define SHRU_X1 \
    279  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
    280  1.1  alnsn 	create_RRROpcodeExtension_X1(SHRU_RRR_0_OPCODE_X1) | FNOP_X0
    281  1.1  alnsn 
    282  1.1  alnsn #define BEQZ_X1 \
    283  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \
    284  1.1  alnsn 	create_BrType_X1(BEQZ_BRANCH_OPCODE_X1) | FNOP_X0
    285  1.1  alnsn 
    286  1.1  alnsn #define BNEZ_X1 \
    287  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \
    288  1.1  alnsn 	create_BrType_X1(BNEZ_BRANCH_OPCODE_X1) | FNOP_X0
    289  1.1  alnsn 
    290  1.1  alnsn #define J_X1 \
    291  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \
    292  1.1  alnsn 	create_JumpOpcodeExtension_X1(J_JUMP_OPCODE_X1) | FNOP_X0
    293  1.1  alnsn 
    294  1.1  alnsn #define JAL_X1 \
    295  1.1  alnsn 	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \
    296  1.1  alnsn 	create_JumpOpcodeExtension_X1(JAL_JUMP_OPCODE_X1) | FNOP_X0
    297  1.1  alnsn 
    298  1.1  alnsn #define DEST_X0(x) create_Dest_X0(x)
    299  1.1  alnsn #define SRCA_X0(x) create_SrcA_X0(x)
    300  1.1  alnsn #define SRCB_X0(x) create_SrcB_X0(x)
    301  1.1  alnsn #define DEST_X1(x) create_Dest_X1(x)
    302  1.1  alnsn #define SRCA_X1(x) create_SrcA_X1(x)
    303  1.1  alnsn #define SRCB_X1(x) create_SrcB_X1(x)
    304  1.1  alnsn #define IMM16_X1(x) create_Imm16_X1(x)
    305  1.1  alnsn #define IMM8_X1(x) create_Imm8_X1(x)
    306  1.1  alnsn #define BFSTART_X0(x) create_BFStart_X0(x)
    307  1.1  alnsn #define BFEND_X0(x) create_BFEnd_X0(x)
    308  1.1  alnsn #define SHIFTIMM_X1(x) create_ShAmt_X1(x)
    309  1.1  alnsn #define JOFF_X1(x) create_JumpOff_X1(x)
    310  1.1  alnsn #define BOFF_X1(x) create_BrOff_X1(x)
    311  1.1  alnsn 
    312  1.3  alnsn static const tilegx_mnemonic data_transfer_insts[16] = {
    313  1.1  alnsn 	/* u w s */ TILEGX_OPC_ST   /* st */,
    314  1.1  alnsn 	/* u w l */ TILEGX_OPC_LD   /* ld */,
    315  1.1  alnsn 	/* u b s */ TILEGX_OPC_ST1  /* st1 */,
    316  1.1  alnsn 	/* u b l */ TILEGX_OPC_LD1U /* ld1u */,
    317  1.1  alnsn 	/* u h s */ TILEGX_OPC_ST2  /* st2 */,
    318  1.1  alnsn 	/* u h l */ TILEGX_OPC_LD2U /* ld2u */,
    319  1.1  alnsn 	/* u i s */ TILEGX_OPC_ST4  /* st4 */,
    320  1.1  alnsn 	/* u i l */ TILEGX_OPC_LD4U /* ld4u */,
    321  1.1  alnsn 	/* s w s */ TILEGX_OPC_ST   /* st */,
    322  1.1  alnsn 	/* s w l */ TILEGX_OPC_LD   /* ld */,
    323  1.1  alnsn 	/* s b s */ TILEGX_OPC_ST1  /* st1 */,
    324  1.1  alnsn 	/* s b l */ TILEGX_OPC_LD1S /* ld1s */,
    325  1.1  alnsn 	/* s h s */ TILEGX_OPC_ST2  /* st2 */,
    326  1.1  alnsn 	/* s h l */ TILEGX_OPC_LD2S /* ld2s */,
    327  1.1  alnsn 	/* s i s */ TILEGX_OPC_ST4  /* st4 */,
    328  1.1  alnsn 	/* s i l */ TILEGX_OPC_LD4S /* ld4s */,
    329  1.1  alnsn };
    330  1.1  alnsn 
    331  1.1  alnsn #ifdef TILEGX_JIT_DEBUG
    332  1.3  alnsn static sljit_s32 push_inst_debug(struct sljit_compiler *compiler, sljit_ins ins, int line)
    333  1.1  alnsn {
    334  1.1  alnsn 	sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
    335  1.1  alnsn 	FAIL_IF(!ptr);
    336  1.1  alnsn 	*ptr = ins;
    337  1.1  alnsn 	compiler->size++;
    338  1.1  alnsn 	printf("|%04d|S0|:\t\t", line);
    339  1.1  alnsn 	print_insn_tilegx(ptr);
    340  1.1  alnsn 	return SLJIT_SUCCESS;
    341  1.1  alnsn }
    342  1.1  alnsn 
    343  1.3  alnsn static sljit_s32 push_inst_nodebug(struct sljit_compiler *compiler, sljit_ins ins)
    344  1.1  alnsn {
    345  1.1  alnsn 	sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
    346  1.1  alnsn 	FAIL_IF(!ptr);
    347  1.1  alnsn 	*ptr = ins;
    348  1.1  alnsn 	compiler->size++;
    349  1.1  alnsn 	return SLJIT_SUCCESS;
    350  1.1  alnsn }
    351  1.1  alnsn 
    352  1.1  alnsn #define push_inst(a, b) push_inst_debug(a, b, __LINE__)
    353  1.1  alnsn #else
    354  1.3  alnsn static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
    355  1.1  alnsn {
    356  1.1  alnsn 	sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
    357  1.1  alnsn 	FAIL_IF(!ptr);
    358  1.1  alnsn 	*ptr = ins;
    359  1.1  alnsn 	compiler->size++;
    360  1.1  alnsn 	return SLJIT_SUCCESS;
    361  1.1  alnsn }
    362  1.1  alnsn #endif
    363  1.1  alnsn 
    364  1.1  alnsn #define BUNDLE_FORMAT_MASK(p0, p1, p2) \
    365  1.1  alnsn 	((p0) | ((p1) << 8) | ((p2) << 16))
    366  1.1  alnsn 
    367  1.1  alnsn #define BUNDLE_FORMAT(p0, p1, p2) \
    368  1.1  alnsn 	{ \
    369  1.1  alnsn 		{ \
    370  1.1  alnsn 			(tilegx_pipeline)(p0), \
    371  1.1  alnsn 			(tilegx_pipeline)(p1), \
    372  1.1  alnsn 			(tilegx_pipeline)(p2) \
    373  1.1  alnsn 		}, \
    374  1.1  alnsn 		BUNDLE_FORMAT_MASK(1 << (p0), 1 << (p1), (1 << (p2))) \
    375  1.1  alnsn 	}
    376  1.1  alnsn 
    377  1.1  alnsn #define NO_PIPELINE TILEGX_NUM_PIPELINE_ENCODINGS
    378  1.1  alnsn 
    379  1.1  alnsn #define tilegx_is_x_pipeline(p) ((int)(p) <= (int)TILEGX_PIPELINE_X1)
    380  1.1  alnsn 
    381  1.1  alnsn #define PI(encoding) \
    382  1.1  alnsn 	push_inst(compiler, encoding)
    383  1.1  alnsn 
    384  1.1  alnsn #define PB3(opcode, dst, srca, srcb) \
    385  1.1  alnsn 	push_3_buffer(compiler, opcode, dst, srca, srcb, __LINE__)
    386  1.1  alnsn 
    387  1.1  alnsn #define PB2(opcode, dst, src) \
    388  1.1  alnsn 	push_2_buffer(compiler, opcode, dst, src, __LINE__)
    389  1.1  alnsn 
    390  1.1  alnsn #define JR(reg) \
    391  1.1  alnsn 	push_jr_buffer(compiler, TILEGX_OPC_JR, reg, __LINE__)
    392  1.1  alnsn 
    393  1.1  alnsn #define ADD(dst, srca, srcb) \
    394  1.1  alnsn 	push_3_buffer(compiler, TILEGX_OPC_ADD, dst, srca, srcb, __LINE__)
    395  1.1  alnsn 
    396  1.1  alnsn #define SUB(dst, srca, srcb) \
    397  1.1  alnsn 	push_3_buffer(compiler, TILEGX_OPC_SUB, dst, srca, srcb, __LINE__)
    398  1.1  alnsn 
    399  1.3  alnsn #define MUL(dst, srca, srcb) \
    400  1.3  alnsn 	push_3_buffer(compiler, TILEGX_OPC_MULX, dst, srca, srcb, __LINE__)
    401  1.3  alnsn 
    402  1.1  alnsn #define NOR(dst, srca, srcb) \
    403  1.1  alnsn 	push_3_buffer(compiler, TILEGX_OPC_NOR, dst, srca, srcb, __LINE__)
    404  1.1  alnsn 
    405  1.1  alnsn #define OR(dst, srca, srcb) \
    406  1.1  alnsn 	push_3_buffer(compiler, TILEGX_OPC_OR, dst, srca, srcb, __LINE__)
    407  1.1  alnsn 
    408  1.1  alnsn #define XOR(dst, srca, srcb) \
    409  1.1  alnsn 	push_3_buffer(compiler, TILEGX_OPC_XOR, dst, srca, srcb, __LINE__)
    410  1.1  alnsn 
    411  1.1  alnsn #define AND(dst, srca, srcb) \
    412  1.1  alnsn 	push_3_buffer(compiler, TILEGX_OPC_AND, dst, srca, srcb, __LINE__)
    413  1.1  alnsn 
    414  1.1  alnsn #define CLZ(dst, src) \
    415  1.1  alnsn 	push_2_buffer(compiler, TILEGX_OPC_CLZ, dst, src, __LINE__)
    416  1.1  alnsn 
    417  1.1  alnsn #define SHLI(dst, srca, srcb) \
    418  1.1  alnsn 	push_3_buffer(compiler, TILEGX_OPC_SHLI, dst, srca, srcb, __LINE__)
    419  1.1  alnsn 
    420  1.1  alnsn #define SHRUI(dst, srca, imm) \
    421  1.1  alnsn 	push_3_buffer(compiler, TILEGX_OPC_SHRUI, dst, srca, imm, __LINE__)
    422  1.1  alnsn 
    423  1.1  alnsn #define XORI(dst, srca, imm) \
    424  1.1  alnsn 	push_3_buffer(compiler, TILEGX_OPC_XORI, dst, srca, imm, __LINE__)
    425  1.1  alnsn 
    426  1.1  alnsn #define ORI(dst, srca, imm) \
    427  1.1  alnsn 	push_3_buffer(compiler, TILEGX_OPC_ORI, dst, srca, imm, __LINE__)
    428  1.1  alnsn 
    429  1.1  alnsn #define CMPLTU(dst, srca, srcb) \
    430  1.1  alnsn 	push_3_buffer(compiler, TILEGX_OPC_CMPLTU, dst, srca, srcb, __LINE__)
    431  1.1  alnsn 
    432  1.1  alnsn #define CMPLTS(dst, srca, srcb) \
    433  1.1  alnsn 	push_3_buffer(compiler, TILEGX_OPC_CMPLTS, dst, srca, srcb, __LINE__)
    434  1.1  alnsn 
    435  1.1  alnsn #define CMPLTUI(dst, srca, imm) \
    436  1.1  alnsn 	push_3_buffer(compiler, TILEGX_OPC_CMPLTUI, dst, srca, imm, __LINE__)
    437  1.1  alnsn 
    438  1.1  alnsn #define CMOVNEZ(dst, srca, srcb) \
    439  1.1  alnsn 	push_3_buffer(compiler, TILEGX_OPC_CMOVNEZ, dst, srca, srcb, __LINE__)
    440  1.1  alnsn 
    441  1.1  alnsn #define CMOVEQZ(dst, srca, srcb) \
    442  1.1  alnsn 	push_3_buffer(compiler, TILEGX_OPC_CMOVEQZ, dst, srca, srcb, __LINE__)
    443  1.1  alnsn 
    444  1.1  alnsn #define ADDLI(dst, srca, srcb) \
    445  1.1  alnsn 	push_3_buffer(compiler, TILEGX_OPC_ADDLI, dst, srca, srcb, __LINE__)
    446  1.1  alnsn 
    447  1.1  alnsn #define SHL16INSLI(dst, srca, srcb) \
    448  1.1  alnsn 	push_3_buffer(compiler, TILEGX_OPC_SHL16INSLI, dst, srca, srcb, __LINE__)
    449  1.1  alnsn 
    450  1.1  alnsn #define LD_ADD(dst, addr, adjust) \
    451  1.1  alnsn 	push_3_buffer(compiler, TILEGX_OPC_LD_ADD, dst, addr, adjust, __LINE__)
    452  1.1  alnsn 
    453  1.1  alnsn #define ST_ADD(src, addr, adjust) \
    454  1.1  alnsn 	push_3_buffer(compiler, TILEGX_OPC_ST_ADD, src, addr, adjust, __LINE__)
    455  1.1  alnsn 
    456  1.1  alnsn #define LD(dst, addr) \
    457  1.1  alnsn 	push_2_buffer(compiler, TILEGX_OPC_LD, dst, addr, __LINE__)
    458  1.1  alnsn 
    459  1.1  alnsn #define BFEXTU(dst, src, start, end) \
    460  1.1  alnsn 	push_4_buffer(compiler, TILEGX_OPC_BFEXTU, dst, src, start, end, __LINE__)
    461  1.1  alnsn 
    462  1.1  alnsn #define BFEXTS(dst, src, start, end) \
    463  1.1  alnsn 	push_4_buffer(compiler, TILEGX_OPC_BFEXTS, dst, src, start, end, __LINE__)
    464  1.1  alnsn 
    465  1.1  alnsn #define ADD_SOLO(dest, srca, srcb) \
    466  1.1  alnsn 	push_inst(compiler, ADD_X1 | DEST_X1(dest) | SRCA_X1(srca) | SRCB_X1(srcb))
    467  1.1  alnsn 
    468  1.1  alnsn #define ADDI_SOLO(dest, srca, imm) \
    469  1.1  alnsn 	push_inst(compiler, ADDI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM8_X1(imm))
    470  1.1  alnsn 
    471  1.1  alnsn #define ADDLI_SOLO(dest, srca, imm) \
    472  1.1  alnsn 	push_inst(compiler, ADDLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm))
    473  1.1  alnsn 
    474  1.1  alnsn #define SHL16INSLI_SOLO(dest, srca, imm) \
    475  1.1  alnsn 	push_inst(compiler, SHL16INSLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm))
    476  1.1  alnsn 
    477  1.1  alnsn #define JALR_SOLO(reg) \
    478  1.1  alnsn 	push_inst(compiler, JALR_X1 | SRCA_X1(reg))
    479  1.1  alnsn 
    480  1.1  alnsn #define JR_SOLO(reg) \
    481  1.1  alnsn 	push_inst(compiler, JR_X1 | SRCA_X1(reg))
    482  1.1  alnsn 
    483  1.1  alnsn struct Format {
    484  1.1  alnsn 	/* Mapping of bundle issue slot to assigned pipe. */
    485  1.1  alnsn 	tilegx_pipeline pipe[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE];
    486  1.1  alnsn 
    487  1.1  alnsn 	/* Mask of pipes used by this bundle. */
    488  1.1  alnsn 	unsigned int pipe_mask;
    489  1.1  alnsn };
    490  1.1  alnsn 
    491  1.1  alnsn const struct Format formats[] =
    492  1.1  alnsn {
    493  1.1  alnsn 	/* In Y format we must always have something in Y2, since it has
    494  1.1  alnsn 	* no fnop, so this conveys that Y2 must always be used. */
    495  1.1  alnsn 	BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, NO_PIPELINE),
    496  1.1  alnsn 	BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, NO_PIPELINE),
    497  1.1  alnsn 	BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, NO_PIPELINE),
    498  1.1  alnsn 	BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, NO_PIPELINE),
    499  1.1  alnsn 
    500  1.1  alnsn 	/* Y format has three instructions. */
    501  1.1  alnsn 	BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2),
    502  1.1  alnsn 	BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1),
    503  1.1  alnsn 	BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2),
    504  1.1  alnsn 	BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0),
    505  1.1  alnsn 	BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1),
    506  1.1  alnsn 	BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0),
    507  1.1  alnsn 
    508  1.1  alnsn 	/* X format has only two instructions. */
    509  1.1  alnsn 	BUNDLE_FORMAT(TILEGX_PIPELINE_X0, TILEGX_PIPELINE_X1, NO_PIPELINE),
    510  1.1  alnsn 	BUNDLE_FORMAT(TILEGX_PIPELINE_X1, TILEGX_PIPELINE_X0, NO_PIPELINE)
    511  1.1  alnsn };
    512  1.1  alnsn 
    513  1.1  alnsn 
    514  1.1  alnsn struct jit_instr inst_buf[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE];
    515  1.1  alnsn unsigned long inst_buf_index;
    516  1.1  alnsn 
    517  1.1  alnsn tilegx_pipeline get_any_valid_pipe(const struct tilegx_opcode* opcode)
    518  1.1  alnsn {
    519  1.1  alnsn 	/* FIXME: tile: we could pregenerate this. */
    520  1.1  alnsn 	int pipe;
    521  1.1  alnsn 	for (pipe = 0; ((opcode->pipes & (1 << pipe)) == 0 && pipe < TILEGX_NUM_PIPELINE_ENCODINGS); pipe++)
    522  1.1  alnsn 		;
    523  1.1  alnsn 	return (tilegx_pipeline)(pipe);
    524  1.1  alnsn }
    525  1.1  alnsn 
    526  1.1  alnsn void insert_nop(tilegx_mnemonic opc, int line)
    527  1.1  alnsn {
    528  1.1  alnsn 	const struct tilegx_opcode* opcode = NULL;
    529  1.1  alnsn 
    530  1.1  alnsn 	memmove(&inst_buf[1], &inst_buf[0], inst_buf_index * sizeof inst_buf[0]);
    531  1.1  alnsn 
    532  1.1  alnsn 	opcode = &tilegx_opcodes[opc];
    533  1.1  alnsn 	inst_buf[0].opcode = opcode;
    534  1.1  alnsn 	inst_buf[0].pipe = get_any_valid_pipe(opcode);
    535  1.1  alnsn 	inst_buf[0].input_registers = 0;
    536  1.1  alnsn 	inst_buf[0].output_registers = 0;
    537  1.1  alnsn 	inst_buf[0].line = line;
    538  1.1  alnsn 	++inst_buf_index;
    539  1.1  alnsn }
    540  1.1  alnsn 
    541  1.1  alnsn const struct Format* compute_format()
    542  1.1  alnsn {
    543  1.1  alnsn 	unsigned int compatible_pipes = BUNDLE_FORMAT_MASK(
    544  1.1  alnsn 		inst_buf[0].opcode->pipes,
    545  1.1  alnsn 		inst_buf[1].opcode->pipes,
    546  1.1  alnsn 		(inst_buf_index == 3 ? inst_buf[2].opcode->pipes : (1 << NO_PIPELINE)));
    547  1.1  alnsn 
    548  1.1  alnsn 	const struct Format* match = NULL;
    549  1.1  alnsn 	const struct Format *b = NULL;
    550  1.3  alnsn 	unsigned int i;
    551  1.3  alnsn 	for (i = 0; i < sizeof formats / sizeof formats[0]; i++) {
    552  1.1  alnsn 		b = &formats[i];
    553  1.1  alnsn 		if ((b->pipe_mask & compatible_pipes) == b->pipe_mask) {
    554  1.1  alnsn 			match = b;
    555  1.1  alnsn 			break;
    556  1.1  alnsn 		}
    557  1.1  alnsn 	}
    558  1.1  alnsn 
    559  1.1  alnsn 	return match;
    560  1.1  alnsn }
    561  1.1  alnsn 
    562  1.3  alnsn sljit_s32 assign_pipes()
    563  1.1  alnsn {
    564  1.1  alnsn 	unsigned long output_registers = 0;
    565  1.1  alnsn 	unsigned int i = 0;
    566  1.1  alnsn 
    567  1.1  alnsn 	if (inst_buf_index == 1) {
    568  1.1  alnsn 		tilegx_mnemonic opc = inst_buf[0].opcode->can_bundle
    569  1.1  alnsn 					? TILEGX_OPC_FNOP : TILEGX_OPC_NOP;
    570  1.1  alnsn 		insert_nop(opc, __LINE__);
    571  1.1  alnsn 	}
    572  1.1  alnsn 
    573  1.1  alnsn 	const struct Format* match = compute_format();
    574  1.1  alnsn 
    575  1.1  alnsn 	if (match == NULL)
    576  1.1  alnsn 		return -1;
    577  1.1  alnsn 
    578  1.1  alnsn 	for (i = 0; i < inst_buf_index; i++) {
    579  1.1  alnsn 
    580  1.1  alnsn 		if ((i > 0) && ((inst_buf[i].input_registers & output_registers) != 0))
    581  1.1  alnsn 			return -1;
    582  1.1  alnsn 
    583  1.1  alnsn 		if ((i > 0) && ((inst_buf[i].output_registers & output_registers) != 0))
    584  1.1  alnsn 			return -1;
    585  1.1  alnsn 
    586  1.1  alnsn 		/* Don't include Rzero in the match set, to avoid triggering
    587  1.1  alnsn 		   needlessly on 'prefetch' instrs. */
    588  1.1  alnsn 
    589  1.1  alnsn 		output_registers |= inst_buf[i].output_registers & 0xFFFFFFFFFFFFFFL;
    590  1.1  alnsn 
    591  1.1  alnsn 		inst_buf[i].pipe = match->pipe[i];
    592  1.1  alnsn 	}
    593  1.1  alnsn 
    594  1.1  alnsn 	/* If only 2 instrs, and in Y-mode, insert a nop. */
    595  1.1  alnsn 	if (inst_buf_index == 2 && !tilegx_is_x_pipeline(match->pipe[0])) {
    596  1.1  alnsn 		insert_nop(TILEGX_OPC_FNOP, __LINE__);
    597  1.1  alnsn 
    598  1.1  alnsn 		/* Select the yet unassigned pipe. */
    599  1.1  alnsn 		tilegx_pipeline pipe = (tilegx_pipeline)(((TILEGX_PIPELINE_Y0
    600  1.1  alnsn 					+ TILEGX_PIPELINE_Y1 + TILEGX_PIPELINE_Y2)
    601  1.1  alnsn 					- (inst_buf[1].pipe + inst_buf[2].pipe)));
    602  1.1  alnsn 
    603  1.1  alnsn 		inst_buf[0].pipe = pipe;
    604  1.1  alnsn 	}
    605  1.1  alnsn 
    606  1.1  alnsn 	return 0;
    607  1.1  alnsn }
    608  1.1  alnsn 
    609  1.1  alnsn tilegx_bundle_bits get_bundle_bit(struct jit_instr *inst)
    610  1.1  alnsn {
    611  1.1  alnsn 	int i, val;
    612  1.1  alnsn 	const struct tilegx_opcode* opcode = inst->opcode;
    613  1.1  alnsn 	tilegx_bundle_bits bits = opcode->fixed_bit_values[inst->pipe];
    614  1.1  alnsn 
    615  1.1  alnsn 	const struct tilegx_operand* operand = NULL;
    616  1.1  alnsn 	for (i = 0; i < opcode->num_operands; i++) {
    617  1.1  alnsn 		operand = &tilegx_operands[opcode->operands[inst->pipe][i]];
    618  1.1  alnsn 		val = inst->operand_value[i];
    619  1.1  alnsn 
    620  1.1  alnsn 		bits |= operand->insert(val);
    621  1.1  alnsn 	}
    622  1.1  alnsn 
    623  1.1  alnsn 	return bits;
    624  1.1  alnsn }
    625  1.1  alnsn 
    626  1.3  alnsn static sljit_s32 update_buffer(struct sljit_compiler *compiler)
    627  1.1  alnsn {
    628  1.1  alnsn 	int i;
    629  1.1  alnsn 	int orig_index = inst_buf_index;
    630  1.1  alnsn 	struct jit_instr inst0 = inst_buf[0];
    631  1.1  alnsn 	struct jit_instr inst1 = inst_buf[1];
    632  1.1  alnsn 	struct jit_instr inst2 = inst_buf[2];
    633  1.1  alnsn 	tilegx_bundle_bits bits = 0;
    634  1.1  alnsn 
    635  1.1  alnsn 	/* If the bundle is valid as is, perform the encoding and return 1. */
    636  1.1  alnsn 	if (assign_pipes() == 0) {
    637  1.1  alnsn 		for (i = 0; i < inst_buf_index; i++) {
    638  1.1  alnsn 			bits |= get_bundle_bit(inst_buf + i);
    639  1.1  alnsn #ifdef TILEGX_JIT_DEBUG
    640  1.1  alnsn 			printf("|%04d", inst_buf[i].line);
    641  1.1  alnsn #endif
    642  1.1  alnsn 		}
    643  1.1  alnsn #ifdef TILEGX_JIT_DEBUG
    644  1.1  alnsn 		if (inst_buf_index == 3)
    645  1.1  alnsn 			printf("|M0|:\t");
    646  1.1  alnsn 		else
    647  1.1  alnsn 			printf("|M0|:\t\t");
    648  1.1  alnsn 		print_insn_tilegx(&bits);
    649  1.1  alnsn #endif
    650  1.1  alnsn 
    651  1.1  alnsn 		inst_buf_index = 0;
    652  1.1  alnsn 
    653  1.1  alnsn #ifdef TILEGX_JIT_DEBUG
    654  1.1  alnsn 		return push_inst_nodebug(compiler, bits);
    655  1.1  alnsn #else
    656  1.1  alnsn 		return push_inst(compiler, bits);
    657  1.1  alnsn #endif
    658  1.1  alnsn 	}
    659  1.1  alnsn 
    660  1.1  alnsn 	/* If the bundle is invalid, split it in two. First encode the first two
    661  1.1  alnsn 	   (or possibly 1) instructions, and then the last, separately. Note that
    662  1.1  alnsn 	   assign_pipes may have re-ordered the instrs (by inserting no-ops in
    663  1.1  alnsn 	   lower slots) so we need to reset them. */
    664  1.1  alnsn 
    665  1.1  alnsn 	inst_buf_index = orig_index - 1;
    666  1.1  alnsn 	inst_buf[0] = inst0;
    667  1.1  alnsn 	inst_buf[1] = inst1;
    668  1.1  alnsn 	inst_buf[2] = inst2;
    669  1.1  alnsn 	if (assign_pipes() == 0) {
    670  1.1  alnsn 		for (i = 0; i < inst_buf_index; i++) {
    671  1.1  alnsn 			bits |= get_bundle_bit(inst_buf + i);
    672  1.1  alnsn #ifdef TILEGX_JIT_DEBUG
    673  1.1  alnsn 			printf("|%04d", inst_buf[i].line);
    674  1.1  alnsn #endif
    675  1.1  alnsn 		}
    676  1.1  alnsn 
    677  1.1  alnsn #ifdef TILEGX_JIT_DEBUG
    678  1.1  alnsn 		if (inst_buf_index == 3)
    679  1.1  alnsn 			printf("|M1|:\t");
    680  1.1  alnsn 		else
    681  1.1  alnsn 			printf("|M1|:\t\t");
    682  1.1  alnsn 		print_insn_tilegx(&bits);
    683  1.1  alnsn #endif
    684  1.1  alnsn 
    685  1.1  alnsn 		if ((orig_index - 1) == 2) {
    686  1.1  alnsn 			inst_buf[0] = inst2;
    687  1.1  alnsn 			inst_buf_index = 1;
    688  1.1  alnsn 		} else if ((orig_index - 1) == 1) {
    689  1.1  alnsn 			inst_buf[0] = inst1;
    690  1.1  alnsn 			inst_buf_index = 1;
    691  1.1  alnsn 		} else
    692  1.4  alnsn 			SLJIT_UNREACHABLE();
    693  1.1  alnsn 
    694  1.1  alnsn #ifdef TILEGX_JIT_DEBUG
    695  1.1  alnsn 		return push_inst_nodebug(compiler, bits);
    696  1.1  alnsn #else
    697  1.1  alnsn 		return push_inst(compiler, bits);
    698  1.1  alnsn #endif
    699  1.1  alnsn 	} else {
    700  1.1  alnsn 		/* We had 3 instrs of which the first 2 can't live in the same bundle.
    701  1.1  alnsn 		   Split those two. Note that we don't try to then combine the second
    702  1.1  alnsn 		   and third instr into a single bundle.  First instruction: */
    703  1.1  alnsn 		inst_buf_index = 1;
    704  1.1  alnsn 		inst_buf[0] = inst0;
    705  1.1  alnsn 		inst_buf[1] = inst1;
    706  1.1  alnsn 		inst_buf[2] = inst2;
    707  1.1  alnsn 		if (assign_pipes() == 0) {
    708  1.1  alnsn 			for (i = 0; i < inst_buf_index; i++) {
    709  1.1  alnsn 				bits |= get_bundle_bit(inst_buf + i);
    710  1.1  alnsn #ifdef TILEGX_JIT_DEBUG
    711  1.1  alnsn 				printf("|%04d", inst_buf[i].line);
    712  1.1  alnsn #endif
    713  1.1  alnsn 			}
    714  1.1  alnsn 
    715  1.1  alnsn #ifdef TILEGX_JIT_DEBUG
    716  1.1  alnsn 			if (inst_buf_index == 3)
    717  1.1  alnsn 				printf("|M2|:\t");
    718  1.1  alnsn 			else
    719  1.1  alnsn 				printf("|M2|:\t\t");
    720  1.1  alnsn 			print_insn_tilegx(&bits);
    721  1.1  alnsn #endif
    722  1.1  alnsn 
    723  1.1  alnsn 			inst_buf[0] = inst1;
    724  1.1  alnsn 			inst_buf[1] = inst2;
    725  1.1  alnsn 			inst_buf_index = orig_index - 1;
    726  1.1  alnsn #ifdef TILEGX_JIT_DEBUG
    727  1.1  alnsn 			return push_inst_nodebug(compiler, bits);
    728  1.1  alnsn #else
    729  1.1  alnsn 			return push_inst(compiler, bits);
    730  1.1  alnsn #endif
    731  1.1  alnsn 		} else
    732  1.4  alnsn 			SLJIT_UNREACHABLE();
    733  1.1  alnsn 	}
    734  1.1  alnsn 
    735  1.4  alnsn 	SLJIT_UNREACHABLE();
    736  1.1  alnsn }
    737  1.1  alnsn 
    738  1.3  alnsn static sljit_s32 flush_buffer(struct sljit_compiler *compiler)
    739  1.1  alnsn {
    740  1.3  alnsn 	while (inst_buf_index != 0) {
    741  1.3  alnsn 		FAIL_IF(update_buffer(compiler));
    742  1.3  alnsn 	}
    743  1.3  alnsn 	return SLJIT_SUCCESS;
    744  1.1  alnsn }
    745  1.1  alnsn 
    746  1.3  alnsn static sljit_s32 push_4_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int op3, int line)
    747  1.1  alnsn {
    748  1.1  alnsn 	if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
    749  1.1  alnsn 		FAIL_IF(update_buffer(compiler));
    750  1.1  alnsn 
    751  1.1  alnsn 	const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
    752  1.1  alnsn 	inst_buf[inst_buf_index].opcode = opcode;
    753  1.1  alnsn 	inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
    754  1.1  alnsn 	inst_buf[inst_buf_index].operand_value[0] = op0;
    755  1.1  alnsn 	inst_buf[inst_buf_index].operand_value[1] = op1;
    756  1.1  alnsn 	inst_buf[inst_buf_index].operand_value[2] = op2;
    757  1.1  alnsn 	inst_buf[inst_buf_index].operand_value[3] = op3;
    758  1.1  alnsn 	inst_buf[inst_buf_index].input_registers = 1L << op1;
    759  1.1  alnsn 	inst_buf[inst_buf_index].output_registers = 1L << op0;
    760  1.1  alnsn 	inst_buf[inst_buf_index].line = line;
    761  1.1  alnsn 	inst_buf_index++;
    762  1.1  alnsn 
    763  1.1  alnsn 	return SLJIT_SUCCESS;
    764  1.1  alnsn }
    765  1.1  alnsn 
    766  1.3  alnsn static sljit_s32 push_3_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int line)
    767  1.1  alnsn {
    768  1.1  alnsn 	if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
    769  1.1  alnsn 		FAIL_IF(update_buffer(compiler));
    770  1.1  alnsn 
    771  1.1  alnsn 	const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
    772  1.1  alnsn 	inst_buf[inst_buf_index].opcode = opcode;
    773  1.1  alnsn 	inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
    774  1.1  alnsn 	inst_buf[inst_buf_index].operand_value[0] = op0;
    775  1.1  alnsn 	inst_buf[inst_buf_index].operand_value[1] = op1;
    776  1.1  alnsn 	inst_buf[inst_buf_index].operand_value[2] = op2;
    777  1.1  alnsn 	inst_buf[inst_buf_index].line = line;
    778  1.1  alnsn 
    779  1.1  alnsn 	switch (opc) {
    780  1.1  alnsn 	case TILEGX_OPC_ST_ADD:
    781  1.1  alnsn 		inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1);
    782  1.1  alnsn 		inst_buf[inst_buf_index].output_registers = 1L << op0;
    783  1.1  alnsn 		break;
    784  1.1  alnsn 	case TILEGX_OPC_LD_ADD:
    785  1.1  alnsn 		inst_buf[inst_buf_index].input_registers = 1L << op1;
    786  1.1  alnsn 		inst_buf[inst_buf_index].output_registers = (1L << op0) | (1L << op1);
    787  1.1  alnsn 		break;
    788  1.1  alnsn 	case TILEGX_OPC_ADD:
    789  1.1  alnsn 	case TILEGX_OPC_AND:
    790  1.1  alnsn 	case TILEGX_OPC_SUB:
    791  1.3  alnsn 	case TILEGX_OPC_MULX:
    792  1.1  alnsn 	case TILEGX_OPC_OR:
    793  1.1  alnsn 	case TILEGX_OPC_XOR:
    794  1.1  alnsn 	case TILEGX_OPC_NOR:
    795  1.1  alnsn 	case TILEGX_OPC_SHL:
    796  1.1  alnsn 	case TILEGX_OPC_SHRU:
    797  1.1  alnsn 	case TILEGX_OPC_SHRS:
    798  1.1  alnsn 	case TILEGX_OPC_CMPLTU:
    799  1.1  alnsn 	case TILEGX_OPC_CMPLTS:
    800  1.1  alnsn 	case TILEGX_OPC_CMOVEQZ:
    801  1.1  alnsn 	case TILEGX_OPC_CMOVNEZ:
    802  1.1  alnsn 		inst_buf[inst_buf_index].input_registers = (1L << op1) | (1L << op2);
    803  1.1  alnsn 		inst_buf[inst_buf_index].output_registers = 1L << op0;
    804  1.1  alnsn 		break;
    805  1.1  alnsn 	case TILEGX_OPC_ADDLI:
    806  1.1  alnsn 	case TILEGX_OPC_XORI:
    807  1.1  alnsn 	case TILEGX_OPC_ORI:
    808  1.1  alnsn 	case TILEGX_OPC_SHLI:
    809  1.1  alnsn 	case TILEGX_OPC_SHRUI:
    810  1.1  alnsn 	case TILEGX_OPC_SHRSI:
    811  1.1  alnsn 	case TILEGX_OPC_SHL16INSLI:
    812  1.1  alnsn 	case TILEGX_OPC_CMPLTUI:
    813  1.1  alnsn 	case TILEGX_OPC_CMPLTSI:
    814  1.1  alnsn 		inst_buf[inst_buf_index].input_registers = 1L << op1;
    815  1.1  alnsn 		inst_buf[inst_buf_index].output_registers = 1L << op0;
    816  1.1  alnsn 		break;
    817  1.1  alnsn 	default:
    818  1.1  alnsn 		printf("unrecoginzed opc: %s\n", opcode->name);
    819  1.4  alnsn 		SLJIT_UNREACHABLE();
    820  1.1  alnsn 	}
    821  1.1  alnsn 
    822  1.1  alnsn 	inst_buf_index++;
    823  1.1  alnsn 
    824  1.1  alnsn 	return SLJIT_SUCCESS;
    825  1.1  alnsn }
    826  1.1  alnsn 
    827  1.3  alnsn static sljit_s32 push_2_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int line)
    828  1.1  alnsn {
    829  1.1  alnsn 	if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
    830  1.1  alnsn 		FAIL_IF(update_buffer(compiler));
    831  1.1  alnsn 
    832  1.1  alnsn 	const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
    833  1.1  alnsn 	inst_buf[inst_buf_index].opcode = opcode;
    834  1.1  alnsn 	inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
    835  1.1  alnsn 	inst_buf[inst_buf_index].operand_value[0] = op0;
    836  1.1  alnsn 	inst_buf[inst_buf_index].operand_value[1] = op1;
    837  1.1  alnsn 	inst_buf[inst_buf_index].line = line;
    838  1.1  alnsn 
    839  1.1  alnsn 	switch (opc) {
    840  1.1  alnsn 	case TILEGX_OPC_BEQZ:
    841  1.1  alnsn 	case TILEGX_OPC_BNEZ:
    842  1.1  alnsn 		inst_buf[inst_buf_index].input_registers = 1L << op0;
    843  1.1  alnsn 		break;
    844  1.1  alnsn 	case TILEGX_OPC_ST:
    845  1.1  alnsn 	case TILEGX_OPC_ST1:
    846  1.1  alnsn 	case TILEGX_OPC_ST2:
    847  1.1  alnsn 	case TILEGX_OPC_ST4:
    848  1.1  alnsn 		inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1);
    849  1.1  alnsn 		inst_buf[inst_buf_index].output_registers = 0;
    850  1.1  alnsn 		break;
    851  1.1  alnsn 	case TILEGX_OPC_CLZ:
    852  1.1  alnsn 	case TILEGX_OPC_LD:
    853  1.1  alnsn 	case TILEGX_OPC_LD1U:
    854  1.1  alnsn 	case TILEGX_OPC_LD1S:
    855  1.1  alnsn 	case TILEGX_OPC_LD2U:
    856  1.1  alnsn 	case TILEGX_OPC_LD2S:
    857  1.1  alnsn 	case TILEGX_OPC_LD4U:
    858  1.1  alnsn 	case TILEGX_OPC_LD4S:
    859  1.1  alnsn 		inst_buf[inst_buf_index].input_registers = 1L << op1;
    860  1.1  alnsn 		inst_buf[inst_buf_index].output_registers = 1L << op0;
    861  1.1  alnsn 		break;
    862  1.1  alnsn 	default:
    863  1.1  alnsn 		printf("unrecoginzed opc: %s\n", opcode->name);
    864  1.4  alnsn 		SLJIT_UNREACHABLE();
    865  1.1  alnsn 	}
    866  1.1  alnsn 
    867  1.1  alnsn 	inst_buf_index++;
    868  1.1  alnsn 
    869  1.1  alnsn 	return SLJIT_SUCCESS;
    870  1.1  alnsn }
    871  1.1  alnsn 
    872  1.3  alnsn static sljit_s32 push_0_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int line)
    873  1.1  alnsn {
    874  1.1  alnsn 	if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
    875  1.1  alnsn 		FAIL_IF(update_buffer(compiler));
    876  1.1  alnsn 
    877  1.1  alnsn 	const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
    878  1.1  alnsn 	inst_buf[inst_buf_index].opcode = opcode;
    879  1.1  alnsn 	inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
    880  1.1  alnsn 	inst_buf[inst_buf_index].input_registers = 0;
    881  1.1  alnsn 	inst_buf[inst_buf_index].output_registers = 0;
    882  1.1  alnsn 	inst_buf[inst_buf_index].line = line;
    883  1.1  alnsn 	inst_buf_index++;
    884  1.1  alnsn 
    885  1.1  alnsn 	return SLJIT_SUCCESS;
    886  1.1  alnsn }
    887  1.1  alnsn 
    888  1.3  alnsn static sljit_s32 push_jr_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int line)
    889  1.1  alnsn {
    890  1.1  alnsn 	if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
    891  1.1  alnsn 		FAIL_IF(update_buffer(compiler));
    892  1.1  alnsn 
    893  1.1  alnsn 	const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
    894  1.1  alnsn 	inst_buf[inst_buf_index].opcode = opcode;
    895  1.1  alnsn 	inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
    896  1.1  alnsn 	inst_buf[inst_buf_index].operand_value[0] = op0;
    897  1.1  alnsn 	inst_buf[inst_buf_index].input_registers = 1L << op0;
    898  1.1  alnsn 	inst_buf[inst_buf_index].output_registers = 0;
    899  1.1  alnsn 	inst_buf[inst_buf_index].line = line;
    900  1.1  alnsn 	inst_buf_index++;
    901  1.1  alnsn 
    902  1.1  alnsn 	return flush_buffer(compiler);
    903  1.1  alnsn }
    904  1.1  alnsn 
    905  1.1  alnsn static SLJIT_INLINE sljit_ins * detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
    906  1.1  alnsn {
    907  1.1  alnsn 	sljit_sw diff;
    908  1.1  alnsn 	sljit_uw target_addr;
    909  1.1  alnsn 	sljit_ins *inst;
    910  1.1  alnsn 
    911  1.1  alnsn 	if (jump->flags & SLJIT_REWRITABLE_JUMP)
    912  1.1  alnsn 		return code_ptr;
    913  1.1  alnsn 
    914  1.1  alnsn 	if (jump->flags & JUMP_ADDR)
    915  1.1  alnsn 		target_addr = jump->u.target;
    916  1.1  alnsn 	else {
    917  1.1  alnsn 		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
    918  1.1  alnsn 		target_addr = (sljit_uw)(code + jump->u.label->size);
    919  1.1  alnsn 	}
    920  1.1  alnsn 
    921  1.1  alnsn 	inst = (sljit_ins *)jump->addr;
    922  1.1  alnsn 	if (jump->flags & IS_COND)
    923  1.1  alnsn 		inst--;
    924  1.1  alnsn 
    925  1.1  alnsn 	diff = ((sljit_sw) target_addr - (sljit_sw) inst) >> 3;
    926  1.1  alnsn 	if (diff <= SIMM_17BIT_MAX && diff >= SIMM_17BIT_MIN) {
    927  1.1  alnsn 		jump->flags |= PATCH_B;
    928  1.1  alnsn 
    929  1.1  alnsn 		if (!(jump->flags & IS_COND)) {
    930  1.1  alnsn 			if (jump->flags & IS_JAL) {
    931  1.1  alnsn 				jump->flags &= ~(PATCH_B);
    932  1.1  alnsn 				jump->flags |= PATCH_J;
    933  1.1  alnsn 				inst[0] = JAL_X1;
    934  1.1  alnsn 
    935  1.1  alnsn #ifdef TILEGX_JIT_DEBUG
    936  1.1  alnsn 				printf("[runtime relocate]%04d:\t", __LINE__);
    937  1.1  alnsn 				print_insn_tilegx(inst);
    938  1.1  alnsn #endif
    939  1.1  alnsn 			} else {
    940  1.1  alnsn 				inst[0] = BEQZ_X1 | SRCA_X1(ZERO);
    941  1.1  alnsn 
    942  1.1  alnsn #ifdef TILEGX_JIT_DEBUG
    943  1.1  alnsn 				printf("[runtime relocate]%04d:\t", __LINE__);
    944  1.1  alnsn 				print_insn_tilegx(inst);
    945  1.1  alnsn #endif
    946  1.1  alnsn 			}
    947  1.1  alnsn 
    948  1.1  alnsn 			return inst;
    949  1.1  alnsn 		}
    950  1.1  alnsn 
    951  1.1  alnsn 		inst[0] = inst[0] ^ (0x7L << 55);
    952  1.1  alnsn 
    953  1.1  alnsn #ifdef TILEGX_JIT_DEBUG
    954  1.1  alnsn 		printf("[runtime relocate]%04d:\t", __LINE__);
    955  1.1  alnsn 		print_insn_tilegx(inst);
    956  1.1  alnsn #endif
    957  1.1  alnsn 		jump->addr -= sizeof(sljit_ins);
    958  1.1  alnsn 		return inst;
    959  1.1  alnsn 	}
    960  1.1  alnsn 
    961  1.1  alnsn 	if (jump->flags & IS_COND) {
    962  1.1  alnsn 		if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) {
    963  1.1  alnsn 			jump->flags |= PATCH_J;
    964  1.1  alnsn 			inst[0] = (inst[0] & ~(BOFF_X1(-1))) | BOFF_X1(2);
    965  1.1  alnsn 			inst[1] = J_X1;
    966  1.1  alnsn 			return inst + 1;
    967  1.1  alnsn 		}
    968  1.1  alnsn 
    969  1.1  alnsn 		return code_ptr;
    970  1.1  alnsn 	}
    971  1.1  alnsn 
    972  1.1  alnsn 	if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) {
    973  1.1  alnsn 		jump->flags |= PATCH_J;
    974  1.1  alnsn 
    975  1.1  alnsn 		if (jump->flags & IS_JAL) {
    976  1.1  alnsn 			inst[0] = JAL_X1;
    977  1.1  alnsn 
    978  1.1  alnsn #ifdef TILEGX_JIT_DEBUG
    979  1.1  alnsn 			printf("[runtime relocate]%04d:\t", __LINE__);
    980  1.1  alnsn 			print_insn_tilegx(inst);
    981  1.1  alnsn #endif
    982  1.1  alnsn 
    983  1.1  alnsn 		} else {
    984  1.1  alnsn 			inst[0] = J_X1;
    985  1.1  alnsn 
    986  1.1  alnsn #ifdef TILEGX_JIT_DEBUG
    987  1.1  alnsn 			printf("[runtime relocate]%04d:\t", __LINE__);
    988  1.1  alnsn 			print_insn_tilegx(inst);
    989  1.1  alnsn #endif
    990  1.1  alnsn 		}
    991  1.1  alnsn 
    992  1.1  alnsn 		return inst;
    993  1.1  alnsn 	}
    994  1.1  alnsn 
    995  1.1  alnsn 	return code_ptr;
    996  1.1  alnsn }
    997  1.1  alnsn 
    998  1.1  alnsn SLJIT_API_FUNC_ATTRIBUTE void * sljit_generate_code(struct sljit_compiler *compiler)
    999  1.1  alnsn {
   1000  1.1  alnsn 	struct sljit_memory_fragment *buf;
   1001  1.1  alnsn 	sljit_ins *code;
   1002  1.1  alnsn 	sljit_ins *code_ptr;
   1003  1.1  alnsn 	sljit_ins *buf_ptr;
   1004  1.1  alnsn 	sljit_ins *buf_end;
   1005  1.1  alnsn 	sljit_uw word_count;
   1006  1.1  alnsn 	sljit_uw addr;
   1007  1.1  alnsn 
   1008  1.1  alnsn 	struct sljit_label *label;
   1009  1.1  alnsn 	struct sljit_jump *jump;
   1010  1.1  alnsn 	struct sljit_const *const_;
   1011  1.1  alnsn 
   1012  1.1  alnsn 	CHECK_ERROR_PTR();
   1013  1.3  alnsn 	CHECK_PTR(check_sljit_generate_code(compiler));
   1014  1.1  alnsn 	reverse_buf(compiler);
   1015  1.1  alnsn 
   1016  1.1  alnsn 	code = (sljit_ins *)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
   1017  1.1  alnsn 	PTR_FAIL_WITH_EXEC_IF(code);
   1018  1.1  alnsn 	buf = compiler->buf;
   1019  1.1  alnsn 
   1020  1.1  alnsn 	code_ptr = code;
   1021  1.1  alnsn 	word_count = 0;
   1022  1.1  alnsn 	label = compiler->labels;
   1023  1.1  alnsn 	jump = compiler->jumps;
   1024  1.1  alnsn 	const_ = compiler->consts;
   1025  1.1  alnsn 	do {
   1026  1.1  alnsn 		buf_ptr = (sljit_ins *)buf->memory;
   1027  1.1  alnsn 		buf_end = buf_ptr + (buf->used_size >> 3);
   1028  1.1  alnsn 		do {
   1029  1.1  alnsn 			*code_ptr = *buf_ptr++;
   1030  1.1  alnsn 			SLJIT_ASSERT(!label || label->size >= word_count);
   1031  1.1  alnsn 			SLJIT_ASSERT(!jump || jump->addr >= word_count);
   1032  1.1  alnsn 			SLJIT_ASSERT(!const_ || const_->addr >= word_count);
   1033  1.1  alnsn 			/* These structures are ordered by their address. */
   1034  1.1  alnsn 			if (label && label->size == word_count) {
   1035  1.1  alnsn 				/* Just recording the address. */
   1036  1.1  alnsn 				label->addr = (sljit_uw) code_ptr;
   1037  1.1  alnsn 				label->size = code_ptr - code;
   1038  1.1  alnsn 				label = label->next;
   1039  1.1  alnsn 			}
   1040  1.1  alnsn 
   1041  1.1  alnsn 			if (jump && jump->addr == word_count) {
   1042  1.1  alnsn 				if (jump->flags & IS_JAL)
   1043  1.1  alnsn 					jump->addr = (sljit_uw)(code_ptr - 4);
   1044  1.1  alnsn 				else
   1045  1.1  alnsn 					jump->addr = (sljit_uw)(code_ptr - 3);
   1046  1.1  alnsn 
   1047  1.1  alnsn 				code_ptr = detect_jump_type(jump, code_ptr, code);
   1048  1.1  alnsn 				jump = jump->next;
   1049  1.1  alnsn 			}
   1050  1.1  alnsn 
   1051  1.1  alnsn 			if (const_ && const_->addr == word_count) {
   1052  1.1  alnsn 				/* Just recording the address. */
   1053  1.1  alnsn 				const_->addr = (sljit_uw) code_ptr;
   1054  1.1  alnsn 				const_ = const_->next;
   1055  1.1  alnsn 			}
   1056  1.1  alnsn 
   1057  1.1  alnsn 			code_ptr++;
   1058  1.1  alnsn 			word_count++;
   1059  1.1  alnsn 		} while (buf_ptr < buf_end);
   1060  1.1  alnsn 
   1061  1.1  alnsn 		buf = buf->next;
   1062  1.1  alnsn 	} while (buf);
   1063  1.1  alnsn 
   1064  1.1  alnsn 	if (label && label->size == word_count) {
   1065  1.1  alnsn 		label->addr = (sljit_uw) code_ptr;
   1066  1.1  alnsn 		label->size = code_ptr - code;
   1067  1.1  alnsn 		label = label->next;
   1068  1.1  alnsn 	}
   1069  1.1  alnsn 
   1070  1.1  alnsn 	SLJIT_ASSERT(!label);
   1071  1.1  alnsn 	SLJIT_ASSERT(!jump);
   1072  1.1  alnsn 	SLJIT_ASSERT(!const_);
   1073  1.1  alnsn 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
   1074  1.1  alnsn 
   1075  1.1  alnsn 	jump = compiler->jumps;
   1076  1.1  alnsn 	while (jump) {
   1077  1.1  alnsn 		do {
   1078  1.1  alnsn 			addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
   1079  1.1  alnsn 			buf_ptr = (sljit_ins *)jump->addr;
   1080  1.1  alnsn 
   1081  1.1  alnsn 			if (jump->flags & PATCH_B) {
   1082  1.1  alnsn 				addr = (sljit_sw)(addr - (jump->addr)) >> 3;
   1083  1.1  alnsn 				SLJIT_ASSERT((sljit_sw) addr <= SIMM_17BIT_MAX && (sljit_sw) addr >= SIMM_17BIT_MIN);
   1084  1.1  alnsn 				buf_ptr[0] = (buf_ptr[0] & ~(BOFF_X1(-1))) | BOFF_X1(addr);
   1085  1.1  alnsn 
   1086  1.1  alnsn #ifdef TILEGX_JIT_DEBUG
   1087  1.1  alnsn 				printf("[runtime relocate]%04d:\t", __LINE__);
   1088  1.1  alnsn 				print_insn_tilegx(buf_ptr);
   1089  1.1  alnsn #endif
   1090  1.1  alnsn 				break;
   1091  1.1  alnsn 			}
   1092  1.1  alnsn 
   1093  1.1  alnsn 			if (jump->flags & PATCH_J) {
   1094  1.1  alnsn 				SLJIT_ASSERT((addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL));
   1095  1.1  alnsn 				addr = (sljit_sw)(addr - (jump->addr)) >> 3;
   1096  1.1  alnsn 				buf_ptr[0] = (buf_ptr[0] & ~(JOFF_X1(-1))) | JOFF_X1(addr);
   1097  1.1  alnsn 
   1098  1.1  alnsn #ifdef TILEGX_JIT_DEBUG
   1099  1.1  alnsn 				printf("[runtime relocate]%04d:\t", __LINE__);
   1100  1.1  alnsn 				print_insn_tilegx(buf_ptr);
   1101  1.1  alnsn #endif
   1102  1.1  alnsn 				break;
   1103  1.1  alnsn 			}
   1104  1.1  alnsn 
   1105  1.1  alnsn 			SLJIT_ASSERT(!(jump->flags & IS_JAL));
   1106  1.1  alnsn 
   1107  1.1  alnsn 			/* Set the fields of immediate loads. */
   1108  1.1  alnsn 			buf_ptr[0] = (buf_ptr[0] & ~(0xFFFFL << 43)) | (((addr >> 32) & 0xFFFFL) << 43);
   1109  1.1  alnsn 			buf_ptr[1] = (buf_ptr[1] & ~(0xFFFFL << 43)) | (((addr >> 16) & 0xFFFFL) << 43);
   1110  1.1  alnsn 			buf_ptr[2] = (buf_ptr[2] & ~(0xFFFFL << 43)) | ((addr & 0xFFFFL) << 43);
   1111  1.1  alnsn 		} while (0);
   1112  1.1  alnsn 
   1113  1.1  alnsn 		jump = jump->next;
   1114  1.1  alnsn 	}
   1115  1.1  alnsn 
   1116  1.1  alnsn 	compiler->error = SLJIT_ERR_COMPILED;
   1117  1.1  alnsn 	compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
   1118  1.1  alnsn 	SLJIT_CACHE_FLUSH(code, code_ptr);
   1119  1.1  alnsn 	return code;
   1120  1.1  alnsn }
   1121  1.1  alnsn 
   1122  1.3  alnsn static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm)
   1123  1.1  alnsn {
   1124  1.1  alnsn 
   1125  1.1  alnsn 	if (imm <= SIMM_16BIT_MAX && imm >= SIMM_16BIT_MIN)
   1126  1.1  alnsn 		return ADDLI(dst_ar, ZERO, imm);
   1127  1.1  alnsn 
   1128  1.1  alnsn 	if (imm <= SIMM_32BIT_MAX && imm >= SIMM_32BIT_MIN) {
   1129  1.1  alnsn 		FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 16));
   1130  1.1  alnsn 		return SHL16INSLI(dst_ar, dst_ar, imm);
   1131  1.1  alnsn 	}
   1132  1.1  alnsn 
   1133  1.1  alnsn 	if (imm <= SIMM_48BIT_MAX && imm >= SIMM_48BIT_MIN) {
   1134  1.1  alnsn 		FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32));
   1135  1.1  alnsn 		FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16));
   1136  1.1  alnsn 		return SHL16INSLI(dst_ar, dst_ar, imm);
   1137  1.1  alnsn 	}
   1138  1.1  alnsn 
   1139  1.1  alnsn 	FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 48));
   1140  1.1  alnsn 	FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 32));
   1141  1.1  alnsn 	FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16));
   1142  1.1  alnsn 	return SHL16INSLI(dst_ar, dst_ar, imm);
   1143  1.1  alnsn }
   1144  1.1  alnsn 
   1145  1.3  alnsn static sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm, int flush)
   1146  1.1  alnsn {
   1147  1.1  alnsn 	/* Should *not* be optimized as load_immediate, as pcre relocation
   1148  1.1  alnsn 	   mechanism will match this fixed 4-instruction pattern. */
   1149  1.1  alnsn 	if (flush) {
   1150  1.1  alnsn 		FAIL_IF(ADDLI_SOLO(dst_ar, ZERO, imm >> 32));
   1151  1.1  alnsn 		FAIL_IF(SHL16INSLI_SOLO(dst_ar, dst_ar, imm >> 16));
   1152  1.1  alnsn 		return SHL16INSLI_SOLO(dst_ar, dst_ar, imm);
   1153  1.1  alnsn 	}
   1154  1.1  alnsn 
   1155  1.1  alnsn 	FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32));
   1156  1.1  alnsn 	FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16));
   1157  1.1  alnsn 	return SHL16INSLI(dst_ar, dst_ar, imm);
   1158  1.1  alnsn }
   1159  1.1  alnsn 
   1160  1.3  alnsn static sljit_s32 emit_const_64(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm, int flush)
   1161  1.1  alnsn {
   1162  1.1  alnsn 	/* Should *not* be optimized as load_immediate, as pcre relocation
   1163  1.1  alnsn 	   mechanism will match this fixed 4-instruction pattern. */
   1164  1.1  alnsn 	if (flush) {
   1165  1.1  alnsn 		FAIL_IF(ADDLI_SOLO(reg_map[dst_ar], ZERO, imm >> 48));
   1166  1.1  alnsn 		FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 32));
   1167  1.1  alnsn 		FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 16));
   1168  1.1  alnsn 		return SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm);
   1169  1.1  alnsn 	}
   1170  1.1  alnsn 
   1171  1.1  alnsn 	FAIL_IF(ADDLI(reg_map[dst_ar], ZERO, imm >> 48));
   1172  1.1  alnsn 	FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 32));
   1173  1.1  alnsn 	FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 16));
   1174  1.1  alnsn 	return SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm);
   1175  1.1  alnsn }
   1176  1.1  alnsn 
   1177  1.3  alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
   1178  1.3  alnsn 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
   1179  1.3  alnsn 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
   1180  1.1  alnsn {
   1181  1.1  alnsn 	sljit_ins base;
   1182  1.3  alnsn 	sljit_s32 i, tmp;
   1183  1.3  alnsn 
   1184  1.1  alnsn 	CHECK_ERROR();
   1185  1.3  alnsn 	CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
   1186  1.3  alnsn 	set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
   1187  1.1  alnsn 
   1188  1.3  alnsn 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
   1189  1.1  alnsn 	local_size = (local_size + 7) & ~7;
   1190  1.1  alnsn 	compiler->local_size = local_size;
   1191  1.1  alnsn 
   1192  1.1  alnsn 	if (local_size <= SIMM_16BIT_MAX) {
   1193  1.1  alnsn 		/* Frequent case. */
   1194  1.1  alnsn 		FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, -local_size));
   1195  1.1  alnsn 		base = SLJIT_LOCALS_REG_mapped;
   1196  1.1  alnsn 	} else {
   1197  1.1  alnsn 		FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size));
   1198  1.1  alnsn 		FAIL_IF(ADD(TMP_REG2_mapped, SLJIT_LOCALS_REG_mapped, ZERO));
   1199  1.1  alnsn 		FAIL_IF(SUB(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped));
   1200  1.1  alnsn 		base = TMP_REG2_mapped;
   1201  1.1  alnsn 		local_size = 0;
   1202  1.1  alnsn 	}
   1203  1.1  alnsn 
   1204  1.3  alnsn 	/* Save the return address. */
   1205  1.1  alnsn 	FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8));
   1206  1.1  alnsn 	FAIL_IF(ST_ADD(ADDR_TMP_mapped, RA, -8));
   1207  1.1  alnsn 
   1208  1.3  alnsn 	/* Save the S registers. */
   1209  1.3  alnsn 	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
   1210  1.3  alnsn 	for (i = SLJIT_S0; i >= tmp; i--) {
   1211  1.3  alnsn 		FAIL_IF(ST_ADD(ADDR_TMP_mapped, reg_map[i], -8));
   1212  1.3  alnsn 	}
   1213  1.1  alnsn 
   1214  1.3  alnsn 	/* Save the R registers that need to be reserved. */
   1215  1.3  alnsn 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
   1216  1.3  alnsn 		FAIL_IF(ST_ADD(ADDR_TMP_mapped, reg_map[i], -8));
   1217  1.3  alnsn 	}
   1218  1.1  alnsn 
   1219  1.3  alnsn 	/* Move the arguments to S registers. */
   1220  1.3  alnsn 	for (i = 0; i < args; i++) {
   1221  1.3  alnsn 		FAIL_IF(ADD(reg_map[SLJIT_S0 - i], i, ZERO));
   1222  1.3  alnsn 	}
   1223  1.1  alnsn 
   1224  1.1  alnsn 	return SLJIT_SUCCESS;
   1225  1.1  alnsn }
   1226  1.1  alnsn 
   1227  1.3  alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
   1228  1.3  alnsn 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
   1229  1.3  alnsn 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
   1230  1.1  alnsn {
   1231  1.3  alnsn 	CHECK_ERROR();
   1232  1.3  alnsn 	CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
   1233  1.3  alnsn 	set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
   1234  1.1  alnsn 
   1235  1.3  alnsn 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
   1236  1.3  alnsn 	compiler->local_size = (local_size + 7) & ~7;
   1237  1.1  alnsn 
   1238  1.3  alnsn 	return SLJIT_SUCCESS;
   1239  1.1  alnsn }
   1240  1.1  alnsn 
   1241  1.3  alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
   1242  1.1  alnsn {
   1243  1.3  alnsn 	sljit_s32 local_size;
   1244  1.1  alnsn 	sljit_ins base;
   1245  1.3  alnsn 	sljit_s32 i, tmp;
   1246  1.3  alnsn 	sljit_s32 saveds;
   1247  1.1  alnsn 
   1248  1.1  alnsn 	CHECK_ERROR();
   1249  1.3  alnsn 	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
   1250  1.1  alnsn 
   1251  1.1  alnsn 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
   1252  1.1  alnsn 
   1253  1.1  alnsn 	local_size = compiler->local_size;
   1254  1.1  alnsn 	if (local_size <= SIMM_16BIT_MAX)
   1255  1.1  alnsn 		base = SLJIT_LOCALS_REG_mapped;
   1256  1.1  alnsn 	else {
   1257  1.1  alnsn 		FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size));
   1258  1.1  alnsn 		FAIL_IF(ADD(TMP_REG1_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped));
   1259  1.1  alnsn 		base = TMP_REG1_mapped;
   1260  1.1  alnsn 		local_size = 0;
   1261  1.1  alnsn 	}
   1262  1.1  alnsn 
   1263  1.3  alnsn 	/* Restore the return address. */
   1264  1.1  alnsn 	FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8));
   1265  1.3  alnsn 	FAIL_IF(LD_ADD(RA, ADDR_TMP_mapped, -8));
   1266  1.1  alnsn 
   1267  1.3  alnsn 	/* Restore the S registers. */
   1268  1.3  alnsn 	saveds = compiler->saveds;
   1269  1.3  alnsn 	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
   1270  1.3  alnsn 	for (i = SLJIT_S0; i >= tmp; i--) {
   1271  1.3  alnsn 		FAIL_IF(LD_ADD(reg_map[i], ADDR_TMP_mapped, -8));
   1272  1.1  alnsn 	}
   1273  1.1  alnsn 
   1274  1.3  alnsn 	/* Restore the R registers that need to be reserved. */
   1275  1.3  alnsn 	for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
   1276  1.3  alnsn 		FAIL_IF(LD_ADD(reg_map[i], ADDR_TMP_mapped, -8));
   1277  1.1  alnsn 	}
   1278  1.1  alnsn 
   1279  1.1  alnsn 	if (compiler->local_size <= SIMM_16BIT_MAX)
   1280  1.1  alnsn 		FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, compiler->local_size));
   1281  1.1  alnsn 	else
   1282  1.1  alnsn 		FAIL_IF(ADD(SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped, ZERO));
   1283  1.1  alnsn 
   1284  1.1  alnsn 	return JR(RA);
   1285  1.1  alnsn }
   1286  1.1  alnsn 
   1287  1.1  alnsn /* reg_ar is an absoulute register! */
   1288  1.1  alnsn 
   1289  1.1  alnsn /* Can perform an operation using at most 1 instruction. */
   1290  1.3  alnsn static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw)
   1291  1.1  alnsn {
   1292  1.1  alnsn 	SLJIT_ASSERT(arg & SLJIT_MEM);
   1293  1.1  alnsn 
   1294  1.1  alnsn 	if ((!(flags & WRITE_BACK) || !(arg & REG_MASK))
   1295  1.1  alnsn 			&& !(arg & OFFS_REG_MASK) && argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) {
   1296  1.1  alnsn 		/* Works for both absoulte and relative addresses. */
   1297  1.1  alnsn 		if (SLJIT_UNLIKELY(flags & ARG_TEST))
   1298  1.1  alnsn 			return 1;
   1299  1.1  alnsn 
   1300  1.1  alnsn 		FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[arg & REG_MASK], argw));
   1301  1.1  alnsn 
   1302  1.1  alnsn 		if (flags & LOAD_DATA)
   1303  1.1  alnsn 			FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped));
   1304  1.1  alnsn 		else
   1305  1.1  alnsn 			FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar));
   1306  1.1  alnsn 
   1307  1.1  alnsn 		return -1;
   1308  1.1  alnsn 	}
   1309  1.1  alnsn 
   1310  1.1  alnsn 	return 0;
   1311  1.1  alnsn }
   1312  1.1  alnsn 
   1313  1.1  alnsn /* See getput_arg below.
   1314  1.1  alnsn    Note: can_cache is called only for binary operators. Those
   1315  1.1  alnsn    operators always uses word arguments without write back. */
   1316  1.3  alnsn static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
   1317  1.1  alnsn {
   1318  1.1  alnsn 	SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
   1319  1.1  alnsn 
   1320  1.1  alnsn 	/* Simple operation except for updates. */
   1321  1.1  alnsn 	if (arg & OFFS_REG_MASK) {
   1322  1.1  alnsn 		argw &= 0x3;
   1323  1.1  alnsn 		next_argw &= 0x3;
   1324  1.1  alnsn 		if (argw && argw == next_argw
   1325  1.1  alnsn 				&& (arg == next_arg || (arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK)))
   1326  1.1  alnsn 			return 1;
   1327  1.1  alnsn 		return 0;
   1328  1.1  alnsn 	}
   1329  1.1  alnsn 
   1330  1.1  alnsn 	if (arg == next_arg) {
   1331  1.1  alnsn 		if (((next_argw - argw) <= SIMM_16BIT_MAX
   1332  1.1  alnsn 				&& (next_argw - argw) >= SIMM_16BIT_MIN))
   1333  1.1  alnsn 			return 1;
   1334  1.1  alnsn 
   1335  1.1  alnsn 		return 0;
   1336  1.1  alnsn 	}
   1337  1.1  alnsn 
   1338  1.1  alnsn 	return 0;
   1339  1.1  alnsn }
   1340  1.1  alnsn 
   1341  1.1  alnsn /* Emit the necessary instructions. See can_cache above. */
   1342  1.3  alnsn static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
   1343  1.1  alnsn {
   1344  1.3  alnsn 	sljit_s32 tmp_ar, base;
   1345  1.1  alnsn 
   1346  1.1  alnsn 	SLJIT_ASSERT(arg & SLJIT_MEM);
   1347  1.1  alnsn 	if (!(next_arg & SLJIT_MEM)) {
   1348  1.1  alnsn 		next_arg = 0;
   1349  1.1  alnsn 		next_argw = 0;
   1350  1.1  alnsn 	}
   1351  1.1  alnsn 
   1352  1.1  alnsn 	if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA))
   1353  1.1  alnsn 		tmp_ar = reg_ar;
   1354  1.1  alnsn 	else
   1355  1.1  alnsn 		tmp_ar = TMP_REG1_mapped;
   1356  1.1  alnsn 
   1357  1.1  alnsn 	base = arg & REG_MASK;
   1358  1.1  alnsn 
   1359  1.1  alnsn 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
   1360  1.1  alnsn 		argw &= 0x3;
   1361  1.1  alnsn 
   1362  1.1  alnsn 		if ((flags & WRITE_BACK) && reg_ar == reg_map[base]) {
   1363  1.1  alnsn 			SLJIT_ASSERT(!(flags & LOAD_DATA) && reg_map[TMP_REG1] != reg_ar);
   1364  1.1  alnsn 			FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO));
   1365  1.1  alnsn 			reg_ar = TMP_REG1_mapped;
   1366  1.1  alnsn 		}
   1367  1.1  alnsn 
   1368  1.1  alnsn 		/* Using the cache. */
   1369  1.1  alnsn 		if (argw == compiler->cache_argw) {
   1370  1.1  alnsn 			if (!(flags & WRITE_BACK)) {
   1371  1.1  alnsn 				if (arg == compiler->cache_arg) {
   1372  1.1  alnsn 					if (flags & LOAD_DATA)
   1373  1.1  alnsn 						return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
   1374  1.1  alnsn 					else
   1375  1.1  alnsn 						return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
   1376  1.1  alnsn 				}
   1377  1.1  alnsn 
   1378  1.1  alnsn 				if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
   1379  1.1  alnsn 					if (arg == next_arg && argw == (next_argw & 0x3)) {
   1380  1.1  alnsn 						compiler->cache_arg = arg;
   1381  1.1  alnsn 						compiler->cache_argw = argw;
   1382  1.1  alnsn 						FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], TMP_REG3_mapped));
   1383  1.1  alnsn 						if (flags & LOAD_DATA)
   1384  1.1  alnsn 							return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
   1385  1.1  alnsn 						else
   1386  1.1  alnsn 							return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
   1387  1.1  alnsn 					}
   1388  1.1  alnsn 
   1389  1.1  alnsn 					FAIL_IF(ADD(tmp_ar, reg_map[base], TMP_REG3_mapped));
   1390  1.1  alnsn 					if (flags & LOAD_DATA)
   1391  1.1  alnsn 						return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar);
   1392  1.1  alnsn 					else
   1393  1.1  alnsn 						return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar);
   1394  1.1  alnsn 				}
   1395  1.1  alnsn 			} else {
   1396  1.1  alnsn 				if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
   1397  1.1  alnsn 					FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped));
   1398  1.1  alnsn 					if (flags & LOAD_DATA)
   1399  1.1  alnsn 						return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]);
   1400  1.1  alnsn 					else
   1401  1.1  alnsn 						return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar);
   1402  1.1  alnsn 				}
   1403  1.1  alnsn 			}
   1404  1.1  alnsn 		}
   1405  1.1  alnsn 
   1406  1.1  alnsn 		if (SLJIT_UNLIKELY(argw)) {
   1407  1.1  alnsn 			compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
   1408  1.1  alnsn 			compiler->cache_argw = argw;
   1409  1.1  alnsn 			FAIL_IF(SHLI(TMP_REG3_mapped, reg_map[OFFS_REG(arg)], argw));
   1410  1.1  alnsn 		}
   1411  1.1  alnsn 
   1412  1.1  alnsn 		if (!(flags & WRITE_BACK)) {
   1413  1.1  alnsn 			if (arg == next_arg && argw == (next_argw & 0x3)) {
   1414  1.1  alnsn 				compiler->cache_arg = arg;
   1415  1.1  alnsn 				compiler->cache_argw = argw;
   1416  1.1  alnsn 				FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3]));
   1417  1.1  alnsn 				tmp_ar = TMP_REG3_mapped;
   1418  1.1  alnsn 			} else
   1419  1.1  alnsn 				FAIL_IF(ADD(tmp_ar, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3]));
   1420  1.1  alnsn 
   1421  1.1  alnsn 			if (flags & LOAD_DATA)
   1422  1.1  alnsn 				return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar);
   1423  1.1  alnsn 			else
   1424  1.1  alnsn 				return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar);
   1425  1.1  alnsn 		}
   1426  1.1  alnsn 
   1427  1.1  alnsn 		FAIL_IF(ADD(reg_map[base], reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3]));
   1428  1.1  alnsn 
   1429  1.1  alnsn 		if (flags & LOAD_DATA)
   1430  1.1  alnsn 			return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]);
   1431  1.1  alnsn 		else
   1432  1.1  alnsn 			return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar);
   1433  1.1  alnsn 	}
   1434  1.1  alnsn 
   1435  1.1  alnsn 	if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) {
   1436  1.1  alnsn 		/* Update only applies if a base register exists. */
   1437  1.1  alnsn 		if (reg_ar == reg_map[base]) {
   1438  1.1  alnsn 			SLJIT_ASSERT(!(flags & LOAD_DATA) && TMP_REG1_mapped != reg_ar);
   1439  1.1  alnsn 			if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) {
   1440  1.1  alnsn 				FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[base], argw));
   1441  1.1  alnsn 				if (flags & LOAD_DATA)
   1442  1.1  alnsn 					FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped));
   1443  1.1  alnsn 				else
   1444  1.1  alnsn 					FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar));
   1445  1.1  alnsn 
   1446  1.1  alnsn 				if (argw)
   1447  1.1  alnsn 					return ADDLI(reg_map[base], reg_map[base], argw);
   1448  1.1  alnsn 
   1449  1.1  alnsn 				return SLJIT_SUCCESS;
   1450  1.1  alnsn 			}
   1451  1.1  alnsn 
   1452  1.1  alnsn 			FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO));
   1453  1.1  alnsn 			reg_ar = TMP_REG1_mapped;
   1454  1.1  alnsn 		}
   1455  1.1  alnsn 
   1456  1.1  alnsn 		if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) {
   1457  1.1  alnsn 			if (argw)
   1458  1.1  alnsn 				FAIL_IF(ADDLI(reg_map[base], reg_map[base], argw));
   1459  1.1  alnsn 		} else {
   1460  1.1  alnsn 			if (compiler->cache_arg == SLJIT_MEM
   1461  1.1  alnsn 					&& argw - compiler->cache_argw <= SIMM_16BIT_MAX
   1462  1.1  alnsn 					&& argw - compiler->cache_argw >= SIMM_16BIT_MIN) {
   1463  1.1  alnsn 				if (argw != compiler->cache_argw) {
   1464  1.1  alnsn 					FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw));
   1465  1.1  alnsn 					compiler->cache_argw = argw;
   1466  1.1  alnsn 				}
   1467  1.1  alnsn 
   1468  1.1  alnsn 				FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped));
   1469  1.1  alnsn 			} else {
   1470  1.1  alnsn 				compiler->cache_arg = SLJIT_MEM;
   1471  1.1  alnsn 				compiler->cache_argw = argw;
   1472  1.1  alnsn 				FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw));
   1473  1.1  alnsn 				FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped));
   1474  1.1  alnsn 			}
   1475  1.1  alnsn 		}
   1476  1.1  alnsn 
   1477  1.1  alnsn 		if (flags & LOAD_DATA)
   1478  1.1  alnsn 			return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]);
   1479  1.1  alnsn 		else
   1480  1.1  alnsn 			return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar);
   1481  1.1  alnsn 	}
   1482  1.1  alnsn 
   1483  1.1  alnsn 	if (compiler->cache_arg == arg
   1484  1.1  alnsn 			&& argw - compiler->cache_argw <= SIMM_16BIT_MAX
   1485  1.1  alnsn 			&& argw - compiler->cache_argw >= SIMM_16BIT_MIN) {
   1486  1.1  alnsn 		if (argw != compiler->cache_argw) {
   1487  1.1  alnsn 			FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw));
   1488  1.1  alnsn 			compiler->cache_argw = argw;
   1489  1.1  alnsn 		}
   1490  1.1  alnsn 
   1491  1.1  alnsn 		if (flags & LOAD_DATA)
   1492  1.1  alnsn 			return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
   1493  1.1  alnsn 		else
   1494  1.1  alnsn 			return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
   1495  1.1  alnsn 	}
   1496  1.1  alnsn 
   1497  1.1  alnsn 	if (compiler->cache_arg == SLJIT_MEM
   1498  1.1  alnsn 			&& argw - compiler->cache_argw <= SIMM_16BIT_MAX
   1499  1.1  alnsn 			&& argw - compiler->cache_argw >= SIMM_16BIT_MIN) {
   1500  1.1  alnsn 		if (argw != compiler->cache_argw)
   1501  1.1  alnsn 			FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw));
   1502  1.1  alnsn 	} else {
   1503  1.1  alnsn 		compiler->cache_arg = SLJIT_MEM;
   1504  1.1  alnsn 		FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw));
   1505  1.1  alnsn 	}
   1506  1.1  alnsn 
   1507  1.1  alnsn 	compiler->cache_argw = argw;
   1508  1.1  alnsn 
   1509  1.1  alnsn 	if (!base) {
   1510  1.1  alnsn 		if (flags & LOAD_DATA)
   1511  1.1  alnsn 			return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
   1512  1.1  alnsn 		else
   1513  1.1  alnsn 			return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
   1514  1.1  alnsn 	}
   1515  1.1  alnsn 
   1516  1.1  alnsn 	if (arg == next_arg
   1517  1.1  alnsn 			&& next_argw - argw <= SIMM_16BIT_MAX
   1518  1.1  alnsn 			&& next_argw - argw >= SIMM_16BIT_MIN) {
   1519  1.1  alnsn 		compiler->cache_arg = arg;
   1520  1.1  alnsn 		FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, reg_map[base]));
   1521  1.1  alnsn 		if (flags & LOAD_DATA)
   1522  1.1  alnsn 			return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
   1523  1.1  alnsn 		else
   1524  1.1  alnsn 			return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
   1525  1.1  alnsn 	}
   1526  1.1  alnsn 
   1527  1.1  alnsn 	FAIL_IF(ADD(tmp_ar, TMP_REG3_mapped, reg_map[base]));
   1528  1.1  alnsn 
   1529  1.1  alnsn 	if (flags & LOAD_DATA)
   1530  1.1  alnsn 		return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar);
   1531  1.1  alnsn 	else
   1532  1.1  alnsn 		return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar);
   1533  1.1  alnsn }
   1534  1.1  alnsn 
   1535  1.3  alnsn static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw)
   1536  1.1  alnsn {
   1537  1.1  alnsn 	if (getput_arg_fast(compiler, flags, reg_ar, arg, argw))
   1538  1.1  alnsn 		return compiler->error;
   1539  1.1  alnsn 
   1540  1.1  alnsn 	compiler->cache_arg = 0;
   1541  1.1  alnsn 	compiler->cache_argw = 0;
   1542  1.1  alnsn 	return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0);
   1543  1.1  alnsn }
   1544  1.1  alnsn 
   1545  1.3  alnsn static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
   1546  1.1  alnsn {
   1547  1.1  alnsn 	if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
   1548  1.1  alnsn 		return compiler->error;
   1549  1.1  alnsn 	return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
   1550  1.1  alnsn }
   1551  1.1  alnsn 
   1552  1.3  alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
   1553  1.1  alnsn {
   1554  1.1  alnsn 	CHECK_ERROR();
   1555  1.3  alnsn 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
   1556  1.1  alnsn 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1557  1.1  alnsn 
   1558  1.1  alnsn 	/* For UNUSED dst. Uncommon, but possible. */
   1559  1.1  alnsn 	if (dst == SLJIT_UNUSED)
   1560  1.1  alnsn 		return SLJIT_SUCCESS;
   1561  1.1  alnsn 
   1562  1.1  alnsn 	if (FAST_IS_REG(dst))
   1563  1.1  alnsn 		return ADD(reg_map[dst], RA, ZERO);
   1564  1.1  alnsn 
   1565  1.1  alnsn 	/* Memory. */
   1566  1.1  alnsn 	return emit_op_mem(compiler, WORD_DATA, RA, dst, dstw);
   1567  1.1  alnsn }
   1568  1.1  alnsn 
   1569  1.3  alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
   1570  1.1  alnsn {
   1571  1.1  alnsn 	CHECK_ERROR();
   1572  1.3  alnsn 	CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
   1573  1.1  alnsn 	ADJUST_LOCAL_OFFSET(src, srcw);
   1574  1.1  alnsn 
   1575  1.1  alnsn 	if (FAST_IS_REG(src))
   1576  1.1  alnsn 		FAIL_IF(ADD(RA, reg_map[src], ZERO));
   1577  1.1  alnsn 
   1578  1.1  alnsn 	else if (src & SLJIT_MEM)
   1579  1.1  alnsn 		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RA, src, srcw));
   1580  1.1  alnsn 
   1581  1.1  alnsn 	else if (src & SLJIT_IMM)
   1582  1.1  alnsn 		FAIL_IF(load_immediate(compiler, RA, srcw));
   1583  1.1  alnsn 
   1584  1.1  alnsn 	return JR(RA);
   1585  1.1  alnsn }
   1586  1.1  alnsn 
   1587  1.3  alnsn static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
   1588  1.1  alnsn {
   1589  1.3  alnsn 	sljit_s32 overflow_ra = 0;
   1590  1.1  alnsn 
   1591  1.1  alnsn 	switch (GET_OPCODE(op)) {
   1592  1.1  alnsn 	case SLJIT_MOV:
   1593  1.1  alnsn 	case SLJIT_MOV_P:
   1594  1.1  alnsn 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
   1595  1.1  alnsn 		if (dst != src2)
   1596  1.1  alnsn 			return ADD(reg_map[dst], reg_map[src2], ZERO);
   1597  1.1  alnsn 		return SLJIT_SUCCESS;
   1598  1.1  alnsn 
   1599  1.3  alnsn 	case SLJIT_MOV_U32:
   1600  1.3  alnsn 	case SLJIT_MOV_S32:
   1601  1.1  alnsn 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
   1602  1.1  alnsn 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
   1603  1.3  alnsn 			if (op == SLJIT_MOV_S32)
   1604  1.1  alnsn 				return BFEXTS(reg_map[dst], reg_map[src2], 0, 31);
   1605  1.1  alnsn 
   1606  1.3  alnsn 			return BFEXTU(reg_map[dst], reg_map[src2], 0, 31);
   1607  1.3  alnsn 		} else if (dst != src2) {
   1608  1.3  alnsn 			SLJIT_ASSERT(src2 == 0);
   1609  1.3  alnsn 			return ADD(reg_map[dst], reg_map[src2], ZERO);
   1610  1.3  alnsn 		}
   1611  1.1  alnsn 
   1612  1.1  alnsn 		return SLJIT_SUCCESS;
   1613  1.1  alnsn 
   1614  1.3  alnsn 	case SLJIT_MOV_U8:
   1615  1.3  alnsn 	case SLJIT_MOV_S8:
   1616  1.1  alnsn 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
   1617  1.1  alnsn 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
   1618  1.3  alnsn 			if (op == SLJIT_MOV_S8)
   1619  1.1  alnsn 				return BFEXTS(reg_map[dst], reg_map[src2], 0, 7);
   1620  1.1  alnsn 
   1621  1.1  alnsn 			return BFEXTU(reg_map[dst], reg_map[src2], 0, 7);
   1622  1.3  alnsn 		} else if (dst != src2) {
   1623  1.3  alnsn 			SLJIT_ASSERT(src2 == 0);
   1624  1.3  alnsn 			return ADD(reg_map[dst], reg_map[src2], ZERO);
   1625  1.3  alnsn 		}
   1626  1.1  alnsn 
   1627  1.1  alnsn 		return SLJIT_SUCCESS;
   1628  1.1  alnsn 
   1629  1.3  alnsn 	case SLJIT_MOV_U16:
   1630  1.3  alnsn 	case SLJIT_MOV_S16:
   1631  1.1  alnsn 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
   1632  1.1  alnsn 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
   1633  1.3  alnsn 			if (op == SLJIT_MOV_S16)
   1634  1.1  alnsn 				return BFEXTS(reg_map[dst], reg_map[src2], 0, 15);
   1635  1.1  alnsn 
   1636  1.1  alnsn 			return BFEXTU(reg_map[dst], reg_map[src2], 0, 15);
   1637  1.3  alnsn 		} else if (dst != src2) {
   1638  1.3  alnsn 			SLJIT_ASSERT(src2 == 0);
   1639  1.3  alnsn 			return ADD(reg_map[dst], reg_map[src2], ZERO);
   1640  1.3  alnsn 		}
   1641  1.1  alnsn 
   1642  1.1  alnsn 		return SLJIT_SUCCESS;
   1643  1.1  alnsn 
   1644  1.1  alnsn 	case SLJIT_NOT:
   1645  1.1  alnsn 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
   1646  1.1  alnsn 		if (op & SLJIT_SET_E)
   1647  1.1  alnsn 			FAIL_IF(NOR(EQUAL_FLAG, reg_map[src2], reg_map[src2]));
   1648  1.1  alnsn 		if (CHECK_FLAGS(SLJIT_SET_E))
   1649  1.1  alnsn 			FAIL_IF(NOR(reg_map[dst], reg_map[src2], reg_map[src2]));
   1650  1.1  alnsn 
   1651  1.1  alnsn 		return SLJIT_SUCCESS;
   1652  1.1  alnsn 
   1653  1.1  alnsn 	case SLJIT_CLZ:
   1654  1.1  alnsn 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
   1655  1.1  alnsn 		if (op & SLJIT_SET_E)
   1656  1.1  alnsn 			FAIL_IF(CLZ(EQUAL_FLAG, reg_map[src2]));
   1657  1.1  alnsn 		if (CHECK_FLAGS(SLJIT_SET_E))
   1658  1.1  alnsn 			FAIL_IF(CLZ(reg_map[dst], reg_map[src2]));
   1659  1.1  alnsn 
   1660  1.1  alnsn 		return SLJIT_SUCCESS;
   1661  1.1  alnsn 
   1662  1.1  alnsn 	case SLJIT_ADD:
   1663  1.1  alnsn 		if (flags & SRC2_IMM) {
   1664  1.1  alnsn 			if (op & SLJIT_SET_O) {
   1665  1.1  alnsn 				FAIL_IF(SHRUI(TMP_EREG1, reg_map[src1], 63));
   1666  1.1  alnsn 				if (src2 < 0)
   1667  1.1  alnsn 					FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1));
   1668  1.1  alnsn 			}
   1669  1.1  alnsn 
   1670  1.1  alnsn 			if (op & SLJIT_SET_E)
   1671  1.1  alnsn 				FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], src2));
   1672  1.1  alnsn 
   1673  1.1  alnsn 			if (op & SLJIT_SET_C) {
   1674  1.1  alnsn 				if (src2 >= 0)
   1675  1.1  alnsn 					FAIL_IF(ORI(ULESS_FLAG ,reg_map[src1], src2));
   1676  1.1  alnsn 				else {
   1677  1.1  alnsn 					FAIL_IF(ADDLI(ULESS_FLAG ,ZERO, src2));
   1678  1.1  alnsn 					FAIL_IF(OR(ULESS_FLAG,reg_map[src1],ULESS_FLAG));
   1679  1.1  alnsn 				}
   1680  1.1  alnsn 			}
   1681  1.1  alnsn 
   1682  1.1  alnsn 			/* dst may be the same as src1 or src2. */
   1683  1.1  alnsn 			if (CHECK_FLAGS(SLJIT_SET_E))
   1684  1.1  alnsn 				FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2));
   1685  1.1  alnsn 
   1686  1.1  alnsn 			if (op & SLJIT_SET_O) {
   1687  1.1  alnsn 				FAIL_IF(SHRUI(OVERFLOW_FLAG, reg_map[dst], 63));
   1688  1.1  alnsn 
   1689  1.1  alnsn 				if (src2 < 0)
   1690  1.1  alnsn 					FAIL_IF(XORI(OVERFLOW_FLAG, OVERFLOW_FLAG, 1));
   1691  1.1  alnsn 			}
   1692  1.1  alnsn 		} else {
   1693  1.1  alnsn 			if (op & SLJIT_SET_O) {
   1694  1.1  alnsn 				FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2]));
   1695  1.1  alnsn 				FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63));
   1696  1.1  alnsn 
   1697  1.1  alnsn 				if (src1 != dst)
   1698  1.1  alnsn 					overflow_ra = reg_map[src1];
   1699  1.1  alnsn 				else if (src2 != dst)
   1700  1.1  alnsn 					overflow_ra = reg_map[src2];
   1701  1.1  alnsn 				else {
   1702  1.1  alnsn 					/* Rare ocasion. */
   1703  1.1  alnsn 					FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO));
   1704  1.1  alnsn 					overflow_ra = TMP_EREG2;
   1705  1.1  alnsn 				}
   1706  1.1  alnsn 			}
   1707  1.1  alnsn 
   1708  1.1  alnsn 			if (op & SLJIT_SET_E)
   1709  1.1  alnsn 				FAIL_IF(ADD(EQUAL_FLAG ,reg_map[src1], reg_map[src2]));
   1710  1.1  alnsn 
   1711  1.1  alnsn 			if (op & SLJIT_SET_C)
   1712  1.1  alnsn 				FAIL_IF(OR(ULESS_FLAG,reg_map[src1], reg_map[src2]));
   1713  1.1  alnsn 
   1714  1.1  alnsn 			/* dst may be the same as src1 or src2. */
   1715  1.1  alnsn 			if (CHECK_FLAGS(SLJIT_SET_E))
   1716  1.1  alnsn 				FAIL_IF(ADD(reg_map[dst],reg_map[src1], reg_map[src2]));
   1717  1.1  alnsn 
   1718  1.1  alnsn 			if (op & SLJIT_SET_O) {
   1719  1.1  alnsn 				FAIL_IF(XOR(OVERFLOW_FLAG,reg_map[dst], overflow_ra));
   1720  1.1  alnsn 				FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63));
   1721  1.1  alnsn 			}
   1722  1.1  alnsn 		}
   1723  1.1  alnsn 
   1724  1.1  alnsn 		/* a + b >= a | b (otherwise, the carry should be set to 1). */
   1725  1.1  alnsn 		if (op & SLJIT_SET_C)
   1726  1.1  alnsn 			FAIL_IF(CMPLTU(ULESS_FLAG ,reg_map[dst] ,ULESS_FLAG));
   1727  1.1  alnsn 
   1728  1.1  alnsn 		if (op & SLJIT_SET_O)
   1729  1.1  alnsn 			return CMOVNEZ(OVERFLOW_FLAG, TMP_EREG1, ZERO);
   1730  1.1  alnsn 
   1731  1.1  alnsn 		return SLJIT_SUCCESS;
   1732  1.1  alnsn 
   1733  1.1  alnsn 	case SLJIT_ADDC:
   1734  1.1  alnsn 		if (flags & SRC2_IMM) {
   1735  1.1  alnsn 			if (op & SLJIT_SET_C) {
   1736  1.1  alnsn 				if (src2 >= 0)
   1737  1.1  alnsn 					FAIL_IF(ORI(TMP_EREG1, reg_map[src1], src2));
   1738  1.1  alnsn 				else {
   1739  1.1  alnsn 					FAIL_IF(ADDLI(TMP_EREG1, ZERO, src2));
   1740  1.1  alnsn 					FAIL_IF(OR(TMP_EREG1, reg_map[src1], TMP_EREG1));
   1741  1.1  alnsn 				}
   1742  1.1  alnsn 			}
   1743  1.1  alnsn 
   1744  1.1  alnsn 			FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2));
   1745  1.1  alnsn 
   1746  1.1  alnsn 		} else {
   1747  1.1  alnsn 			if (op & SLJIT_SET_C)
   1748  1.1  alnsn 				FAIL_IF(OR(TMP_EREG1, reg_map[src1], reg_map[src2]));
   1749  1.1  alnsn 
   1750  1.1  alnsn 			/* dst may be the same as src1 or src2. */
   1751  1.1  alnsn 			FAIL_IF(ADD(reg_map[dst], reg_map[src1], reg_map[src2]));
   1752  1.1  alnsn 		}
   1753  1.1  alnsn 
   1754  1.1  alnsn 		if (op & SLJIT_SET_C)
   1755  1.1  alnsn 			FAIL_IF(CMPLTU(TMP_EREG1, reg_map[dst], TMP_EREG1));
   1756  1.1  alnsn 
   1757  1.1  alnsn 		FAIL_IF(ADD(reg_map[dst], reg_map[dst], ULESS_FLAG));
   1758  1.1  alnsn 
   1759  1.1  alnsn 		if (!(op & SLJIT_SET_C))
   1760  1.1  alnsn 			return SLJIT_SUCCESS;
   1761  1.1  alnsn 
   1762  1.1  alnsn 		/* Set TMP_EREG2 (dst == 0) && (ULESS_FLAG == 1). */
   1763  1.1  alnsn 		FAIL_IF(CMPLTUI(TMP_EREG2, reg_map[dst], 1));
   1764  1.1  alnsn 		FAIL_IF(AND(TMP_EREG2, TMP_EREG2, ULESS_FLAG));
   1765  1.1  alnsn 		/* Set carry flag. */
   1766  1.1  alnsn 		return OR(ULESS_FLAG, TMP_EREG2, TMP_EREG1);
   1767  1.1  alnsn 
   1768  1.1  alnsn 	case SLJIT_SUB:
   1769  1.1  alnsn 		if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_16BIT_MIN)) {
   1770  1.1  alnsn 			FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2));
   1771  1.1  alnsn 			src2 = TMP_REG2;
   1772  1.1  alnsn 			flags &= ~SRC2_IMM;
   1773  1.1  alnsn 		}
   1774  1.1  alnsn 
   1775  1.1  alnsn 		if (flags & SRC2_IMM) {
   1776  1.1  alnsn 			if (op & SLJIT_SET_O) {
   1777  1.1  alnsn 				FAIL_IF(SHRUI(TMP_EREG1,reg_map[src1], 63));
   1778  1.1  alnsn 
   1779  1.1  alnsn 				if (src2 < 0)
   1780  1.1  alnsn 					FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1));
   1781  1.1  alnsn 
   1782  1.1  alnsn 				if (src1 != dst)
   1783  1.1  alnsn 					overflow_ra = reg_map[src1];
   1784  1.1  alnsn 				else {
   1785  1.1  alnsn 					/* Rare ocasion. */
   1786  1.1  alnsn 					FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO));
   1787  1.1  alnsn 					overflow_ra = TMP_EREG2;
   1788  1.1  alnsn 				}
   1789  1.1  alnsn 			}
   1790  1.1  alnsn 
   1791  1.1  alnsn 			if (op & SLJIT_SET_E)
   1792  1.1  alnsn 				FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], -src2));
   1793  1.1  alnsn 
   1794  1.1  alnsn 			if (op & SLJIT_SET_C) {
   1795  1.1  alnsn 				FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2));
   1796  1.1  alnsn 				FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], ADDR_TMP_mapped));
   1797  1.1  alnsn 			}
   1798  1.1  alnsn 
   1799  1.1  alnsn 			/* dst may be the same as src1 or src2. */
   1800  1.1  alnsn 			if (CHECK_FLAGS(SLJIT_SET_E))
   1801  1.1  alnsn 				FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2));
   1802  1.1  alnsn 
   1803  1.1  alnsn 		} else {
   1804  1.1  alnsn 
   1805  1.1  alnsn 			if (op & SLJIT_SET_O) {
   1806  1.1  alnsn 				FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2]));
   1807  1.1  alnsn 				FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63));
   1808  1.1  alnsn 
   1809  1.1  alnsn 				if (src1 != dst)
   1810  1.1  alnsn 					overflow_ra = reg_map[src1];
   1811  1.1  alnsn 				else {
   1812  1.1  alnsn 					/* Rare ocasion. */
   1813  1.1  alnsn 					FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO));
   1814  1.1  alnsn 					overflow_ra = TMP_EREG2;
   1815  1.1  alnsn 				}
   1816  1.1  alnsn 			}
   1817  1.1  alnsn 
   1818  1.1  alnsn 			if (op & SLJIT_SET_E)
   1819  1.1  alnsn 				FAIL_IF(SUB(EQUAL_FLAG, reg_map[src1], reg_map[src2]));
   1820  1.1  alnsn 
   1821  1.1  alnsn 			if (op & (SLJIT_SET_U | SLJIT_SET_C))
   1822  1.1  alnsn 				FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], reg_map[src2]));
   1823  1.1  alnsn 
   1824  1.1  alnsn 			if (op & SLJIT_SET_U)
   1825  1.1  alnsn 				FAIL_IF(CMPLTU(UGREATER_FLAG, reg_map[src2], reg_map[src1]));
   1826  1.1  alnsn 
   1827  1.1  alnsn 			if (op & SLJIT_SET_S) {
   1828  1.1  alnsn 				FAIL_IF(CMPLTS(LESS_FLAG ,reg_map[src1] ,reg_map[src2]));
   1829  1.1  alnsn 				FAIL_IF(CMPLTS(GREATER_FLAG ,reg_map[src2] ,reg_map[src1]));
   1830  1.1  alnsn 			}
   1831  1.1  alnsn 
   1832  1.1  alnsn 			/* dst may be the same as src1 or src2. */
   1833  1.1  alnsn 			if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))
   1834  1.1  alnsn 				FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2]));
   1835  1.1  alnsn 		}
   1836  1.1  alnsn 
   1837  1.1  alnsn 		if (op & SLJIT_SET_O) {
   1838  1.1  alnsn 			FAIL_IF(XOR(OVERFLOW_FLAG, reg_map[dst], overflow_ra));
   1839  1.1  alnsn 			FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63));
   1840  1.1  alnsn 			return CMOVEQZ(OVERFLOW_FLAG, TMP_EREG1, ZERO);
   1841  1.1  alnsn 		}
   1842  1.1  alnsn 
   1843  1.1  alnsn 		return SLJIT_SUCCESS;
   1844  1.1  alnsn 
   1845  1.1  alnsn 	case SLJIT_SUBC:
   1846  1.1  alnsn 		if ((flags & SRC2_IMM) && src2 == SIMM_16BIT_MIN) {
   1847  1.1  alnsn 			FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2));
   1848  1.1  alnsn 			src2 = TMP_REG2;
   1849  1.1  alnsn 			flags &= ~SRC2_IMM;
   1850  1.1  alnsn 		}
   1851  1.1  alnsn 
   1852  1.1  alnsn 		if (flags & SRC2_IMM) {
   1853  1.1  alnsn 			if (op & SLJIT_SET_C) {
   1854  1.1  alnsn 				FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, -src2));
   1855  1.1  alnsn 				FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], ADDR_TMP_mapped));
   1856  1.1  alnsn 			}
   1857  1.1  alnsn 
   1858  1.1  alnsn 			/* dst may be the same as src1 or src2. */
   1859  1.1  alnsn 			FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2));
   1860  1.1  alnsn 
   1861  1.1  alnsn 		} else {
   1862  1.1  alnsn 			if (op & SLJIT_SET_C)
   1863  1.1  alnsn 				FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], reg_map[src2]));
   1864  1.1  alnsn 				/* dst may be the same as src1 or src2. */
   1865  1.1  alnsn 			FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2]));
   1866  1.1  alnsn 		}
   1867  1.1  alnsn 
   1868  1.1  alnsn 		if (op & SLJIT_SET_C)
   1869  1.1  alnsn 			FAIL_IF(CMOVEQZ(TMP_EREG1, reg_map[dst], ULESS_FLAG));
   1870  1.1  alnsn 
   1871  1.1  alnsn 		FAIL_IF(SUB(reg_map[dst], reg_map[dst], ULESS_FLAG));
   1872  1.1  alnsn 
   1873  1.1  alnsn 		if (op & SLJIT_SET_C)
   1874  1.1  alnsn 			FAIL_IF(ADD(ULESS_FLAG, TMP_EREG1, ZERO));
   1875  1.1  alnsn 
   1876  1.1  alnsn 		return SLJIT_SUCCESS;
   1877  1.1  alnsn 
   1878  1.3  alnsn 	case SLJIT_MUL:
   1879  1.3  alnsn 		if (flags & SRC2_IMM) {
   1880  1.3  alnsn 			FAIL_IF(load_immediate(compiler, TMP_REG2_mapped, src2));
   1881  1.3  alnsn 			src2 = TMP_REG2;
   1882  1.3  alnsn 			flags &= ~SRC2_IMM;
   1883  1.3  alnsn 		}
   1884  1.3  alnsn 
   1885  1.3  alnsn 		FAIL_IF(MUL(reg_map[dst], reg_map[src1], reg_map[src2]));
   1886  1.3  alnsn 
   1887  1.3  alnsn 		return SLJIT_SUCCESS;
   1888  1.3  alnsn 
   1889  1.1  alnsn #define EMIT_LOGICAL(op_imm, op_norm) \
   1890  1.1  alnsn 	if (flags & SRC2_IMM) { \
   1891  1.1  alnsn 		FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2)); \
   1892  1.1  alnsn 		if (op & SLJIT_SET_E) \
   1893  1.1  alnsn 			FAIL_IF(push_3_buffer( \
   1894  1.1  alnsn 				compiler, op_norm, EQUAL_FLAG, reg_map[src1], \
   1895  1.1  alnsn 				ADDR_TMP_mapped, __LINE__)); \
   1896  1.1  alnsn 		if (CHECK_FLAGS(SLJIT_SET_E)) \
   1897  1.1  alnsn 			FAIL_IF(push_3_buffer( \
   1898  1.1  alnsn 				compiler, op_norm, reg_map[dst], reg_map[src1], \
   1899  1.1  alnsn 				ADDR_TMP_mapped, __LINE__)); \
   1900  1.1  alnsn 	} else { \
   1901  1.1  alnsn 		if (op & SLJIT_SET_E) \
   1902  1.1  alnsn 			FAIL_IF(push_3_buffer( \
   1903  1.1  alnsn 				compiler, op_norm, EQUAL_FLAG, reg_map[src1], \
   1904  1.1  alnsn 				reg_map[src2], __LINE__)); \
   1905  1.1  alnsn 		if (CHECK_FLAGS(SLJIT_SET_E)) \
   1906  1.1  alnsn 			FAIL_IF(push_3_buffer( \
   1907  1.1  alnsn 				compiler, op_norm, reg_map[dst], reg_map[src1], \
   1908  1.1  alnsn 				reg_map[src2], __LINE__)); \
   1909  1.1  alnsn 	}
   1910  1.1  alnsn 
   1911  1.1  alnsn 	case SLJIT_AND:
   1912  1.1  alnsn 		EMIT_LOGICAL(TILEGX_OPC_ANDI, TILEGX_OPC_AND);
   1913  1.1  alnsn 		return SLJIT_SUCCESS;
   1914  1.1  alnsn 
   1915  1.1  alnsn 	case SLJIT_OR:
   1916  1.1  alnsn 		EMIT_LOGICAL(TILEGX_OPC_ORI, TILEGX_OPC_OR);
   1917  1.1  alnsn 		return SLJIT_SUCCESS;
   1918  1.1  alnsn 
   1919  1.1  alnsn 	case SLJIT_XOR:
   1920  1.1  alnsn 		EMIT_LOGICAL(TILEGX_OPC_XORI, TILEGX_OPC_XOR);
   1921  1.1  alnsn 		return SLJIT_SUCCESS;
   1922  1.1  alnsn 
   1923  1.1  alnsn #define EMIT_SHIFT(op_imm, op_norm) \
   1924  1.1  alnsn 	if (flags & SRC2_IMM) { \
   1925  1.1  alnsn 		if (op & SLJIT_SET_E) \
   1926  1.1  alnsn 			FAIL_IF(push_3_buffer( \
   1927  1.1  alnsn 				compiler, op_imm, EQUAL_FLAG, reg_map[src1], \
   1928  1.1  alnsn 				src2 & 0x3F, __LINE__)); \
   1929  1.1  alnsn 		if (CHECK_FLAGS(SLJIT_SET_E)) \
   1930  1.1  alnsn 			FAIL_IF(push_3_buffer( \
   1931  1.1  alnsn 				compiler, op_imm, reg_map[dst], reg_map[src1], \
   1932  1.1  alnsn 				src2 & 0x3F, __LINE__)); \
   1933  1.1  alnsn 	} else { \
   1934  1.1  alnsn 		if (op & SLJIT_SET_E) \
   1935  1.1  alnsn 			FAIL_IF(push_3_buffer( \
   1936  1.3  alnsn 				compiler, op_norm, EQUAL_FLAG, reg_map[src1], \
   1937  1.3  alnsn 				reg_map[src2], __LINE__)); \
   1938  1.1  alnsn 		if (CHECK_FLAGS(SLJIT_SET_E)) \
   1939  1.1  alnsn 			FAIL_IF(push_3_buffer( \
   1940  1.1  alnsn 				compiler, op_norm, reg_map[dst], reg_map[src1], \
   1941  1.1  alnsn 				reg_map[src2], __LINE__)); \
   1942  1.1  alnsn 	}
   1943  1.1  alnsn 
   1944  1.1  alnsn 	case SLJIT_SHL:
   1945  1.1  alnsn 		EMIT_SHIFT(TILEGX_OPC_SHLI, TILEGX_OPC_SHL);
   1946  1.1  alnsn 		return SLJIT_SUCCESS;
   1947  1.1  alnsn 
   1948  1.1  alnsn 	case SLJIT_LSHR:
   1949  1.1  alnsn 		EMIT_SHIFT(TILEGX_OPC_SHRUI, TILEGX_OPC_SHRU);
   1950  1.1  alnsn 		return SLJIT_SUCCESS;
   1951  1.1  alnsn 
   1952  1.1  alnsn 	case SLJIT_ASHR:
   1953  1.1  alnsn 		EMIT_SHIFT(TILEGX_OPC_SHRSI, TILEGX_OPC_SHRS);
   1954  1.1  alnsn 		return SLJIT_SUCCESS;
   1955  1.1  alnsn 	}
   1956  1.1  alnsn 
   1957  1.4  alnsn 	SLJIT_UNREACHABLE();
   1958  1.1  alnsn 	return SLJIT_SUCCESS;
   1959  1.1  alnsn }
   1960  1.1  alnsn 
   1961  1.3  alnsn static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w)
   1962  1.1  alnsn {
   1963  1.1  alnsn 	/* arg1 goes to TMP_REG1 or src reg.
   1964  1.1  alnsn 	   arg2 goes to TMP_REG2, imm or src reg.
   1965  1.1  alnsn 	   TMP_REG3 can be used for caching.
   1966  1.1  alnsn 	   result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
   1967  1.3  alnsn 	sljit_s32 dst_r = TMP_REG2;
   1968  1.3  alnsn 	sljit_s32 src1_r;
   1969  1.1  alnsn 	sljit_sw src2_r = 0;
   1970  1.3  alnsn 	sljit_s32 sugg_src2_r = TMP_REG2;
   1971  1.1  alnsn 
   1972  1.1  alnsn 	if (!(flags & ALT_KEEP_CACHE)) {
   1973  1.1  alnsn 		compiler->cache_arg = 0;
   1974  1.1  alnsn 		compiler->cache_argw = 0;
   1975  1.1  alnsn 	}
   1976  1.1  alnsn 
   1977  1.1  alnsn 	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
   1978  1.3  alnsn 		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM))
   1979  1.1  alnsn 			return SLJIT_SUCCESS;
   1980  1.1  alnsn 		if (GET_FLAGS(op))
   1981  1.1  alnsn 			flags |= UNUSED_DEST;
   1982  1.1  alnsn 	} else if (FAST_IS_REG(dst)) {
   1983  1.1  alnsn 		dst_r = dst;
   1984  1.1  alnsn 		flags |= REG_DEST;
   1985  1.3  alnsn 		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
   1986  1.1  alnsn 			sugg_src2_r = dst_r;
   1987  1.1  alnsn 	} else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1_mapped, dst, dstw))
   1988  1.1  alnsn 		flags |= SLOW_DEST;
   1989  1.1  alnsn 
   1990  1.1  alnsn 	if (flags & IMM_OP) {
   1991  1.1  alnsn 		if ((src2 & SLJIT_IMM) && src2w) {
   1992  1.1  alnsn 			if ((!(flags & LOGICAL_OP)
   1993  1.1  alnsn 					&& (src2w <= SIMM_16BIT_MAX && src2w >= SIMM_16BIT_MIN))
   1994  1.1  alnsn 					|| ((flags & LOGICAL_OP) && !(src2w & ~UIMM_16BIT_MAX))) {
   1995  1.1  alnsn 				flags |= SRC2_IMM;
   1996  1.1  alnsn 				src2_r = src2w;
   1997  1.1  alnsn 			}
   1998  1.1  alnsn 		}
   1999  1.1  alnsn 
   2000  1.1  alnsn 		if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) {
   2001  1.1  alnsn 			if ((!(flags & LOGICAL_OP)
   2002  1.1  alnsn 					&& (src1w <= SIMM_16BIT_MAX && src1w >= SIMM_16BIT_MIN))
   2003  1.1  alnsn 					|| ((flags & LOGICAL_OP) && !(src1w & ~UIMM_16BIT_MAX))) {
   2004  1.1  alnsn 				flags |= SRC2_IMM;
   2005  1.1  alnsn 				src2_r = src1w;
   2006  1.1  alnsn 
   2007  1.1  alnsn 				/* And swap arguments. */
   2008  1.1  alnsn 				src1 = src2;
   2009  1.1  alnsn 				src1w = src2w;
   2010  1.1  alnsn 				src2 = SLJIT_IMM;
   2011  1.1  alnsn 				/* src2w = src2_r unneeded. */
   2012  1.1  alnsn 			}
   2013  1.1  alnsn 		}
   2014  1.1  alnsn 	}
   2015  1.1  alnsn 
   2016  1.1  alnsn 	/* Source 1. */
   2017  1.1  alnsn 	if (FAST_IS_REG(src1)) {
   2018  1.1  alnsn 		src1_r = src1;
   2019  1.1  alnsn 		flags |= REG1_SOURCE;
   2020  1.1  alnsn 	} else if (src1 & SLJIT_IMM) {
   2021  1.1  alnsn 		if (src1w) {
   2022  1.1  alnsn 			FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, src1w));
   2023  1.1  alnsn 			src1_r = TMP_REG1;
   2024  1.1  alnsn 		} else
   2025  1.1  alnsn 			src1_r = 0;
   2026  1.1  alnsn 	} else {
   2027  1.1  alnsn 		if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w))
   2028  1.1  alnsn 			FAIL_IF(compiler->error);
   2029  1.1  alnsn 		else
   2030  1.1  alnsn 			flags |= SLOW_SRC1;
   2031  1.1  alnsn 		src1_r = TMP_REG1;
   2032  1.1  alnsn 	}
   2033  1.1  alnsn 
   2034  1.1  alnsn 	/* Source 2. */
   2035  1.1  alnsn 	if (FAST_IS_REG(src2)) {
   2036  1.1  alnsn 		src2_r = src2;
   2037  1.1  alnsn 		flags |= REG2_SOURCE;
   2038  1.3  alnsn 		if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
   2039  1.1  alnsn 			dst_r = src2_r;
   2040  1.1  alnsn 	} else if (src2 & SLJIT_IMM) {
   2041  1.1  alnsn 		if (!(flags & SRC2_IMM)) {
   2042  1.1  alnsn 			if (src2w) {
   2043  1.1  alnsn 				FAIL_IF(load_immediate(compiler, reg_map[sugg_src2_r], src2w));
   2044  1.1  alnsn 				src2_r = sugg_src2_r;
   2045  1.1  alnsn 			} else {
   2046  1.1  alnsn 				src2_r = 0;
   2047  1.3  alnsn 				if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) && (dst & SLJIT_MEM))
   2048  1.1  alnsn 					dst_r = 0;
   2049  1.1  alnsn 			}
   2050  1.1  alnsn 		}
   2051  1.1  alnsn 	} else {
   2052  1.1  alnsn 		if (getput_arg_fast(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w))
   2053  1.1  alnsn 			FAIL_IF(compiler->error);
   2054  1.1  alnsn 		else
   2055  1.1  alnsn 			flags |= SLOW_SRC2;
   2056  1.1  alnsn 		src2_r = sugg_src2_r;
   2057  1.1  alnsn 	}
   2058  1.1  alnsn 
   2059  1.1  alnsn 	if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
   2060  1.1  alnsn 		SLJIT_ASSERT(src2_r == TMP_REG2);
   2061  1.1  alnsn 		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
   2062  1.1  alnsn 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, src1, src1w));
   2063  1.1  alnsn 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw));
   2064  1.1  alnsn 		} else {
   2065  1.1  alnsn 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, src2, src2w));
   2066  1.1  alnsn 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, dst, dstw));
   2067  1.1  alnsn 		}
   2068  1.1  alnsn 	} else if (flags & SLOW_SRC1)
   2069  1.1  alnsn 		FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw));
   2070  1.1  alnsn 	else if (flags & SLOW_SRC2)
   2071  1.1  alnsn 		FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w, dst, dstw));
   2072  1.1  alnsn 
   2073  1.1  alnsn 	FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
   2074  1.1  alnsn 
   2075  1.1  alnsn 	if (dst & SLJIT_MEM) {
   2076  1.1  alnsn 		if (!(flags & SLOW_DEST)) {
   2077  1.1  alnsn 			getput_arg_fast(compiler, flags, reg_map[dst_r], dst, dstw);
   2078  1.1  alnsn 			return compiler->error;
   2079  1.1  alnsn 		}
   2080  1.1  alnsn 
   2081  1.1  alnsn 		return getput_arg(compiler, flags, reg_map[dst_r], dst, dstw, 0, 0);
   2082  1.1  alnsn 	}
   2083  1.1  alnsn 
   2084  1.1  alnsn 	return SLJIT_SUCCESS;
   2085  1.1  alnsn }
   2086  1.1  alnsn 
   2087  1.3  alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw, sljit_s32 type)
   2088  1.1  alnsn {
   2089  1.3  alnsn 	sljit_s32 sugg_dst_ar, dst_ar;
   2090  1.3  alnsn 	sljit_s32 flags = GET_ALL_FLAGS(op);
   2091  1.3  alnsn 	sljit_s32 mem_type = (op & SLJIT_I32_OP) ? (INT_DATA | SIGNED_DATA) : WORD_DATA;
   2092  1.1  alnsn 
   2093  1.1  alnsn 	CHECK_ERROR();
   2094  1.3  alnsn 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
   2095  1.1  alnsn 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2096  1.1  alnsn 
   2097  1.1  alnsn 	if (dst == SLJIT_UNUSED)
   2098  1.1  alnsn 		return SLJIT_SUCCESS;
   2099  1.1  alnsn 
   2100  1.1  alnsn 	op = GET_OPCODE(op);
   2101  1.3  alnsn 	if (op == SLJIT_MOV_S32 || op == SLJIT_MOV_U32)
   2102  1.3  alnsn 		mem_type = INT_DATA | SIGNED_DATA;
   2103  1.1  alnsn 	sugg_dst_ar = reg_map[(op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2];
   2104  1.1  alnsn 
   2105  1.1  alnsn 	compiler->cache_arg = 0;
   2106  1.1  alnsn 	compiler->cache_argw = 0;
   2107  1.1  alnsn 	if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
   2108  1.1  alnsn 		ADJUST_LOCAL_OFFSET(src, srcw);
   2109  1.3  alnsn 		FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, TMP_REG1_mapped, src, srcw, dst, dstw));
   2110  1.1  alnsn 		src = TMP_REG1;
   2111  1.1  alnsn 		srcw = 0;
   2112  1.1  alnsn 	}
   2113  1.1  alnsn 
   2114  1.3  alnsn 	switch (type & 0xff) {
   2115  1.3  alnsn 	case SLJIT_EQUAL:
   2116  1.3  alnsn 	case SLJIT_NOT_EQUAL:
   2117  1.1  alnsn 		FAIL_IF(CMPLTUI(sugg_dst_ar, EQUAL_FLAG, 1));
   2118  1.1  alnsn 		dst_ar = sugg_dst_ar;
   2119  1.1  alnsn 		break;
   2120  1.3  alnsn 	case SLJIT_LESS:
   2121  1.3  alnsn 	case SLJIT_GREATER_EQUAL:
   2122  1.1  alnsn 		dst_ar = ULESS_FLAG;
   2123  1.1  alnsn 		break;
   2124  1.3  alnsn 	case SLJIT_GREATER:
   2125  1.3  alnsn 	case SLJIT_LESS_EQUAL:
   2126  1.1  alnsn 		dst_ar = UGREATER_FLAG;
   2127  1.1  alnsn 		break;
   2128  1.3  alnsn 	case SLJIT_SIG_LESS:
   2129  1.3  alnsn 	case SLJIT_SIG_GREATER_EQUAL:
   2130  1.1  alnsn 		dst_ar = LESS_FLAG;
   2131  1.1  alnsn 		break;
   2132  1.3  alnsn 	case SLJIT_SIG_GREATER:
   2133  1.3  alnsn 	case SLJIT_SIG_LESS_EQUAL:
   2134  1.1  alnsn 		dst_ar = GREATER_FLAG;
   2135  1.1  alnsn 		break;
   2136  1.3  alnsn 	case SLJIT_OVERFLOW:
   2137  1.3  alnsn 	case SLJIT_NOT_OVERFLOW:
   2138  1.1  alnsn 		dst_ar = OVERFLOW_FLAG;
   2139  1.1  alnsn 		break;
   2140  1.3  alnsn 	case SLJIT_MUL_OVERFLOW:
   2141  1.3  alnsn 	case SLJIT_MUL_NOT_OVERFLOW:
   2142  1.1  alnsn 		FAIL_IF(CMPLTUI(sugg_dst_ar, OVERFLOW_FLAG, 1));
   2143  1.1  alnsn 		dst_ar = sugg_dst_ar;
   2144  1.1  alnsn 		type ^= 0x1; /* Flip type bit for the XORI below. */
   2145  1.1  alnsn 		break;
   2146  1.1  alnsn 
   2147  1.1  alnsn 	default:
   2148  1.4  alnsn 		SLJIT_UNREACHABLE();
   2149  1.1  alnsn 		dst_ar = sugg_dst_ar;
   2150  1.1  alnsn 		break;
   2151  1.1  alnsn 	}
   2152  1.1  alnsn 
   2153  1.1  alnsn 	if (type & 0x1) {
   2154  1.1  alnsn 		FAIL_IF(XORI(sugg_dst_ar, dst_ar, 1));
   2155  1.1  alnsn 		dst_ar = sugg_dst_ar;
   2156  1.1  alnsn 	}
   2157  1.1  alnsn 
   2158  1.1  alnsn 	if (op >= SLJIT_ADD) {
   2159  1.1  alnsn 		if (TMP_REG2_mapped != dst_ar)
   2160  1.1  alnsn 			FAIL_IF(ADD(TMP_REG2_mapped, dst_ar, ZERO));
   2161  1.3  alnsn 		return emit_op(compiler, op | flags, mem_type | CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0);
   2162  1.1  alnsn 	}
   2163  1.1  alnsn 
   2164  1.1  alnsn 	if (dst & SLJIT_MEM)
   2165  1.3  alnsn 		return emit_op_mem(compiler, mem_type, dst_ar, dst, dstw);
   2166  1.1  alnsn 
   2167  1.1  alnsn 	if (sugg_dst_ar != dst_ar)
   2168  1.1  alnsn 		return ADD(sugg_dst_ar, dst_ar, ZERO);
   2169  1.1  alnsn 
   2170  1.1  alnsn 	return SLJIT_SUCCESS;
   2171  1.1  alnsn }
   2172  1.1  alnsn 
   2173  1.3  alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) {
   2174  1.1  alnsn 	CHECK_ERROR();
   2175  1.3  alnsn 	CHECK(check_sljit_emit_op0(compiler, op));
   2176  1.1  alnsn 
   2177  1.1  alnsn 	op = GET_OPCODE(op);
   2178  1.1  alnsn 	switch (op) {
   2179  1.1  alnsn 	case SLJIT_NOP:
   2180  1.1  alnsn 		return push_0_buffer(compiler, TILEGX_OPC_FNOP, __LINE__);
   2181  1.1  alnsn 
   2182  1.1  alnsn 	case SLJIT_BREAKPOINT:
   2183  1.1  alnsn 		return PI(BPT);
   2184  1.1  alnsn 
   2185  1.3  alnsn 	case SLJIT_LMUL_UW:
   2186  1.3  alnsn 	case SLJIT_LMUL_SW:
   2187  1.3  alnsn 	case SLJIT_DIVMOD_UW:
   2188  1.3  alnsn 	case SLJIT_DIVMOD_SW:
   2189  1.3  alnsn 	case SLJIT_DIV_UW:
   2190  1.3  alnsn 	case SLJIT_DIV_SW:
   2191  1.4  alnsn 		SLJIT_UNREACHABLE();
   2192  1.1  alnsn 	}
   2193  1.1  alnsn 
   2194  1.1  alnsn 	return SLJIT_SUCCESS;
   2195  1.1  alnsn }
   2196  1.1  alnsn 
   2197  1.3  alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw)
   2198  1.1  alnsn {
   2199  1.1  alnsn 	CHECK_ERROR();
   2200  1.3  alnsn 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
   2201  1.1  alnsn 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2202  1.1  alnsn 	ADJUST_LOCAL_OFFSET(src, srcw);
   2203  1.1  alnsn 
   2204  1.1  alnsn 	switch (GET_OPCODE(op)) {
   2205  1.1  alnsn 	case SLJIT_MOV:
   2206  1.1  alnsn 	case SLJIT_MOV_P:
   2207  1.1  alnsn 		return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
   2208  1.1  alnsn 
   2209  1.3  alnsn 	case SLJIT_MOV_U32:
   2210  1.3  alnsn 		return emit_op(compiler, SLJIT_MOV_U32, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
   2211  1.1  alnsn 
   2212  1.3  alnsn 	case SLJIT_MOV_S32:
   2213  1.3  alnsn 		return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
   2214  1.1  alnsn 
   2215  1.3  alnsn 	case SLJIT_MOV_U8:
   2216  1.3  alnsn 		return emit_op(compiler, SLJIT_MOV_U8, BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8) srcw : srcw);
   2217  1.1  alnsn 
   2218  1.3  alnsn 	case SLJIT_MOV_S8:
   2219  1.3  alnsn 		return emit_op(compiler, SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8) srcw : srcw);
   2220  1.1  alnsn 
   2221  1.3  alnsn 	case SLJIT_MOV_U16:
   2222  1.3  alnsn 		return emit_op(compiler, SLJIT_MOV_U16, HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16) srcw : srcw);
   2223  1.1  alnsn 
   2224  1.3  alnsn 	case SLJIT_MOV_S16:
   2225  1.3  alnsn 		return emit_op(compiler, SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16) srcw : srcw);
   2226  1.1  alnsn 
   2227  1.1  alnsn 	case SLJIT_MOVU:
   2228  1.1  alnsn 	case SLJIT_MOVU_P:
   2229  1.1  alnsn 		return emit_op(compiler, SLJIT_MOV, WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
   2230  1.1  alnsn 
   2231  1.3  alnsn 	case SLJIT_MOVU_U32:
   2232  1.3  alnsn 		return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
   2233  1.1  alnsn 
   2234  1.3  alnsn 	case SLJIT_MOVU_S32:
   2235  1.3  alnsn 		return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
   2236  1.1  alnsn 
   2237  1.3  alnsn 	case SLJIT_MOVU_U8:
   2238  1.3  alnsn 		return emit_op(compiler, SLJIT_MOV_U8, BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8) srcw : srcw);
   2239  1.1  alnsn 
   2240  1.3  alnsn 	case SLJIT_MOVU_S8:
   2241  1.3  alnsn 		return emit_op(compiler, SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8) srcw : srcw);
   2242  1.1  alnsn 
   2243  1.3  alnsn 	case SLJIT_MOVU_U16:
   2244  1.3  alnsn 		return emit_op(compiler, SLJIT_MOV_U16, HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16) srcw : srcw);
   2245  1.1  alnsn 
   2246  1.3  alnsn 	case SLJIT_MOVU_S16:
   2247  1.3  alnsn 		return emit_op(compiler, SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16) srcw : srcw);
   2248  1.1  alnsn 
   2249  1.1  alnsn 	case SLJIT_NOT:
   2250  1.1  alnsn 		return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
   2251  1.1  alnsn 
   2252  1.1  alnsn 	case SLJIT_NEG:
   2253  1.1  alnsn 		return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw);
   2254  1.1  alnsn 
   2255  1.1  alnsn 	case SLJIT_CLZ:
   2256  1.3  alnsn 		return emit_op(compiler, op, (op & SLJIT_I32_OP) ? INT_DATA : WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
   2257  1.1  alnsn 	}
   2258  1.1  alnsn 
   2259  1.1  alnsn 	return SLJIT_SUCCESS;
   2260  1.1  alnsn }
   2261  1.1  alnsn 
   2262  1.3  alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w)
   2263  1.1  alnsn {
   2264  1.1  alnsn 	CHECK_ERROR();
   2265  1.3  alnsn 	CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
   2266  1.1  alnsn 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2267  1.1  alnsn 	ADJUST_LOCAL_OFFSET(src1, src1w);
   2268  1.1  alnsn 	ADJUST_LOCAL_OFFSET(src2, src2w);
   2269  1.1  alnsn 
   2270  1.1  alnsn 	switch (GET_OPCODE(op)) {
   2271  1.1  alnsn 	case SLJIT_ADD:
   2272  1.1  alnsn 	case SLJIT_ADDC:
   2273  1.1  alnsn 		return emit_op(compiler, op, CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
   2274  1.1  alnsn 
   2275  1.1  alnsn 	case SLJIT_SUB:
   2276  1.1  alnsn 	case SLJIT_SUBC:
   2277  1.1  alnsn 		return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w);
   2278  1.1  alnsn 
   2279  1.1  alnsn 	case SLJIT_MUL:
   2280  1.1  alnsn 		return emit_op(compiler, op, CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);
   2281  1.1  alnsn 
   2282  1.1  alnsn 	case SLJIT_AND:
   2283  1.1  alnsn 	case SLJIT_OR:
   2284  1.1  alnsn 	case SLJIT_XOR:
   2285  1.1  alnsn 		return emit_op(compiler, op, CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
   2286  1.1  alnsn 
   2287  1.1  alnsn 	case SLJIT_SHL:
   2288  1.1  alnsn 	case SLJIT_LSHR:
   2289  1.1  alnsn 	case SLJIT_ASHR:
   2290  1.1  alnsn 		if (src2 & SLJIT_IMM)
   2291  1.1  alnsn 			src2w &= 0x3f;
   2292  1.3  alnsn 		if (op & SLJIT_I32_OP)
   2293  1.1  alnsn 			src2w &= 0x1f;
   2294  1.1  alnsn 
   2295  1.1  alnsn 		return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w);
   2296  1.1  alnsn 	}
   2297  1.1  alnsn 
   2298  1.1  alnsn 	return SLJIT_SUCCESS;
   2299  1.1  alnsn }
   2300  1.1  alnsn 
   2301  1.1  alnsn SLJIT_API_FUNC_ATTRIBUTE struct sljit_label * sljit_emit_label(struct sljit_compiler *compiler)
   2302  1.1  alnsn {
   2303  1.1  alnsn 	struct sljit_label *label;
   2304  1.1  alnsn 
   2305  1.1  alnsn 	flush_buffer(compiler);
   2306  1.1  alnsn 
   2307  1.1  alnsn 	CHECK_ERROR_PTR();
   2308  1.3  alnsn 	CHECK_PTR(check_sljit_emit_label(compiler));
   2309  1.1  alnsn 
   2310  1.1  alnsn 	if (compiler->last_label && compiler->last_label->size == compiler->size)
   2311  1.1  alnsn 		return compiler->last_label;
   2312  1.1  alnsn 
   2313  1.1  alnsn 	label = (struct sljit_label *)ensure_abuf(compiler, sizeof(struct sljit_label));
   2314  1.1  alnsn 	PTR_FAIL_IF(!label);
   2315  1.1  alnsn 	set_label(label, compiler);
   2316  1.1  alnsn 	return label;
   2317  1.1  alnsn }
   2318  1.1  alnsn 
   2319  1.3  alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
   2320  1.1  alnsn {
   2321  1.3  alnsn 	sljit_s32 src_r = TMP_REG2;
   2322  1.1  alnsn 	struct sljit_jump *jump = NULL;
   2323  1.1  alnsn 
   2324  1.1  alnsn 	flush_buffer(compiler);
   2325  1.1  alnsn 
   2326  1.1  alnsn 	CHECK_ERROR();
   2327  1.3  alnsn 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
   2328  1.1  alnsn 	ADJUST_LOCAL_OFFSET(src, srcw);
   2329  1.1  alnsn 
   2330  1.1  alnsn 	if (FAST_IS_REG(src)) {
   2331  1.1  alnsn 		if (reg_map[src] != 0)
   2332  1.1  alnsn 			src_r = src;
   2333  1.1  alnsn 		else
   2334  1.1  alnsn 			FAIL_IF(ADD_SOLO(TMP_REG2_mapped, reg_map[src], ZERO));
   2335  1.1  alnsn 	}
   2336  1.1  alnsn 
   2337  1.1  alnsn 	if (type >= SLJIT_CALL0) {
   2338  1.1  alnsn 		SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2);
   2339  1.1  alnsn 		if (src & (SLJIT_IMM | SLJIT_MEM)) {
   2340  1.1  alnsn 			if (src & SLJIT_IMM)
   2341  1.1  alnsn 				FAIL_IF(emit_const(compiler, reg_map[PIC_ADDR_REG], srcw, 1));
   2342  1.1  alnsn 			else {
   2343  1.1  alnsn 				SLJIT_ASSERT(src_r == TMP_REG2 && (src & SLJIT_MEM));
   2344  1.1  alnsn 				FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
   2345  1.1  alnsn 			}
   2346  1.1  alnsn 
   2347  1.3  alnsn 			FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO));
   2348  1.1  alnsn 
   2349  1.1  alnsn 			FAIL_IF(ADDI_SOLO(54, 54, -16));
   2350  1.1  alnsn 
   2351  1.1  alnsn 			FAIL_IF(JALR_SOLO(reg_map[PIC_ADDR_REG]));
   2352  1.1  alnsn 
   2353  1.1  alnsn 			return ADDI_SOLO(54, 54, 16);
   2354  1.1  alnsn 		}
   2355  1.1  alnsn 
   2356  1.1  alnsn 		/* Register input. */
   2357  1.1  alnsn 		if (type >= SLJIT_CALL1)
   2358  1.3  alnsn 			FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO));
   2359  1.1  alnsn 
   2360  1.1  alnsn 		FAIL_IF(ADD_SOLO(reg_map[PIC_ADDR_REG], reg_map[src_r], ZERO));
   2361  1.1  alnsn 
   2362  1.1  alnsn 		FAIL_IF(ADDI_SOLO(54, 54, -16));
   2363  1.1  alnsn 
   2364  1.1  alnsn 		FAIL_IF(JALR_SOLO(reg_map[src_r]));
   2365  1.1  alnsn 
   2366  1.1  alnsn 		return ADDI_SOLO(54, 54, 16);
   2367  1.1  alnsn 	}
   2368  1.1  alnsn 
   2369  1.1  alnsn 	if (src & SLJIT_IMM) {
   2370  1.1  alnsn 		jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
   2371  1.1  alnsn 		FAIL_IF(!jump);
   2372  1.1  alnsn 		set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0));
   2373  1.1  alnsn 		jump->u.target = srcw;
   2374  1.1  alnsn 		FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1));
   2375  1.1  alnsn 
   2376  1.1  alnsn 		if (type >= SLJIT_FAST_CALL) {
   2377  1.1  alnsn 			FAIL_IF(ADD_SOLO(ZERO, ZERO, ZERO));
   2378  1.1  alnsn 			jump->addr = compiler->size;
   2379  1.1  alnsn 			FAIL_IF(JR_SOLO(reg_map[src_r]));
   2380  1.1  alnsn 		} else {
   2381  1.1  alnsn 			jump->addr = compiler->size;
   2382  1.1  alnsn 			FAIL_IF(JR_SOLO(reg_map[src_r]));
   2383  1.1  alnsn 		}
   2384  1.1  alnsn 
   2385  1.1  alnsn 		return SLJIT_SUCCESS;
   2386  1.1  alnsn 
   2387  1.3  alnsn 	} else if (src & SLJIT_MEM) {
   2388  1.1  alnsn 		FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
   2389  1.3  alnsn 		flush_buffer(compiler);
   2390  1.3  alnsn 	}
   2391  1.1  alnsn 
   2392  1.1  alnsn 	FAIL_IF(JR_SOLO(reg_map[src_r]));
   2393  1.1  alnsn 
   2394  1.1  alnsn 	if (jump)
   2395  1.1  alnsn 		jump->addr = compiler->size;
   2396  1.1  alnsn 
   2397  1.1  alnsn 	return SLJIT_SUCCESS;
   2398  1.1  alnsn }
   2399  1.1  alnsn 
   2400  1.1  alnsn #define BR_Z(src) \
   2401  1.1  alnsn 	inst = BEQZ_X1 | SRCA_X1(src); \
   2402  1.1  alnsn 	flags = IS_COND;
   2403  1.1  alnsn 
   2404  1.1  alnsn #define BR_NZ(src) \
   2405  1.1  alnsn 	inst = BNEZ_X1 | SRCA_X1(src); \
   2406  1.1  alnsn 	flags = IS_COND;
   2407  1.1  alnsn 
   2408  1.3  alnsn SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
   2409  1.1  alnsn {
   2410  1.1  alnsn 	struct sljit_jump *jump;
   2411  1.1  alnsn 	sljit_ins inst;
   2412  1.3  alnsn 	sljit_s32 flags = 0;
   2413  1.1  alnsn 
   2414  1.1  alnsn 	flush_buffer(compiler);
   2415  1.1  alnsn 
   2416  1.1  alnsn 	CHECK_ERROR_PTR();
   2417  1.3  alnsn 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
   2418  1.1  alnsn 
   2419  1.1  alnsn 	jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
   2420  1.1  alnsn 	PTR_FAIL_IF(!jump);
   2421  1.1  alnsn 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
   2422  1.1  alnsn 	type &= 0xff;
   2423  1.1  alnsn 
   2424  1.1  alnsn 	switch (type) {
   2425  1.3  alnsn 	case SLJIT_EQUAL:
   2426  1.1  alnsn 		BR_NZ(EQUAL_FLAG);
   2427  1.1  alnsn 		break;
   2428  1.3  alnsn 	case SLJIT_NOT_EQUAL:
   2429  1.1  alnsn 		BR_Z(EQUAL_FLAG);
   2430  1.1  alnsn 		break;
   2431  1.3  alnsn 	case SLJIT_LESS:
   2432  1.1  alnsn 		BR_Z(ULESS_FLAG);
   2433  1.1  alnsn 		break;
   2434  1.3  alnsn 	case SLJIT_GREATER_EQUAL:
   2435  1.1  alnsn 		BR_NZ(ULESS_FLAG);
   2436  1.1  alnsn 		break;
   2437  1.3  alnsn 	case SLJIT_GREATER:
   2438  1.1  alnsn 		BR_Z(UGREATER_FLAG);
   2439  1.1  alnsn 		break;
   2440  1.3  alnsn 	case SLJIT_LESS_EQUAL:
   2441  1.1  alnsn 		BR_NZ(UGREATER_FLAG);
   2442  1.1  alnsn 		break;
   2443  1.3  alnsn 	case SLJIT_SIG_LESS:
   2444  1.1  alnsn 		BR_Z(LESS_FLAG);
   2445  1.1  alnsn 		break;
   2446  1.3  alnsn 	case SLJIT_SIG_GREATER_EQUAL:
   2447  1.1  alnsn 		BR_NZ(LESS_FLAG);
   2448  1.1  alnsn 		break;
   2449  1.3  alnsn 	case SLJIT_SIG_GREATER:
   2450  1.1  alnsn 		BR_Z(GREATER_FLAG);
   2451  1.1  alnsn 		break;
   2452  1.3  alnsn 	case SLJIT_SIG_LESS_EQUAL:
   2453  1.1  alnsn 		BR_NZ(GREATER_FLAG);
   2454  1.1  alnsn 		break;
   2455  1.3  alnsn 	case SLJIT_OVERFLOW:
   2456  1.3  alnsn 	case SLJIT_MUL_OVERFLOW:
   2457  1.1  alnsn 		BR_Z(OVERFLOW_FLAG);
   2458  1.1  alnsn 		break;
   2459  1.3  alnsn 	case SLJIT_NOT_OVERFLOW:
   2460  1.3  alnsn 	case SLJIT_MUL_NOT_OVERFLOW:
   2461  1.1  alnsn 		BR_NZ(OVERFLOW_FLAG);
   2462  1.1  alnsn 		break;
   2463  1.1  alnsn 	default:
   2464  1.1  alnsn 		/* Not conditional branch. */
   2465  1.1  alnsn 		inst = 0;
   2466  1.1  alnsn 		break;
   2467  1.1  alnsn 	}
   2468  1.1  alnsn 
   2469  1.1  alnsn 	jump->flags |= flags;
   2470  1.1  alnsn 
   2471  1.1  alnsn 	if (inst) {
   2472  1.1  alnsn 		inst = inst | ((type <= SLJIT_JUMP) ? BOFF_X1(5) : BOFF_X1(6));
   2473  1.1  alnsn 		PTR_FAIL_IF(PI(inst));
   2474  1.1  alnsn 	}
   2475  1.1  alnsn 
   2476  1.1  alnsn 	PTR_FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1));
   2477  1.1  alnsn 	if (type <= SLJIT_JUMP) {
   2478  1.1  alnsn 		jump->addr = compiler->size;
   2479  1.1  alnsn 		PTR_FAIL_IF(JR_SOLO(TMP_REG2_mapped));
   2480  1.1  alnsn 	} else {
   2481  1.1  alnsn 		SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2);
   2482  1.1  alnsn 		/* Cannot be optimized out if type is >= CALL0. */
   2483  1.1  alnsn 		jump->flags |= IS_JAL | (type >= SLJIT_CALL0 ? SLJIT_REWRITABLE_JUMP : 0);
   2484  1.3  alnsn 		PTR_FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO));
   2485  1.1  alnsn 		jump->addr = compiler->size;
   2486  1.1  alnsn 		PTR_FAIL_IF(JALR_SOLO(TMP_REG2_mapped));
   2487  1.1  alnsn 	}
   2488  1.1  alnsn 
   2489  1.1  alnsn 	return jump;
   2490  1.1  alnsn }
   2491  1.1  alnsn 
   2492  1.3  alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
   2493  1.1  alnsn {
   2494  1.1  alnsn 	return 0;
   2495  1.1  alnsn }
   2496  1.1  alnsn 
   2497  1.3  alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw)
   2498  1.1  alnsn {
   2499  1.4  alnsn 	SLJIT_UNREACHABLE();
   2500  1.1  alnsn }
   2501  1.1  alnsn 
   2502  1.3  alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w)
   2503  1.1  alnsn {
   2504  1.4  alnsn 	SLJIT_UNREACHABLE();
   2505  1.1  alnsn }
   2506  1.1  alnsn 
   2507  1.3  alnsn SLJIT_API_FUNC_ATTRIBUTE struct sljit_const * sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
   2508  1.1  alnsn {
   2509  1.1  alnsn 	struct sljit_const *const_;
   2510  1.3  alnsn 	sljit_s32 reg;
   2511  1.1  alnsn 
   2512  1.1  alnsn 	flush_buffer(compiler);
   2513  1.1  alnsn 
   2514  1.1  alnsn 	CHECK_ERROR_PTR();
   2515  1.3  alnsn 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
   2516  1.1  alnsn 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2517  1.1  alnsn 
   2518  1.1  alnsn 	const_ = (struct sljit_const *)ensure_abuf(compiler, sizeof(struct sljit_const));
   2519  1.1  alnsn 	PTR_FAIL_IF(!const_);
   2520  1.1  alnsn 	set_const(const_, compiler);
   2521  1.1  alnsn 
   2522  1.1  alnsn 	reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
   2523  1.1  alnsn 
   2524  1.1  alnsn 	PTR_FAIL_IF(emit_const_64(compiler, reg, init_value, 1));
   2525  1.1  alnsn 
   2526  1.1  alnsn 	if (dst & SLJIT_MEM)
   2527  1.1  alnsn 		PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
   2528  1.1  alnsn 	return const_;
   2529  1.1  alnsn }
   2530  1.1  alnsn 
   2531  1.4  alnsn SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target)
   2532  1.1  alnsn {
   2533  1.1  alnsn 	sljit_ins *inst = (sljit_ins *)addr;
   2534  1.1  alnsn 
   2535  1.4  alnsn 	inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_target >> 32) & 0xffff) << 43);
   2536  1.4  alnsn 	inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_target >> 16) & 0xffff) << 43);
   2537  1.4  alnsn 	inst[2] = (inst[2] & ~(0xFFFFL << 43)) | ((new_target & 0xffff) << 43);
   2538  1.1  alnsn 	SLJIT_CACHE_FLUSH(inst, inst + 3);
   2539  1.1  alnsn }
   2540  1.1  alnsn 
   2541  1.1  alnsn SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
   2542  1.1  alnsn {
   2543  1.1  alnsn 	sljit_ins *inst = (sljit_ins *)addr;
   2544  1.1  alnsn 
   2545  1.1  alnsn 	inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_constant >> 48) & 0xFFFFL) << 43);
   2546  1.1  alnsn 	inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_constant >> 32) & 0xFFFFL) << 43);
   2547  1.1  alnsn 	inst[2] = (inst[2] & ~(0xFFFFL << 43)) | (((new_constant >> 16) & 0xFFFFL) << 43);
   2548  1.1  alnsn 	inst[3] = (inst[3] & ~(0xFFFFL << 43)) | ((new_constant & 0xFFFFL) << 43);
   2549  1.1  alnsn 	SLJIT_CACHE_FLUSH(inst, inst + 4);
   2550  1.1  alnsn }
   2551  1.3  alnsn 
   2552  1.3  alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
   2553  1.3  alnsn {
   2554  1.3  alnsn 	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
   2555  1.3  alnsn 	return reg_map[reg];
   2556  1.3  alnsn }
   2557  1.3  alnsn 
   2558  1.3  alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
   2559  1.3  alnsn 	void *instruction, sljit_s32 size)
   2560  1.3  alnsn {
   2561  1.3  alnsn 	CHECK_ERROR();
   2562  1.3  alnsn 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
   2563  1.3  alnsn 	return SLJIT_ERR_UNSUPPORTED;
   2564  1.3  alnsn }
   2565  1.3  alnsn 
   2566