sljitNativeTILEGX_64.c revision 1.2 1 /* $NetBSD: sljitNativeTILEGX_64.c,v 1.2 2014/06/17 19:33:20 alnsn Exp $ */
2
3 /*
4 * Stack-less Just-In-Time compiler
5 *
6 * Copyright 2013-2013 Tilera Corporation(jiwang (at) tilera.com). All rights reserved.
7 * Copyright 2009-2012 Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without modification, are
10 * permitted provided that the following conditions are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright notice, this list of
13 * conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
16 * of conditions and the following disclaimer in the documentation and/or other materials
17 * provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
22 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
24 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
27 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 /* TileGX architecture. */
31 /* Contributed by Tilera Corporation. */
32 #include "sljitNativeTILEGX-encoder.c"
33
34 #define SIMM_8BIT_MAX (0x7f)
35 #define SIMM_8BIT_MIN (-0x80)
36 #define SIMM_16BIT_MAX (0x7fff)
37 #define SIMM_16BIT_MIN (-0x8000)
38 #define SIMM_17BIT_MAX (0xffff)
39 #define SIMM_17BIT_MIN (-0x10000)
40 #define SIMM_32BIT_MIN (-0x80000000)
41 #define SIMM_32BIT_MAX (0x7fffffff)
42 #define SIMM_48BIT_MIN (0x800000000000L)
43 #define SIMM_48BIT_MAX (0x7fffffff0000L)
44 #define IMM16(imm) ((imm) & 0xffff)
45
46 #define UIMM_16BIT_MAX (0xffff)
47
48 #define TMP_REG1 (SLJIT_NO_REGISTERS + 1)
49 #define TMP_REG2 (SLJIT_NO_REGISTERS + 2)
50 #define TMP_REG3 (SLJIT_NO_REGISTERS + 3)
51 #define ADDR_TMP (SLJIT_NO_REGISTERS + 4)
52 #define PIC_ADDR_REG TMP_REG2
53
54 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
55 63, 0, 1, 2, 3, 4, 30, 31, 32, 33, 34, 54, 5, 16, 6, 7
56 };
57
58 #define SLJIT_LOCALS_REG_mapped 54
59 #define TMP_REG1_mapped 5
60 #define TMP_REG2_mapped 16
61 #define TMP_REG3_mapped 6
62 #define ADDR_TMP_mapped 7
63 #define SLJIT_SAVED_REG1_mapped 30
64 #define SLJIT_SAVED_REG2_mapped 31
65 #define SLJIT_SAVED_REG3_mapped 32
66 #define SLJIT_SAVED_EREG1_mapped 33
67 #define SLJIT_SAVED_EREG2_mapped 34
68
69 /* Flags are keept in volatile registers. */
70 #define EQUAL_FLAG 8
71 /* And carry flag as well. */
72 #define ULESS_FLAG 9
73 #define UGREATER_FLAG 10
74 #define LESS_FLAG 11
75 #define GREATER_FLAG 12
76 #define OVERFLOW_FLAG 13
77
78 #define ZERO 63
79 #define RA 55
80 #define TMP_EREG1 14
81 #define TMP_EREG2 15
82
83 #define LOAD_DATA 0x01
84 #define WORD_DATA 0x00
85 #define BYTE_DATA 0x02
86 #define HALF_DATA 0x04
87 #define INT_DATA 0x06
88 #define SIGNED_DATA 0x08
89 #define DOUBLE_DATA 0x10
90
91 /* Separates integer and floating point registers */
92 #define GPR_REG 0xf
93
94 #define MEM_MASK 0x1f
95
96 #define WRITE_BACK 0x00020
97 #define ARG_TEST 0x00040
98 #define ALT_KEEP_CACHE 0x00080
99 #define CUMULATIVE_OP 0x00100
100 #define LOGICAL_OP 0x00200
101 #define IMM_OP 0x00400
102 #define SRC2_IMM 0x00800
103
104 #define UNUSED_DEST 0x01000
105 #define REG_DEST 0x02000
106 #define REG1_SOURCE 0x04000
107 #define REG2_SOURCE 0x08000
108 #define SLOW_SRC1 0x10000
109 #define SLOW_SRC2 0x20000
110 #define SLOW_DEST 0x40000
111
112 /* Only these flags are set. UNUSED_DEST is not set when no flags should be set.
113 */
114 #define CHECK_FLAGS(list) (!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list))))
115
116 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char *sljit_get_platform_name(void)
117 {
118 return "TileGX" SLJIT_CPUINFO;
119 }
120
121 /* Length of an instruction word */
122 typedef sljit_uw sljit_ins;
123
124 struct jit_instr {
125 const struct tilegx_opcode* opcode;
126 tilegx_pipeline pipe;
127 unsigned long input_registers;
128 unsigned long output_registers;
129 int operand_value[4];
130 int line;
131 };
132
133 /* Opcode Helper Macros */
134 #define TILEGX_X_MODE 0
135
136 #define X_MODE create_Mode(TILEGX_X_MODE)
137
138 #define FNOP_X0 \
139 create_Opcode_X0(RRR_0_OPCODE_X0) | \
140 create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \
141 create_UnaryOpcodeExtension_X0(FNOP_UNARY_OPCODE_X0)
142
143 #define FNOP_X1 \
144 create_Opcode_X1(RRR_0_OPCODE_X1) | \
145 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
146 create_UnaryOpcodeExtension_X1(FNOP_UNARY_OPCODE_X1)
147
148 #define NOP \
149 create_Mode(TILEGX_X_MODE) | FNOP_X0 | FNOP_X1
150
151 #define ANOP_X0 \
152 create_Opcode_X0(RRR_0_OPCODE_X0) | \
153 create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \
154 create_UnaryOpcodeExtension_X0(NOP_UNARY_OPCODE_X0)
155
156 #define BPT create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
157 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
158 create_UnaryOpcodeExtension_X1(ILL_UNARY_OPCODE_X1) | \
159 create_Dest_X1(0x1C) | create_SrcA_X1(0x25) | ANOP_X0
160
161 #define ADD_X1 \
162 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
163 create_RRROpcodeExtension_X1(ADD_RRR_0_OPCODE_X1) | FNOP_X0
164
165 #define ADDI_X1 \
166 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
167 create_Imm8OpcodeExtension_X1(ADDI_IMM8_OPCODE_X1) | FNOP_X0
168
169 #define SUB_X1 \
170 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
171 create_RRROpcodeExtension_X1(SUB_RRR_0_OPCODE_X1) | FNOP_X0
172
173 #define NOR_X1 \
174 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
175 create_RRROpcodeExtension_X1(NOR_RRR_0_OPCODE_X1) | FNOP_X0
176
177 #define OR_X1 \
178 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
179 create_RRROpcodeExtension_X1(OR_RRR_0_OPCODE_X1) | FNOP_X0
180
181 #define AND_X1 \
182 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
183 create_RRROpcodeExtension_X1(AND_RRR_0_OPCODE_X1) | FNOP_X0
184
185 #define XOR_X1 \
186 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
187 create_RRROpcodeExtension_X1(XOR_RRR_0_OPCODE_X1) | FNOP_X0
188
189 #define CMOVNEZ_X0 \
190 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \
191 create_RRROpcodeExtension_X0(CMOVNEZ_RRR_0_OPCODE_X0) | FNOP_X1
192
193 #define CMOVEQZ_X0 \
194 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \
195 create_RRROpcodeExtension_X0(CMOVEQZ_RRR_0_OPCODE_X0) | FNOP_X1
196
197 #define ADDLI_X1 \
198 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(ADDLI_OPCODE_X1) | FNOP_X0
199
200 #define V4INT_L_X1 \
201 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
202 create_RRROpcodeExtension_X1(V4INT_L_RRR_0_OPCODE_X1) | FNOP_X0
203
204 #define BFEXTU_X0 \
205 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \
206 create_BFOpcodeExtension_X0(BFEXTU_BF_OPCODE_X0) | FNOP_X1
207
208 #define BFEXTS_X0 \
209 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \
210 create_BFOpcodeExtension_X0(BFEXTS_BF_OPCODE_X0) | FNOP_X1
211
212 #define SHL16INSLI_X1 \
213 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHL16INSLI_OPCODE_X1) | FNOP_X0
214
215 #define ST_X1 \
216 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
217 create_RRROpcodeExtension_X1(ST_RRR_0_OPCODE_X1) | create_Dest_X1(0x0) | FNOP_X0
218
219 #define LD_X1 \
220 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
221 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
222 create_UnaryOpcodeExtension_X1(LD_UNARY_OPCODE_X1) | FNOP_X0
223
224 #define JR_X1 \
225 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
226 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
227 create_UnaryOpcodeExtension_X1(JR_UNARY_OPCODE_X1) | FNOP_X0
228
229 #define JALR_X1 \
230 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
231 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
232 create_UnaryOpcodeExtension_X1(JALR_UNARY_OPCODE_X1) | FNOP_X0
233
234 #define CLZ_X0 \
235 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \
236 create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \
237 create_UnaryOpcodeExtension_X0(CNTLZ_UNARY_OPCODE_X0) | FNOP_X1
238
239 #define CMPLTUI_X1 \
240 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
241 create_Imm8OpcodeExtension_X1(CMPLTUI_IMM8_OPCODE_X1) | FNOP_X0
242
243 #define CMPLTU_X1 \
244 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
245 create_RRROpcodeExtension_X1(CMPLTU_RRR_0_OPCODE_X1) | FNOP_X0
246
247 #define CMPLTS_X1 \
248 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
249 create_RRROpcodeExtension_X1(CMPLTS_RRR_0_OPCODE_X1) | FNOP_X0
250
251 #define XORI_X1 \
252 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
253 create_Imm8OpcodeExtension_X1(XORI_IMM8_OPCODE_X1) | FNOP_X0
254
255 #define ORI_X1 \
256 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
257 create_Imm8OpcodeExtension_X1(ORI_IMM8_OPCODE_X1) | FNOP_X0
258
259 #define ANDI_X1 \
260 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
261 create_Imm8OpcodeExtension_X1(ANDI_IMM8_OPCODE_X1) | FNOP_X0
262
263 #define SHLI_X1 \
264 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \
265 create_ShiftOpcodeExtension_X1(SHLI_SHIFT_OPCODE_X1) | FNOP_X0
266
267 #define SHL_X1 \
268 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
269 create_RRROpcodeExtension_X1(SHL_RRR_0_OPCODE_X1) | FNOP_X0
270
271 #define SHRSI_X1 \
272 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \
273 create_ShiftOpcodeExtension_X1(SHRSI_SHIFT_OPCODE_X1) | FNOP_X0
274
275 #define SHRS_X1 \
276 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
277 create_RRROpcodeExtension_X1(SHRS_RRR_0_OPCODE_X1) | FNOP_X0
278
279 #define SHRUI_X1 \
280 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \
281 create_ShiftOpcodeExtension_X1(SHRUI_SHIFT_OPCODE_X1) | FNOP_X0
282
283 #define SHRU_X1 \
284 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
285 create_RRROpcodeExtension_X1(SHRU_RRR_0_OPCODE_X1) | FNOP_X0
286
287 #define BEQZ_X1 \
288 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \
289 create_BrType_X1(BEQZ_BRANCH_OPCODE_X1) | FNOP_X0
290
291 #define BNEZ_X1 \
292 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \
293 create_BrType_X1(BNEZ_BRANCH_OPCODE_X1) | FNOP_X0
294
295 #define J_X1 \
296 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \
297 create_JumpOpcodeExtension_X1(J_JUMP_OPCODE_X1) | FNOP_X0
298
299 #define JAL_X1 \
300 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \
301 create_JumpOpcodeExtension_X1(JAL_JUMP_OPCODE_X1) | FNOP_X0
302
303 #define DEST_X0(x) create_Dest_X0(x)
304 #define SRCA_X0(x) create_SrcA_X0(x)
305 #define SRCB_X0(x) create_SrcB_X0(x)
306 #define DEST_X1(x) create_Dest_X1(x)
307 #define SRCA_X1(x) create_SrcA_X1(x)
308 #define SRCB_X1(x) create_SrcB_X1(x)
309 #define IMM16_X1(x) create_Imm16_X1(x)
310 #define IMM8_X1(x) create_Imm8_X1(x)
311 #define BFSTART_X0(x) create_BFStart_X0(x)
312 #define BFEND_X0(x) create_BFEnd_X0(x)
313 #define SHIFTIMM_X1(x) create_ShAmt_X1(x)
314 #define JOFF_X1(x) create_JumpOff_X1(x)
315 #define BOFF_X1(x) create_BrOff_X1(x)
316
317 static SLJIT_CONST tilegx_mnemonic data_transfer_insts[16] = {
318 /* u w s */ TILEGX_OPC_ST /* st */,
319 /* u w l */ TILEGX_OPC_LD /* ld */,
320 /* u b s */ TILEGX_OPC_ST1 /* st1 */,
321 /* u b l */ TILEGX_OPC_LD1U /* ld1u */,
322 /* u h s */ TILEGX_OPC_ST2 /* st2 */,
323 /* u h l */ TILEGX_OPC_LD2U /* ld2u */,
324 /* u i s */ TILEGX_OPC_ST4 /* st4 */,
325 /* u i l */ TILEGX_OPC_LD4U /* ld4u */,
326 /* s w s */ TILEGX_OPC_ST /* st */,
327 /* s w l */ TILEGX_OPC_LD /* ld */,
328 /* s b s */ TILEGX_OPC_ST1 /* st1 */,
329 /* s b l */ TILEGX_OPC_LD1S /* ld1s */,
330 /* s h s */ TILEGX_OPC_ST2 /* st2 */,
331 /* s h l */ TILEGX_OPC_LD2S /* ld2s */,
332 /* s i s */ TILEGX_OPC_ST4 /* st4 */,
333 /* s i l */ TILEGX_OPC_LD4S /* ld4s */,
334 };
335
336 #ifdef TILEGX_JIT_DEBUG
337 static sljit_si push_inst_debug(struct sljit_compiler *compiler, sljit_ins ins, int line)
338 {
339 sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
340 FAIL_IF(!ptr);
341 *ptr = ins;
342 compiler->size++;
343 printf("|%04d|S0|:\t\t", line);
344 print_insn_tilegx(ptr);
345 return SLJIT_SUCCESS;
346 }
347
348 static sljit_si push_inst_nodebug(struct sljit_compiler *compiler, sljit_ins ins)
349 {
350 sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
351 FAIL_IF(!ptr);
352 *ptr = ins;
353 compiler->size++;
354 return SLJIT_SUCCESS;
355 }
356
357 #define push_inst(a, b) push_inst_debug(a, b, __LINE__)
358 #else
359 static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins)
360 {
361 sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
362 FAIL_IF(!ptr);
363 *ptr = ins;
364 compiler->size++;
365 return SLJIT_SUCCESS;
366 }
367 #endif
368
369 #define BUNDLE_FORMAT_MASK(p0, p1, p2) \
370 ((p0) | ((p1) << 8) | ((p2) << 16))
371
372 #define BUNDLE_FORMAT(p0, p1, p2) \
373 { \
374 { \
375 (tilegx_pipeline)(p0), \
376 (tilegx_pipeline)(p1), \
377 (tilegx_pipeline)(p2) \
378 }, \
379 BUNDLE_FORMAT_MASK(1 << (p0), 1 << (p1), (1 << (p2))) \
380 }
381
382 #define NO_PIPELINE TILEGX_NUM_PIPELINE_ENCODINGS
383
384 #define tilegx_is_x_pipeline(p) ((int)(p) <= (int)TILEGX_PIPELINE_X1)
385
386 #define PI(encoding) \
387 push_inst(compiler, encoding)
388
389 #define PB3(opcode, dst, srca, srcb) \
390 push_3_buffer(compiler, opcode, dst, srca, srcb, __LINE__)
391
392 #define PB2(opcode, dst, src) \
393 push_2_buffer(compiler, opcode, dst, src, __LINE__)
394
395 #define JR(reg) \
396 push_jr_buffer(compiler, TILEGX_OPC_JR, reg, __LINE__)
397
398 #define ADD(dst, srca, srcb) \
399 push_3_buffer(compiler, TILEGX_OPC_ADD, dst, srca, srcb, __LINE__)
400
401 #define SUB(dst, srca, srcb) \
402 push_3_buffer(compiler, TILEGX_OPC_SUB, dst, srca, srcb, __LINE__)
403
404 #define NOR(dst, srca, srcb) \
405 push_3_buffer(compiler, TILEGX_OPC_NOR, dst, srca, srcb, __LINE__)
406
407 #define OR(dst, srca, srcb) \
408 push_3_buffer(compiler, TILEGX_OPC_OR, dst, srca, srcb, __LINE__)
409
410 #define XOR(dst, srca, srcb) \
411 push_3_buffer(compiler, TILEGX_OPC_XOR, dst, srca, srcb, __LINE__)
412
413 #define AND(dst, srca, srcb) \
414 push_3_buffer(compiler, TILEGX_OPC_AND, dst, srca, srcb, __LINE__)
415
416 #define CLZ(dst, src) \
417 push_2_buffer(compiler, TILEGX_OPC_CLZ, dst, src, __LINE__)
418
419 #define SHLI(dst, srca, srcb) \
420 push_3_buffer(compiler, TILEGX_OPC_SHLI, dst, srca, srcb, __LINE__)
421
422 #define SHRUI(dst, srca, imm) \
423 push_3_buffer(compiler, TILEGX_OPC_SHRUI, dst, srca, imm, __LINE__)
424
425 #define XORI(dst, srca, imm) \
426 push_3_buffer(compiler, TILEGX_OPC_XORI, dst, srca, imm, __LINE__)
427
428 #define ORI(dst, srca, imm) \
429 push_3_buffer(compiler, TILEGX_OPC_ORI, dst, srca, imm, __LINE__)
430
431 #define CMPLTU(dst, srca, srcb) \
432 push_3_buffer(compiler, TILEGX_OPC_CMPLTU, dst, srca, srcb, __LINE__)
433
434 #define CMPLTS(dst, srca, srcb) \
435 push_3_buffer(compiler, TILEGX_OPC_CMPLTS, dst, srca, srcb, __LINE__)
436
437 #define CMPLTUI(dst, srca, imm) \
438 push_3_buffer(compiler, TILEGX_OPC_CMPLTUI, dst, srca, imm, __LINE__)
439
440 #define CMOVNEZ(dst, srca, srcb) \
441 push_3_buffer(compiler, TILEGX_OPC_CMOVNEZ, dst, srca, srcb, __LINE__)
442
443 #define CMOVEQZ(dst, srca, srcb) \
444 push_3_buffer(compiler, TILEGX_OPC_CMOVEQZ, dst, srca, srcb, __LINE__)
445
446 #define ADDLI(dst, srca, srcb) \
447 push_3_buffer(compiler, TILEGX_OPC_ADDLI, dst, srca, srcb, __LINE__)
448
449 #define SHL16INSLI(dst, srca, srcb) \
450 push_3_buffer(compiler, TILEGX_OPC_SHL16INSLI, dst, srca, srcb, __LINE__)
451
452 #define LD_ADD(dst, addr, adjust) \
453 push_3_buffer(compiler, TILEGX_OPC_LD_ADD, dst, addr, adjust, __LINE__)
454
455 #define ST_ADD(src, addr, adjust) \
456 push_3_buffer(compiler, TILEGX_OPC_ST_ADD, src, addr, adjust, __LINE__)
457
458 #define LD(dst, addr) \
459 push_2_buffer(compiler, TILEGX_OPC_LD, dst, addr, __LINE__)
460
461 #define BFEXTU(dst, src, start, end) \
462 push_4_buffer(compiler, TILEGX_OPC_BFEXTU, dst, src, start, end, __LINE__)
463
464 #define BFEXTS(dst, src, start, end) \
465 push_4_buffer(compiler, TILEGX_OPC_BFEXTS, dst, src, start, end, __LINE__)
466
467 #define ADD_SOLO(dest, srca, srcb) \
468 push_inst(compiler, ADD_X1 | DEST_X1(dest) | SRCA_X1(srca) | SRCB_X1(srcb))
469
470 #define ADDI_SOLO(dest, srca, imm) \
471 push_inst(compiler, ADDI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM8_X1(imm))
472
473 #define ADDLI_SOLO(dest, srca, imm) \
474 push_inst(compiler, ADDLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm))
475
476 #define SHL16INSLI_SOLO(dest, srca, imm) \
477 push_inst(compiler, SHL16INSLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm))
478
479 #define JALR_SOLO(reg) \
480 push_inst(compiler, JALR_X1 | SRCA_X1(reg))
481
482 #define JR_SOLO(reg) \
483 push_inst(compiler, JR_X1 | SRCA_X1(reg))
484
485 struct Format {
486 /* Mapping of bundle issue slot to assigned pipe. */
487 tilegx_pipeline pipe[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE];
488
489 /* Mask of pipes used by this bundle. */
490 unsigned int pipe_mask;
491 };
492
493 const struct Format formats[] =
494 {
495 /* In Y format we must always have something in Y2, since it has
496 * no fnop, so this conveys that Y2 must always be used. */
497 BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, NO_PIPELINE),
498 BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, NO_PIPELINE),
499 BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, NO_PIPELINE),
500 BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, NO_PIPELINE),
501
502 /* Y format has three instructions. */
503 BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2),
504 BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1),
505 BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2),
506 BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0),
507 BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1),
508 BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0),
509
510 /* X format has only two instructions. */
511 BUNDLE_FORMAT(TILEGX_PIPELINE_X0, TILEGX_PIPELINE_X1, NO_PIPELINE),
512 BUNDLE_FORMAT(TILEGX_PIPELINE_X1, TILEGX_PIPELINE_X0, NO_PIPELINE)
513 };
514
515
516 struct jit_instr inst_buf[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE];
517 unsigned long inst_buf_index;
518
519 tilegx_pipeline get_any_valid_pipe(const struct tilegx_opcode* opcode)
520 {
521 /* FIXME: tile: we could pregenerate this. */
522 int pipe;
523 for (pipe = 0; ((opcode->pipes & (1 << pipe)) == 0 && pipe < TILEGX_NUM_PIPELINE_ENCODINGS); pipe++)
524 ;
525 return (tilegx_pipeline)(pipe);
526 }
527
528 void insert_nop(tilegx_mnemonic opc, int line)
529 {
530 const struct tilegx_opcode* opcode = NULL;
531
532 memmove(&inst_buf[1], &inst_buf[0], inst_buf_index * sizeof inst_buf[0]);
533
534 opcode = &tilegx_opcodes[opc];
535 inst_buf[0].opcode = opcode;
536 inst_buf[0].pipe = get_any_valid_pipe(opcode);
537 inst_buf[0].input_registers = 0;
538 inst_buf[0].output_registers = 0;
539 inst_buf[0].line = line;
540 ++inst_buf_index;
541 }
542
543 const struct Format* compute_format()
544 {
545 unsigned int compatible_pipes = BUNDLE_FORMAT_MASK(
546 inst_buf[0].opcode->pipes,
547 inst_buf[1].opcode->pipes,
548 (inst_buf_index == 3 ? inst_buf[2].opcode->pipes : (1 << NO_PIPELINE)));
549
550 const struct Format* match = NULL;
551 const struct Format *b = NULL;
552 unsigned int i = 0;
553 for (i; i < sizeof formats / sizeof formats[0]; i++) {
554 b = &formats[i];
555 if ((b->pipe_mask & compatible_pipes) == b->pipe_mask) {
556 match = b;
557 break;
558 }
559 }
560
561 return match;
562 }
563
564 sljit_si assign_pipes()
565 {
566 unsigned long output_registers = 0;
567 unsigned int i = 0;
568
569 if (inst_buf_index == 1) {
570 tilegx_mnemonic opc = inst_buf[0].opcode->can_bundle
571 ? TILEGX_OPC_FNOP : TILEGX_OPC_NOP;
572 insert_nop(opc, __LINE__);
573 }
574
575 const struct Format* match = compute_format();
576
577 if (match == NULL)
578 return -1;
579
580 for (i = 0; i < inst_buf_index; i++) {
581
582 if ((i > 0) && ((inst_buf[i].input_registers & output_registers) != 0))
583 return -1;
584
585 if ((i > 0) && ((inst_buf[i].output_registers & output_registers) != 0))
586 return -1;
587
588 /* Don't include Rzero in the match set, to avoid triggering
589 needlessly on 'prefetch' instrs. */
590
591 output_registers |= inst_buf[i].output_registers & 0xFFFFFFFFFFFFFFL;
592
593 inst_buf[i].pipe = match->pipe[i];
594 }
595
596 /* If only 2 instrs, and in Y-mode, insert a nop. */
597 if (inst_buf_index == 2 && !tilegx_is_x_pipeline(match->pipe[0])) {
598 insert_nop(TILEGX_OPC_FNOP, __LINE__);
599
600 /* Select the yet unassigned pipe. */
601 tilegx_pipeline pipe = (tilegx_pipeline)(((TILEGX_PIPELINE_Y0
602 + TILEGX_PIPELINE_Y1 + TILEGX_PIPELINE_Y2)
603 - (inst_buf[1].pipe + inst_buf[2].pipe)));
604
605 inst_buf[0].pipe = pipe;
606 }
607
608 return 0;
609 }
610
611 tilegx_bundle_bits get_bundle_bit(struct jit_instr *inst)
612 {
613 int i, val;
614 const struct tilegx_opcode* opcode = inst->opcode;
615 tilegx_bundle_bits bits = opcode->fixed_bit_values[inst->pipe];
616
617 const struct tilegx_operand* operand = NULL;
618 for (i = 0; i < opcode->num_operands; i++) {
619 operand = &tilegx_operands[opcode->operands[inst->pipe][i]];
620 val = inst->operand_value[i];
621
622 bits |= operand->insert(val);
623 }
624
625 return bits;
626 }
627
628 static sljit_si update_buffer(struct sljit_compiler *compiler)
629 {
630 int count;
631 int i;
632 int orig_index = inst_buf_index;
633 struct jit_instr inst0 = inst_buf[0];
634 struct jit_instr inst1 = inst_buf[1];
635 struct jit_instr inst2 = inst_buf[2];
636 tilegx_bundle_bits bits = 0;
637
638 /* If the bundle is valid as is, perform the encoding and return 1. */
639 if (assign_pipes() == 0) {
640 for (i = 0; i < inst_buf_index; i++) {
641 bits |= get_bundle_bit(inst_buf + i);
642 #ifdef TILEGX_JIT_DEBUG
643 printf("|%04d", inst_buf[i].line);
644 #endif
645 }
646 #ifdef TILEGX_JIT_DEBUG
647 if (inst_buf_index == 3)
648 printf("|M0|:\t");
649 else
650 printf("|M0|:\t\t");
651 print_insn_tilegx(&bits);
652 #endif
653
654 inst_buf_index = 0;
655
656 #ifdef TILEGX_JIT_DEBUG
657 return push_inst_nodebug(compiler, bits);
658 #else
659 return push_inst(compiler, bits);
660 #endif
661 }
662
663 /* If the bundle is invalid, split it in two. First encode the first two
664 (or possibly 1) instructions, and then the last, separately. Note that
665 assign_pipes may have re-ordered the instrs (by inserting no-ops in
666 lower slots) so we need to reset them. */
667
668 inst_buf_index = orig_index - 1;
669 inst_buf[0] = inst0;
670 inst_buf[1] = inst1;
671 inst_buf[2] = inst2;
672 if (assign_pipes() == 0) {
673 for (i = 0; i < inst_buf_index; i++) {
674 bits |= get_bundle_bit(inst_buf + i);
675 #ifdef TILEGX_JIT_DEBUG
676 printf("|%04d", inst_buf[i].line);
677 #endif
678 }
679
680 #ifdef TILEGX_JIT_DEBUG
681 if (inst_buf_index == 3)
682 printf("|M1|:\t");
683 else
684 printf("|M1|:\t\t");
685 print_insn_tilegx(&bits);
686 #endif
687
688 if ((orig_index - 1) == 2) {
689 inst_buf[0] = inst2;
690 inst_buf_index = 1;
691 } else if ((orig_index - 1) == 1) {
692 inst_buf[0] = inst1;
693 inst_buf_index = 1;
694 } else
695 SLJIT_ASSERT_STOP();
696
697 #ifdef TILEGX_JIT_DEBUG
698 return push_inst_nodebug(compiler, bits);
699 #else
700 return push_inst(compiler, bits);
701 #endif
702 } else {
703 /* We had 3 instrs of which the first 2 can't live in the same bundle.
704 Split those two. Note that we don't try to then combine the second
705 and third instr into a single bundle. First instruction: */
706 inst_buf_index = 1;
707 inst_buf[0] = inst0;
708 inst_buf[1] = inst1;
709 inst_buf[2] = inst2;
710 if (assign_pipes() == 0) {
711 for (i = 0; i < inst_buf_index; i++) {
712 bits |= get_bundle_bit(inst_buf + i);
713 #ifdef TILEGX_JIT_DEBUG
714 printf("|%04d", inst_buf[i].line);
715 #endif
716 }
717
718 #ifdef TILEGX_JIT_DEBUG
719 if (inst_buf_index == 3)
720 printf("|M2|:\t");
721 else
722 printf("|M2|:\t\t");
723 print_insn_tilegx(&bits);
724 #endif
725
726 inst_buf[0] = inst1;
727 inst_buf[1] = inst2;
728 inst_buf_index = orig_index - 1;
729 #ifdef TILEGX_JIT_DEBUG
730 return push_inst_nodebug(compiler, bits);
731 #else
732 return push_inst(compiler, bits);
733 #endif
734 } else
735 SLJIT_ASSERT_STOP();
736 }
737
738 SLJIT_ASSERT_STOP();
739 }
740
741 static sljit_si flush_buffer(struct sljit_compiler *compiler)
742 {
743 while (inst_buf_index != 0)
744 update_buffer(compiler);
745 }
746
747 static sljit_si push_4_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int op3, int line)
748 {
749 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
750 FAIL_IF(update_buffer(compiler));
751
752 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
753 inst_buf[inst_buf_index].opcode = opcode;
754 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
755 inst_buf[inst_buf_index].operand_value[0] = op0;
756 inst_buf[inst_buf_index].operand_value[1] = op1;
757 inst_buf[inst_buf_index].operand_value[2] = op2;
758 inst_buf[inst_buf_index].operand_value[3] = op3;
759 inst_buf[inst_buf_index].input_registers = 1L << op1;
760 inst_buf[inst_buf_index].output_registers = 1L << op0;
761 inst_buf[inst_buf_index].line = line;
762 inst_buf_index++;
763
764 return SLJIT_SUCCESS;
765 }
766
767 static sljit_si push_3_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int line)
768 {
769 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
770 FAIL_IF(update_buffer(compiler));
771
772 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
773 inst_buf[inst_buf_index].opcode = opcode;
774 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
775 inst_buf[inst_buf_index].operand_value[0] = op0;
776 inst_buf[inst_buf_index].operand_value[1] = op1;
777 inst_buf[inst_buf_index].operand_value[2] = op2;
778 inst_buf[inst_buf_index].line = line;
779
780 switch (opc) {
781 case TILEGX_OPC_ST_ADD:
782 inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1);
783 inst_buf[inst_buf_index].output_registers = 1L << op0;
784 break;
785 case TILEGX_OPC_LD_ADD:
786 inst_buf[inst_buf_index].input_registers = 1L << op1;
787 inst_buf[inst_buf_index].output_registers = (1L << op0) | (1L << op1);
788 break;
789 case TILEGX_OPC_ADD:
790 case TILEGX_OPC_AND:
791 case TILEGX_OPC_SUB:
792 case TILEGX_OPC_OR:
793 case TILEGX_OPC_XOR:
794 case TILEGX_OPC_NOR:
795 case TILEGX_OPC_SHL:
796 case TILEGX_OPC_SHRU:
797 case TILEGX_OPC_SHRS:
798 case TILEGX_OPC_CMPLTU:
799 case TILEGX_OPC_CMPLTS:
800 case TILEGX_OPC_CMOVEQZ:
801 case TILEGX_OPC_CMOVNEZ:
802 inst_buf[inst_buf_index].input_registers = (1L << op1) | (1L << op2);
803 inst_buf[inst_buf_index].output_registers = 1L << op0;
804 break;
805 case TILEGX_OPC_ADDLI:
806 case TILEGX_OPC_XORI:
807 case TILEGX_OPC_ORI:
808 case TILEGX_OPC_SHLI:
809 case TILEGX_OPC_SHRUI:
810 case TILEGX_OPC_SHRSI:
811 case TILEGX_OPC_SHL16INSLI:
812 case TILEGX_OPC_CMPLTUI:
813 case TILEGX_OPC_CMPLTSI:
814 inst_buf[inst_buf_index].input_registers = 1L << op1;
815 inst_buf[inst_buf_index].output_registers = 1L << op0;
816 break;
817 default:
818 printf("unrecoginzed opc: %s\n", opcode->name);
819 SLJIT_ASSERT_STOP();
820 }
821
822 inst_buf_index++;
823
824 return SLJIT_SUCCESS;
825 }
826
827 static sljit_si push_2_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int line)
828 {
829 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
830 FAIL_IF(update_buffer(compiler));
831
832 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
833 inst_buf[inst_buf_index].opcode = opcode;
834 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
835 inst_buf[inst_buf_index].operand_value[0] = op0;
836 inst_buf[inst_buf_index].operand_value[1] = op1;
837 inst_buf[inst_buf_index].line = line;
838
839 switch (opc) {
840 case TILEGX_OPC_BEQZ:
841 case TILEGX_OPC_BNEZ:
842 inst_buf[inst_buf_index].input_registers = 1L << op0;
843 break;
844 case TILEGX_OPC_ST:
845 case TILEGX_OPC_ST1:
846 case TILEGX_OPC_ST2:
847 case TILEGX_OPC_ST4:
848 inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1);
849 inst_buf[inst_buf_index].output_registers = 0;
850 break;
851 case TILEGX_OPC_CLZ:
852 case TILEGX_OPC_LD:
853 case TILEGX_OPC_LD1U:
854 case TILEGX_OPC_LD1S:
855 case TILEGX_OPC_LD2U:
856 case TILEGX_OPC_LD2S:
857 case TILEGX_OPC_LD4U:
858 case TILEGX_OPC_LD4S:
859 inst_buf[inst_buf_index].input_registers = 1L << op1;
860 inst_buf[inst_buf_index].output_registers = 1L << op0;
861 break;
862 default:
863 printf("unrecoginzed opc: %s\n", opcode->name);
864 SLJIT_ASSERT_STOP();
865 }
866
867 inst_buf_index++;
868
869 return SLJIT_SUCCESS;
870 }
871
872 static sljit_si push_0_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int line)
873 {
874 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
875 FAIL_IF(update_buffer(compiler));
876
877 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
878 inst_buf[inst_buf_index].opcode = opcode;
879 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
880 inst_buf[inst_buf_index].input_registers = 0;
881 inst_buf[inst_buf_index].output_registers = 0;
882 inst_buf[inst_buf_index].line = line;
883 inst_buf_index++;
884
885 return SLJIT_SUCCESS;
886 }
887
888 static sljit_si push_jr_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int line)
889 {
890 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
891 FAIL_IF(update_buffer(compiler));
892
893 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
894 inst_buf[inst_buf_index].opcode = opcode;
895 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
896 inst_buf[inst_buf_index].operand_value[0] = op0;
897 inst_buf[inst_buf_index].input_registers = 1L << op0;
898 inst_buf[inst_buf_index].output_registers = 0;
899 inst_buf[inst_buf_index].line = line;
900 inst_buf_index++;
901
902 return flush_buffer(compiler);
903 }
904
905 static SLJIT_INLINE sljit_ins * detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
906 {
907 sljit_sw diff;
908 sljit_uw target_addr;
909 sljit_ins *inst;
910 sljit_ins saved_inst;
911
912 if (jump->flags & SLJIT_REWRITABLE_JUMP)
913 return code_ptr;
914
915 if (jump->flags & JUMP_ADDR)
916 target_addr = jump->u.target;
917 else {
918 SLJIT_ASSERT(jump->flags & JUMP_LABEL);
919 target_addr = (sljit_uw)(code + jump->u.label->size);
920 }
921
922 inst = (sljit_ins *)jump->addr;
923 if (jump->flags & IS_COND)
924 inst--;
925
926 diff = ((sljit_sw) target_addr - (sljit_sw) inst) >> 3;
927 if (diff <= SIMM_17BIT_MAX && diff >= SIMM_17BIT_MIN) {
928 jump->flags |= PATCH_B;
929
930 if (!(jump->flags & IS_COND)) {
931 if (jump->flags & IS_JAL) {
932 jump->flags &= ~(PATCH_B);
933 jump->flags |= PATCH_J;
934 inst[0] = JAL_X1;
935
936 #ifdef TILEGX_JIT_DEBUG
937 printf("[runtime relocate]%04d:\t", __LINE__);
938 print_insn_tilegx(inst);
939 #endif
940 } else {
941 inst[0] = BEQZ_X1 | SRCA_X1(ZERO);
942
943 #ifdef TILEGX_JIT_DEBUG
944 printf("[runtime relocate]%04d:\t", __LINE__);
945 print_insn_tilegx(inst);
946 #endif
947 }
948
949 return inst;
950 }
951
952 inst[0] = inst[0] ^ (0x7L << 55);
953
954 #ifdef TILEGX_JIT_DEBUG
955 printf("[runtime relocate]%04d:\t", __LINE__);
956 print_insn_tilegx(inst);
957 #endif
958 jump->addr -= sizeof(sljit_ins);
959 return inst;
960 }
961
962 if (jump->flags & IS_COND) {
963 if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) {
964 jump->flags |= PATCH_J;
965 inst[0] = (inst[0] & ~(BOFF_X1(-1))) | BOFF_X1(2);
966 inst[1] = J_X1;
967 return inst + 1;
968 }
969
970 return code_ptr;
971 }
972
973 if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) {
974 jump->flags |= PATCH_J;
975
976 if (jump->flags & IS_JAL) {
977 inst[0] = JAL_X1;
978
979 #ifdef TILEGX_JIT_DEBUG
980 printf("[runtime relocate]%04d:\t", __LINE__);
981 print_insn_tilegx(inst);
982 #endif
983
984 } else {
985 inst[0] = J_X1;
986
987 #ifdef TILEGX_JIT_DEBUG
988 printf("[runtime relocate]%04d:\t", __LINE__);
989 print_insn_tilegx(inst);
990 #endif
991 }
992
993 return inst;
994 }
995
996 return code_ptr;
997 }
998
999 SLJIT_API_FUNC_ATTRIBUTE void * sljit_generate_code(struct sljit_compiler *compiler)
1000 {
1001 struct sljit_memory_fragment *buf;
1002 sljit_ins *code;
1003 sljit_ins *code_ptr;
1004 sljit_ins *buf_ptr;
1005 sljit_ins *buf_end;
1006 sljit_uw word_count;
1007 sljit_uw addr;
1008
1009 struct sljit_label *label;
1010 struct sljit_jump *jump;
1011 struct sljit_const *const_;
1012
1013 CHECK_ERROR_PTR();
1014 check_sljit_generate_code(compiler);
1015 reverse_buf(compiler);
1016
1017 code = (sljit_ins *)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
1018 PTR_FAIL_WITH_EXEC_IF(code);
1019 buf = compiler->buf;
1020
1021 code_ptr = code;
1022 word_count = 0;
1023 label = compiler->labels;
1024 jump = compiler->jumps;
1025 const_ = compiler->consts;
1026 do {
1027 buf_ptr = (sljit_ins *)buf->memory;
1028 buf_end = buf_ptr + (buf->used_size >> 3);
1029 do {
1030 *code_ptr = *buf_ptr++;
1031 SLJIT_ASSERT(!label || label->size >= word_count);
1032 SLJIT_ASSERT(!jump || jump->addr >= word_count);
1033 SLJIT_ASSERT(!const_ || const_->addr >= word_count);
1034 /* These structures are ordered by their address. */
1035 if (label && label->size == word_count) {
1036 /* Just recording the address. */
1037 label->addr = (sljit_uw) code_ptr;
1038 label->size = code_ptr - code;
1039 label = label->next;
1040 }
1041
1042 if (jump && jump->addr == word_count) {
1043 if (jump->flags & IS_JAL)
1044 jump->addr = (sljit_uw)(code_ptr - 4);
1045 else
1046 jump->addr = (sljit_uw)(code_ptr - 3);
1047
1048 code_ptr = detect_jump_type(jump, code_ptr, code);
1049 jump = jump->next;
1050 }
1051
1052 if (const_ && const_->addr == word_count) {
1053 /* Just recording the address. */
1054 const_->addr = (sljit_uw) code_ptr;
1055 const_ = const_->next;
1056 }
1057
1058 code_ptr++;
1059 word_count++;
1060 } while (buf_ptr < buf_end);
1061
1062 buf = buf->next;
1063 } while (buf);
1064
1065 if (label && label->size == word_count) {
1066 label->addr = (sljit_uw) code_ptr;
1067 label->size = code_ptr - code;
1068 label = label->next;
1069 }
1070
1071 SLJIT_ASSERT(!label);
1072 SLJIT_ASSERT(!jump);
1073 SLJIT_ASSERT(!const_);
1074 SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
1075
1076 jump = compiler->jumps;
1077 while (jump) {
1078 do {
1079 addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
1080 buf_ptr = (sljit_ins *)jump->addr;
1081
1082 if (jump->flags & PATCH_B) {
1083 addr = (sljit_sw)(addr - (jump->addr)) >> 3;
1084 SLJIT_ASSERT((sljit_sw) addr <= SIMM_17BIT_MAX && (sljit_sw) addr >= SIMM_17BIT_MIN);
1085 buf_ptr[0] = (buf_ptr[0] & ~(BOFF_X1(-1))) | BOFF_X1(addr);
1086
1087 #ifdef TILEGX_JIT_DEBUG
1088 printf("[runtime relocate]%04d:\t", __LINE__);
1089 print_insn_tilegx(buf_ptr);
1090 #endif
1091 break;
1092 }
1093
1094 if (jump->flags & PATCH_J) {
1095 SLJIT_ASSERT((addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL));
1096 addr = (sljit_sw)(addr - (jump->addr)) >> 3;
1097 buf_ptr[0] = (buf_ptr[0] & ~(JOFF_X1(-1))) | JOFF_X1(addr);
1098
1099 #ifdef TILEGX_JIT_DEBUG
1100 printf("[runtime relocate]%04d:\t", __LINE__);
1101 print_insn_tilegx(buf_ptr);
1102 #endif
1103 break;
1104 }
1105
1106 SLJIT_ASSERT(!(jump->flags & IS_JAL));
1107
1108 /* Set the fields of immediate loads. */
1109 buf_ptr[0] = (buf_ptr[0] & ~(0xFFFFL << 43)) | (((addr >> 32) & 0xFFFFL) << 43);
1110 buf_ptr[1] = (buf_ptr[1] & ~(0xFFFFL << 43)) | (((addr >> 16) & 0xFFFFL) << 43);
1111 buf_ptr[2] = (buf_ptr[2] & ~(0xFFFFL << 43)) | ((addr & 0xFFFFL) << 43);
1112 } while (0);
1113
1114 jump = jump->next;
1115 }
1116
1117 compiler->error = SLJIT_ERR_COMPILED;
1118 compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
1119 SLJIT_CACHE_FLUSH(code, code_ptr);
1120 return code;
1121 }
1122
1123 static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm)
1124 {
1125
1126 if (imm <= SIMM_16BIT_MAX && imm >= SIMM_16BIT_MIN)
1127 return ADDLI(dst_ar, ZERO, imm);
1128
1129 if (imm <= SIMM_32BIT_MAX && imm >= SIMM_32BIT_MIN) {
1130 FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 16));
1131 return SHL16INSLI(dst_ar, dst_ar, imm);
1132 }
1133
1134 if (imm <= SIMM_48BIT_MAX && imm >= SIMM_48BIT_MIN) {
1135 FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32));
1136 FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16));
1137 return SHL16INSLI(dst_ar, dst_ar, imm);
1138 }
1139
1140 FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 48));
1141 FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 32));
1142 FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16));
1143 return SHL16INSLI(dst_ar, dst_ar, imm);
1144 }
1145
1146 static sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm, int flush)
1147 {
1148 /* Should *not* be optimized as load_immediate, as pcre relocation
1149 mechanism will match this fixed 4-instruction pattern. */
1150 if (flush) {
1151 FAIL_IF(ADDLI_SOLO(dst_ar, ZERO, imm >> 32));
1152 FAIL_IF(SHL16INSLI_SOLO(dst_ar, dst_ar, imm >> 16));
1153 return SHL16INSLI_SOLO(dst_ar, dst_ar, imm);
1154 }
1155
1156 FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32));
1157 FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16));
1158 return SHL16INSLI(dst_ar, dst_ar, imm);
1159 }
1160
1161 static sljit_si emit_const_64(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm, int flush)
1162 {
1163 /* Should *not* be optimized as load_immediate, as pcre relocation
1164 mechanism will match this fixed 4-instruction pattern. */
1165 if (flush) {
1166 FAIL_IF(ADDLI_SOLO(reg_map[dst_ar], ZERO, imm >> 48));
1167 FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 32));
1168 FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 16));
1169 return SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm);
1170 }
1171
1172 FAIL_IF(ADDLI(reg_map[dst_ar], ZERO, imm >> 48));
1173 FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 32));
1174 FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 16));
1175 return SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm);
1176 }
1177
1178 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
1179 {
1180 sljit_ins base;
1181 sljit_ins bundle = 0;
1182
1183 CHECK_ERROR();
1184 check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
1185
1186 compiler->scratches = scratches;
1187 compiler->saveds = saveds;
1188 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
1189 compiler->logical_local_size = local_size;
1190 #endif
1191
1192 local_size += (saveds + 1) * sizeof(sljit_sw);
1193 local_size = (local_size + 7) & ~7;
1194 compiler->local_size = local_size;
1195
1196 if (local_size <= SIMM_16BIT_MAX) {
1197 /* Frequent case. */
1198 FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, -local_size));
1199 base = SLJIT_LOCALS_REG_mapped;
1200 } else {
1201 FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size));
1202 FAIL_IF(ADD(TMP_REG2_mapped, SLJIT_LOCALS_REG_mapped, ZERO));
1203 FAIL_IF(SUB(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped));
1204 base = TMP_REG2_mapped;
1205 local_size = 0;
1206 }
1207
1208 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8));
1209 FAIL_IF(ST_ADD(ADDR_TMP_mapped, RA, -8));
1210
1211 if (saveds >= 1)
1212 FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG1_mapped, -8));
1213
1214 if (saveds >= 2)
1215 FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG2_mapped, -8));
1216
1217 if (saveds >= 3)
1218 FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG3_mapped, -8));
1219
1220 if (saveds >= 4)
1221 FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_EREG1_mapped, -8));
1222
1223 if (saveds >= 5)
1224 FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_EREG2_mapped, -8));
1225
1226 if (args >= 1)
1227 FAIL_IF(ADD(SLJIT_SAVED_REG1_mapped, 0, ZERO));
1228
1229 if (args >= 2)
1230 FAIL_IF(ADD(SLJIT_SAVED_REG2_mapped, 1, ZERO));
1231
1232 if (args >= 3)
1233 FAIL_IF(ADD(SLJIT_SAVED_REG3_mapped, 2, ZERO));
1234
1235 return SLJIT_SUCCESS;
1236 }
1237
1238 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
1239 {
1240 CHECK_ERROR_VOID();
1241 check_sljit_set_context(compiler, args, scratches, saveds, local_size);
1242
1243 compiler->scratches = scratches;
1244 compiler->saveds = saveds;
1245 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
1246 compiler->logical_local_size = local_size;
1247 #endif
1248
1249 local_size += (saveds + 1) * sizeof(sljit_sw);
1250 compiler->local_size = (local_size + 7) & ~7;
1251 }
1252
1253 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
1254 {
1255 sljit_si local_size;
1256 sljit_ins base;
1257 int addr_initialized = 0;
1258
1259 CHECK_ERROR();
1260 check_sljit_emit_return(compiler, op, src, srcw);
1261
1262 FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
1263
1264 local_size = compiler->local_size;
1265 if (local_size <= SIMM_16BIT_MAX)
1266 base = SLJIT_LOCALS_REG_mapped;
1267 else {
1268 FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size));
1269 FAIL_IF(ADD(TMP_REG1_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped));
1270 base = TMP_REG1_mapped;
1271 local_size = 0;
1272 }
1273
1274 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8));
1275 FAIL_IF(LD(RA, ADDR_TMP_mapped));
1276
1277 if (compiler->saveds >= 5) {
1278 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 48));
1279 addr_initialized = 1;
1280
1281 FAIL_IF(LD_ADD(SLJIT_SAVED_EREG2_mapped, ADDR_TMP_mapped, 8));
1282 }
1283
1284 if (compiler->saveds >= 4) {
1285 if (addr_initialized == 0) {
1286 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 40));
1287 addr_initialized = 1;
1288 }
1289
1290 FAIL_IF(LD_ADD(SLJIT_SAVED_EREG1_mapped, ADDR_TMP_mapped, 8));
1291 }
1292
1293 if (compiler->saveds >= 3) {
1294 if (addr_initialized == 0) {
1295 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 32));
1296 addr_initialized = 1;
1297 }
1298
1299 FAIL_IF(LD_ADD(SLJIT_SAVED_REG3_mapped, ADDR_TMP_mapped, 8));
1300 }
1301
1302 if (compiler->saveds >= 2) {
1303 if (addr_initialized == 0) {
1304 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 24));
1305 addr_initialized = 1;
1306 }
1307
1308 FAIL_IF(LD_ADD(SLJIT_SAVED_REG2_mapped, ADDR_TMP_mapped, 8));
1309 }
1310
1311 if (compiler->saveds >= 1) {
1312 if (addr_initialized == 0) {
1313 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 16));
1314 /* addr_initialized = 1; no need to initialize as it's the last one. */
1315 }
1316
1317 FAIL_IF(LD_ADD(SLJIT_SAVED_REG1_mapped, ADDR_TMP_mapped, 8));
1318 }
1319
1320 if (compiler->local_size <= SIMM_16BIT_MAX)
1321 FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, compiler->local_size));
1322 else
1323 FAIL_IF(ADD(SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped, ZERO));
1324
1325 return JR(RA);
1326 }
1327
1328 /* reg_ar is an absoulute register! */
1329
1330 /* Can perform an operation using at most 1 instruction. */
1331 static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw)
1332 {
1333 SLJIT_ASSERT(arg & SLJIT_MEM);
1334
1335 if ((!(flags & WRITE_BACK) || !(arg & REG_MASK))
1336 && !(arg & OFFS_REG_MASK) && argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) {
1337 /* Works for both absoulte and relative addresses. */
1338 if (SLJIT_UNLIKELY(flags & ARG_TEST))
1339 return 1;
1340
1341 FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[arg & REG_MASK], argw));
1342
1343 if (flags & LOAD_DATA)
1344 FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped));
1345 else
1346 FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar));
1347
1348 return -1;
1349 }
1350
1351 return 0;
1352 }
1353
1354 /* See getput_arg below.
1355 Note: can_cache is called only for binary operators. Those
1356 operators always uses word arguments without write back. */
1357 static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
1358 {
1359 SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
1360
1361 /* Simple operation except for updates. */
1362 if (arg & OFFS_REG_MASK) {
1363 argw &= 0x3;
1364 next_argw &= 0x3;
1365 if (argw && argw == next_argw
1366 && (arg == next_arg || (arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK)))
1367 return 1;
1368 return 0;
1369 }
1370
1371 if (arg == next_arg) {
1372 if (((next_argw - argw) <= SIMM_16BIT_MAX
1373 && (next_argw - argw) >= SIMM_16BIT_MIN))
1374 return 1;
1375
1376 return 0;
1377 }
1378
1379 return 0;
1380 }
1381
1382 /* Emit the necessary instructions. See can_cache above. */
1383 static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
1384 {
1385 sljit_si tmp_ar, base;
1386
1387 SLJIT_ASSERT(arg & SLJIT_MEM);
1388 if (!(next_arg & SLJIT_MEM)) {
1389 next_arg = 0;
1390 next_argw = 0;
1391 }
1392
1393 if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA))
1394 tmp_ar = reg_ar;
1395 else
1396 tmp_ar = TMP_REG1_mapped;
1397
1398 base = arg & REG_MASK;
1399
1400 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1401 argw &= 0x3;
1402
1403 if ((flags & WRITE_BACK) && reg_ar == reg_map[base]) {
1404 SLJIT_ASSERT(!(flags & LOAD_DATA) && reg_map[TMP_REG1] != reg_ar);
1405 FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO));
1406 reg_ar = TMP_REG1_mapped;
1407 }
1408
1409 /* Using the cache. */
1410 if (argw == compiler->cache_argw) {
1411 if (!(flags & WRITE_BACK)) {
1412 if (arg == compiler->cache_arg) {
1413 if (flags & LOAD_DATA)
1414 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
1415 else
1416 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
1417 }
1418
1419 if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
1420 if (arg == next_arg && argw == (next_argw & 0x3)) {
1421 compiler->cache_arg = arg;
1422 compiler->cache_argw = argw;
1423 FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], TMP_REG3_mapped));
1424 if (flags & LOAD_DATA)
1425 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
1426 else
1427 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
1428 }
1429
1430 FAIL_IF(ADD(tmp_ar, reg_map[base], TMP_REG3_mapped));
1431 if (flags & LOAD_DATA)
1432 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar);
1433 else
1434 return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar);
1435 }
1436 } else {
1437 if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
1438 FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped));
1439 if (flags & LOAD_DATA)
1440 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]);
1441 else
1442 return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar);
1443 }
1444 }
1445 }
1446
1447 if (SLJIT_UNLIKELY(argw)) {
1448 compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
1449 compiler->cache_argw = argw;
1450 FAIL_IF(SHLI(TMP_REG3_mapped, reg_map[OFFS_REG(arg)], argw));
1451 }
1452
1453 if (!(flags & WRITE_BACK)) {
1454 if (arg == next_arg && argw == (next_argw & 0x3)) {
1455 compiler->cache_arg = arg;
1456 compiler->cache_argw = argw;
1457 FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3]));
1458 tmp_ar = TMP_REG3_mapped;
1459 } else
1460 FAIL_IF(ADD(tmp_ar, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3]));
1461
1462 if (flags & LOAD_DATA)
1463 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar);
1464 else
1465 return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar);
1466 }
1467
1468 FAIL_IF(ADD(reg_map[base], reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3]));
1469
1470 if (flags & LOAD_DATA)
1471 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]);
1472 else
1473 return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar);
1474 }
1475
1476 if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) {
1477 /* Update only applies if a base register exists. */
1478 if (reg_ar == reg_map[base]) {
1479 SLJIT_ASSERT(!(flags & LOAD_DATA) && TMP_REG1_mapped != reg_ar);
1480 if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) {
1481 FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[base], argw));
1482 if (flags & LOAD_DATA)
1483 FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped));
1484 else
1485 FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar));
1486
1487 if (argw)
1488 return ADDLI(reg_map[base], reg_map[base], argw);
1489
1490 return SLJIT_SUCCESS;
1491 }
1492
1493 FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO));
1494 reg_ar = TMP_REG1_mapped;
1495 }
1496
1497 if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) {
1498 if (argw)
1499 FAIL_IF(ADDLI(reg_map[base], reg_map[base], argw));
1500 } else {
1501 if (compiler->cache_arg == SLJIT_MEM
1502 && argw - compiler->cache_argw <= SIMM_16BIT_MAX
1503 && argw - compiler->cache_argw >= SIMM_16BIT_MIN) {
1504 if (argw != compiler->cache_argw) {
1505 FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw));
1506 compiler->cache_argw = argw;
1507 }
1508
1509 FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped));
1510 } else {
1511 compiler->cache_arg = SLJIT_MEM;
1512 compiler->cache_argw = argw;
1513 FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw));
1514 FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped));
1515 }
1516 }
1517
1518 if (flags & LOAD_DATA)
1519 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]);
1520 else
1521 return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar);
1522 }
1523
1524 if (compiler->cache_arg == arg
1525 && argw - compiler->cache_argw <= SIMM_16BIT_MAX
1526 && argw - compiler->cache_argw >= SIMM_16BIT_MIN) {
1527 if (argw != compiler->cache_argw) {
1528 FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw));
1529 compiler->cache_argw = argw;
1530 }
1531
1532 if (flags & LOAD_DATA)
1533 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
1534 else
1535 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
1536 }
1537
1538 if (compiler->cache_arg == SLJIT_MEM
1539 && argw - compiler->cache_argw <= SIMM_16BIT_MAX
1540 && argw - compiler->cache_argw >= SIMM_16BIT_MIN) {
1541 if (argw != compiler->cache_argw)
1542 FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw));
1543 } else {
1544 compiler->cache_arg = SLJIT_MEM;
1545 FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw));
1546 }
1547
1548 compiler->cache_argw = argw;
1549
1550 if (!base) {
1551 if (flags & LOAD_DATA)
1552 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
1553 else
1554 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
1555 }
1556
1557 if (arg == next_arg
1558 && next_argw - argw <= SIMM_16BIT_MAX
1559 && next_argw - argw >= SIMM_16BIT_MIN) {
1560 compiler->cache_arg = arg;
1561 FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, reg_map[base]));
1562 if (flags & LOAD_DATA)
1563 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
1564 else
1565 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
1566 }
1567
1568 FAIL_IF(ADD(tmp_ar, TMP_REG3_mapped, reg_map[base]));
1569
1570 if (flags & LOAD_DATA)
1571 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar);
1572 else
1573 return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar);
1574 }
1575
1576 static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw)
1577 {
1578 if (getput_arg_fast(compiler, flags, reg_ar, arg, argw))
1579 return compiler->error;
1580
1581 compiler->cache_arg = 0;
1582 compiler->cache_argw = 0;
1583 return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0);
1584 }
1585
1586 static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
1587 {
1588 if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
1589 return compiler->error;
1590 return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
1591 }
1592
1593 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
1594 {
1595 CHECK_ERROR();
1596 check_sljit_emit_fast_enter(compiler, dst, dstw);
1597 ADJUST_LOCAL_OFFSET(dst, dstw);
1598
1599 /* For UNUSED dst. Uncommon, but possible. */
1600 if (dst == SLJIT_UNUSED)
1601 return SLJIT_SUCCESS;
1602
1603 if (FAST_IS_REG(dst))
1604 return ADD(reg_map[dst], RA, ZERO);
1605
1606 /* Memory. */
1607 return emit_op_mem(compiler, WORD_DATA, RA, dst, dstw);
1608 }
1609
1610 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
1611 {
1612 CHECK_ERROR();
1613 check_sljit_emit_fast_return(compiler, src, srcw);
1614 ADJUST_LOCAL_OFFSET(src, srcw);
1615
1616 if (FAST_IS_REG(src))
1617 FAIL_IF(ADD(RA, reg_map[src], ZERO));
1618
1619 else if (src & SLJIT_MEM)
1620 FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RA, src, srcw));
1621
1622 else if (src & SLJIT_IMM)
1623 FAIL_IF(load_immediate(compiler, RA, srcw));
1624
1625 return JR(RA);
1626 }
1627
1628 static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, sljit_si dst, sljit_si src1, sljit_sw src2)
1629 {
1630 sljit_si overflow_ra = 0;
1631
1632 switch (GET_OPCODE(op)) {
1633 case SLJIT_MOV:
1634 case SLJIT_MOV_P:
1635 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1636 if (dst != src2)
1637 return ADD(reg_map[dst], reg_map[src2], ZERO);
1638 return SLJIT_SUCCESS;
1639
1640 case SLJIT_MOV_UI:
1641 case SLJIT_MOV_SI:
1642 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1643 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
1644 if (op == SLJIT_MOV_SI)
1645 return BFEXTS(reg_map[dst], reg_map[src2], 0, 31);
1646
1647 return BFEXTU(reg_map[dst], reg_map[src2], 0, 31);
1648 } else if (dst != src2)
1649 SLJIT_ASSERT_STOP();
1650
1651 return SLJIT_SUCCESS;
1652
1653 case SLJIT_MOV_UB:
1654 case SLJIT_MOV_SB:
1655 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1656 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
1657 if (op == SLJIT_MOV_SB)
1658 return BFEXTS(reg_map[dst], reg_map[src2], 0, 7);
1659
1660 return BFEXTU(reg_map[dst], reg_map[src2], 0, 7);
1661 } else if (dst != src2)
1662 SLJIT_ASSERT_STOP();
1663
1664 return SLJIT_SUCCESS;
1665
1666 case SLJIT_MOV_UH:
1667 case SLJIT_MOV_SH:
1668 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1669 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
1670 if (op == SLJIT_MOV_SH)
1671 return BFEXTS(reg_map[dst], reg_map[src2], 0, 15);
1672
1673 return BFEXTU(reg_map[dst], reg_map[src2], 0, 15);
1674 } else if (dst != src2)
1675 SLJIT_ASSERT_STOP();
1676
1677 return SLJIT_SUCCESS;
1678
1679 case SLJIT_NOT:
1680 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1681 if (op & SLJIT_SET_E)
1682 FAIL_IF(NOR(EQUAL_FLAG, reg_map[src2], reg_map[src2]));
1683 if (CHECK_FLAGS(SLJIT_SET_E))
1684 FAIL_IF(NOR(reg_map[dst], reg_map[src2], reg_map[src2]));
1685
1686 return SLJIT_SUCCESS;
1687
1688 case SLJIT_CLZ:
1689 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1690 if (op & SLJIT_SET_E)
1691 FAIL_IF(CLZ(EQUAL_FLAG, reg_map[src2]));
1692 if (CHECK_FLAGS(SLJIT_SET_E))
1693 FAIL_IF(CLZ(reg_map[dst], reg_map[src2]));
1694
1695 return SLJIT_SUCCESS;
1696
1697 case SLJIT_ADD:
1698 if (flags & SRC2_IMM) {
1699 if (op & SLJIT_SET_O) {
1700 FAIL_IF(SHRUI(TMP_EREG1, reg_map[src1], 63));
1701 if (src2 < 0)
1702 FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1));
1703 }
1704
1705 if (op & SLJIT_SET_E)
1706 FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], src2));
1707
1708 if (op & SLJIT_SET_C) {
1709 if (src2 >= 0)
1710 FAIL_IF(ORI(ULESS_FLAG ,reg_map[src1], src2));
1711 else {
1712 FAIL_IF(ADDLI(ULESS_FLAG ,ZERO, src2));
1713 FAIL_IF(OR(ULESS_FLAG,reg_map[src1],ULESS_FLAG));
1714 }
1715 }
1716
1717 /* dst may be the same as src1 or src2. */
1718 if (CHECK_FLAGS(SLJIT_SET_E))
1719 FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2));
1720
1721 if (op & SLJIT_SET_O) {
1722 FAIL_IF(SHRUI(OVERFLOW_FLAG, reg_map[dst], 63));
1723
1724 if (src2 < 0)
1725 FAIL_IF(XORI(OVERFLOW_FLAG, OVERFLOW_FLAG, 1));
1726 }
1727 } else {
1728 if (op & SLJIT_SET_O) {
1729 FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2]));
1730 FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63));
1731
1732 if (src1 != dst)
1733 overflow_ra = reg_map[src1];
1734 else if (src2 != dst)
1735 overflow_ra = reg_map[src2];
1736 else {
1737 /* Rare ocasion. */
1738 FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO));
1739 overflow_ra = TMP_EREG2;
1740 }
1741 }
1742
1743 if (op & SLJIT_SET_E)
1744 FAIL_IF(ADD(EQUAL_FLAG ,reg_map[src1], reg_map[src2]));
1745
1746 if (op & SLJIT_SET_C)
1747 FAIL_IF(OR(ULESS_FLAG,reg_map[src1], reg_map[src2]));
1748
1749 /* dst may be the same as src1 or src2. */
1750 if (CHECK_FLAGS(SLJIT_SET_E))
1751 FAIL_IF(ADD(reg_map[dst],reg_map[src1], reg_map[src2]));
1752
1753 if (op & SLJIT_SET_O) {
1754 FAIL_IF(XOR(OVERFLOW_FLAG,reg_map[dst], overflow_ra));
1755 FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63));
1756 }
1757 }
1758
1759 /* a + b >= a | b (otherwise, the carry should be set to 1). */
1760 if (op & SLJIT_SET_C)
1761 FAIL_IF(CMPLTU(ULESS_FLAG ,reg_map[dst] ,ULESS_FLAG));
1762
1763 if (op & SLJIT_SET_O)
1764 return CMOVNEZ(OVERFLOW_FLAG, TMP_EREG1, ZERO);
1765
1766 return SLJIT_SUCCESS;
1767
1768 case SLJIT_ADDC:
1769 if (flags & SRC2_IMM) {
1770 if (op & SLJIT_SET_C) {
1771 if (src2 >= 0)
1772 FAIL_IF(ORI(TMP_EREG1, reg_map[src1], src2));
1773 else {
1774 FAIL_IF(ADDLI(TMP_EREG1, ZERO, src2));
1775 FAIL_IF(OR(TMP_EREG1, reg_map[src1], TMP_EREG1));
1776 }
1777 }
1778
1779 FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2));
1780
1781 } else {
1782 if (op & SLJIT_SET_C)
1783 FAIL_IF(OR(TMP_EREG1, reg_map[src1], reg_map[src2]));
1784
1785 /* dst may be the same as src1 or src2. */
1786 FAIL_IF(ADD(reg_map[dst], reg_map[src1], reg_map[src2]));
1787 }
1788
1789 if (op & SLJIT_SET_C)
1790 FAIL_IF(CMPLTU(TMP_EREG1, reg_map[dst], TMP_EREG1));
1791
1792 FAIL_IF(ADD(reg_map[dst], reg_map[dst], ULESS_FLAG));
1793
1794 if (!(op & SLJIT_SET_C))
1795 return SLJIT_SUCCESS;
1796
1797 /* Set TMP_EREG2 (dst == 0) && (ULESS_FLAG == 1). */
1798 FAIL_IF(CMPLTUI(TMP_EREG2, reg_map[dst], 1));
1799 FAIL_IF(AND(TMP_EREG2, TMP_EREG2, ULESS_FLAG));
1800 /* Set carry flag. */
1801 return OR(ULESS_FLAG, TMP_EREG2, TMP_EREG1);
1802
1803 case SLJIT_SUB:
1804 if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_16BIT_MIN)) {
1805 FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2));
1806 src2 = TMP_REG2;
1807 flags &= ~SRC2_IMM;
1808 }
1809
1810 if (flags & SRC2_IMM) {
1811 if (op & SLJIT_SET_O) {
1812 FAIL_IF(SHRUI(TMP_EREG1,reg_map[src1], 63));
1813
1814 if (src2 < 0)
1815 FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1));
1816
1817 if (src1 != dst)
1818 overflow_ra = reg_map[src1];
1819 else {
1820 /* Rare ocasion. */
1821 FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO));
1822
1823 overflow_ra = TMP_EREG2;
1824 }
1825 }
1826
1827 if (op & SLJIT_SET_E)
1828 FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], -src2));
1829
1830 if (op & SLJIT_SET_C) {
1831 FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2));
1832 FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], ADDR_TMP_mapped));
1833 }
1834
1835 /* dst may be the same as src1 or src2. */
1836 if (CHECK_FLAGS(SLJIT_SET_E))
1837 FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2));
1838
1839 } else {
1840
1841 if (op & SLJIT_SET_O) {
1842 FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2]));
1843 FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63));
1844
1845 if (src1 != dst)
1846 overflow_ra = reg_map[src1];
1847 else {
1848 /* Rare ocasion. */
1849 FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO));
1850 overflow_ra = TMP_EREG2;
1851 }
1852 }
1853
1854 if (op & SLJIT_SET_E)
1855 FAIL_IF(SUB(EQUAL_FLAG, reg_map[src1], reg_map[src2]));
1856
1857 if (op & (SLJIT_SET_U | SLJIT_SET_C))
1858 FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], reg_map[src2]));
1859
1860 if (op & SLJIT_SET_U)
1861 FAIL_IF(CMPLTU(UGREATER_FLAG, reg_map[src2], reg_map[src1]));
1862
1863 if (op & SLJIT_SET_S) {
1864 FAIL_IF(CMPLTS(LESS_FLAG ,reg_map[src1] ,reg_map[src2]));
1865 FAIL_IF(CMPLTS(GREATER_FLAG ,reg_map[src2] ,reg_map[src1]));
1866 }
1867
1868 /* dst may be the same as src1 or src2. */
1869 if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))
1870 FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2]));
1871 }
1872
1873 if (op & SLJIT_SET_O) {
1874 FAIL_IF(XOR(OVERFLOW_FLAG, reg_map[dst], overflow_ra));
1875 FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63));
1876 return CMOVEQZ(OVERFLOW_FLAG, TMP_EREG1, ZERO);
1877 }
1878
1879 return SLJIT_SUCCESS;
1880
1881 case SLJIT_SUBC:
1882 if ((flags & SRC2_IMM) && src2 == SIMM_16BIT_MIN) {
1883 FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2));
1884 src2 = TMP_REG2;
1885 flags &= ~SRC2_IMM;
1886 }
1887
1888 if (flags & SRC2_IMM) {
1889 if (op & SLJIT_SET_C) {
1890 FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, -src2));
1891 FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], ADDR_TMP_mapped));
1892 }
1893
1894 /* dst may be the same as src1 or src2. */
1895 FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2));
1896
1897 } else {
1898 if (op & SLJIT_SET_C)
1899 FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], reg_map[src2]));
1900 /* dst may be the same as src1 or src2. */
1901 FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2]));
1902 }
1903
1904 if (op & SLJIT_SET_C)
1905 FAIL_IF(CMOVEQZ(TMP_EREG1, reg_map[dst], ULESS_FLAG));
1906
1907 FAIL_IF(SUB(reg_map[dst], reg_map[dst], ULESS_FLAG));
1908
1909 if (op & SLJIT_SET_C)
1910 FAIL_IF(ADD(ULESS_FLAG, TMP_EREG1, ZERO));
1911
1912 return SLJIT_SUCCESS;
1913
1914 #define EMIT_LOGICAL(op_imm, op_norm) \
1915 if (flags & SRC2_IMM) { \
1916 FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2)); \
1917 if (op & SLJIT_SET_E) \
1918 FAIL_IF(push_3_buffer( \
1919 compiler, op_norm, EQUAL_FLAG, reg_map[src1], \
1920 ADDR_TMP_mapped, __LINE__)); \
1921 if (CHECK_FLAGS(SLJIT_SET_E)) \
1922 FAIL_IF(push_3_buffer( \
1923 compiler, op_norm, reg_map[dst], reg_map[src1], \
1924 ADDR_TMP_mapped, __LINE__)); \
1925 } else { \
1926 if (op & SLJIT_SET_E) \
1927 FAIL_IF(push_3_buffer( \
1928 compiler, op_norm, EQUAL_FLAG, reg_map[src1], \
1929 reg_map[src2], __LINE__)); \
1930 if (CHECK_FLAGS(SLJIT_SET_E)) \
1931 FAIL_IF(push_3_buffer( \
1932 compiler, op_norm, reg_map[dst], reg_map[src1], \
1933 reg_map[src2], __LINE__)); \
1934 }
1935
1936 case SLJIT_AND:
1937 EMIT_LOGICAL(TILEGX_OPC_ANDI, TILEGX_OPC_AND);
1938 return SLJIT_SUCCESS;
1939
1940 case SLJIT_OR:
1941 EMIT_LOGICAL(TILEGX_OPC_ORI, TILEGX_OPC_OR);
1942 return SLJIT_SUCCESS;
1943
1944 case SLJIT_XOR:
1945 EMIT_LOGICAL(TILEGX_OPC_XORI, TILEGX_OPC_XOR);
1946 return SLJIT_SUCCESS;
1947
1948 #define EMIT_SHIFT(op_imm, op_norm) \
1949 if (flags & SRC2_IMM) { \
1950 if (op & SLJIT_SET_E) \
1951 FAIL_IF(push_3_buffer( \
1952 compiler, op_imm, EQUAL_FLAG, reg_map[src1], \
1953 src2 & 0x3F, __LINE__)); \
1954 if (CHECK_FLAGS(SLJIT_SET_E)) \
1955 FAIL_IF(push_3_buffer( \
1956 compiler, op_imm, reg_map[dst], reg_map[src1], \
1957 src2 & 0x3F, __LINE__)); \
1958 } else { \
1959 if (op & SLJIT_SET_E) \
1960 FAIL_IF(push_3_buffer( \
1961 compiler, op_imm, reg_map[dst], reg_map[src1], \
1962 src2 & 0x3F, __LINE__)); \
1963 if (CHECK_FLAGS(SLJIT_SET_E)) \
1964 FAIL_IF(push_3_buffer( \
1965 compiler, op_norm, reg_map[dst], reg_map[src1], \
1966 reg_map[src2], __LINE__)); \
1967 }
1968
1969 case SLJIT_SHL:
1970 EMIT_SHIFT(TILEGX_OPC_SHLI, TILEGX_OPC_SHL);
1971 return SLJIT_SUCCESS;
1972
1973 case SLJIT_LSHR:
1974 EMIT_SHIFT(TILEGX_OPC_SHRUI, TILEGX_OPC_SHRU);
1975 return SLJIT_SUCCESS;
1976
1977 case SLJIT_ASHR:
1978 EMIT_SHIFT(TILEGX_OPC_SHRSI, TILEGX_OPC_SHRS);
1979 return SLJIT_SUCCESS;
1980 }
1981
1982 SLJIT_ASSERT_STOP();
1983 return SLJIT_SUCCESS;
1984 }
1985
1986 static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w)
1987 {
1988 /* arg1 goes to TMP_REG1 or src reg.
1989 arg2 goes to TMP_REG2, imm or src reg.
1990 TMP_REG3 can be used for caching.
1991 result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
1992 sljit_si dst_r = TMP_REG2;
1993 sljit_si src1_r;
1994 sljit_sw src2_r = 0;
1995 sljit_si sugg_src2_r = TMP_REG2;
1996
1997 if (!(flags & ALT_KEEP_CACHE)) {
1998 compiler->cache_arg = 0;
1999 compiler->cache_argw = 0;
2000 }
2001
2002 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
2003 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM))
2004 return SLJIT_SUCCESS;
2005 if (GET_FLAGS(op))
2006 flags |= UNUSED_DEST;
2007 } else if (FAST_IS_REG(dst)) {
2008 dst_r = dst;
2009 flags |= REG_DEST;
2010 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
2011 sugg_src2_r = dst_r;
2012 } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1_mapped, dst, dstw))
2013 flags |= SLOW_DEST;
2014
2015 if (flags & IMM_OP) {
2016 if ((src2 & SLJIT_IMM) && src2w) {
2017 if ((!(flags & LOGICAL_OP)
2018 && (src2w <= SIMM_16BIT_MAX && src2w >= SIMM_16BIT_MIN))
2019 || ((flags & LOGICAL_OP) && !(src2w & ~UIMM_16BIT_MAX))) {
2020 flags |= SRC2_IMM;
2021 src2_r = src2w;
2022 }
2023 }
2024
2025 if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) {
2026 if ((!(flags & LOGICAL_OP)
2027 && (src1w <= SIMM_16BIT_MAX && src1w >= SIMM_16BIT_MIN))
2028 || ((flags & LOGICAL_OP) && !(src1w & ~UIMM_16BIT_MAX))) {
2029 flags |= SRC2_IMM;
2030 src2_r = src1w;
2031
2032 /* And swap arguments. */
2033 src1 = src2;
2034 src1w = src2w;
2035 src2 = SLJIT_IMM;
2036 /* src2w = src2_r unneeded. */
2037 }
2038 }
2039 }
2040
2041 /* Source 1. */
2042 if (FAST_IS_REG(src1)) {
2043 src1_r = src1;
2044 flags |= REG1_SOURCE;
2045 } else if (src1 & SLJIT_IMM) {
2046 if (src1w) {
2047 FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, src1w));
2048 src1_r = TMP_REG1;
2049 } else
2050 src1_r = 0;
2051 } else {
2052 if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w))
2053 FAIL_IF(compiler->error);
2054 else
2055 flags |= SLOW_SRC1;
2056 src1_r = TMP_REG1;
2057 }
2058
2059 /* Source 2. */
2060 if (FAST_IS_REG(src2)) {
2061 src2_r = src2;
2062 flags |= REG2_SOURCE;
2063 if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
2064 dst_r = src2_r;
2065 } else if (src2 & SLJIT_IMM) {
2066 if (!(flags & SRC2_IMM)) {
2067 if (src2w) {
2068 FAIL_IF(load_immediate(compiler, reg_map[sugg_src2_r], src2w));
2069 src2_r = sugg_src2_r;
2070 } else {
2071 src2_r = 0;
2072 if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) && (dst & SLJIT_MEM))
2073 dst_r = 0;
2074 }
2075 }
2076 } else {
2077 if (getput_arg_fast(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w))
2078 FAIL_IF(compiler->error);
2079 else
2080 flags |= SLOW_SRC2;
2081 src2_r = sugg_src2_r;
2082 }
2083
2084 if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
2085 SLJIT_ASSERT(src2_r == TMP_REG2);
2086 if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
2087 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, src1, src1w));
2088 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw));
2089 } else {
2090 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, src2, src2w));
2091 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, dst, dstw));
2092 }
2093 } else if (flags & SLOW_SRC1)
2094 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw));
2095 else if (flags & SLOW_SRC2)
2096 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w, dst, dstw));
2097
2098 FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
2099
2100 if (dst & SLJIT_MEM) {
2101 if (!(flags & SLOW_DEST)) {
2102 getput_arg_fast(compiler, flags, reg_map[dst_r], dst, dstw);
2103 return compiler->error;
2104 }
2105
2106 return getput_arg(compiler, flags, reg_map[dst_r], dst, dstw, 0, 0);
2107 }
2108
2109 return SLJIT_SUCCESS;
2110 }
2111
2112 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw, sljit_si type)
2113 {
2114 sljit_si sugg_dst_ar, dst_ar;
2115 sljit_si flags = GET_ALL_FLAGS(op);
2116
2117 CHECK_ERROR();
2118 check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
2119 ADJUST_LOCAL_OFFSET(dst, dstw);
2120
2121 if (dst == SLJIT_UNUSED)
2122 return SLJIT_SUCCESS;
2123
2124 op = GET_OPCODE(op);
2125 sugg_dst_ar = reg_map[(op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2];
2126
2127 compiler->cache_arg = 0;
2128 compiler->cache_argw = 0;
2129 if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
2130 ADJUST_LOCAL_OFFSET(src, srcw);
2131 FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1_mapped, src, srcw, dst, dstw));
2132 src = TMP_REG1;
2133 srcw = 0;
2134 }
2135
2136 switch (type) {
2137 case SLJIT_C_EQUAL:
2138 case SLJIT_C_NOT_EQUAL:
2139 FAIL_IF(CMPLTUI(sugg_dst_ar, EQUAL_FLAG, 1));
2140 dst_ar = sugg_dst_ar;
2141 break;
2142 case SLJIT_C_LESS:
2143 case SLJIT_C_GREATER_EQUAL:
2144 case SLJIT_C_FLOAT_LESS:
2145 case SLJIT_C_FLOAT_GREATER_EQUAL:
2146 dst_ar = ULESS_FLAG;
2147 break;
2148 case SLJIT_C_GREATER:
2149 case SLJIT_C_LESS_EQUAL:
2150 case SLJIT_C_FLOAT_GREATER:
2151 case SLJIT_C_FLOAT_LESS_EQUAL:
2152 dst_ar = UGREATER_FLAG;
2153 break;
2154 case SLJIT_C_SIG_LESS:
2155 case SLJIT_C_SIG_GREATER_EQUAL:
2156 dst_ar = LESS_FLAG;
2157 break;
2158 case SLJIT_C_SIG_GREATER:
2159 case SLJIT_C_SIG_LESS_EQUAL:
2160 dst_ar = GREATER_FLAG;
2161 break;
2162 case SLJIT_C_OVERFLOW:
2163 case SLJIT_C_NOT_OVERFLOW:
2164 dst_ar = OVERFLOW_FLAG;
2165 break;
2166 case SLJIT_C_MUL_OVERFLOW:
2167 case SLJIT_C_MUL_NOT_OVERFLOW:
2168 FAIL_IF(CMPLTUI(sugg_dst_ar, OVERFLOW_FLAG, 1));
2169 dst_ar = sugg_dst_ar;
2170 type ^= 0x1; /* Flip type bit for the XORI below. */
2171 break;
2172 case SLJIT_C_FLOAT_EQUAL:
2173 case SLJIT_C_FLOAT_NOT_EQUAL:
2174 dst_ar = EQUAL_FLAG;
2175 break;
2176
2177 default:
2178 SLJIT_ASSERT_STOP();
2179 dst_ar = sugg_dst_ar;
2180 break;
2181 }
2182
2183 if (type & 0x1) {
2184 FAIL_IF(XORI(sugg_dst_ar, dst_ar, 1));
2185 dst_ar = sugg_dst_ar;
2186 }
2187
2188 if (op >= SLJIT_ADD) {
2189 if (TMP_REG2_mapped != dst_ar)
2190 FAIL_IF(ADD(TMP_REG2_mapped, dst_ar, ZERO));
2191 return emit_op(compiler, op | flags, CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0);
2192 }
2193
2194 if (dst & SLJIT_MEM)
2195 return emit_op_mem(compiler, WORD_DATA, dst_ar, dst, dstw);
2196
2197 if (sugg_dst_ar != dst_ar)
2198 return ADD(sugg_dst_ar, dst_ar, ZERO);
2199
2200 return SLJIT_SUCCESS;
2201 }
2202
2203 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) {
2204 CHECK_ERROR();
2205 check_sljit_emit_op0(compiler, op);
2206
2207 op = GET_OPCODE(op);
2208 switch (op) {
2209 case SLJIT_NOP:
2210 return push_0_buffer(compiler, TILEGX_OPC_FNOP, __LINE__);
2211
2212 case SLJIT_BREAKPOINT:
2213 return PI(BPT);
2214
2215 case SLJIT_UMUL:
2216 case SLJIT_SMUL:
2217 case SLJIT_UDIV:
2218 case SLJIT_SDIV:
2219 SLJIT_ASSERT_STOP();
2220 }
2221
2222 return SLJIT_SUCCESS;
2223 }
2224
2225 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw)
2226 {
2227 CHECK_ERROR();
2228 check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
2229 ADJUST_LOCAL_OFFSET(dst, dstw);
2230 ADJUST_LOCAL_OFFSET(src, srcw);
2231
2232 switch (GET_OPCODE(op)) {
2233 case SLJIT_MOV:
2234 case SLJIT_MOV_P:
2235 return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
2236
2237 case SLJIT_MOV_UI:
2238 return emit_op(compiler, SLJIT_MOV_UI, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
2239
2240 case SLJIT_MOV_SI:
2241 return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
2242
2243 case SLJIT_MOV_UB:
2244 return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub) srcw : srcw);
2245
2246 case SLJIT_MOV_SB:
2247 return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb) srcw : srcw);
2248
2249 case SLJIT_MOV_UH:
2250 return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh) srcw : srcw);
2251
2252 case SLJIT_MOV_SH:
2253 return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh) srcw : srcw);
2254
2255 case SLJIT_MOVU:
2256 case SLJIT_MOVU_P:
2257 return emit_op(compiler, SLJIT_MOV, WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
2258
2259 case SLJIT_MOVU_UI:
2260 return emit_op(compiler, SLJIT_MOV_UI, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
2261
2262 case SLJIT_MOVU_SI:
2263 return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
2264
2265 case SLJIT_MOVU_UB:
2266 return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub) srcw : srcw);
2267
2268 case SLJIT_MOVU_SB:
2269 return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb) srcw : srcw);
2270
2271 case SLJIT_MOVU_UH:
2272 return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh) srcw : srcw);
2273
2274 case SLJIT_MOVU_SH:
2275 return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh) srcw : srcw);
2276
2277 case SLJIT_NOT:
2278 return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
2279
2280 case SLJIT_NEG:
2281 return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw);
2282
2283 case SLJIT_CLZ:
2284 return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
2285 }
2286
2287 return SLJIT_SUCCESS;
2288 }
2289
2290 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w)
2291 {
2292 CHECK_ERROR();
2293 check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2294 ADJUST_LOCAL_OFFSET(dst, dstw);
2295 ADJUST_LOCAL_OFFSET(src1, src1w);
2296 ADJUST_LOCAL_OFFSET(src2, src2w);
2297
2298 switch (GET_OPCODE(op)) {
2299 case SLJIT_ADD:
2300 case SLJIT_ADDC:
2301 return emit_op(compiler, op, CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2302
2303 case SLJIT_SUB:
2304 case SLJIT_SUBC:
2305 return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2306
2307 case SLJIT_MUL:
2308 return emit_op(compiler, op, CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);
2309
2310 case SLJIT_AND:
2311 case SLJIT_OR:
2312 case SLJIT_XOR:
2313 return emit_op(compiler, op, CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2314
2315 case SLJIT_SHL:
2316 case SLJIT_LSHR:
2317 case SLJIT_ASHR:
2318 if (src2 & SLJIT_IMM)
2319 src2w &= 0x3f;
2320 if (op & SLJIT_INT_OP)
2321 src2w &= 0x1f;
2322
2323 return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2324 }
2325
2326 return SLJIT_SUCCESS;
2327 }
2328
2329 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label * sljit_emit_label(struct sljit_compiler *compiler)
2330 {
2331 struct sljit_label *label;
2332
2333 flush_buffer(compiler);
2334
2335 CHECK_ERROR_PTR();
2336 check_sljit_emit_label(compiler);
2337
2338 if (compiler->last_label && compiler->last_label->size == compiler->size)
2339 return compiler->last_label;
2340
2341 label = (struct sljit_label *)ensure_abuf(compiler, sizeof(struct sljit_label));
2342 PTR_FAIL_IF(!label);
2343 set_label(label, compiler);
2344 return label;
2345 }
2346
2347 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
2348 {
2349 sljit_si src_r = TMP_REG2;
2350 struct sljit_jump *jump = NULL;
2351
2352 flush_buffer(compiler);
2353
2354 CHECK_ERROR();
2355 check_sljit_emit_ijump(compiler, type, src, srcw);
2356 ADJUST_LOCAL_OFFSET(src, srcw);
2357
2358 if (FAST_IS_REG(src)) {
2359 if (reg_map[src] != 0)
2360 src_r = src;
2361 else
2362 FAIL_IF(ADD_SOLO(TMP_REG2_mapped, reg_map[src], ZERO));
2363 }
2364
2365 if (type >= SLJIT_CALL0) {
2366 SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2);
2367 if (src & (SLJIT_IMM | SLJIT_MEM)) {
2368 if (src & SLJIT_IMM)
2369 FAIL_IF(emit_const(compiler, reg_map[PIC_ADDR_REG], srcw, 1));
2370 else {
2371 SLJIT_ASSERT(src_r == TMP_REG2 && (src & SLJIT_MEM));
2372 FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
2373 }
2374
2375 FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO));
2376
2377 FAIL_IF(ADDI_SOLO(54, 54, -16));
2378
2379 FAIL_IF(JALR_SOLO(reg_map[PIC_ADDR_REG]));
2380
2381 return ADDI_SOLO(54, 54, 16);
2382 }
2383
2384 /* Register input. */
2385 if (type >= SLJIT_CALL1)
2386 FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO));
2387
2388 FAIL_IF(ADD_SOLO(reg_map[PIC_ADDR_REG], reg_map[src_r], ZERO));
2389
2390 FAIL_IF(ADDI_SOLO(54, 54, -16));
2391
2392 FAIL_IF(JALR_SOLO(reg_map[src_r]));
2393
2394 return ADDI_SOLO(54, 54, 16);
2395 }
2396
2397 if (src & SLJIT_IMM) {
2398 jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
2399 FAIL_IF(!jump);
2400 set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0));
2401 jump->u.target = srcw;
2402 FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1));
2403
2404 if (type >= SLJIT_FAST_CALL) {
2405 FAIL_IF(ADD_SOLO(ZERO, ZERO, ZERO));
2406 jump->addr = compiler->size;
2407 FAIL_IF(JR_SOLO(reg_map[src_r]));
2408 } else {
2409 jump->addr = compiler->size;
2410 FAIL_IF(JR_SOLO(reg_map[src_r]));
2411 }
2412
2413 return SLJIT_SUCCESS;
2414
2415 } else if (src & SLJIT_MEM)
2416 FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
2417
2418 FAIL_IF(JR_SOLO(reg_map[src_r]));
2419
2420 if (jump)
2421 jump->addr = compiler->size;
2422
2423 return SLJIT_SUCCESS;
2424 }
2425
2426 #define BR_Z(src) \
2427 inst = BEQZ_X1 | SRCA_X1(src); \
2428 flags = IS_COND;
2429
2430 #define BR_NZ(src) \
2431 inst = BNEZ_X1 | SRCA_X1(src); \
2432 flags = IS_COND;
2433
2434 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
2435 {
2436 struct sljit_jump *jump;
2437 sljit_ins inst;
2438 sljit_si flags = 0;
2439
2440 flush_buffer(compiler);
2441
2442 CHECK_ERROR_PTR();
2443 check_sljit_emit_jump(compiler, type);
2444
2445 jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
2446 PTR_FAIL_IF(!jump);
2447 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2448 type &= 0xff;
2449
2450 switch (type) {
2451 case SLJIT_C_EQUAL:
2452 case SLJIT_C_FLOAT_NOT_EQUAL:
2453 BR_NZ(EQUAL_FLAG);
2454 break;
2455 case SLJIT_C_NOT_EQUAL:
2456 case SLJIT_C_FLOAT_EQUAL:
2457 BR_Z(EQUAL_FLAG);
2458 break;
2459 case SLJIT_C_LESS:
2460 case SLJIT_C_FLOAT_LESS:
2461 BR_Z(ULESS_FLAG);
2462 break;
2463 case SLJIT_C_GREATER_EQUAL:
2464 case SLJIT_C_FLOAT_GREATER_EQUAL:
2465 BR_NZ(ULESS_FLAG);
2466 break;
2467 case SLJIT_C_GREATER:
2468 case SLJIT_C_FLOAT_GREATER:
2469 BR_Z(UGREATER_FLAG);
2470 break;
2471 case SLJIT_C_LESS_EQUAL:
2472 case SLJIT_C_FLOAT_LESS_EQUAL:
2473 BR_NZ(UGREATER_FLAG);
2474 break;
2475 case SLJIT_C_SIG_LESS:
2476 BR_Z(LESS_FLAG);
2477 break;
2478 case SLJIT_C_SIG_GREATER_EQUAL:
2479 BR_NZ(LESS_FLAG);
2480 break;
2481 case SLJIT_C_SIG_GREATER:
2482 BR_Z(GREATER_FLAG);
2483 break;
2484 case SLJIT_C_SIG_LESS_EQUAL:
2485 BR_NZ(GREATER_FLAG);
2486 break;
2487 case SLJIT_C_OVERFLOW:
2488 case SLJIT_C_MUL_OVERFLOW:
2489 BR_Z(OVERFLOW_FLAG);
2490 break;
2491 case SLJIT_C_NOT_OVERFLOW:
2492 case SLJIT_C_MUL_NOT_OVERFLOW:
2493 BR_NZ(OVERFLOW_FLAG);
2494 break;
2495 default:
2496 /* Not conditional branch. */
2497 inst = 0;
2498 break;
2499 }
2500
2501 jump->flags |= flags;
2502
2503 if (inst) {
2504 inst = inst | ((type <= SLJIT_JUMP) ? BOFF_X1(5) : BOFF_X1(6));
2505 PTR_FAIL_IF(PI(inst));
2506 }
2507
2508 PTR_FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1));
2509 if (type <= SLJIT_JUMP) {
2510 jump->addr = compiler->size;
2511 PTR_FAIL_IF(JR_SOLO(TMP_REG2_mapped));
2512 } else {
2513 SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2);
2514 /* Cannot be optimized out if type is >= CALL0. */
2515 jump->flags |= IS_JAL | (type >= SLJIT_CALL0 ? SLJIT_REWRITABLE_JUMP : 0);
2516 PTR_FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO));
2517 jump->addr = compiler->size;
2518 PTR_FAIL_IF(JALR_SOLO(TMP_REG2_mapped));
2519 }
2520
2521 return jump;
2522 }
2523
2524 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
2525 {
2526 return 0;
2527 }
2528
2529 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw)
2530 {
2531 SLJIT_ASSERT_STOP();
2532 }
2533
2534 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w)
2535 {
2536 SLJIT_ASSERT_STOP();
2537 }
2538
2539 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const * sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
2540 {
2541 struct sljit_const *const_;
2542 sljit_si reg;
2543
2544 flush_buffer(compiler);
2545
2546 CHECK_ERROR_PTR();
2547 check_sljit_emit_const(compiler, dst, dstw, init_value);
2548 ADJUST_LOCAL_OFFSET(dst, dstw);
2549
2550 const_ = (struct sljit_const *)ensure_abuf(compiler, sizeof(struct sljit_const));
2551 PTR_FAIL_IF(!const_);
2552 set_const(const_, compiler);
2553
2554 reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
2555
2556 PTR_FAIL_IF(emit_const_64(compiler, reg, init_value, 1));
2557
2558 if (dst & SLJIT_MEM)
2559 PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
2560 return const_;
2561 }
2562
2563 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2564 {
2565 sljit_ins *inst = (sljit_ins *)addr;
2566
2567 inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_addr >> 32) & 0xffff) << 43);
2568 inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_addr >> 16) & 0xffff) << 43);
2569 inst[2] = (inst[2] & ~(0xFFFFL << 43)) | ((new_addr & 0xffff) << 43);
2570 SLJIT_CACHE_FLUSH(inst, inst + 3);
2571 }
2572
2573 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
2574 {
2575 sljit_ins *inst = (sljit_ins *)addr;
2576
2577 inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_constant >> 48) & 0xFFFFL) << 43);
2578 inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_constant >> 32) & 0xFFFFL) << 43);
2579 inst[2] = (inst[2] & ~(0xFFFFL << 43)) | (((new_constant >> 16) & 0xFFFFL) << 43);
2580 inst[3] = (inst[3] & ~(0xFFFFL << 43)) | ((new_constant & 0xFFFFL) << 43);
2581 SLJIT_CACHE_FLUSH(inst, inst + 4);
2582 }
2583